LCOV - code coverage report
Current view: top level - gdk - gdk_project.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 402 726 55.4 %
Date: 2024-11-15 19:37:45 Functions: 21 22 95.5 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : 
      17             : /*
      18             :  * BATproject returns a BAT aligned with the left input whose values
      19             :  * are the values from the right input that were referred to by the
      20             :  * OIDs in the left input.
      21             :  *
      22             :  * BATproject2 is similar, except instead of a single right input
      23             :  * there are two of which the second's hseqbase is equal to the first
      24             :  * hseqbase + its batCount.
      25             :  */
      26             : 
      27             : #define project1_loop(TYPE)                                             \
      28             : static gdk_return                                                       \
      29             : project1_##TYPE(BAT *restrict bn, BATiter *restrict li,                 \
      30             :                 BATiter *restrict r1i, QryCtx *qry_ctx)                 \
      31             : {                                                                       \
      32             :         BUN lo;                                                         \
      33             :         const TYPE *restrict r1t;                                       \
      34             :         TYPE *restrict bt;                                              \
      35             :         oid r1seq, r1end;                                               \
      36             :                                                                         \
      37             :         MT_thread_setalgorithm(__func__);                               \
      38             :         r1t = (const TYPE *) r1i->base;                                      \
      39             :         bt = (TYPE *) Tloc(bn, 0);                                      \
      40             :         r1seq = r1i->b->hseqbase;                                 \
      41             :         r1end = r1seq + r1i->count;                                  \
      42             :         if (BATtdensebi(li)) {                                          \
      43             :                 if (li->tseq < r1seq ||                                   \
      44             :                     (li->tseq + li->count) >= r1end) {                 \
      45             :                         GDKerror("does not match always\n");          \
      46             :                         return GDK_FAIL;                                \
      47             :                 }                                                       \
      48             :                 oid off = li->tseq - r1seq;                          \
      49             :                 r1t += off;                                             \
      50             :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx)             \
      51             :                         bt[lo] = r1t[lo];                               \
      52             :         } else {                                                        \
      53             :                 assert(li->type);                                    \
      54             :                 const oid *restrict ot = (const oid *) li->base;     \
      55             :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {           \
      56             :                         oid o = ot[lo];                                 \
      57             :                         if (o < r1seq || o >= r1end) {                    \
      58             :                                 GDKerror("does not match always\n");  \
      59             :                                 return GDK_FAIL;                        \
      60             :                         }                                               \
      61             :                         bt[lo] = r1t[o - r1seq];                        \
      62             :                 }                                                       \
      63             :         }                                                               \
      64             :         TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));     \
      65             :         BATsetcount(bn, lo);                                            \
      66             :         return GDK_SUCCEED;                                             \
      67             : }
      68             : 
      69             : /* project type switch */
      70    61552481 : project1_loop(bte)
      71    29056973 : project1_loop(sht)
      72   579326998 : project1_loop(int)
      73        1564 : project1_loop(flt)
      74      910396 : project1_loop(dbl)
      75   184931115 : project1_loop(lng)
      76             : #ifdef HAVE_HGE
      77    61959434 : project1_loop(hge)
      78             : #endif
      79         150 : project1_loop(uuid)
      80             : 
      81             : #define project_loop(TYPE)                                              \
      82             : static gdk_return                                                       \
      83             : project_##TYPE(BAT *restrict bn, BATiter *restrict li,                  \
      84             :                struct canditer *restrict ci,                            \
      85             :                BATiter *restrict r1i, BATiter *restrict r2i,            \
      86             :                QryCtx *qry_ctx)                                         \
      87             : {                                                                       \
      88             :         BUN lo;                                                         \
      89             :         const TYPE *restrict r1t;                                       \
      90             :         const TYPE *restrict r2t;                                       \
      91             :         TYPE *restrict bt;                                              \
      92             :         TYPE v;                                                         \
      93             :         oid r1seq, r1end;                                               \
      94             :         oid r2seq, r2end;                                               \
      95             :                                                                         \
      96             :         if (r2i == NULL &&                                              \
      97             :             (ci == NULL || (ci->tpe == cand_dense && BATtdensebi(li))) && \
      98             :             li->nonil && r1i->type && !BATtdensebi(r1i))          \
      99             :                 return project1_##TYPE(bn, li, r1i, qry_ctx);           \
     100             :         MT_thread_setalgorithm(__func__);                               \
     101             :         r1t = (const TYPE *) r1i->base;                                      \
     102             :         bt = (TYPE *) Tloc(bn, 0);                                      \
     103             :         r1seq = r1i->b->hseqbase;                                 \
     104             :         r1end = r1seq + r1i->count;                                  \
     105             :         if (r2i) {                                                      \
     106             :                 r2t = (const TYPE *) r2i->base;                              \
     107             :                 r2seq = r2i->b->hseqbase;                         \
     108             :                 r2end = r2seq + r2i->count;                          \
     109             :         } else {                                                        \
     110             :                 r2t = NULL;                                             \
     111             :                 r2seq = r2end = r1end;                                  \
     112             :         }                                                               \
     113             :         if (ci) {                                                       \
     114             :                 TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) {           \
     115             :                         oid o = canditer_next(ci);                      \
     116             :                         if (o < r1seq || o >= r2end) {                    \
     117             :                                 GDKerror("does not match always\n");  \
     118             :                                 return GDK_FAIL;                        \
     119             :                         }                                               \
     120             :                         if (o < r1end)                                       \
     121             :                                 v = r1t[o - r1seq];                     \
     122             :                         else                                            \
     123             :                                 v = r2t[o - r2seq];                     \
     124             :                         bt[lo] = v;                                     \
     125             :                 }                                                       \
     126             :         } else if (BATtdensebi(li)) {                                   \
     127             :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {           \
     128             :                         oid o = li->tseq + lo;                               \
     129             :                         if (o < r1seq || o >= r2end) {                    \
     130             :                                 GDKerror("does not match always\n");  \
     131             :                                 return GDK_FAIL;                        \
     132             :                         }                                               \
     133             :                         if (o < r1end)                                       \
     134             :                                 v = r1t[o - r1seq];                     \
     135             :                         else                                            \
     136             :                                 v = r2t[o - r2seq];                     \
     137             :                         bt[lo] = v;                                     \
     138             :                 }                                                       \
     139             :         } else {                                                        \
     140             :                 const oid *restrict ot = (const oid *) li->base;     \
     141             :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {           \
     142             :                         oid o = ot[lo];                                 \
     143             :                         if (is_oid_nil(o)) {                            \
     144             :                                 bt[lo] = v = TYPE##_nil;                \
     145             :                                 bn->tnil = true;                     \
     146             :                         } else if (o < r1seq || o >= r2end) {             \
     147             :                                 GDKerror("does not match always\n");  \
     148             :                                 return GDK_FAIL;                        \
     149             :                         } else if (o < r1end) {                              \
     150             :                                 v = r1t[o - r1seq];                     \
     151             :                                 bt[lo] = v;                             \
     152             :                         } else {                                        \
     153             :                                 v = r2t[o - r2seq];                     \
     154             :                                 bt[lo] = v;                             \
     155             :                         }                                               \
     156             :                 }                                                       \
     157             :         }                                                               \
     158             :         TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));     \
     159             :         BATsetcount(bn, lo);                                            \
     160             :         return GDK_SUCCEED;                                             \
     161             : }
     162             : 
     163             : 
     164             : /* project type switch */
     165       14686 : project_loop(bte)
     166       36081 : project_loop(sht)
     167    61969132 : project_loop(int)
     168         112 : project_loop(flt)
     169        5390 : project_loop(dbl)
     170       24632 : project_loop(lng)
     171             : #ifdef HAVE_HGE
     172     1221924 : project_loop(hge)
     173             : #endif
     174          27 : project_loop(uuid)
     175             : 
     176             : static gdk_return
     177       15770 : project_oid(BAT *restrict bn, BATiter *restrict li,
     178             :             struct canditer *restrict lci,
     179             :             BATiter *restrict r1i, BATiter *restrict r2i, QryCtx *qry_ctx)
     180             : {
     181       15770 :         BUN lo;
     182       15770 :         oid *restrict bt;
     183       15770 :         oid r1seq, r1end;
     184       15770 :         oid r2seq, r2end;
     185       15770 :         const oid *restrict r1t = NULL;
     186       15770 :         const oid *restrict r2t = NULL;
     187       15770 :         struct canditer r1ci = {0}, r2ci = {0};
     188             : 
     189       15770 :         if ((!lci || (lci->tpe == cand_dense && BATtdensebi(li))) && r1i->type && !BATtdensebi(r1i) && !r2i && li->nonil) {
     190        7301 :                 if (sizeof(oid) == sizeof(lng))
     191        7301 :                         return project1_lng(bn, li, r1i, qry_ctx);
     192             :                 else
     193             :                         return project1_int(bn, li, r1i, qry_ctx);
     194             :         }
     195        8469 :         MT_thread_setalgorithm(__func__);
     196        8484 :         if (complex_cand(r1i->b))
     197          15 :                 canditer_init(&r1ci, NULL, r1i->b);
     198        8469 :         else if (!BATtdensebi(r1i))
     199           3 :                 r1t = (const oid *) r1i->base;
     200        8484 :         r1seq = r1i->b->hseqbase;
     201        8484 :         r1end = r1seq + r1i->count;
     202        8484 :         if (r2i) {
     203           0 :                 if (complex_cand(r2i->b))
     204           0 :                         canditer_init(&r2ci, NULL, r2i->b);
     205           0 :                 else if (!BATtdensebi(r2i))
     206           0 :                         r2t = (const oid *) r2i->base;
     207           0 :                 r2seq = r2i->b->hseqbase;
     208           0 :                 r2end = r2seq + r2i->count;
     209             :         } else {
     210             :                 r2seq = r2end = r1end;
     211             :         }
     212        8484 :         bt = (oid *) Tloc(bn, 0);
     213        8484 :         if (lci) {
     214    61755615 :                 TIMEOUT_LOOP_IDX(lo, lci->ncand, qry_ctx) {
     215    61751705 :                         oid o = canditer_next(lci);
     216    61751705 :                         if (o < r1seq || o >= r2end) {
     217           0 :                                 goto nomatch;
     218             :                         }
     219    61751705 :                         if (o < r1end) {
     220    61751705 :                                 if (r1ci.s)
     221           0 :                                         bt[lo] = canditer_idx(&r1ci, o - r1seq);
     222    61751705 :                                 else if (r1t)
     223         179 :                                         bt[lo] = r1t[o - r1seq];
     224             :                                 else
     225    61751526 :                                         bt[lo] = o - r1seq + r1i->tseq;
     226             :                         } else {
     227           0 :                                 if (r2ci.s)
     228           0 :                                         bt[lo] = canditer_idx(&r2ci, o - r2seq);
     229           0 :                                 else if (r2t)
     230           0 :                                         bt[lo] = r2t[o - r2seq];
     231             :                                 else
     232           0 :                                         bt[lo] = o - r2seq + r2i->tseq;
     233             :                         }
     234             :                 }
     235        8385 :         } else if (BATtdensebi(li)) {
     236           0 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     237           0 :                         oid o = li->tseq + lo;
     238           0 :                         if (o < r1seq || o >= r2end) {
     239           0 :                                 goto nomatch;
     240             :                         }
     241           0 :                         if (o < r1end) {
     242           0 :                                 if (r1ci.s)
     243           0 :                                         bt[lo] = canditer_idx(&r1ci, o - r1seq);
     244           0 :                                 else if (r1t)
     245           0 :                                         bt[lo] = r1t[o - r1seq];
     246             :                                 else
     247           0 :                                         bt[lo] = o - r1seq + r1i->tseq;
     248             :                         } else {
     249           0 :                                 if (r2ci.s)
     250           0 :                                         bt[lo] = canditer_idx(&r2ci, o - r2seq);
     251           0 :                                 else if (r2t)
     252           0 :                                         bt[lo] = r2t[o - r2seq];
     253             :                                 else
     254           0 :                                         bt[lo] = o - r2seq + r2i->tseq;
     255             :                         }
     256             :                 }
     257             :         } else {
     258        8385 :                 const oid *ot = (const oid *) li->base;
     259   175516095 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     260   175487838 :                         oid o = ot[lo];
     261   175487838 :                         if (is_oid_nil(o)) {
     262           0 :                                 bt[lo] = oid_nil;
     263           0 :                                 bn->tnonil = false;
     264           0 :                                 bn->tnil = true;
     265   175487838 :                         } else if (o < r1seq || o >= r2end) {
     266           0 :                                 goto nomatch;
     267   175487838 :                         } else if (o < r1end) {
     268   175487838 :                                 if (r1ci.s)
     269        3972 :                                         bt[lo] = canditer_idx(&r1ci, o - r1seq);
     270   175483866 :                                 else if (r1t)
     271           0 :                                         bt[lo] = r1t[o - r1seq];
     272             :                                 else
     273   175483866 :                                         bt[lo] = o - r1seq + r1i->tseq;
     274             :                         } else {
     275           0 :                                 if (r2ci.s)
     276           0 :                                         bt[lo] = canditer_idx(&r2ci, o - r2seq);
     277           0 :                                 else if (r2t)
     278           0 :                                         bt[lo] = r2t[o - r2seq];
     279             :                                 else
     280           0 :                                         bt[lo] = o - r2seq + r2i->tseq;
     281             :                         }
     282             :                 }
     283             :         }
     284        8503 :         TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
     285        8499 :         BATsetcount(bn, lo);
     286        8499 :         return GDK_SUCCEED;
     287           0 :   nomatch:
     288           0 :         GDKerror("does not match always\n");
     289           0 :         return GDK_FAIL;
     290             : }
     291             : 
     292             : static gdk_return
     293         334 : project_any(BAT *restrict bn, BATiter *restrict li,
     294             :             struct canditer *restrict ci,
     295             :             BATiter *restrict r1i, BATiter *restrict r2i, QryCtx *qry_ctx)
     296             : {
     297         334 :         BUN lo;
     298         334 :         const void *nil = ATOMnilptr(r1i->type);
     299         334 :         const void *v;
     300         334 :         oid r1seq, r1end;
     301         334 :         oid r2seq, r2end;
     302             : 
     303         334 :         MT_thread_setalgorithm(__func__);
     304         333 :         r1seq = r1i->b->hseqbase;
     305         333 :         r1end = r1seq + r1i->count;
     306         333 :         if (r2i) {
     307           0 :                 r2seq = r2i->b->hseqbase;
     308           0 :                 r2end = r2seq + r2i->count;
     309             :         } else {
     310             :                 r2seq = r2end = r1end;
     311             :         }
     312         333 :         if (ci) {
     313           0 :                 TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) {
     314           0 :                         oid o = canditer_next(ci);
     315           0 :                         if (o < r1seq || o >= r2end) {
     316           0 :                                 GDKerror("does not match always\n");
     317           0 :                                 return GDK_FAIL;
     318             :                         }
     319           0 :                         if (o < r1end)
     320           0 :                                 v = BUNtail(*r1i, o - r1seq);
     321             :                         else
     322           0 :                                 v = BUNtail(*r2i, o - r2seq);
     323           0 :                         if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
     324             :                                 return GDK_FAIL;
     325             :                         }
     326             :                 }
     327         334 :         } else if (BATtdensebi(li)) {
     328           0 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     329           0 :                         oid o = li->tseq + lo;
     330           0 :                         if (o < r1seq || o >= r2end) {
     331           0 :                                 GDKerror("does not match always\n");
     332           0 :                                 return GDK_FAIL;
     333             :                         }
     334           0 :                         if (o < r1end)
     335           0 :                                 v = BUNtail(*r1i, o - r1seq);
     336             :                         else
     337           0 :                                 v = BUNtail(*r2i, o - r2seq);
     338           0 :                         if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
     339             :                                 return GDK_FAIL;
     340             :                         }
     341             :                 }
     342             :         } else {
     343         334 :                 const oid *restrict ot = (const oid *) li->base;
     344             : 
     345        7350 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     346        6681 :                         oid o = ot[lo];
     347        6681 :                         if (is_oid_nil(o)) {
     348           0 :                                 v = nil;
     349           0 :                                 bn->tnil = true;
     350        6681 :                         } else if (o < r1seq || o >= r2end) {
     351           0 :                                 GDKerror("does not match always\n");
     352           0 :                                 return GDK_FAIL;
     353        6681 :                         } else if (o < r1end) {
     354        6681 :                                 v = BUNtail(*r1i, o - r1seq);
     355             :                         } else {
     356           0 :                                 v = BUNtail(*r2i, o - r2seq);
     357             :                         }
     358        6681 :                         if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
     359             :                                 return GDK_FAIL;
     360             :                         }
     361             :                 }
     362             :         }
     363         334 :         TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
     364         335 :         BATsetcount(bn, lo);
     365         335 :         bn->theap->dirty = true;
     366         335 :         return GDK_SUCCEED;
     367             : }
     368             : 
     369             : static BAT *
     370           0 : project_str(BATiter *restrict li, struct canditer *restrict ci, int tpe,
     371             :             BATiter *restrict r1i, BATiter *restrict r2i,
     372             :             QryCtx *qry_ctx, lng t0)
     373             : {
     374           0 :         BAT *bn;
     375           0 :         BUN lo;
     376           0 :         oid r1seq, r1end;
     377           0 :         oid r2seq, r2end;
     378           0 :         BUN h1off;
     379           0 :         BUN off;
     380           0 :         oid seq;
     381           0 :         var_t v;
     382           0 :         BATiter *ri;
     383             : 
     384           0 :         if ((bn = COLnew(li->b->hseqbase, tpe, ci ? ci->ncand : li->count,
     385             :                          TRANSIENT)) == NULL)
     386             :                 return NULL;
     387             : 
     388           0 :         v = (var_t) r1i->vhfree;
     389           0 :         if (r1i->vh == r2i->vh) {
     390           0 :                 h1off = 0;
     391           0 :                 assert(bn->tvheap->parentid == bn->batCacheid);
     392           0 :                 HEAPdecref(bn->tvheap, true);
     393           0 :                 HEAPincref(r1i->vh);
     394           0 :                 bn->tvheap = r1i->vh;
     395           0 :                 assert(bn->tvheap->parentid != bn->batCacheid);
     396           0 :                 BBPretain(bn->tvheap->parentid);
     397             :         } else {
     398           0 :                 v = (v + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
     399           0 :                 h1off = (BUN) v;
     400           0 :                 v += ((var_t) r2i->vhfree + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
     401           0 :                 if (HEAPextend(bn->tvheap, v, false) != GDK_SUCCEED) {
     402           0 :                         BBPreclaim(bn);
     403           0 :                         return NULL;
     404             :                 }
     405           0 :                 memcpy(bn->tvheap->base, r1i->vh->base, r1i->vhfree);
     406             : #ifndef NDEBUG
     407           0 :                 if (h1off > r1i->vhfree)
     408           0 :                         memset(bn->tvheap->base + r1i->vhfree, 0, h1off - r1i->vhfree);
     409             : #endif
     410           0 :                 memcpy(bn->tvheap->base + h1off, r2i->vh->base, r2i->vhfree);
     411           0 :                 bn->tvheap->free = h1off + r2i->vhfree;
     412           0 :                 bn->tvheap->dirty = true;
     413             :         }
     414             : 
     415           0 :         if (v >= ((var_t) 1 << (8 << bn->tshift)) &&
     416           0 :             GDKupgradevarheap(bn, v, false, 0) != GDK_SUCCEED) {
     417           0 :                 BBPreclaim(bn);
     418           0 :                 return NULL;
     419             :         }
     420             : 
     421           0 :         r1seq = r1i->b->hseqbase;
     422           0 :         r1end = r1seq + r1i->count;
     423           0 :         r2seq = r2i->b->hseqbase;
     424           0 :         r2end = r2seq + r2i->count;
     425           0 :         if (ci) {
     426           0 :                 TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) {
     427           0 :                         oid o = canditer_next(ci);
     428           0 :                         if (o < r1seq || o >= r2end) {
     429           0 :                                 GDKerror("does not match always\n");
     430           0 :                                 BBPreclaim(bn);
     431           0 :                                 return NULL;
     432             :                         }
     433           0 :                         if (o < r1end) {
     434             :                                 ri = r1i;
     435             :                                 off = 0;
     436             :                                 seq = r1seq;
     437             :                         } else {
     438           0 :                                 ri = r2i;
     439           0 :                                 off = h1off;
     440           0 :                                 seq = r2seq;
     441             :                         }
     442           0 :                         switch (ri->width) {
     443           0 :                         case 1:
     444           0 :                                 v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     445           0 :                                 break;
     446           0 :                         case 2:
     447           0 :                                 v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     448           0 :                                 break;
     449           0 :                         case 4:
     450           0 :                                 v = (var_t) ((uint32_t *) ri->base)[o - seq];
     451           0 :                                 break;
     452           0 :                         case 8:
     453           0 :                                 v = (var_t) ((uint64_t *) ri->base)[o - seq];
     454           0 :                                 break;
     455             :                         }
     456           0 :                         v += off;
     457           0 :                         switch (bn->twidth) {
     458           0 :                         case 1:
     459           0 :                                 ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
     460           0 :                                 break;
     461           0 :                         case 2:
     462           0 :                                 ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
     463           0 :                                 break;
     464           0 :                         case 4:
     465           0 :                                 ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
     466           0 :                                 break;
     467           0 :                         case 8:
     468           0 :                                 ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
     469           0 :                                 break;
     470             :                         }
     471             :                 }
     472           0 :         } else if (BATtdensebi(li)) {
     473           0 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     474           0 :                         oid o = li->tseq + lo;
     475           0 :                         if (o < r1seq || o >= r2end) {
     476           0 :                                 GDKerror("does not match always\n");
     477           0 :                                 BBPreclaim(bn);
     478           0 :                                 return NULL;
     479             :                         }
     480           0 :                         if (o < r1end) {
     481             :                                 ri = r1i;
     482             :                                 off = 0;
     483             :                                 seq = r1seq;
     484             :                         } else {
     485           0 :                                 ri = r2i;
     486           0 :                                 off = h1off;
     487           0 :                                 seq = r2seq;
     488             :                         }
     489           0 :                         switch (ri->width) {
     490           0 :                         case 1:
     491           0 :                                 v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     492           0 :                                 break;
     493           0 :                         case 2:
     494           0 :                                 v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     495           0 :                                 break;
     496           0 :                         case 4:
     497           0 :                                 v = (var_t) ((uint32_t *) ri->base)[o - seq];
     498           0 :                                 break;
     499           0 :                         case 8:
     500           0 :                                 v = (var_t) ((uint64_t *) ri->base)[o - seq];
     501           0 :                                 break;
     502             :                         }
     503           0 :                         v += off;
     504           0 :                         switch (bn->twidth) {
     505           0 :                         case 1:
     506           0 :                                 ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
     507           0 :                                 break;
     508           0 :                         case 2:
     509           0 :                                 ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
     510           0 :                                 break;
     511           0 :                         case 4:
     512           0 :                                 ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
     513           0 :                                 break;
     514           0 :                         case 8:
     515           0 :                                 ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
     516           0 :                                 break;
     517             :                         }
     518             :                 }
     519             :         } else {
     520           0 :                 const oid *restrict ot = (const oid *) li->base;
     521           0 :                 TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
     522           0 :                         oid o = ot[lo];
     523           0 :                         if (o < r1seq || o >= r2end) {
     524           0 :                                 GDKerror("does not match always\n");
     525           0 :                                 BBPreclaim(bn);
     526           0 :                                 return NULL;
     527             :                         }
     528           0 :                         if (o < r1end) {
     529             :                                 ri = r1i;
     530             :                                 off = 0;
     531             :                                 seq = r1seq;
     532             :                         } else {
     533           0 :                                 ri = r2i;
     534           0 :                                 off = h1off;
     535           0 :                                 seq = r2seq;
     536             :                         }
     537           0 :                         switch (ri->width) {
     538           0 :                         case 1:
     539           0 :                                 v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     540           0 :                                 break;
     541           0 :                         case 2:
     542           0 :                                 v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
     543           0 :                                 break;
     544           0 :                         case 4:
     545           0 :                                 v = (var_t) ((uint32_t *) ri->base)[o - seq];
     546           0 :                                 break;
     547           0 :                         case 8:
     548           0 :                                 v = (var_t) ((uint64_t *) ri->base)[o - seq];
     549           0 :                                 break;
     550             :                         }
     551           0 :                         v += off;
     552           0 :                         switch (bn->twidth) {
     553           0 :                         case 1:
     554           0 :                                 ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
     555           0 :                                 break;
     556           0 :                         case 2:
     557           0 :                                 ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
     558           0 :                                 break;
     559           0 :                         case 4:
     560           0 :                                 ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
     561           0 :                                 break;
     562           0 :                         case 8:
     563           0 :                                 ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
     564           0 :                                 break;
     565             :                         }
     566             :                 }
     567             :         }
     568           0 :         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     569           0 :         BATsetcount(bn, lo);
     570           0 :         bn->tsorted = bn->trevsorted = false;
     571           0 :         bn->tnil = false;
     572           0 :         bn->tnonil = r1i->nonil & r2i->nonil;
     573           0 :         bn->tkey = false;
     574           0 :         bn->tunique_est =
     575           0 :                 MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
     576             :                    r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
     577           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
     578             :                   " -> " ALGOBATFMT "%s " LLFMT "us\n",
     579             :                   ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
     580             :                   ALGOBATPAR(bn),
     581             :                   bn && bn->ttype == TYPE_str && bn->tvheap == r1i->vh ? " sharing string heap" : "",
     582             :                   GDKusec() - t0);
     583             :         return bn;
     584           0 :   bailout:
     585           0 :         BBPreclaim(bn);
     586           0 :         return NULL;
     587             : }
     588             : 
     589             : BAT *
     590     2418085 : BATproject2(BAT *restrict l, BAT *restrict r1, BAT *restrict r2)
     591             : {
     592     2418085 :         BAT *bn = NULL;
     593     2418085 :         BAT *or1 = r1, *or2 = r2, *ol = l;
     594     2418085 :         oid lo, hi;
     595     2418085 :         gdk_return res;
     596     2418085 :         int tpe = ATOMtype(r1->ttype), otpe = tpe;
     597     2418085 :         bool stringtrick = false;
     598     2418085 :         struct canditer ci, *lci = NULL;
     599     2418085 :         const char *msg = "";
     600     2418085 :         lng t0 = 0;
     601     2418085 :         BATiter li = bat_iterator(l);
     602     2428583 :         BATiter r1i = bat_iterator(r1);
     603     2429614 :         BATiter r2i = bat_iterator(r2);
     604     2426562 :         BUN lcount = li.count;
     605             : 
     606     2426562 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
     607             : 
     608     2426562 :         assert(ATOMtype(li.type) == TYPE_oid || li.type == TYPE_msk);
     609     2426562 :         assert(r2 == NULL || tpe == ATOMtype(r2i.type));
     610           0 :         assert(r2 == NULL || r1->hseqbase + r1i.count == r2->hseqbase);
     611             : 
     612     2426562 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     613             : 
     614     2422109 :         if (r2 && r1i.count == 0) {
     615             :                 /* unlikely special case: r1 is empty, so we just have r2 */
     616           0 :                 r1 = r2;
     617           0 :                 r2 = NULL;
     618           0 :                 bat_iterator_end(&r1i);
     619           0 :                 r1i = r2i;
     620           0 :                 r2i = bat_iterator(NULL);
     621             :         }
     622             : 
     623     2422109 :         if (BATtdensebi(&li) && lcount > 0) {
     624      746997 :                 lo = l->tseqbase;
     625      746997 :                 hi = l->tseqbase + lcount;
     626      746997 :                 if (lo >= r1->hseqbase && hi <= r1->hseqbase + r1i.count) {
     627      746997 :                         bn = BATslice(r1, lo - r1->hseqbase, hi - r1->hseqbase);
     628      742948 :                         BAThseqbase(bn, l->hseqbase);
     629      741235 :                         msg = " (slice)";
     630      741235 :                         goto doreturn;
     631             :                 }
     632           0 :                 if (lo < r1->hseqbase || r2 == NULL || hi > r2->hseqbase + r2i.count) {
     633           0 :                         GDKerror("does not match always\n");
     634           0 :                         bat_iterator_end(&li);
     635           0 :                         bat_iterator_end(&r1i);
     636           0 :                         bat_iterator_end(&r2i);
     637           0 :                         return NULL;
     638             :                 }
     639           0 :                 if (lo >= r2->hseqbase) {
     640           0 :                         bn = BATslice(r2, lo - r2->hseqbase, hi - r2->hseqbase);
     641           0 :                         BAThseqbase(bn, l->hseqbase);
     642           0 :                         msg = " (slice2)";
     643           0 :                         goto doreturn;
     644             :                 }
     645             :         }
     646     1675112 :         if (complex_cand(l)) {
     647             :                 /* l is candidate list with exceptions or is a bitmask */
     648        1085 :                 assert(li.type == TYPE_msk || !is_oid_nil(l->tseqbase));
     649        1085 :                 canditer_init(&ci, NULL, l);
     650        1082 :                 lcount = ci.ncand;
     651        1082 :                 lci = &ci;
     652     1674027 :         } else if (li.type == TYPE_msk) {
     653           0 :                 l = BATunmask(l);
     654           0 :                 if (l == NULL)
     655           0 :                         goto doreturn;
     656           0 :                 if (complex_cand(l)) {
     657           0 :                         canditer_init(&ci, NULL, l);
     658           0 :                         lcount = ci.ncand;
     659           0 :                         lci = &ci;
     660             :                 }
     661             :         }
     662     1675109 :         if (lcount == 0 ||
     663        1101 :             (li.type == TYPE_void && is_oid_nil(l->tseqbase)) ||
     664      169043 :             (r1i.type == TYPE_void && is_oid_nil(r1->tseqbase) &&
     665           0 :              (r2 == NULL ||
     666           0 :               (r2i.type == TYPE_void && is_oid_nil(r2->tseqbase))))) {
     667             :                 /* trivial: all values are nil (includes no entries at all) */
     668     1506066 :                 const void *nil = r1i.type == TYPE_msk ? &oid_nil : ATOMnilptr(r1i.type);
     669             : 
     670     3002917 :                 bn = BATconstant(l->hseqbase, r1i.type == TYPE_oid || r1i.type == TYPE_msk ? TYPE_void : r1i.type,
     671             :                                  nil, lcount, TRANSIENT);
     672     1505627 :                 if (bn != NULL &&
     673     1505627 :                     ATOMtype(bn->ttype) == TYPE_oid &&
     674      233363 :                     BATcount(bn) == 0) {
     675      233688 :                         BATtseqbase(bn, 0);
     676             :                 }
     677     1505157 :                 msg = " (constant)";
     678     1505157 :                 goto doreturn;
     679             :         }
     680             : 
     681      169043 :         if (ATOMstorage(tpe) == TYPE_str) {
     682       16476 :                 if (li.nonil &&
     683       16477 :                     r2 == NULL &&
     684       16477 :                     (r1i.count == 0 ||
     685       16479 :                      lcount > (r1i.count >> 3) ||
     686        3220 :                      r1i.restricted == BAT_READ)) {
     687             :                         /* insert strings as ints, we need to copy the
     688             :                          * string heap whole sale; we can't do this if
     689             :                          * there are nils in the left column, and we
     690             :                          * won't do it if the left is much smaller than
     691             :                          * the right and the right is writable (meaning
     692             :                          * we have to actually copy the right string
     693             :                          * heap) */
     694       16157 :                         tpe = r1i.width == 1 ? TYPE_bte : (r1i.width == 2 ? TYPE_sht : (r1i.width == 4 ? TYPE_int : TYPE_lng));
     695             :                         stringtrick = true;
     696         319 :                 } else if (li.nonil &&
     697           0 :                            r2 != NULL &&
     698           0 :                            (r1i.vh == r2i.vh ||
     699           0 :                             (!GDK_ELIMDOUBLES(r1i.vh) /* && size tests */))) {
     700             :                         /* r1 and r2 may explicitly share their vheap,
     701             :                          * if they do, the result will also share the
     702             :                          * vheap; this also means that for this case we
     703             :                          * don't care about duplicate elimination: it
     704             :                          * will remain the same */
     705           0 :                         bn = project_str(&li, lci, tpe, &r1i, &r2i, qry_ctx, t0);
     706           0 :                         bat_iterator_end(&li);
     707           0 :                         bat_iterator_end(&r1i);
     708           0 :                         bat_iterator_end(&r2i);
     709           0 :                         return bn;
     710             :                 }
     711      152567 :         } else if (ATOMvarsized(tpe) &&
     712         152 :                    li.nonil &&
     713         152 :                    r2 == NULL &&
     714         152 :                    (r1i.count == 0 ||
     715         152 :                     lcount > (r1i.count >> 3) ||
     716           0 :                     r1i.restricted == BAT_READ)) {
     717         152 :                 tpe = r1i.width == 4 ? TYPE_int : TYPE_lng;
     718             :                 stringtrick = true;
     719      152415 :         } else if (tpe == TYPE_msk || mask_cand(r1)) {
     720          10 :                 r1 = BATunmask(r1);
     721          10 :                 if (r1 == NULL)
     722           0 :                         goto doreturn;
     723          10 :                 if (r2) {
     724           0 :                         r2 = BATunmask(r2);
     725           0 :                         if (r2 == NULL)
     726           0 :                                 goto doreturn;
     727             :                 }
     728          10 :                 tpe = TYPE_oid;
     729          10 :                 bat_iterator_end(&r1i);
     730          10 :                 bat_iterator_end(&r2i);
     731          10 :                 r1i = bat_iterator(r1);
     732          10 :                 r2i = bat_iterator(r2);
     733             :         }
     734      329870 :         bn = COLnew2(l->hseqbase, ATOMtype(r1i.type), lcount, TRANSIENT, stringtrick ? r1i.width : 0);
     735      167513 :         if (bn == NULL) {
     736           0 :                 goto doreturn;
     737             :         }
     738      167513 :         bn->tnil = false;
     739      167513 :         if (r2) {
     740           0 :                 bn->tnonil = li.nonil & r1i.nonil & r2i.nonil;
     741           0 :                 bn->tsorted = li.count <= 1;
     742           0 :                 bn->trevsorted = li.count <= 1;
     743           0 :                 bn->tkey = li.count <= 1;
     744             :         } else {
     745      168767 :                 bn->tnonil = li.nonil & r1i.nonil;
     746      337534 :                 bn->tsorted = li.count <= 1
     747      167947 :                         || (li.sorted & r1i.sorted)
     748      111668 :                         || (li.revsorted & r1i.revsorted)
     749      279264 :                         || r1i.count <= 1;
     750      337534 :                 bn->trevsorted = li.count <= 1
     751      168080 :                         || (li.sorted & r1i.revsorted)
     752      150540 :                         || (li.revsorted & r1i.sorted)
     753      317770 :                         || r1i.count <= 1;
     754      192597 :                 bn->tkey = li.count <= 1 || (li.key & r1i.key);
     755             :         }
     756             : 
     757      167513 :         if (!stringtrick && tpe != TYPE_oid)
     758      136362 :                 tpe = ATOMbasetype(tpe);
     759      167513 :         switch (tpe) {
     760       11119 :         case TYPE_bte:
     761       11119 :                 res = project_bte(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     762       11119 :                 break;
     763       12771 :         case TYPE_sht:
     764       12771 :                 res = project_sht(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     765       12771 :                 break;
     766      113059 :         case TYPE_int:
     767      113059 :                 res = project_int(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     768      113059 :                 break;
     769          97 :         case TYPE_flt:
     770          97 :                 res = project_flt(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     771          97 :                 break;
     772        5259 :         case TYPE_dbl:
     773        5259 :                 res = project_dbl(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     774        5259 :                 break;
     775        8551 :         case TYPE_lng:
     776        8551 :                 res = project_lng(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     777        8551 :                 break;
     778             : #ifdef HAVE_HGE
     779         512 :         case TYPE_hge:
     780         512 :                 res = project_hge(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     781         512 :                 break;
     782             : #endif
     783       15786 :         case TYPE_oid:
     784       15786 :                 res = project_oid(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     785       15786 :                 break;
     786          25 :         case TYPE_uuid:
     787          25 :                 res = project_uuid(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     788          25 :                 break;
     789         334 :         default:
     790         334 :                 res = project_any(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
     791         334 :                 break;
     792             :         }
     793             : 
     794      168422 :         if (res != GDK_SUCCEED)
     795         652 :                 goto bailout;
     796             : 
     797             :         /* handle string trick */
     798      167770 :         if (stringtrick) {
     799       16205 :                 assert(r1i.vh);
     800       16205 :                 if (r1i.restricted == BAT_READ || VIEWvtparent(r1)) {
     801             :                         /* really share string heap */
     802       15582 :                         assert(r1i.vh->parentid > 0);
     803             :                         /* there is no file, so we don't need to remove it */
     804       15582 :                         HEAPdecref(bn->tvheap, false);
     805       15687 :                         bn->tvheap = r1i.vh;
     806       15687 :                         HEAPincref(r1i.vh);
     807       15687 :                         assert(bn->tvheap->parentid != bn->batCacheid);
     808       15687 :                         BBPretain(bn->tvheap->parentid);
     809             :                 } else {
     810             :                         /* make copy of string heap */
     811         623 :                         bn->tvheap->parentid = bn->batCacheid;
     812         623 :                         bn->tvheap->farmid = BBPselectfarm(bn->batRole, otpe, varheap);
     813         624 :                         strconcat_len(bn->tvheap->filename,
     814             :                                       sizeof(bn->tvheap->filename),
     815         624 :                                       BBP_physical(bn->batCacheid), ".theap",
     816             :                                       NULL);
     817         627 :                         if (HEAPcopy(bn->tvheap, r1i.vh, 0) != GDK_SUCCEED)
     818           0 :                                 goto bailout;
     819             :                 }
     820       16316 :                 bn->ttype = r1i.type;
     821       16316 :                 bn->twidth = r1i.width;
     822       16316 :                 bn->tshift = r1i.shift;
     823       16316 :                 bn->tascii = r1i.ascii;
     824             :         }
     825             : 
     826      335762 :         bn->tunique_est =
     827      167881 :                 MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
     828             :                    r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
     829      167881 :         if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
     830      159409 :                 BATtseqbase(bn, oid_nil);
     831             : 
     832        8472 :   doreturn:
     833     2413124 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOOPTBATFMT
     834             :                   " -> " ALGOOPTBATFMT "%s%s " LLFMT "us\n",
     835             :                   ALGOBATPAR(l), ALGOBATPAR(or1), ALGOOPTBATPAR(or2),
     836             :                   ALGOOPTBATPAR(bn),
     837             :                   bn && bn->ttype == TYPE_str && bn->tvheap == r1i.vh ? " sharing string heap" : "",
     838             :                   msg, GDKusec() - t0);
     839     2413124 :         bat_iterator_end(&li);
     840     2422819 :         bat_iterator_end(&r1i);
     841     2426929 :         bat_iterator_end(&r2i);
     842     2422868 :         if (l != ol)
     843           0 :                 BBPreclaim(l);
     844     2422922 :         if (r1 != or1)
     845          10 :                 BBPreclaim(r1);
     846     2396206 :         if (r2 != or2)
     847           0 :                 BBPreclaim(r2);
     848             :         return bn;
     849             : 
     850         652 :   bailout:
     851         652 :         BBPreclaim(bn);
     852           0 :         bn = NULL;
     853           0 :         goto doreturn;
     854             : }
     855             : 
     856             : BAT *
     857      649559 : BATproject(BAT *restrict l, BAT *restrict r)
     858             : {
     859      649559 :         return BATproject2(l, r, NULL);
     860             : }
     861             : 
     862             : /* Calculate a chain of BATproject calls.
     863             :  * The argument is a NULL-terminated array of BAT pointers.
     864             :  * This function is equivalent (apart from reference counting) to a
     865             :  * sequence of calls
     866             :  * bn = BATproject(bats[0], bats[1]);
     867             :  * bn = BATproject(bn, bats[2]);
     868             :  * ...
     869             :  * bn = BATproject(bn, bats[n-1]);
     870             :  * return bn;
     871             :  * where none of the intermediates are actually produced (and bats[n]==NULL).
     872             :  * Note that all BATs except the last must have type oid/void or msk.
     873             :  *
     874             :  * We assume that all but the last BAT in the chain is temporary and
     875             :  * therefore there is no chance that another thread will modify it while
     876             :  * we're busy.  This is not necessarily the case for that last BAT, so
     877             :  * it uses a BAT iterator.
     878             :  */
     879             : BAT *
     880      555234 : BATprojectchain(BAT **bats)
     881             : {
     882      555234 :         struct ba {
     883             :                 BAT *b;
     884             :                 oid hlo;
     885             :                 oid hhi;
     886             :                 BUN cnt;
     887             :                 oid *t;
     888             :                 struct canditer ci; /* used if .ci.s != NULL */
     889             :         } *ba;
     890      555234 :         BAT **tobedeleted = NULL;
     891      555234 :         int ndelete = 0;
     892      555234 :         int n, i;
     893      555234 :         BAT *b = NULL, *bn = NULL;
     894      555234 :         BATiter bi;
     895      555234 :         bool allnil = false;
     896      555234 :         bool issorted = true;
     897      555234 :         bool nonil = true;
     898      555234 :         bool stringtrick = false;
     899      555234 :         const void *nil;
     900      555234 :         int tpe;
     901      555234 :         lng t0 = 0;
     902             : 
     903      555234 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
     904             : 
     905      555234 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     906             : 
     907             :         /* count number of participating BATs and allocate some
     908             :          * temporary work space */
     909     6849206 :         for (n = 0; bats[n]; n++) {
     910     5740134 :                 b = bats[n];
     911     5740134 :                 ndelete += (b->ttype == TYPE_msk || mask_cand(b));
     912     5740134 :                 TRC_DEBUG(ALGO, "arg %d: " ALGOBATFMT "\n",
     913             :                           n + 1, ALGOBATPAR(b));
     914             :         }
     915      553838 :         if (n == 0) {
     916           0 :                 GDKerror("must have BAT arguments\n");
     917           0 :                 return NULL;
     918             :         }
     919      553838 :         if (n == 1) {
     920           0 :                 bn = COLcopy(b, b->ttype, true, TRANSIENT);
     921           0 :                 TRC_DEBUG(ALGO, "single bat: copy -> " ALGOOPTBATFMT
     922             :                           " " LLFMT " usec\n",
     923             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
     924           0 :                 return bn;
     925             :         }
     926             : 
     927      553838 :         if (ndelete > 0 &&
     928          44 :             (tobedeleted = GDKmalloc(sizeof(BAT *) * ndelete)) == NULL)
     929             :                 return NULL;
     930      553838 :         ba = GDKmalloc(sizeof(*ba) * n);
     931      555257 :         if (ba == NULL) {
     932           0 :                 GDKfree(tobedeleted);
     933           0 :                 return NULL;
     934             :         }
     935             : 
     936             :         ndelete = 0;
     937     6201011 :         for (n = 0, i = 0; bats[n]; n++) {
     938     5648515 :                 b = bats[n];
     939     5648515 :                 if (b->ttype == TYPE_msk || mask_cand(b)) {
     940           0 :                         if ((b = BATunmask(b)) == NULL) {
     941           0 :                                 goto bunins_failed;
     942             :                         }
     943          44 :                         tobedeleted[ndelete++] = b;
     944             :                 }
     945     5648922 :                 if (bats[n+1] && BATtdense(b) && b->hseqbase == b->tseqbase && b->tseqbase == bats[n+1]->hseqbase && BATcount(b) == BATcount(bats[n+1]))
     946     4577674 :                         continue; /* skip dense bat */
     947     1071248 :                 ba[i] = (struct ba) {
     948             :                         .b = b,
     949     1071248 :                         .hlo = b->hseqbase,
     950     1071248 :                         .hhi = b->hseqbase + b->batCount,
     951             :                         .cnt = b->batCount,
     952     1071248 :                         .t = (oid *) Tloc(b, 0),
     953             :                 };
     954     1071248 :                 allnil |= b->ttype == TYPE_void && is_oid_nil(b->tseqbase);
     955     1071248 :                 issorted &= b->tsorted;
     956     1071248 :                 if (bats[n + 1])
     957      515340 :                         nonil &= b->tnonil;
     958     1071248 :                 if (b->tnonil && b->tkey && b->tsorted &&
     959      604382 :                     ATOMtype(b->ttype) == TYPE_oid) {
     960      489285 :                         canditer_init(&ba[i].ci, NULL, b);
     961             :                 }
     962     1068080 :                 i++;
     963             :         }
     964      552496 :         n = i;
     965      552496 :         if (i<=2) {
     966      506144 :                 if (i == 1) {
     967       85591 :                         bn = ba[0].b;
     968       85591 :                         BBPfix(bn->batCacheid);
     969             :                 } else {
     970      420553 :                         bn = BATproject(ba[0].b, ba[1].b);
     971             :                 }
     972      507019 :                 while (ndelete-- > 0)
     973          12 :                         BBPunfix(tobedeleted[ndelete]->batCacheid);
     974      507007 :                 GDKfree(tobedeleted);
     975      503258 :                 GDKfree(ba);
     976      503258 :                 return bn;
     977             :         }
     978             :         /* b is last BAT in bats array */
     979       46352 :         tpe = ATOMtype(b->ttype);
     980       46352 :         nil = ATOMnilptr(tpe);
     981       46352 :         if (allnil || ba[0].cnt == 0) {
     982        7683 :                 bn = BATconstant(ba[0].hlo, tpe == TYPE_oid ? TYPE_void : tpe,
     983             :                                  nil, ba[0].cnt, TRANSIENT);
     984        7683 :                 while (ndelete-- > 0)
     985        7718 :                         BBPreclaim(tobedeleted[ndelete]);
     986        7708 :                 GDKfree(tobedeleted);
     987        7668 :                 GDKfree(ba);
     988        7731 :                 TRC_DEBUG(ALGO, "with %d bats: nil/empty -> " ALGOOPTBATFMT
     989             :                           " " LLFMT " usec\n",
     990             :                           n, ALGOOPTBATPAR(bn), GDKusec() - t0);
     991        7731 :                 return bn;
     992             :         }
     993             : 
     994       38669 :         bi = bat_iterator(b);
     995       38669 :         if (nonil && ATOMstorage(tpe) == TYPE_str && bi.restricted == BAT_READ) {
     996        6374 :                 stringtrick = true;
     997        6374 :                 bn = COLnew2(ba[0].hlo, tpe, ba[0].cnt, TRANSIENT, bi.width);
     998        6357 :                 if (bn && bn->tvheap) {
     999             :                         /* no need to remove any files since they were
    1000             :                          * never created for this bat */
    1001        6358 :                         HEAPdecref(bn->tvheap, false);
    1002        6370 :                         bn->tvheap = NULL;
    1003             :                 }
    1004        6369 :                 tpe = bi.width == 1 ? TYPE_bte : (bi.width == 2 ? TYPE_sht : (bi.width == 4 ? TYPE_int : TYPE_lng));
    1005             :         } else {
    1006       32295 :                 bn = COLnew(ba[0].hlo, tpe, ba[0].cnt, TRANSIENT);
    1007             :         }
    1008       38510 :         if (bn == NULL) {
    1009           0 :                 bat_iterator_end(&bi);
    1010           0 :                 goto bunins_failed;
    1011             :         }
    1012             : 
    1013       38510 :         assert(ba[n - 1].b == b);
    1014       38510 :         ba[n - 1].t = bi.base;
    1015       38510 :         if (ATOMtype(b->ttype) == TYPE_oid) {
    1016             :                 /* oid all the way */
    1017         664 :                 oid *d = (oid *) Tloc(bn, 0);
    1018         664 :                 assert(!stringtrick);
    1019    15100537 :                 TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
    1020    15096725 :                         oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
    1021    60221222 :                         for (int i = 1; i < n; i++) {
    1022    45124490 :                                 if (is_oid_nil(o)) {
    1023           0 :                                         bn->tnil = true;
    1024           0 :                                         break;
    1025             :                                 }
    1026    45124490 :                                 if (o < ba[i].hlo || o >= ba[i].hhi) {
    1027           0 :                                         GDKerror("does not match always\n");
    1028           0 :                                         bat_iterator_end(&bi);
    1029           0 :                                         goto bunins_failed;
    1030             :                                 }
    1031    45124490 :                                 o -= ba[i].hlo;
    1032    45124490 :                                 o = ba[i].ci.s ?
    1033    23715792 :                                     (ba[i].ci.tpe == cand_dense) ?
    1034    23715792 :                                         canditer_idx_dense(&ba[i].ci, o) :
    1035    68065802 :                                         canditer_idx(&ba[i].ci, o) : ba[i].t[o];
    1036             :                         }
    1037    15096732 :                         *d++ = o;
    1038             :                 }
    1039       37846 :         } else if (!ATOMvarsized(tpe)) {
    1040       37716 :                 const void *v;
    1041       37716 :                 char *d = Tloc(bn, 0);
    1042             : 
    1043       37716 :                 bn->tnil = false;
    1044       37716 :                 n--;    /* stop one before the end, also ba[n] is last */
    1045   123207113 :                 TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
    1046   123079255 :                         oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
    1047             : 
    1048   278586164 :                         for (int i = 1; i < n; i++) {
    1049   155563337 :                                 if (is_oid_nil(o)) {
    1050          15 :                                         bn->tnil = true;
    1051          15 :                                         break;
    1052             :                                 }
    1053   155563322 :                                 if (o < ba[i].hlo || o >= ba[i].hhi) {
    1054           0 :                                         GDKerror("does not match always\n");
    1055           0 :                                         bat_iterator_end(&bi);
    1056           0 :                                         goto bunins_failed;
    1057             :                                 }
    1058   155563322 :                                 o -= ba[i].hlo;
    1059   155563322 :                                 o = ba[i].ci.s ?
    1060    53308893 :                                     (ba[i].ci.tpe == cand_dense) ?
    1061    53308893 :                                         canditer_idx_dense(&ba[i].ci, o) :
    1062   198609730 :                                         canditer_idx(&ba[i].ci, o) : ba[i].t[o];
    1063             :                         }
    1064   123022842 :                         if (is_oid_nil(o)) {
    1065          15 :                                 assert(!stringtrick);
    1066          15 :                                 bn->tnil = true;
    1067          15 :                                 v = nil;
    1068   123022827 :                         } else if (o < ba[n].hlo || o >= ba[n].hhi) {
    1069           0 :                                 GDKerror("does not match always\n");
    1070           0 :                                 bat_iterator_end(&bi);
    1071           0 :                                 goto bunins_failed;
    1072             :                         } else {
    1073   123022827 :                                 o -= ba[n].hlo;
    1074   123022827 :                                 v = (const char *) bi.base + (o << bi.shift);
    1075             :                         }
    1076   123022842 :                         if (ATOMputFIX(tpe, d, v) != GDK_SUCCEED) {
    1077           0 :                                 bat_iterator_end(&bi);
    1078           0 :                                 goto bunins_failed;
    1079             :                         }
    1080   123079570 :                         d += bi.width;
    1081             :                 }
    1082       37891 :                 if (stringtrick) {
    1083        6375 :                         bn->tnil = false;
    1084        6375 :                         bn->tnonil = bi.nonil;
    1085        6375 :                         bn->tkey = false;
    1086        6375 :                         bn->tascii = bi.ascii;
    1087        6375 :                         assert(bn->tvheap == NULL);
    1088        6375 :                         bn->tvheap = bi.vh;
    1089        6375 :                         HEAPincref(bi.vh);
    1090        6373 :                         assert(bn->tvheap->parentid != bn->batCacheid);
    1091        6373 :                         BBPretain(bn->tvheap->parentid);
    1092        6381 :                         assert(bn->ttype == b->ttype);
    1093        6381 :                         assert(bn->twidth == bi.width);
    1094        6381 :                         assert(bn->tshift == bi.shift);
    1095             :                 }
    1096             :                 n++;            /* undo for debug print */
    1097             :         } else {
    1098         130 :                 const void *v;
    1099             : 
    1100         130 :                 assert(!stringtrick);
    1101         130 :                 bn->tnil = false;
    1102         130 :                 n--;    /* stop one before the end, also ba[n] is last */
    1103      670717 :                 TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
    1104      670419 :                         oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
    1105     1342178 :                         for (int i = 1; i < n; i++) {
    1106      671761 :                                 if (is_oid_nil(o)) {
    1107           0 :                                         bn->tnil = true;
    1108           0 :                                         break;
    1109             :                                 }
    1110      671761 :                                 if (o < ba[i].hlo || o >= ba[i].hhi) {
    1111           0 :                                         GDKerror("does not match always\n");
    1112           0 :                                         bat_iterator_end(&bi);
    1113           0 :                                         goto bunins_failed;
    1114             :                                 }
    1115      671761 :                                 o -= ba[i].hlo;
    1116      671761 :                                 o = ba[i].ci.s ?
    1117        5012 :                                     (ba[i].ci.tpe == cand_dense) ?
    1118        5012 :                                         canditer_idx_dense(&ba[i].ci, o) :
    1119      676537 :                                         canditer_idx(&ba[i].ci, o) : ba[i].t[o];
    1120             :                         }
    1121      670417 :                         if (is_oid_nil(o)) {
    1122           0 :                                 bn->tnil = true;
    1123           0 :                                 v = nil;
    1124      670417 :                         } else if (o < ba[n].hlo || o >= ba[n].hhi) {
    1125           0 :                                 GDKerror("does not match always\n");
    1126           0 :                                 bat_iterator_end(&bi);
    1127           0 :                                 goto bunins_failed;
    1128             :                         } else {
    1129      670417 :                                 o -= ba[n].hlo;
    1130      670417 :                                 v = BUNtail(bi, o);
    1131             :                         }
    1132      670417 :                         if (bunfastapp(bn, v) != GDK_SUCCEED) {
    1133           0 :                                 bat_iterator_end(&bi);
    1134           0 :                                 goto bunins_failed;
    1135             :                         }
    1136             :                 }
    1137             :                 n++;            /* undo for debug print */
    1138             :         }
    1139       38696 :         bat_iterator_end(&bi);
    1140       38684 :         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
    1141       38662 :         BATsetcount(bn, ba[0].cnt);
    1142       38684 :         bn->tsorted = (ba[0].cnt <= 1) | issorted;
    1143       38684 :         bn->trevsorted = ba[0].cnt <= 1;
    1144       38684 :         bn->tnonil = nonil & b->tnonil;
    1145       38684 :         bn->tseqbase = oid_nil;
    1146       38684 :         bn->tkey = (ba[0].cnt <= 1);
    1147       38684 :         double est = 0;
    1148      167804 :         for (int i = 0; i < n; i++) {
    1149      129120 :                 double nest = ba[i].b->tunique_est?ba[i].b->tunique_est:BATcount(ba[i].b);
    1150      129120 :                 if (est)
    1151       90691 :                         est = MIN(est, nest);
    1152             :                 else
    1153             :                         est = nest;
    1154             :         }
    1155       38684 :         bn->tunique_est = est;
    1156             :         /* note, b may point to one of the bats in tobedeleted, so
    1157             :          * reclaim after the last use of b */
    1158       38684 :         while (ndelete-- > 0)
    1159       38658 :                 BBPreclaim(tobedeleted[ndelete]);
    1160       38636 :         GDKfree(tobedeleted);
    1161       38559 :         GDKfree(ba);
    1162       38754 :         TRC_DEBUG(ALGO, "with %d bats: " ALGOOPTBATFMT " " LLFMT " usec\n",
    1163             :                   n, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1164             :         return bn;
    1165             : 
    1166           0 :   bunins_failed:
    1167           0 :         while (ndelete-- > 0)
    1168           0 :                 BBPreclaim(tobedeleted[ndelete]);
    1169           0 :         GDKfree(tobedeleted);
    1170           0 :         GDKfree(ba);
    1171           0 :         BBPreclaim(bn);
    1172           0 :         TRC_DEBUG(ALGO, "failed " LLFMT "usec\n", GDKusec() - t0);
    1173             :         return NULL;
    1174             : }

Generated by: LCOV version 1.14