LCOV - code coverage report
Current view: top level - gdk - gdk_select.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 843 1057 79.8 %
Date: 2024-11-15 19:37:45 Functions: 23 23 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : 
      17             : static inline oid *
      18   106017864 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
      19             : {
      20   106017864 :         if (i == BATcapacity(bn)) {
      21          32 :                 BATsetcount(bn, i);
      22          32 :                 if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
      23             :                         return NULL;
      24          32 :                 a = (oid *) Tloc(bn, 0);
      25             :         }
      26   106017864 :         a[i] = v;
      27   106017864 :         return a;
      28             : }
      29             : 
      30             : BAT *
      31     2071973 : virtualize(BAT *bn)
      32             : {
      33             :         /* input must be a valid candidate list or NULL */
      34     2071973 :         if (bn == NULL)
      35             :                 return NULL;
      36     2071973 :         if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
      37           0 :                 fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
      38           0 :                         bn->ttype, is_oid_nil(bn->tseqbase),
      39           0 :                         bn->tkey, bn->tsorted);
      40           0 :                 fflush(stderr);
      41             :         }
      42     2072241 :         assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
      43             :                 bn->ttype == TYPE_oid) &&
      44             :                bn->tkey && bn->tsorted);
      45     2072241 :         assert(BBP_refs(bn->batCacheid) == 1);
      46     2072241 :         assert(BBP_lrefs(bn->batCacheid) == 0);
      47             :         /* since bn has unique and strictly ascending values, we can
      48             :          * easily check whether the column is dense */
      49     2072241 :         if (bn->ttype == TYPE_oid &&
      50     1435847 :             (BATcount(bn) <= 1 ||
      51      389979 :              * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
      52      389979 :              * (const oid *) Tloc(bn, BATcount(bn) - 1))) {
      53             :                 /* column is dense, replace by virtual oid */
      54     1085179 :                 oid tseq;       /* work around bug in Intel compiler */
      55     1085179 :                 if (BATcount(bn) == 0)
      56             :                         tseq = 0;
      57             :                 else
      58      413378 :                         tseq = * (const oid *) Tloc(bn, 0);
      59     1085179 :                 TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
      60             :                           ALGOBATPAR(bn), tseq);
      61     1084979 :                 bn->tseqbase = tseq;
      62     1084979 :                 if (VIEWtparent(bn)) {
      63           7 :                         Heap *h = GDKmalloc(sizeof(Heap));
      64           7 :                         if (h == NULL) {
      65           0 :                                 BBPunfix(bn->batCacheid);
      66           0 :                                 return NULL;
      67             :                         }
      68           7 :                         *h = *bn->theap;
      69           7 :                         settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
      70           7 :                         h->parentid = bn->batCacheid;
      71           7 :                         h->base = NULL;
      72           7 :                         h->hasfile = false;
      73           7 :                         ATOMIC_INIT(&h->refs, 1);
      74           7 :                         if (bn->theap->parentid != bn->batCacheid)
      75           7 :                                 BBPrelease(bn->theap->parentid);
      76           7 :                         HEAPdecref(bn->theap, false);
      77           7 :                         bn->theap = h;
      78             :                 } else {
      79     1084972 :                         HEAPfree(bn->theap, true);
      80             :                 }
      81     1085196 :                 bn->theap->storage = bn->theap->newstorage = STORE_MEM;
      82     1085196 :                 bn->theap->size = 0;
      83     1085196 :                 bn->ttype = TYPE_void;
      84     1085196 :                 bn->twidth = 0;
      85     1085196 :                 bn->tshift = 0;
      86             :         }
      87             : 
      88             :         return bn;
      89             : }
      90             : 
      91             : #define HASHloop_bound(bi, h, hb, v, lo, hi)            \
      92             :         for (hb = HASHget(h, HASHprobe((h), v));        \
      93             :              hb != BUN_NONE;                            \
      94             :              hb = HASHgetlink(h,hb))                    \
      95             :                 if (hb >= (lo) && hb < (hi) &&            \
      96             :                     (cmp == NULL ||                     \
      97             :                      (*cmp)(v, BUNtail(bi, hb)) == 0))
      98             : 
      99             : static BAT *
     100       57273 : hashselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
     101             :            const void *tl, BUN maximum, bool havehash, bool phash,
     102             :            const char **algo)
     103             : {
     104       57273 :         BUN i, cnt;
     105       57273 :         oid o, *restrict dst;
     106       57273 :         BUN l, h, d = 0;
     107       57273 :         oid seq;
     108       57273 :         int (*cmp)(const void *, const void *);
     109       57273 :         BAT *b2 = NULL;
     110       57273 :         BATiter pbi = {0};
     111             : 
     112       57273 :         size_t counter = 0;
     113       57273 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     114             : 
     115       57169 :         assert(bn->ttype == TYPE_oid);
     116       57169 :         seq = bi->b->hseqbase;
     117       57169 :         l = ci->seq - seq;
     118       57169 :         h = canditer_last(ci) + 1 - seq;
     119             : 
     120       57169 :         *algo = "hashselect";
     121       57169 :         if (phash && (b2 = BATdescriptor(VIEWtparent(bi->b))) != NULL) {
     122       56862 :                 *algo = "hashselect on parent";
     123       56862 :                 TRC_DEBUG(ALGO, ALGOBATFMT
     124             :                           " using parent(" ALGOBATFMT ") "
     125             :                           "for hash\n",
     126             :                           ALGOBATPAR(bi->b),
     127             :                           ALGOBATPAR(b2));
     128       56862 :                 d = bi->baseoff - b2->tbaseoff;
     129       56862 :                 l += d;
     130       56862 :                 h += d;
     131       56862 :                 pbi = bat_iterator(b2);
     132       56862 :                 bi = &pbi;
     133             :         } else {
     134             :                 phash = false;
     135             :         }
     136             : 
     137       57512 :         if (!havehash) {
     138           2 :                 if (BAThash(bi->b) != GDK_SUCCEED) {
     139           0 :                         BBPreclaim(bn);
     140           0 :                         BBPreclaim(b2);
     141           0 :                         if (phash)
     142           0 :                                 bat_iterator_end(&pbi);
     143           0 :                         return NULL;
     144             :                 }
     145           2 :                 MT_rwlock_rdlock(&bi->b->thashlock);
     146           2 :                 if (bi->b->thash == NULL) {
     147           0 :                         GDKerror("Hash destroyed before we could use it\n");
     148           0 :                         goto bailout;
     149             :                 }
     150             :         }
     151      115021 :         switch (ATOMbasetype(bi->type)) {
     152             :         case TYPE_bte:
     153             :         case TYPE_sht:
     154             :                 cmp = NULL;     /* no need to compare: "hash" is perfect */
     155             :                 break;
     156       57505 :         default:
     157       57505 :                 cmp = ATOMcompare(bi->type);
     158       57505 :                 break;
     159             :         }
     160       57512 :         dst = (oid *) Tloc(bn, 0);
     161       57512 :         cnt = 0;
     162       57512 :         if (ci->tpe != cand_dense) {
     163       23544 :                 HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
     164        9944 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
     165             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     166        9944 :                         o = (oid) (i + seq - d);
     167        9944 :                         if (canditer_contains(ci, o)) {
     168       19080 :                                 dst = buninsfix(bn, dst, cnt, o,
     169        9541 :                                                 maximum - BATcapacity(bn),
     170             :                                                 maximum);
     171        9539 :                                 if (dst == NULL)
     172           0 :                                         goto bailout;
     173        9539 :                                 cnt++;
     174             :                         }
     175             :                 }
     176             :         } else {
     177      180578 :                 HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
     178       68323 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
     179             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     180       68323 :                         o = (oid) (i + seq - d);
     181      136624 :                         dst = buninsfix(bn, dst, cnt, o,
     182       68323 :                                         maximum - BATcapacity(bn),
     183             :                                         maximum);
     184       68301 :                         if (dst == NULL)
     185           0 :                                 goto bailout;
     186       68301 :                         cnt++;
     187             :                 }
     188             :         }
     189       57519 :         MT_rwlock_rdunlock(&bi->b->thashlock);
     190       57541 :         BBPreclaim(b2);
     191       57510 :         BATsetcount(bn, cnt);
     192       57498 :         bn->tkey = true;
     193       57498 :         if (cnt > 1) {
     194             :                 /* hash chains produce results in the order high to
     195             :                  * low, so we just need to reverse */
     196       32837 :                 for (l = 0, h = BATcount(bn) - 1; l < h; l++, h--) {
     197       30050 :                         o = dst[l];
     198       30050 :                         dst[l] = dst[h];
     199       30050 :                         dst[h] = o;
     200             :                 }
     201             :         }
     202       57498 :         bn->tsorted = true;
     203       57498 :         bn->trevsorted = bn->batCount <= 1;
     204       57498 :         bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
     205       57498 :         if (phash)
     206       56798 :                 bat_iterator_end(&pbi);
     207             :         return bn;
     208             : 
     209           0 :   bailout:
     210           0 :         MT_rwlock_rdunlock(&bi->b->thashlock);
     211           0 :         if (phash)
     212           0 :                 bat_iterator_end(&pbi);
     213           0 :         BBPreclaim(b2);
     214           0 :         BBPreclaim(bn);
     215           0 :         return NULL;
     216             : }
     217             : 
     218             : /* core scan select loop with & without candidates */
     219             : #define scanloop(NAME,canditer_next,TEST)                               \
     220             :         do {                                                            \
     221             :                 BUN ncand = ci->ncand;                                       \
     222             :                 *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
     223             :                 if (BATcapacity(bn) < maximum) {                     \
     224             :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {           \
     225             :                                 o = canditer_next(ci);                  \
     226             :                                 v = src[o-hseq];                        \
     227             :                                 if (TEST) {                             \
     228             :                                         dst = buninsfix(bn, dst, cnt, o, \
     229             :                                                   (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
     230             :                                                          * (dbl) (ncand-p) * 1.1 + 1024), \
     231             :                                                         maximum);       \
     232             :                                         if (dst == NULL) {              \
     233             :                                                 goto bailout;           \
     234             :                                         }                               \
     235             :                                         cnt++;                          \
     236             :                                 }                                       \
     237             :                         }                                               \
     238             :                 } else {                                                \
     239             :                         TIMEOUT_LOOP(ncand, qry_ctx) {                  \
     240             :                                 o = canditer_next(ci);                  \
     241             :                                 v = src[o-hseq];                        \
     242             :                                 assert(cnt < BATcapacity(bn));               \
     243             :                                 dst[cnt] = o;                           \
     244             :                                 cnt += (TEST) != 0;                     \
     245             :                         }                                               \
     246             :                 }                                                       \
     247             :                 TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
     248             :         } while (false)
     249             : 
     250             : /* argument list for type-specific core scan select function call */
     251             : #define scanargs                                                        \
     252             :         bi, ci, bn, tl, th, li, hi, equi, anti, nil_matches, lval, hval, \
     253             :         lnil, cnt, bi->b->hseqbase, dst, maximum, algo
     254             : 
     255             : #define PREVVALUEbte(x) ((x) - 1)
     256             : #define PREVVALUEsht(x) ((x) - 1)
     257             : #define PREVVALUEint(x) ((x) - 1)
     258             : #define PREVVALUElng(x) ((x) - 1)
     259             : #ifdef HAVE_HGE
     260             : #define PREVVALUEhge(x) ((x) - 1)
     261             : #endif
     262             : #define PREVVALUEoid(x) ((x) - 1)
     263             : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
     264             : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
     265             : 
     266             : #define NEXTVALUEbte(x) ((x) + 1)
     267             : #define NEXTVALUEsht(x) ((x) + 1)
     268             : #define NEXTVALUEint(x) ((x) + 1)
     269             : #define NEXTVALUElng(x) ((x) + 1)
     270             : #ifdef HAVE_HGE
     271             : #define NEXTVALUEhge(x) ((x) + 1)
     272             : #endif
     273             : #define NEXTVALUEoid(x) ((x) + 1)
     274             : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
     275             : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
     276             : 
     277             : #define MINVALUEbte     GDK_bte_min
     278             : #define MINVALUEsht     GDK_sht_min
     279             : #define MINVALUEint     GDK_int_min
     280             : #define MINVALUElng     GDK_lng_min
     281             : #ifdef HAVE_HGE
     282             : #define MINVALUEhge     GDK_hge_min
     283             : #endif
     284             : #define MINVALUEoid     GDK_oid_min
     285             : #define MINVALUEflt     GDK_flt_min
     286             : #define MINVALUEdbl     GDK_dbl_min
     287             : 
     288             : #define MAXVALUEbte     GDK_bte_max
     289             : #define MAXVALUEsht     GDK_sht_max
     290             : #define MAXVALUEint     GDK_int_max
     291             : #define MAXVALUElng     GDK_lng_max
     292             : #ifdef HAVE_HGE
     293             : #define MAXVALUEhge     GDK_hge_max
     294             : #endif
     295             : #define MAXVALUEoid     GDK_oid_max
     296             : #define MAXVALUEflt     GDK_flt_max
     297             : #define MAXVALUEdbl     GDK_dbl_max
     298             : 
     299             : /* definition of type-specific core scan select function */
     300             : #define scanfunc(NAME, TYPE, ISDENSE)                                   \
     301             : static BUN                                                              \
     302             : NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn,       \
     303             :               const TYPE *tl, const TYPE *th, bool li, bool hi,         \
     304             :               bool equi, bool anti, bool nil_matches, bool lval,        \
     305             :               bool hval, bool lnil, BUN cnt, const oid hseq,            \
     306             :               oid *restrict dst, BUN maximum, const char **algo)        \
     307             : {                                                                       \
     308             :         TYPE vl = *tl;                                                  \
     309             :         TYPE vh = *th;                                                  \
     310             :         TYPE v;                                                         \
     311             :         const TYPE minval = MINVALUE##TYPE;                             \
     312             :         const TYPE maxval = MAXVALUE##TYPE;                             \
     313             :         const TYPE *src = (const TYPE *) bi->base;                   \
     314             :         oid o;                                                          \
     315             :         BUN p;                                                          \
     316             :         (void) li;                                                      \
     317             :         (void) hi;                                                      \
     318             :         (void) lval;                                                    \
     319             :         (void) hval;                                                    \
     320             :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();                      \
     321             :         /* Normalize the variables li, hi, lval, hval, possibly */      \
     322             :         /* changing anti in the process.  This works for all */         \
     323             :         /* (and only) numeric types. */                                 \
     324             :                                                                         \
     325             :         /* Note that the expression x < v is equivalent to x <= */        \
     326             :         /* v' where v' is the next smaller value in the domain */       \
     327             :         /* of v (similarly for x > v).  Also note that for */                \
     328             :         /* floating point numbers there actually is such a */           \
     329             :         /* value.  In fact, there is a function in standard C */        \
     330             :         /* that calculates that value. */                               \
     331             :                                                                         \
     332             :         /* The result is: */                                            \
     333             :         /* li == !anti, hi == !anti, lval == true, hval == true */      \
     334             :         /* This means that all ranges that we check for are */          \
     335             :         /* closed ranges.  If a range is one-sided, we fill in */       \
     336             :         /* the minimum resp. maximum value in the domain so that */     \
     337             :         /* we create a closed range. */                                 \
     338             :         if (anti && li) {                                               \
     339             :                 /* -inf < x < vl === -inf < x <= vl-1 */            \
     340             :                 if (vl == MINVALUE##TYPE) {                             \
     341             :                         /* -inf < x < MIN || *th <[=] x < +inf */   \
     342             :                         /* degenerates into half range */               \
     343             :                         /* *th <[=] x < +inf */                           \
     344             :                         anti = false;                                   \
     345             :                         vl = vh;                                        \
     346             :                         li = !hi;                                       \
     347             :                         hval = false;                                   \
     348             :                         /* further dealt with below */                  \
     349             :                 } else {                                                \
     350             :                         vl = PREVVALUE##TYPE(vl);                       \
     351             :                         li = false;                                     \
     352             :                 }                                                       \
     353             :         }                                                               \
     354             :         if (anti && hi) {                                               \
     355             :                 /* vl < x < +inf === vl+1 <= x < +inf */            \
     356             :                 if (vh == MAXVALUE##TYPE) {                             \
     357             :                         /* -inf < x <[=] *tl || MAX > x > +inf */   \
     358             :                         /* degenerates into half range */               \
     359             :                         /* -inf < x <[=] *tl */                           \
     360             :                         anti = false;                                   \
     361             :                         vh = vl;                                        \
     362             :                         hi = !li;                                       \
     363             :                         lval = false;                                   \
     364             :                         /* further dealt with below */                  \
     365             :                 } else {                                                \
     366             :                         vh = NEXTVALUE##TYPE(vh);                       \
     367             :                         hi = false;                                     \
     368             :                 }                                                       \
     369             :         }                                                               \
     370             :         if (!anti) {                                                    \
     371             :                 if (lval) {                                             \
     372             :                         /* range bounded on left */                     \
     373             :                         if (!li) {                                      \
     374             :                                 /* open range on left */                \
     375             :                                 if (vl == MAXVALUE##TYPE) {             \
     376             :                                         *algo = "select: empty range";        \
     377             :                                         return 0;                       \
     378             :                                 }                                       \
     379             :                                 /* vl < x === vl+1 <= x */                \
     380             :                                 vl = NEXTVALUE##TYPE(vl);               \
     381             :                                 li = true;                              \
     382             :                         }                                               \
     383             :                 } else {                                                \
     384             :                         /* -inf, i.e. smallest value */                 \
     385             :                         vl = MINVALUE##TYPE;                            \
     386             :                         li = true;                                      \
     387             :                         lval = true;                                    \
     388             :                 }                                                       \
     389             :                 if (hval) {                                             \
     390             :                         /* range bounded on right */                    \
     391             :                         if (!hi) {                                      \
     392             :                                 /* open range on right */               \
     393             :                                 if (vh == MINVALUE##TYPE) {             \
     394             :                                         *algo = "select: empty range";        \
     395             :                                         return 0;                       \
     396             :                                 }                                       \
     397             :                                 /* x < vh === x <= vh-1 */                \
     398             :                                 vh = PREVVALUE##TYPE(vh);               \
     399             :                                 hi = true;                              \
     400             :                         }                                               \
     401             :                 } else {                                                \
     402             :                         /* +inf, i.e. largest value */                  \
     403             :                         vh = MAXVALUE##TYPE;                            \
     404             :                         hi = true;                                      \
     405             :                         hval = true;                                    \
     406             :                 }                                                       \
     407             :                 if (vl > vh) {                                               \
     408             :                         *algo = "select: empty range";                        \
     409             :                         return 0;                                       \
     410             :                 }                                                       \
     411             :         }                                                               \
     412             :         /* if anti is set, we can now check */                          \
     413             :         /* (x <= vl || x >= vh) && x != nil */                            \
     414             :         /* if anti is not set, we can check just */                     \
     415             :         /* vl <= x && x <= vh */                                  \
     416             :         /* if equi==true, the check is x == vl */                       \
     417             :         /* note that this includes the check for != nil */              \
     418             :         assert(li == !anti);                                            \
     419             :         assert(hi == !anti);                                            \
     420             :         assert(lval);                                                   \
     421             :         assert(hval);                                                   \
     422             :         if (equi) {                                                     \
     423             :                 if (lnil)                                               \
     424             :                         scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
     425             :                 else                                                    \
     426             :                         scanloop(NAME, canditer_next##ISDENSE, v == vl); \
     427             :         } else if (anti) {                                              \
     428             :                 if (bi->nonil) {                                     \
     429             :                         scanloop(NAME, canditer_next##ISDENSE, (v <= vl || v >= vh)); \
     430             :                 } else if (nil_matches) {                               \
     431             :                         scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v) || v <= vl || v >= vh); \
     432             :                 } else {                                                \
     433             :                         scanloop(NAME, canditer_next##ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh)); \
     434             :                 }                                                       \
     435             :         } else if (bi->nonil && vl == minval) {                              \
     436             :                 scanloop(NAME, canditer_next##ISDENSE, v <= vh);                     \
     437             :         } else if (vh == maxval) {                                      \
     438             :                 scanloop(NAME, canditer_next##ISDENSE, v >= vl);                     \
     439             :         } else {                                                        \
     440             :                 scanloop(NAME, canditer_next##ISDENSE, v >= vl && v <= vh);       \
     441             :         }                                                               \
     442             :         return cnt;                                                     \
     443             :   bailout:                                                              \
     444             :         BBPreclaim(bn);                                                 \
     445             :         return BUN_NONE;                                                \
     446             : }
     447             : 
     448             : static BUN
     449         553 : fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn,
     450             :              const void *tl, const void *th,
     451             :              bool li, bool hi, bool equi, bool anti, bool nil_matches,
     452             :              bool lval, bool hval, bool lnil, BUN cnt, const oid hseq,
     453             :              oid *restrict dst, BUN maximum, const char **algo)
     454             : {
     455         553 :         const void *v;
     456         553 :         const void *restrict nil = ATOMnilptr(bi->type);
     457         553 :         int (*cmp)(const void *, const void *) = ATOMcompare(bi->type);
     458         553 :         oid o;
     459         553 :         BUN p, ncand = ci->ncand;
     460         553 :         int c;
     461             : 
     462         553 :         (void) maximum;
     463         553 :         (void) lnil;
     464         553 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     465             : 
     466         554 :         if (equi) {
     467         543 :                 *algo = "select: fullscan equi";
     468         543 :                 if (ci->tpe == cand_dense) {
     469     9322732 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     470     9320959 :                                 o = canditer_next_dense(ci);
     471     9320959 :                                 v = BUNtail(*bi, o-hseq);
     472     9320959 :                                 if ((*cmp)(tl, v) == 0) {
     473     6626554 :                                         dst = buninsfix(bn, dst, cnt, o,
     474     2198929 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     475     2198929 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     476             :                                                         maximum);
     477     2228696 :                                         if (dst == NULL) {
     478           0 :                                                 BBPreclaim(bn);
     479           0 :                                                 return BUN_NONE;
     480             :                                         }
     481     2228696 :                                         cnt++;
     482             :                                 }
     483             :                         }
     484             :                 } else {
     485     1404435 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     486     1403739 :                                 o = canditer_next(ci);
     487     1404317 :                                 v = BUNtail(*bi, o-hseq);
     488     1404317 :                                 if ((*cmp)(tl, v) == 0) {
     489       61624 :                                         dst = buninsfix(bn, dst, cnt, o,
     490       20534 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     491       20534 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     492             :                                                         maximum);
     493       20556 :                                         if (dst == NULL) {
     494           0 :                                                 BBPreclaim(bn);
     495           0 :                                                 return BUN_NONE;
     496             :                                         }
     497       20556 :                                         cnt++;
     498             :                                 }
     499             :                         }
     500             :                 }
     501          11 :         } else if (anti) {
     502           2 :                 *algo = "select: fullscan anti";
     503           2 :                 if (ci->tpe == cand_dense) {
     504          14 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     505           8 :                                 o = canditer_next_dense(ci);
     506           8 :                                 v = BUNtail(*bi, o-hseq);
     507           8 :                                 bool isnil = nil != NULL && (*cmp)(v, nil) == 0;
     508           8 :                                 if ((nil_matches && isnil) ||
     509           8 :                                     (!isnil &&
     510           8 :                                      ((lval &&
     511           8 :                                        ((c = (*cmp)(tl, v)) > 0 ||
     512           6 :                                         (!li && c == 0))) ||
     513           6 :                                       (hval &&
     514           6 :                                        ((c = (*cmp)(th, v)) < 0 ||
     515           4 :                                         (!hi && c == 0)))))) {
     516          12 :                                         dst = buninsfix(bn, dst, cnt, o,
     517           4 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     518           4 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     519             :                                                         maximum);
     520           4 :                                         if (dst == NULL) {
     521           0 :                                                 BBPreclaim(bn);
     522           0 :                                                 return BUN_NONE;
     523             :                                         }
     524           4 :                                         cnt++;
     525             :                                 }
     526             :                         }
     527             :                 } else {
     528           0 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     529           0 :                                 o = canditer_next(ci);
     530           0 :                                 v = BUNtail(*bi, o-hseq);
     531           0 :                                 bool isnil = nil != NULL && (*cmp)(v, nil) == 0;
     532           0 :                                 if ((nil_matches && isnil) ||
     533           0 :                                     (!isnil &&
     534           0 :                                      ((lval &&
     535           0 :                                        ((c = (*cmp)(tl, v)) > 0 ||
     536           0 :                                         (!li && c == 0))) ||
     537           0 :                                       (hval &&
     538           0 :                                        ((c = (*cmp)(th, v)) < 0 ||
     539           0 :                                         (!hi && c == 0)))))) {
     540           0 :                                         dst = buninsfix(bn, dst, cnt, o,
     541           0 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     542           0 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     543             :                                                         maximum);
     544           0 :                                         if (dst == NULL) {
     545           0 :                                                 BBPreclaim(bn);
     546           0 :                                                 return BUN_NONE;
     547             :                                         }
     548           0 :                                         cnt++;
     549             :                                 }
     550             :                         }
     551             :                 }
     552             :         } else {
     553           9 :                 *algo = "select: fullscan range";
     554           9 :                 if (ci->tpe == cand_dense) {
     555        4910 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     556        4889 :                                 o = canditer_next_dense(ci);
     557        4889 :                                 v = BUNtail(*bi, o-hseq);
     558        4889 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     559        4027 :                                     ((!lval ||
     560        4027 :                                       (c = cmp(tl, v)) < 0 ||
     561        4889 :                                       (li && c == 0)) &&
     562        3906 :                                      (!hval ||
     563        3906 :                                       (c = cmp(th, v)) > 0 ||
     564        3662 :                                       (hi && c == 0)))) {
     565         798 :                                         dst = buninsfix(bn, dst, cnt, o,
     566         266 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     567         266 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     568             :                                                         maximum);
     569         266 :                                         if (dst == NULL) {
     570           0 :                                                 BBPreclaim(bn);
     571           0 :                                                 return BUN_NONE;
     572             :                                         }
     573         266 :                                         cnt++;
     574             :                                 }
     575             :                         }
     576             :                 } else {
     577         232 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     578         226 :                                 o = canditer_next(ci);
     579         226 :                                 v = BUNtail(*bi, o-hseq);
     580         226 :                                 if ((nil == NULL || (*cmp)(v, nil) != 0) &&
     581         226 :                                     ((!lval ||
     582         226 :                                       (c = cmp(tl, v)) < 0 ||
     583         226 :                                       (li && c == 0)) &&
     584           0 :                                      (!hval ||
     585           0 :                                       (c = cmp(th, v)) > 0 ||
     586           0 :                                       (hi && c == 0)))) {
     587         474 :                                         dst = buninsfix(bn, dst, cnt, o,
     588         158 :                                                         (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     589         158 :                                                                * (dbl) (ncand-p) * 1.1 + 1024),
     590             :                                                         maximum);
     591         158 :                                         if (dst == NULL) {
     592           0 :                                                 BBPreclaim(bn);
     593           0 :                                                 return BUN_NONE;
     594             :                                         }
     595         158 :                                         cnt++;
     596             :                                 }
     597             :                         }
     598             :                 }
     599             :         }
     600         554 :         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     601             :         return cnt;
     602           0 :   bailout:
     603           0 :         BBPreclaim(bn);
     604             :         return BUN_NONE;
     605             : }
     606             : 
     607             : static BUN
     608      189352 : fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn,
     609             :              const char *tl, const char *th,
     610             :              bool li, bool hi, bool equi, bool anti, bool nil_matches,
     611             :              bool lval, bool hval, bool lnil, BUN cnt, const oid hseq,
     612             :              oid *restrict dst, BUN maximum, const char **algo)
     613             : {
     614      189352 :         var_t pos;
     615      189352 :         BUN p, ncand = ci->ncand;
     616      189352 :         oid o;
     617      189352 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     618             : 
     619      189334 :         if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
     620           0 :             strcmp(tl, str_nil) != 0 &&
     621           0 :             strLocate(bi->vh, str_nil) == (var_t) -2) {
     622             :                 /* anti-equi select for non-nil value, and there are no
     623             :                  * nils, so we can use fast path; trigger by setting
     624             :                  * nonil */
     625           0 :                 bi->nonil = true;
     626             :         }
     627      189334 :         if (!((equi ||
     628         314 :                (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
     629      189323 :               GDK_ELIMDOUBLES(bi->vh)))
     630         451 :                 return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
     631             :                                     nil_matches, lval, hval, lnil, cnt, hseq,
     632             :                                     dst, maximum, algo);
     633      188883 :         if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
     634         830 :                 if (anti) {
     635             :                         /* return the whole shebang */
     636          18 :                         *algo = "select: fullscan anti-equi strelim (all)";
     637          18 :                         if (BATextend(bn, ncand) != GDK_SUCCEED) {
     638           0 :                                 BBPreclaim(bn);
     639           0 :                                 return BUN_NONE;
     640             :                         }
     641          18 :                         dst = Tloc(bn, 0);
     642         788 :                         TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     643         752 :                                 dst[p] = canditer_next(ci);
     644             :                         }
     645          18 :                         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     646             :                         return ncand;
     647             :                 }
     648         812 :                 *algo = "select: fullscan equi strelim (nomatch)";
     649         812 :                 return 0;
     650             :         }
     651      188077 :         if (pos == (var_t) -1) {
     652           0 :                 *algo = NULL;
     653           0 :                 BBPreclaim(bn);
     654           0 :                 return BUN_NONE;
     655             :         }
     656      188077 :         *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan equi strelim";
     657      188077 :         assert(pos >= GDK_VAROFFSET);
     658      188077 :         switch (bi->width) {
     659      160413 :         case 1: {
     660      160413 :                 const unsigned char *ptr = (const unsigned char *) bi->base;
     661      160413 :                 pos -= GDK_VAROFFSET;
     662      160413 :                 if (ci->tpe == cand_dense) {
     663      160042 :                         if (anti) {
     664        2334 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     665        2034 :                                         o = canditer_next_dense(ci);
     666        2034 :                                         if (ptr[o - hseq] != pos) {
     667        5604 :                                                 dst = buninsfix(bn, dst, cnt, o,
     668        1868 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     669        1868 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     670             :                                                                 maximum);
     671        1868 :                                                 if (dst == NULL) {
     672           0 :                                                         BBPreclaim(bn);
     673           0 :                                                         return BUN_NONE;
     674             :                                                 }
     675        1868 :                                                 cnt++;
     676             :                                         }
     677             :                                 }
     678             :                         } else {
     679    24774281 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     680    24292305 :                                         o = canditer_next_dense(ci);
     681    24292305 :                                         if (ptr[o - hseq] == pos) {
     682    24354521 :                                                 dst = buninsfix(bn, dst, cnt, o,
     683     8118167 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     684     8118167 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     685             :                                                                 maximum);
     686     8118187 :                                                 if (dst == NULL) {
     687           0 :                                                         BBPreclaim(bn);
     688           0 :                                                         return BUN_NONE;
     689             :                                                 }
     690     8118187 :                                                 cnt++;
     691             :                                         }
     692             :                                 }
     693             :                         }
     694             :                 } else {
     695         371 :                         if (anti) {
     696        1105 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     697        1093 :                                         o = canditer_next(ci);
     698        1093 :                                         if (ptr[o - hseq] != pos) {
     699          39 :                                                 dst = buninsfix(bn, dst, cnt, o,
     700          13 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     701          13 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     702             :                                                                 maximum);
     703          13 :                                                 if (dst == NULL) {
     704           0 :                                                         BBPreclaim(bn);
     705           0 :                                                         return BUN_NONE;
     706             :                                                 }
     707          13 :                                                 cnt++;
     708             :                                         }
     709             :                                 }
     710             :                         } else {
     711    11503415 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     712    11501163 :                                         o = canditer_next(ci);
     713    11496447 :                                         if (ptr[o - hseq] == pos) {
     714    11803893 :                                                 dst = buninsfix(bn, dst, cnt, o,
     715     3933058 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     716     3933058 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     717             :                                                                 maximum);
     718     3937777 :                                                 if (dst == NULL) {
     719           0 :                                                         BBPreclaim(bn);
     720           0 :                                                         return BUN_NONE;
     721             :                                                 }
     722     3937777 :                                                 cnt++;
     723             :                                         }
     724             :                                 }
     725             :                         }
     726             :                 }
     727             :                 break;
     728             :         }
     729       27661 :         case 2: {
     730       27661 :                 const unsigned short *ptr = (const unsigned short *) bi->base;
     731       27661 :                 pos -= GDK_VAROFFSET;
     732       27661 :                 if (ci->tpe == cand_dense) {
     733       25426 :                         if (anti) {
     734        1040 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     735         560 :                                         o = canditer_next_dense(ci);
     736         560 :                                         if (ptr[o - hseq] != pos) {
     737        1200 :                                                 dst = buninsfix(bn, dst, cnt, o,
     738         400 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     739         400 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     740             :                                                                 maximum);
     741         400 :                                                 if (dst == NULL) {
     742           0 :                                                         BBPreclaim(bn);
     743           0 :                                                         return BUN_NONE;
     744             :                                                 }
     745         400 :                                                 cnt++;
     746             :                                         }
     747             :                                 }
     748             :                         } else {
     749     2170207 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     750     2094142 :                                         o = canditer_next_dense(ci);
     751     2094142 :                                         if (ptr[o - hseq] == pos) {
     752      156542 :                                                 dst = buninsfix(bn, dst, cnt, o,
     753       52150 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     754       52150 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     755             :                                                                 maximum);
     756       52242 :                                                 if (dst == NULL) {
     757           0 :                                                         BBPreclaim(bn);
     758           0 :                                                         return BUN_NONE;
     759             :                                                 }
     760       52242 :                                                 cnt++;
     761             :                                         }
     762             :                                 }
     763             :                         }
     764             :                 } else {
     765        2235 :                         if (anti) {
     766         966 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     767         903 :                                         o = canditer_next(ci);
     768         903 :                                         if (ptr[o - hseq] != pos) {
     769        2280 :                                                 dst = buninsfix(bn, dst, cnt, o,
     770         760 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     771         760 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     772             :                                                                 maximum);
     773         760 :                                                 if (dst == NULL) {
     774           0 :                                                         BBPreclaim(bn);
     775           0 :                                                         return BUN_NONE;
     776             :                                                 }
     777         760 :                                                 cnt++;
     778             :                                         }
     779             :                                 }
     780             :                         } else {
     781     1454868 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     782     1448162 :                                         o = canditer_next(ci);
     783     1447393 :                                         if (ptr[o - hseq] == pos) {
     784      182550 :                                                 dst = buninsfix(bn, dst, cnt, o,
     785       60591 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     786       60591 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     787             :                                                                 maximum);
     788       61368 :                                                 if (dst == NULL) {
     789           0 :                                                         BBPreclaim(bn);
     790           0 :                                                         return BUN_NONE;
     791             :                                                 }
     792       61368 :                                                 cnt++;
     793             :                                         }
     794             :                                 }
     795             :                         }
     796             :                 }
     797             :                 break;
     798             :         }
     799             : #if SIZEOF_VAR_T == 8
     800           1 :         case 4: {
     801           1 :                 const unsigned int *ptr = (const unsigned int *) bi->base;
     802           1 :                 if (ci->tpe == cand_dense) {
     803           1 :                         if (anti) {
     804           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     805           0 :                                         o = canditer_next_dense(ci);
     806           0 :                                         if (ptr[o - hseq] != pos) {
     807           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     808           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     809           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     810             :                                                                 maximum);
     811           0 :                                                 if (dst == NULL) {
     812           0 :                                                         BBPreclaim(bn);
     813           0 :                                                         return BUN_NONE;
     814             :                                                 }
     815           0 :                                                 cnt++;
     816             :                                         }
     817             :                                 }
     818             :                         } else {
     819         973 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     820         970 :                                         o = canditer_next_dense(ci);
     821         970 :                                         if (ptr[o - hseq] == pos) {
     822         744 :                                                 dst = buninsfix(bn, dst, cnt, o,
     823         248 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     824         248 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     825             :                                                                 maximum);
     826         248 :                                                 if (dst == NULL) {
     827           0 :                                                         BBPreclaim(bn);
     828           0 :                                                         return BUN_NONE;
     829             :                                                 }
     830         248 :                                                 cnt++;
     831             :                                         }
     832             :                                 }
     833             :                         }
     834             :                 } else {
     835           0 :                         if (anti) {
     836           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     837           0 :                                         o = canditer_next(ci);
     838           0 :                                         if (ptr[o - hseq] != pos) {
     839           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     840           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     841           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     842             :                                                                 maximum);
     843           0 :                                                 if (dst == NULL) {
     844           0 :                                                         BBPreclaim(bn);
     845           0 :                                                         return BUN_NONE;
     846             :                                                 }
     847           0 :                                                 cnt++;
     848             :                                         }
     849             :                                 }
     850             :                         } else {
     851           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     852           0 :                                         o = canditer_next(ci);
     853           0 :                                         if (ptr[o - hseq] == pos) {
     854           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     855           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     856           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     857             :                                                                 maximum);
     858           0 :                                                 if (dst == NULL) {
     859           0 :                                                         BBPreclaim(bn);
     860           0 :                                                         return BUN_NONE;
     861             :                                                 }
     862           0 :                                                 cnt++;
     863             :                                         }
     864             :                                 }
     865             :                         }
     866             :                 }
     867             :                 break;
     868             :         }
     869             : #endif
     870           2 :         default: {
     871           2 :                 const var_t *ptr = (const var_t *) bi->base;
     872           2 :                 if (ci->tpe == cand_dense) {
     873           2 :                         if (anti) {
     874           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     875           0 :                                         o = canditer_next_dense(ci);
     876           0 :                                         if (ptr[o - hseq] != pos) {
     877           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     878           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     879           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     880             :                                                                 maximum);
     881           0 :                                                 if (dst == NULL) {
     882           0 :                                                         BBPreclaim(bn);
     883           0 :                                                         return BUN_NONE;
     884             :                                                 }
     885           0 :                                                 cnt++;
     886             :                                         }
     887             :                                 }
     888             :                         } else {
     889          26 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     890          20 :                                         o = canditer_next_dense(ci);
     891          20 :                                         if (ptr[o - hseq] == pos) {
     892           6 :                                                 dst = buninsfix(bn, dst, cnt, o,
     893           2 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     894           2 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     895             :                                                                 maximum);
     896           2 :                                                 if (dst == NULL) {
     897           0 :                                                         BBPreclaim(bn);
     898           0 :                                                         return BUN_NONE;
     899             :                                                 }
     900           2 :                                                 cnt++;
     901             :                                         }
     902             :                                 }
     903             :                         }
     904             :                 } else {
     905           0 :                         if (anti) {
     906           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     907           0 :                                         o = canditer_next(ci);
     908           0 :                                         if (ptr[o - hseq] != pos) {
     909           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     910           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     911           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     912             :                                                                 maximum);
     913           0 :                                                 if (dst == NULL) {
     914           0 :                                                         BBPreclaim(bn);
     915           0 :                                                         return BUN_NONE;
     916             :                                                 }
     917           0 :                                                 cnt++;
     918             :                                         }
     919             :                                 }
     920             :                         } else {
     921           0 :                                 TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
     922           0 :                                         o = canditer_next(ci);
     923           0 :                                         if (ptr[o - hseq] == pos) {
     924           0 :                                                 dst = buninsfix(bn, dst, cnt, o,
     925           0 :                                                                 (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
     926           0 :                                                                        * (dbl) (ncand-p) * 1.1 + 1024),
     927             :                                                                 maximum);
     928           0 :                                                 if (dst == NULL) {
     929           0 :                                                         BBPreclaim(bn);
     930           0 :                                                         return BUN_NONE;
     931             :                                                 }
     932           0 :                                                 cnt++;
     933             :                                         }
     934             :                                 }
     935             :                         }
     936             :                 }
     937             :                 break;
     938             :         }
     939             :         }
     940      188194 :         TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     941             :         return cnt;
     942           0 :   bailout:
     943           0 :         BBPreclaim(bn);
     944             :         return BUN_NONE;
     945             : }
     946             : 
     947             : /* scan select type switch */
     948             : #ifdef HAVE_HGE
     949             : #define scanfunc_hge(NAME, ISDENSE)             \
     950             :         scanfunc(NAME, hge, ISDENSE)
     951             : #else
     952             : #define scanfunc_hge(NAME, ISDENSE)
     953             : #endif
     954             : #define scan_sel(NAME, ISDENSE)                 \
     955             :         scanfunc(NAME, bte, ISDENSE)            \
     956             :         scanfunc(NAME, sht, ISDENSE)            \
     957             :         scanfunc(NAME, int, ISDENSE)            \
     958             :         scanfunc(NAME, flt, ISDENSE)            \
     959             :         scanfunc(NAME, dbl, ISDENSE)            \
     960             :         scanfunc(NAME, lng, ISDENSE)            \
     961             :         scanfunc_hge(NAME, ISDENSE)
     962             : 
     963             : /* scan select */
     964   177294211 : scan_sel(fullscan, )
     965  1272853389 : scan_sel(densescan, _dense)
     966             : 
     967             : #if 0
     968             : /* some programs that produce editor tags files don't recognize the
     969             :  * scanselect function because before it are the scan_del macro
     970             :  * calls that don't look like function definitions or variable
     971             :  * declarations, hence we have this hidden away function to realign the
     972             :  * tags program */
     973             : void
     974             : realign_tags(void)
     975             : {
     976             : }
     977             : #endif
     978             : 
     979             : static BAT *
     980     1256861 : scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
     981             :            const void *tl, const void *th,
     982             :            bool li, bool hi, bool equi, bool anti, bool nil_matches,
     983             :            bool lval, bool hval, bool lnil,
     984             :            BUN maximum, const char **algo)
     985             : {
     986             : #ifndef NDEBUG
     987     1256861 :         int (*cmp)(const void *, const void *);
     988             : #endif
     989     1256861 :         int t;
     990     1256861 :         BUN cnt = 0;
     991     1256861 :         oid *restrict dst;
     992             : 
     993     1256861 :         assert(bi->b != NULL);
     994     1256861 :         assert(bn != NULL);
     995     1256861 :         assert(bn->ttype == TYPE_oid);
     996     1256861 :         assert(!lval || tl != NULL);
     997     1256861 :         assert(!hval || th != NULL);
     998     1256861 :         assert(!equi || (li && hi && !anti));
     999     1256861 :         assert(!anti || lval || hval);
    1000     1256861 :         assert(bi->type != TYPE_void || equi || bi->nonil);
    1001             : 
    1002             : #ifndef NDEBUG
    1003     1256861 :         cmp = ATOMcompare(bi->type);
    1004             : #endif
    1005             : 
    1006     1256861 :         assert(!lval || !hval || tl == th || (*cmp)(tl, th) <= 0);
    1007             : 
    1008     1256863 :         dst = (oid *) Tloc(bn, 0);
    1009             : 
    1010     1256863 :         t = ATOMbasetype(bi->type);
    1011             : 
    1012             :         /* call type-specific core scan select function */
    1013     1256863 :         switch (t) {
    1014       33415 :         case TYPE_bte:
    1015       33415 :                 if (ci->tpe == cand_dense)
    1016       32155 :                         cnt = densescan_bte(scanargs);
    1017             :                 else
    1018        1260 :                         cnt = fullscan_bte(scanargs);
    1019             :                 break;
    1020        6599 :         case TYPE_sht:
    1021        6599 :                 if (ci->tpe == cand_dense)
    1022        6566 :                         cnt = densescan_sht(scanargs);
    1023             :                 else
    1024          33 :                         cnt = fullscan_sht(scanargs);
    1025             :                 break;
    1026     1024166 :         case TYPE_int:
    1027     1024166 :                 if (ci->tpe == cand_dense)
    1028      739425 :                         cnt = densescan_int(scanargs);
    1029             :                 else
    1030      284741 :                         cnt = fullscan_int(scanargs);
    1031             :                 break;
    1032          18 :         case TYPE_flt:
    1033          18 :                 if (ci->tpe == cand_dense)
    1034          17 :                         cnt = densescan_flt(scanargs);
    1035             :                 else
    1036           1 :                         cnt = fullscan_flt(scanargs);
    1037             :                 break;
    1038         129 :         case TYPE_dbl:
    1039         129 :                 if (ci->tpe == cand_dense)
    1040         121 :                         cnt = densescan_dbl(scanargs);
    1041             :                 else
    1042           8 :                         cnt = fullscan_dbl(scanargs);
    1043             :                 break;
    1044        2969 :         case TYPE_lng:
    1045        2969 :                 if (ci->tpe == cand_dense)
    1046        2957 :                         cnt = densescan_lng(scanargs);
    1047             :                 else
    1048          12 :                         cnt = fullscan_lng(scanargs);
    1049             :                 break;
    1050             : #ifdef HAVE_HGE
    1051         114 :         case TYPE_hge:
    1052         114 :                 if (ci->tpe == cand_dense)
    1053         105 :                         cnt = densescan_hge(scanargs);
    1054             :                 else
    1055           9 :                         cnt = fullscan_hge(scanargs);
    1056             :                 break;
    1057             : #endif
    1058      189351 :         case TYPE_str:
    1059      189351 :                 cnt = fullscan_str(scanargs);
    1060      189351 :                 break;
    1061         102 :         default:
    1062         102 :                 cnt = fullscan_any(scanargs);
    1063         102 :                 break;
    1064             :         }
    1065     1257324 :         if (cnt == BUN_NONE) {
    1066             :                 return NULL;
    1067             :         }
    1068     1257324 :         assert(bn->batCapacity >= cnt);
    1069             : 
    1070     1257324 :         BATsetcount(bn, cnt);
    1071     1257389 :         bn->tsorted = true;
    1072     1257389 :         bn->trevsorted = bn->batCount <= 1;
    1073     1257389 :         bn->tkey = true;
    1074     1257389 :         bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == bi->count ? bi->b->hseqbase : oid_nil;
    1075             : 
    1076     1257389 :         return bn;
    1077             : }
    1078             : 
    1079             : #if SIZEOF_BUN == SIZEOF_INT
    1080             : #define CALC_ESTIMATE(TPE)                                              \
    1081             :         do {                                                            \
    1082             :                 if (*(TPE*)tl < 1) {                                 \
    1083             :                         if ((int) BUN_MAX + *(TPE*)tl >= *(TPE*)th)  \
    1084             :                                 estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
    1085             :                 } else {                                                \
    1086             :                         if (-(int) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
    1087             :                                 estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
    1088             :                 }                                                       \
    1089             :         } while (0)
    1090             : #else
    1091             : #define CALC_ESTIMATE(TPE)                                              \
    1092             :         do {                                                            \
    1093             :                 if (*(TPE*)tl < 1) {                                 \
    1094             :                         if ((lng) BUN_MAX + *(TPE*)tl >= *(TPE*)th)  \
    1095             :                                 estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
    1096             :                 } else {                                                \
    1097             :                         if (-(lng) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
    1098             :                                 estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
    1099             :                 }                                                       \
    1100             :         } while (0)
    1101             : #endif
    1102             : 
    1103             : static enum range_comp_t
    1104     1839406 : BATrange(BATiter *bi, const void *tl, const void *th, bool li, bool hi)
    1105             : {
    1106     1839406 :         enum range_comp_t range;
    1107     1839406 :         const ValRecord *minprop = NULL, *maxprop = NULL;
    1108     1839406 :         const void *minval = NULL, *maxval = NULL;
    1109     1839406 :         bool maxincl = true;
    1110     1839406 :         BAT *pb = NULL;
    1111     1839406 :         int c;
    1112     1839406 :         int (*atomcmp) (const void *, const void *) = ATOMcompare(bi->type);
    1113     1839406 :         BATiter bi2 = *bi;
    1114             : 
    1115     1839406 :         if (tl && (*atomcmp)(tl, ATOMnilptr(bi->type)) == 0)
    1116       12404 :                 tl = NULL;
    1117     1839800 :         if (th && (*atomcmp)(th, ATOMnilptr(bi->type)) == 0)
    1118        8769 :                 th = NULL;
    1119     1840962 :         if (tl == NULL && th == NULL)
    1120             :                 return range_contains; /* looking for everything */
    1121             : 
    1122     1835494 :         if (VIEWtparent(bi->b))
    1123     1660550 :                 pb = BATdescriptor(VIEWtparent(bi->b));
    1124             : 
    1125             :         /* keep locked while we look at the property values */
    1126     1835851 :         MT_lock_set(&bi->b->theaplock);
    1127     1835388 :         if (bi->sorted && (bi->nonil || atomcmp(BUNtail(*bi, 0), ATOMnilptr(bi->type)) != 0))
    1128      308235 :                 minval = BUNtail(*bi, 0);
    1129     1527156 :         else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(*bi, bi->count - 1), ATOMnilptr(bi->type)) != 0))
    1130      131784 :                 minval = BUNtail(*bi, bi->count - 1);
    1131     1395373 :         else if (bi->minpos != BUN_NONE)
    1132       27168 :                 minval = BUNtail(*bi, bi->minpos);
    1133     1368205 :         else if ((minprop = BATgetprop_nolock(bi->b, GDK_MIN_BOUND)) != NULL)
    1134           0 :                 minval = VALptr(minprop);
    1135     1835302 :         if (bi->sorted && (bi->nonil || atomcmp(BUNtail(bi2, bi->count - 1), ATOMnilptr(bi->type)) != 0)) {
    1136      308662 :                 maxval = BUNtail(bi2, bi->count - 1);
    1137             :                 maxincl = true;
    1138     1526243 :         } else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(bi2, 0), ATOMnilptr(bi->type)) != 0)) {
    1139      132438 :                 maxval = BUNtail(bi2, 0);
    1140             :                 maxincl = true;
    1141     1393805 :         } else if (bi->maxpos != BUN_NONE) {
    1142       27112 :                 maxval = BUNtail(bi2, bi->maxpos);
    1143             :                 maxincl = true;
    1144     1366693 :         } else if ((maxprop = BATgetprop_nolock(bi->b, GDK_MAX_BOUND)) != NULL) {
    1145           0 :                 maxval = VALptr(maxprop);
    1146           0 :                 maxincl = false;
    1147             :         }
    1148     1834933 :         bool keep = false;      /* keep lock on parent bat? */
    1149     1834933 :         if (minval == NULL || maxval == NULL) {
    1150     1367857 :                 if (pb != NULL) {
    1151     1279641 :                         MT_lock_set(&pb->theaplock);
    1152     1279987 :                         if (minval == NULL && (minprop = BATgetprop_nolock(pb, GDK_MIN_BOUND)) != NULL) {
    1153           6 :                                 keep = true;
    1154           6 :                                 minval = VALptr(minprop);
    1155             :                         }
    1156     1279971 :                         if (maxval == NULL && (maxprop = BATgetprop_nolock(pb, GDK_MAX_BOUND)) != NULL) {
    1157           2 :                                 keep = true;
    1158           2 :                                 maxval = VALptr(maxprop);
    1159           2 :                                 maxincl = true;
    1160             :                         }
    1161     1279898 :                         if (!keep) {
    1162     1279911 :                                 MT_lock_unset(&pb->theaplock);
    1163             :                         }
    1164             :                 }
    1165             :         }
    1166             : 
    1167     1835377 :         if (minval == NULL && maxval == NULL) {
    1168             :                 range = range_inside; /* strictly: unknown */
    1169      468072 :         } else if (maxval &&
    1170      461416 :                    tl &&
    1171      461348 :                    ((c = atomcmp(tl, maxval)) > 0 ||
    1172         553 :                     ((!maxincl || !li) && c == 0))) {
    1173             :                 range = range_after;
    1174      397912 :         } else if (minval &&
    1175      396716 :                    th &&
    1176      396733 :                    ((c = atomcmp(th, minval)) < 0 ||
    1177      356340 :                     (!hi && c == 0))) {
    1178             :                 range = range_before;
    1179      357451 :         } else if (tl == NULL) {
    1180        6536 :                 if (minval == NULL) {
    1181          16 :                         c = atomcmp(th, maxval);
    1182          16 :                         if (c < 0 || ((maxincl || !hi) && c == 0))
    1183             :                                 range = range_atstart;
    1184             :                         else
    1185        7097 :                                 range = range_contains;
    1186             :                 } else {
    1187        6520 :                         c = atomcmp(th, minval);
    1188        6521 :                         if (c < 0 || (!hi && c == 0))
    1189             :                                 range = range_before;
    1190        6520 :                         else if (maxval == NULL)
    1191             :                                 range = range_atstart;
    1192             :                         else {
    1193        6518 :                                 c = atomcmp(th, maxval);
    1194        6518 :                                 if (c < 0 || ((maxincl || !hi) && c == 0))
    1195             :                                         range = range_atstart;
    1196             :                                 else
    1197        7097 :                                         range = range_contains;
    1198             :                         }
    1199             :                 }
    1200      350915 :         } else if (th == NULL) {
    1201         748 :                 if (maxval == NULL) {
    1202           2 :                         c = atomcmp(tl, minval);
    1203           2 :                         if (c >= 0)
    1204             :                                 range = range_atend;
    1205             :                         else
    1206        7097 :                                 range = range_contains;
    1207             :                 } else {
    1208         746 :                         c = atomcmp(tl, maxval);
    1209         746 :                         if (c > 0 || ((!maxincl || !li) && c == 0))
    1210             :                                 range = range_after;
    1211         746 :                         else if (minval == NULL)
    1212             :                                 range = range_atend;
    1213             :                         else {
    1214         686 :                                 c = atomcmp(tl, minval);
    1215         686 :                                 if (c >= 0)
    1216             :                                         range = range_atend;
    1217             :                                 else
    1218        7097 :                                         range = range_contains;
    1219             :                         }
    1220             :                 }
    1221      350167 :         } else if (minval == NULL) {
    1222         479 :                 c = atomcmp(th, maxval);
    1223         479 :                 if (c < 0 || ((maxincl || !hi) && c == 0))
    1224             :                         range = range_inside;
    1225             :                 else
    1226       19909 :                         range = range_atend;
    1227      349688 :         } else if (maxval == NULL) {
    1228           6 :                 c = atomcmp(tl, minval);
    1229           6 :                 if (c >= 0)
    1230             :                         range = range_inside;
    1231             :                 else
    1232         248 :                         range = range_atstart;
    1233             :         } else {
    1234      349682 :                 c = atomcmp(tl, minval);
    1235      349704 :                 if (c >= 0) {
    1236      349148 :                         c = atomcmp(th, maxval);
    1237      349254 :                         if (c < 0 || ((maxincl || !hi) && c == 0))
    1238             :                                 range = range_inside;
    1239             :                         else
    1240       19909 :                                 range = range_atend;
    1241             :                 } else {
    1242         556 :                         c = atomcmp(th, maxval);
    1243         556 :                         if (c < 0 || ((maxincl || !hi) && c == 0))
    1244             :                                 range = range_atstart;
    1245             :                         else
    1246        7097 :                                 range = range_contains;
    1247             :                 }
    1248             :         }
    1249             : 
    1250     1835557 :         MT_lock_unset(&bi->b->theaplock);
    1251     1836019 :         if (pb) {
    1252     1660868 :                 if (keep)
    1253           6 :                         MT_lock_unset(&pb->theaplock);
    1254     1660868 :                 BBPreclaim(pb);
    1255             :         }
    1256             : 
    1257             :         return range;
    1258             : }
    1259             : 
    1260             : /* generic range select
    1261             :  *
    1262             :  * Return a BAT with the OID values of b for qualifying tuples.  The
    1263             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    1264             :  *
    1265             :  * If s is non-NULL, it is a list of candidates.  s must be sorted.
    1266             :  *
    1267             :  * tl may not be NULL, li, hi, and anti must be either false or true.
    1268             :  *
    1269             :  * If th is NULL, hi is ignored.
    1270             :  *
    1271             :  * If anti is false, qualifying tuples are those whose value is between
    1272             :  * tl and th (as in x >[=] tl && x <[=] th, where equality depends on li
    1273             :  * and hi--so if tl > th, nothing will be returned).  If li or hi is
    1274             :  * true, the respective boundary is inclusive, otherwise exclusive.  If
    1275             :  * th is NULL it is taken to be equal to tl, turning this into an equi-
    1276             :  * or point-select.  Note that for a point select to return anything, li
    1277             :  * (and hi if th was not NULL) must be true.  There is a special case if
    1278             :  * tl is nil and th is NULL.  This is the only way to select for nil
    1279             :  * values.
    1280             :  *
    1281             :  * If anti is true, the result is the complement of what the result
    1282             :  * would be if anti were 0, except that nils are filtered out if
    1283             :  * nil_matches is false.
    1284             :  *
    1285             :  * If nil_matches is true, NIL is considered an ordinary value that
    1286             :  * can match, else NIL must be considered to never match.
    1287             :  *
    1288             :  * In brief:
    1289             :  * - if tl==nil and th==NULL and anti==false, return all nils (only way
    1290             :  *   to get nils);
    1291             :  * - it tl==nil and th==nil, return all but nils;
    1292             :  * - if tl==nil and th!=NULL, no lower bound;
    1293             :  * - if th==NULL or tl==th, point (equi) select;
    1294             :  * - if th==nil, no upper bound
    1295             :  *
    1296             :  * A complete breakdown of the various arguments follows.  Here, v, v1
    1297             :  * and v2 are values from the appropriate domain, and
    1298             :  * v != nil, v1 != nil, v2 != nil, v1 < v2.
    1299             :  * Note that if nil_matches is true, all the "x != nil" conditions fall
    1300             :  * away and for the "equi" or "point" selects, i.e. when tl is nil and
    1301             :  * th is either NULL or nil, there is no special handling of nil (so
    1302             :  * look at the rows with tl == v and th == v or NULL).
    1303             :  *      tl      th      li      hi      anti    result list of OIDs for values
    1304             :  *      -----------------------------------------------------------------
    1305             :  *      nil     NULL    true    ignored false   x == nil (only way to get nil)
    1306             :  *      nil     NULL    false   ignored false   NOTHING
    1307             :  *      nil     NULL    ignored ignored true    x != nil
    1308             :  *      nil     nil     ignored ignored false   x != nil
    1309             :  *      nil     nil     ignored ignored true    NOTHING
    1310             :  *      nil     v       ignored false   false   x < v
    1311             :  *      nil     v       ignored true    false   x <= v
    1312             :  *      nil     v       ignored false   true    x >= v
    1313             :  *      nil     v       ignored true    true    x > v
    1314             :  *      v       NULL    true    ignored false   x == v
    1315             :  *      v       NULL    false   ignored false   NOTHING
    1316             :  *      v       NULL    true    ignored true    x != v && x != nil
    1317             :  *      v       NULL    false   ignored true    x != nil
    1318             :  *      v       nil     true    ignored false   x >= v
    1319             :  *      v       nil     false   ignored false   x > v
    1320             :  *      v       nil     true    ignored true    x < v
    1321             :  *      v       nil     false   ignored true    x <= v
    1322             :  *      v       v       true    true    false   x == v
    1323             :  *      v       v       false   true    false   NOTHING
    1324             :  *      v       v       true    false   false   NOTHING
    1325             :  *      v       v       false   false   false   NOTHING
    1326             :  *      v       v       true    true    true    x != v && x != nil
    1327             :  *      v       v       false   true    true    x != nil
    1328             :  *      v       v       true    false   true    x != nil
    1329             :  *      v       v       false   false   true    x != nil
    1330             :  *      v1      v2      true    true    false   v1 <= x <= v2
    1331             :  *      v1      v2      false   true    false   v1 < x <= v2
    1332             :  *      v1      v2      true    false   false   v1 <= x < v2
    1333             :  *      v1      v2      false   false   false   v1 < x < v2
    1334             :  *      v1      v2      true    true    true    x < v1 or x > v2
    1335             :  *      v1      v2      false   true    true    x <= v1 or x > v2
    1336             :  *      v1      v2      true    false   true    x < v1 or x >= v2
    1337             :  *      v1      v2      false   false   true    x <= v1 or x >= v2
    1338             :  *      v2      v1      ignored ignored false   NOTHING
    1339             :  *      v2      v1      ignored ignored true    x != nil
    1340             :  */
    1341             : BAT *
    1342     3164703 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
    1343             :           bool li, bool hi, bool anti, bool nil_matches)
    1344             : {
    1345     3164703 :         bool lval;              /* low value used for comparison */
    1346     3164703 :         bool lnil;              /* low value is nil */
    1347     3164703 :         bool hval;              /* high value used for comparison */
    1348     3164703 :         bool equi;              /* select for single value (not range) */
    1349     3164703 :         bool antiequi = false;  /* select for all but single value */
    1350     3164703 :         bool wanthash = false;  /* use hash (equi must be true) */
    1351     3164703 :         bool havehash = false;  /* we have a hash (and the hashlock) */
    1352     3164703 :         bool phash = false;     /* use hash on parent BAT (if view) */
    1353     3164703 :         int t;                  /* data type */
    1354     3164703 :         bat parent;             /* b's parent bat (if b is a view) */
    1355     3164703 :         const void *nil;
    1356     3164703 :         BAT *bn;
    1357     3164703 :         struct canditer ci;
    1358     3164703 :         BUN estimate = BUN_NONE, maximum = BUN_NONE;
    1359     3164703 :         oid vwl = 0, vwh = 0;
    1360     3164703 :         lng vwo = 0;
    1361     3164703 :         Heap *oidxh = NULL;
    1362     3164703 :         const char *algo;
    1363     3164703 :         enum range_comp_t range;
    1364     3164703 :         const bool notnull = BATgetprop(b, GDK_NOT_NULL) != NULL;
    1365     3168405 :         lng t0 = GDKusec();
    1366             : 
    1367     3166624 :         BATcheck(b, NULL);
    1368     3166624 :         if (tl == NULL) {
    1369           0 :                 GDKerror("tl value required");
    1370           0 :                 return NULL;
    1371             :         }
    1372             : 
    1373     3166624 :         if (s && s->ttype != TYPE_msk && !s->tsorted) {
    1374           0 :                 GDKerror("invalid argument: s must be sorted.\n");
    1375           0 :                 return NULL;
    1376             :         }
    1377             : 
    1378     3166624 :         canditer_init(&ci, b, s);
    1379     3166936 :         if (ci.ncand == 0) {
    1380             :                 /* trivially empty result */
    1381     1305489 :                 MT_thread_setalgorithm("select: trivially empty");
    1382     1305354 :                 bn = BATdense(0, 0, 0);
    1383     1303288 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1384             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1385             :                           " (" LLFMT " usec): "
    1386             :                           "trivially empty\n",
    1387             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1388             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1389     1303288 :                 return bn;
    1390             :         }
    1391             : 
    1392     1861447 :         BATiter bi = bat_iterator(b);
    1393             : 
    1394     1862087 :         t = bi.type;
    1395     1862087 :         nil = ATOMnilptr(t);
    1396     1862087 :         if (nil == NULL)
    1397           0 :                 nil_matches = false;
    1398             :         /* can we use the base type? */
    1399     1862087 :         t = ATOMbasetype(t);
    1400     1862087 :         lnil = nil && ATOMcmp(t, tl, nil) == 0; /* low value == nil? */
    1401     1861488 :         lval = !lnil || th == NULL;      /* low value used for comparison */
    1402     1861488 :         equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
    1403     1861585 :         if (lnil && nil_matches && (th == NULL || ATOMcmp(t, th, nil) == 0)) {
    1404             :                 /* if nil_matches is set, tl==th==nil is just an equi select */
    1405             :                 equi = true;
    1406             :                 lval = true;
    1407             :         }
    1408             : 
    1409     1841857 :         if (equi) {
    1410     1819863 :                 assert(lval);
    1411     1819863 :                 if (th == NULL)
    1412     1635808 :                         hi = li;
    1413     1819863 :                 th = tl;
    1414     1819863 :                 hval = true;
    1415     1819863 :                 if (!anti && (!li || !hi)) {
    1416             :                         /* upper and lower bound of range are equal (or
    1417             :                          * upper is NULL) and we want an interval that's
    1418             :                          * open on at least one side (v <= x < v or v <
    1419             :                          * x <= v or v < x < v all result in nothing) */
    1420          59 :                         MT_thread_setalgorithm("select: empty interval");
    1421          59 :                         bn = BATdense(0, 0, 0);
    1422          59 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1423             :                                   ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
    1424             :                                   ALGOOPTBATFMT " (" LLFMT " usec): "
    1425             :                                   "empty interval\n",
    1426             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1427             :                                   li, hi, anti, ALGOOPTBATPAR(bn),
    1428             :                                   GDKusec() - t0);
    1429          59 :                         bat_iterator_end(&bi);
    1430          59 :                         return bn;
    1431             :                 }
    1432             :         } else {
    1433             :                 /* range select: we only care about nil_matches in
    1434             :                  * (anti-)equi-select */
    1435       41722 :                 nil_matches = false;
    1436       41722 :                 if (nil == NULL) {
    1437           0 :                         assert(th != NULL);
    1438             :                         hval = true;
    1439             :                 } else {
    1440       41722 :                         hval = ATOMcmp(t, th, nil) != 0;
    1441             :                 }
    1442             :         }
    1443     1861533 :         if (anti) {
    1444       73646 :                 if (lval != hval) {
    1445             :                         /* one of the end points is nil and the other
    1446             :                          * isn't: swap sub-ranges */
    1447          47 :                         const void *tv;
    1448          47 :                         bool ti;
    1449          47 :                         assert(!equi);
    1450          47 :                         ti = li;
    1451          47 :                         li = !hi;
    1452          47 :                         hi = !ti;
    1453          47 :                         tv = tl;
    1454          47 :                         tl = th;
    1455          47 :                         th = tv;
    1456          47 :                         ti = lval;
    1457          47 :                         lval = hval;
    1458          47 :                         hval = ti;
    1459          47 :                         lnil = ATOMcmp(t, tl, nil) == 0;
    1460          47 :                         anti = false;
    1461          47 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1462             :                                   ",s=" ALGOOPTBATFMT ",anti=%d "
    1463             :                                   "anti: switch ranges...\n",
    1464             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1465             :                                   anti);
    1466       73599 :                 } else if (!lval && !hval) {
    1467             :                         /* antiselect for nil-nil range: all non-nil
    1468             :                          * values are in range; we must return all
    1469             :                          * other non-nil values, i.e. nothing */
    1470          26 :                         MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
    1471          26 :                         bn = BATdense(0, 0, 0);
    1472          26 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1473             :                                   ",s=" ALGOOPTBATFMT ",anti=%d -> "
    1474             :                                   ALGOOPTBATFMT " (" LLFMT " usec): "
    1475             :                                   "anti: nil-nil range, nonil\n",
    1476             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1477             :                                   anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1478          26 :                         bat_iterator_end(&bi);
    1479          26 :                         return bn;
    1480       73573 :                 } else if ((equi && (lnil || !(li && hi))) || ATOMcmp(t, tl, th) > 0) {
    1481             :                         /* various ways to select for everything except nil */
    1482        6466 :                         if (equi && !lnil && nil_matches && !(li && hi)) {
    1483             :                                 /* nil is not special, so return
    1484             :                                  * everything */
    1485           0 :                                 bn = canditer_slice(&ci, 0, ci.ncand);
    1486           0 :                                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1487             :                                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1488             :                                           " (" LLFMT " usec): "
    1489             :                                           "anti, equi, open, nil_matches\n",
    1490             :                                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1491             :                                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1492           0 :                                 bat_iterator_end(&bi);
    1493           0 :                                 return bn;
    1494             :                         }
    1495        6466 :                         bn = BATselect(b, s, nil, NULL, true, true, false, false);
    1496        6464 :                         if (bn == NULL) {
    1497           0 :                                 bat_iterator_end(&bi);
    1498           0 :                                 return NULL;
    1499             :                         }
    1500        6464 :                         BAT *bn2;
    1501        6464 :                         if (s) {
    1502        3274 :                                 bn2 = BATdiffcand(s, bn);
    1503             :                         } else {
    1504        3190 :                                 bn2 = BATnegcands(ci.seq, bi.count, bn);
    1505             :                         }
    1506        6465 :                         bat_iterator_end(&bi);
    1507        6467 :                         BBPreclaim(bn);
    1508        6466 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1509             :                                   ",s=" ALGOOPTBATFMT ",anti=1,equi=%d -> "
    1510             :                                   ALGOOPTBATFMT " (" LLFMT " usec): "
    1511             :                                   "everything except nil\n",
    1512             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    1513             :                                   equi, ALGOOPTBATPAR(bn2), GDKusec() - t0);
    1514        6466 :                         return bn2; /* also if NULL */
    1515             :                 } else {
    1516             :                         antiequi = equi;
    1517             :                         equi = false;
    1518             :                 }
    1519             :         }
    1520             : 
    1521             :         /* if equi set, then so are both lval and hval */
    1522     1855073 :         assert(!equi || (lval && hval));
    1523             : 
    1524     1855073 :         if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
    1525             :                 /* empty range */
    1526          31 :                 MT_thread_setalgorithm("select: empty range");
    1527          31 :                 bn = BATdense(0, 0, 0);
    1528          31 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1529             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1530             :                           " (" LLFMT " usec) "
    1531             :                           "empty range\n",
    1532             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1533             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1534          31 :                 bat_iterator_end(&bi);
    1535          31 :                 return bn;
    1536             :         }
    1537     1855039 :         if (equi && lnil && (notnull || bi.nonil)) {
    1538             :                 /* return all nils, but there aren't any */
    1539       14615 :                 MT_thread_setalgorithm("select: equi-nil, nonil");
    1540       14617 :                 bn = BATdense(0, 0, 0);
    1541       14618 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1542             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1543             :                           " (" LLFMT " usec): "
    1544             :                           "equi-nil, nonil\n",
    1545             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1546             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1547       14618 :                 bat_iterator_end(&bi);
    1548       14618 :                 return bn;
    1549             :         }
    1550             : 
    1551     1840424 :         if (!equi && !lval && !hval && lnil && (notnull || bi.nonil)) {
    1552             :                 /* return all non-nils from a BAT that doesn't have
    1553             :                  * any: i.e. return everything */
    1554           0 :                 MT_thread_setalgorithm("select: everything, nonil");
    1555           0 :                 bn = canditer_slice(&ci, 0, ci.ncand);
    1556           0 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1557             :                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1558             :                           " (" LLFMT " usec): "
    1559             :                           "everything, nonil\n",
    1560             :                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1561             :                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1562           0 :                 bat_iterator_end(&bi);
    1563           0 :                 return bn;
    1564             :         }
    1565             : 
    1566             :         /* figure out how the searched for range compares with the known
    1567             :          * minimum and maximum values */
    1568     1850685 :         range = BATrange(&bi, lval ? tl : NULL, hval ? th : NULL, li, hi);
    1569     1840259 :         if (anti) {
    1570       67101 :                 switch (range) {
    1571         218 :                 case range_contains:
    1572             :                         /* MIN..MAX range of values in BAT fully inside
    1573             :                          * tl..th range, so nothing left over for
    1574             :                          * anti */
    1575         218 :                         MT_thread_setalgorithm("select: nothing, out of range");
    1576         218 :                         bn = BATdense(0, 0, 0);
    1577         218 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1578             :                                   ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1579             :                                   " (" LLFMT " usec): "
    1580             :                                   "nothing, out of range\n",
    1581             :                                   ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
    1582         218 :                         bat_iterator_end(&bi);
    1583         218 :                         return bn;
    1584        6169 :                 case range_before:
    1585             :                 case range_after:
    1586        6169 :                         if (notnull || b->tnonil || nil_matches) {
    1587             :                                 /* search range does not overlap with
    1588             :                                  * BAT range, and there are no nils (or
    1589             :                                  * we want to include nils), so we can
    1590             :                                  * return everything */
    1591        6030 :                                 MT_thread_setalgorithm("select: everything, anti, nonil");
    1592        6030 :                                 bn = canditer_slice(&ci, 0, ci.ncand);
    1593        6027 :                                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1594             :                                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1595             :                                           " (" LLFMT " usec): "
    1596             :                                           "everything, nonil\n",
    1597             :                                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1598             :                                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1599        6027 :                                 bat_iterator_end(&bi);
    1600        6027 :                                 return bn;
    1601             :                         }
    1602             :                         break;
    1603             :                 default:
    1604             :                         break;
    1605             :                 }
    1606     1773158 :         } else if (!equi || !lnil) {
    1607     1767894 :                 switch (range) {
    1608      104528 :                 case range_before:
    1609             :                 case range_after:
    1610             :                         /* range we're looking for either completely
    1611             :                          * before or complete after the range of values
    1612             :                          * in the BAT */
    1613      104528 :                         MT_thread_setalgorithm("select: nothing, out of range");
    1614      104554 :                         bn = BATdense(0, 0, 0);
    1615      104441 :                         TRC_DEBUG(ALGO, "b="
    1616             :                                   ALGOBATFMT ",s="
    1617             :                                   ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1618             :                                   " (" LLFMT " usec): "
    1619             :                                   "nothing, out of range\n",
    1620             :                                   ALGOBATPAR(b),
    1621             :                                   ALGOOPTBATPAR(s), anti,
    1622             :                                   ALGOOPTBATPAR(bn), GDKusec() - t0);
    1623      104441 :                         bat_iterator_end(&bi);
    1624      104441 :                         return bn;
    1625        6893 :                 case range_contains:
    1626        6893 :                         if (notnull || b->tnonil) {
    1627             :                                 /* search range contains BAT range, and
    1628             :                                  * there are no nils, so we can return
    1629             :                                  * everything */
    1630        6825 :                                 MT_thread_setalgorithm("select: everything, nonil");
    1631        6828 :                                 bn = canditer_slice(&ci, 0, ci.ncand);
    1632        6827 :                                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT
    1633             :                                           ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
    1634             :                                           " (" LLFMT " usec): "
    1635             :                                           "everything, nonil\n",
    1636             :                                           ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
    1637             :                                           ALGOOPTBATPAR(bn), GDKusec() - t0);
    1638        6827 :                                 bat_iterator_end(&bi);
    1639        6827 :                                 return bn;
    1640             :                         }
    1641             :                         break;
    1642             :                 default:
    1643             :                         break;
    1644             :                 }
    1645             :         }
    1646             : 
    1647     1722658 :         parent = VIEWtparent(b);
    1648     1569477 :         assert(parent >= 0);
    1649     1722658 :         BAT *pb;
    1650     1722658 :         BATiter pbi;
    1651     1722658 :         if (parent > 0)
    1652     1569497 :                 pb = BATdescriptor(parent);
    1653             :         else
    1654             :                 pb = NULL;
    1655     1723247 :         pbi = bat_iterator(pb);
    1656             :         /* use hash only for equi-join if the bat is not sorted, but
    1657             :          * only if b or its parent already has a hash, or if b or its
    1658             :          * parent is persistent and the total size wouldn't be too
    1659             :          * large; check for existence of hash last since that may
    1660             :          * involve I/O */
    1661     1722700 :         if ((equi || antiequi) && !bi.sorted && !bi.revsorted) {
    1662     1382666 :                 double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
    1663     1382020 :                 if (cost > 0 && cost < ci.ncand) {
    1664       82176 :                         wanthash = true;
    1665       82176 :                         if (havehash) {
    1666       57519 :                                 if (phash) {
    1667       56859 :                                         MT_rwlock_rdlock(&pb->thashlock);
    1668       56870 :                                         if (pb->thash == NULL) {
    1669           0 :                                                 MT_rwlock_rdunlock(&pb->thashlock);
    1670           0 :                                                 havehash = false;
    1671             :                                         }
    1672             :                                 } else {
    1673         660 :                                         MT_rwlock_rdlock(&b->thashlock);
    1674         660 :                                         if (b->thash == NULL) {
    1675           0 :                                                 MT_rwlock_rdunlock(&b->thashlock);
    1676           0 :                                                 havehash = false;
    1677             :                                         }
    1678             :                                 }
    1679             :                         }
    1680       82187 :                         if (wanthash && !havehash && b->batRole != PERSISTENT) {
    1681           0 :                                 MT_lock_set(&b->theaplock);
    1682           0 :                                 if (++b->selcnt > 1000)
    1683           0 :                                         b->selcnt = 1000; /* limit value */
    1684             :                                 else
    1685             :                                         wanthash = false;
    1686           0 :                                 MT_lock_unset(&b->theaplock);
    1687             :                         }
    1688             :                 } else {
    1689     1299844 :                         wanthash = havehash = false;
    1690             :                 }
    1691             :         }
    1692             : 
    1693             :         /* at this point, if havehash is set, we have the hash lock
    1694             :          * the lock is on the parent if phash is set, on b itself if not
    1695             :          * also, if havehash is set, then so is wanthash (but not v.v.) */
    1696             : 
    1697     1722065 :         if (!havehash) {
    1698             :                 /* make sure tsorted and trevsorted flags are set, but
    1699             :                  * we only need to know if we're not yet sure that we're
    1700             :                  * going for the hash (i.e. it already exists) */
    1701     1665375 :                 (void) BATordered(b);
    1702     1666244 :                 (void) BATordered_rev(b);
    1703             :                 /* reinitialize iterator since properties may have changed */
    1704     1666381 :                 bat_iterator_end(&bi);
    1705     1666198 :                 bi = bat_iterator(b);
    1706             :         }
    1707             : 
    1708             :         /* If there is an order index or it is a view and the parent
    1709             :          * has an ordered index, and the bat is not tsorted or
    1710             :          * trevstorted then use the order index.  And there is no cand
    1711             :          * list or if there is one, it is dense.
    1712             :          * TODO: we do not support anti-select with order index */
    1713     1722872 :         bool poidx = false;
    1714     1722872 :         if (!anti &&
    1715     1662438 :             !havehash &&
    1716     1608511 :             !bi.sorted &&
    1717     1360591 :             !bi.revsorted &&
    1718     1238583 :             ci.tpe == cand_dense) {
    1719      962713 :                 BAT *view = NULL;
    1720      962713 :                 (void) BATcheckorderidx(b);
    1721      962822 :                 MT_lock_set(&b->batIdxLock);
    1722      962742 :                 if ((oidxh = b->torderidx) != NULL)
    1723          39 :                         HEAPincref(oidxh);
    1724      962742 :                 MT_lock_unset(&b->batIdxLock);
    1725      962750 :                 if (oidxh == NULL && pb != NULL) {
    1726      901853 :                         (void) BATcheckorderidx(pb);
    1727      901859 :                         MT_lock_set(&pb->batIdxLock);
    1728      901832 :                         if ((oidxh = pb->torderidx) != NULL) {
    1729           9 :                                 HEAPincref(oidxh);
    1730           9 :                                 view = b;
    1731           9 :                                 b = pb;
    1732             :                         }
    1733      901832 :                         MT_lock_unset(&pb->batIdxLock);
    1734             :                 }
    1735      962746 :                 if (oidxh) {
    1736             :                         /* Is query selective enough to use the ordered
    1737             :                          * index?  Finding the boundaries is 2*log(n)
    1738             :                          * where n is the size of the bat, sorting is
    1739             :                          * N*log(N) where N is the number of results.
    1740             :                          * If the sum is less than n (cost of scan),
    1741             :                          * it's cheaper.  However, to find out how large
    1742             :                          * N is, we'd have to do the two boundary
    1743             :                          * searches.  If we do that, we might as well do
    1744             :                          * it all. */
    1745          48 :                         if (view) {
    1746           9 :                                 bat_iterator_end(&bi);
    1747           9 :                                 bi = bat_iterator(b);
    1748           9 :                                 poidx = true; /* using parent oidx */
    1749           9 :                                 vwo = (lng) (view->tbaseoff - bi.baseoff);
    1750           9 :                                 vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
    1751           9 :                                 vwh = vwl + canditer_last(&ci) - ci.seq;
    1752           9 :                                 vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
    1753           9 :                                 TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
    1754             :                         } else {
    1755          39 :                                 vwl = ci.seq;
    1756          39 :                                 vwh = canditer_last(&ci);
    1757             :                         }
    1758             :                 }
    1759             :         }
    1760             : 
    1761     1722905 :         if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) {
    1762      407437 :                 BUN low = 0;
    1763      407437 :                 BUN high = bi.count;
    1764             : 
    1765      407437 :                 if (BATtdensebi(&bi)) {
    1766             :                         /* positional */
    1767             :                         /* we expect nonil to be set, in which case we
    1768             :                          * already know that we're not dealing with a
    1769             :                          * nil equiselect (dealt with above) */
    1770       22990 :                         assert(bi.nonil);
    1771       22990 :                         assert(bi.sorted);
    1772       22990 :                         assert(oidxh == NULL);
    1773       22990 :                         algo = "select: dense";
    1774       22990 :                         if (hval) {
    1775       23160 :                                 oid h = * (oid *) th + hi;
    1776       23160 :                                 if (h > bi.tseq)
    1777       23167 :                                         h -= bi.tseq;
    1778             :                                 else
    1779             :                                         h = 0;
    1780       23160 :                                 if ((BUN) h < high)
    1781       22990 :                                         high = (BUN) h;
    1782             :                         }
    1783             : 
    1784       22990 :                         if (lval) {
    1785       23171 :                                 oid l = *(oid *) tl + !li;
    1786       23171 :                                 if (l > bi.tseq)
    1787        4525 :                                         l -= bi.tseq;
    1788             :                                 else
    1789             :                                         l = 0;
    1790        4525 :                                 if ((BUN) l > low)
    1791        4525 :                                         low = (BUN) l;
    1792        4525 :                                 if (low > high)
    1793             :                                         low = high;
    1794             :                         }
    1795      384447 :                 } else if (bi.sorted) {
    1796      262065 :                         assert(oidxh == NULL);
    1797      262065 :                         algo = "select: sorted";
    1798      262065 :                         if (lval) {
    1799      261949 :                                 if (li)
    1800      261532 :                                         low = SORTfndfirst(b, tl);
    1801             :                                 else
    1802         417 :                                         low = SORTfndlast(b, tl);
    1803             :                         } else {
    1804             :                                 /* skip over nils at start of column */
    1805         116 :                                 low = SORTfndlast(b, nil);
    1806             :                         }
    1807      262348 :                         if (hval) {
    1808      261435 :                                 if (hi)
    1809      260867 :                                         high = SORTfndlast(b, th);
    1810             :                                 else
    1811         568 :                                         high = SORTfndfirst(b, th);
    1812             :                         }
    1813      122382 :                 } else if (bi.revsorted) {
    1814      122334 :                         assert(oidxh == NULL);
    1815      122334 :                         algo = "select: reverse sorted";
    1816      122334 :                         if (lval) {
    1817      122313 :                                 if (li)
    1818      122280 :                                         high = SORTfndlast(b, tl);
    1819             :                                 else
    1820          33 :                                         high = SORTfndfirst(b, tl);
    1821             :                         } else {
    1822             :                                 /* skip over nils at end of column */
    1823          21 :                                 high = SORTfndfirst(b, nil);
    1824             :                         }
    1825      122359 :                         if (hval) {
    1826      122268 :                                 if (hi)
    1827      122220 :                                         low = SORTfndfirst(b, th);
    1828             :                                 else
    1829          48 :                                         low = SORTfndlast(b, th);
    1830             :                         }
    1831             :                 } else {
    1832          48 :                         assert(oidxh != NULL);
    1833          48 :                         algo = poidx ? "select: parent orderidx" : "select: orderidx";
    1834          48 :                         if (lval) {
    1835          30 :                                 if (li)
    1836          24 :                                         low = ORDERfndfirst(b, oidxh, tl);
    1837             :                                 else
    1838           6 :                                         low = ORDERfndlast(b, oidxh, tl);
    1839             :                         } else {
    1840             :                                 /* skip over nils at start of column */
    1841          18 :                                 low = ORDERfndlast(b, oidxh, nil);
    1842             :                         }
    1843          48 :                         if (hval) {
    1844          34 :                                 if (hi)
    1845          22 :                                         high = ORDERfndlast(b, oidxh, th);
    1846             :                                 else
    1847          12 :                                         high = ORDERfndfirst(b, oidxh, th);
    1848             :                         }
    1849             :                 }
    1850      407770 :                 if (anti) {
    1851       37744 :                         assert(oidxh == NULL);
    1852       37744 :                         if (bi.sorted) {
    1853       37429 :                                 BUN first = nil_matches ? 0 : SORTfndlast(b, nil);
    1854             :                                 /* match: [first..low) + [high..last) */
    1855       37448 :                                 bn = canditer_slice2val(&ci,
    1856             :                                                         first + b->hseqbase,
    1857             :                                                         low + b->hseqbase,
    1858       37448 :                                                         high + b->hseqbase,
    1859             :                                                         oid_nil);
    1860             :                         } else {
    1861         315 :                                 BUN last = nil_matches ? bi.count : SORTfndfirst(b, nil);
    1862             :                                 /* match: [first..low) + [high..last) */
    1863         315 :                                 bn = canditer_slice2val(&ci,
    1864             :                                                         oid_nil,
    1865             :                                                         low + b->hseqbase,
    1866             :                                                         high + b->hseqbase,
    1867         315 :                                                         last + b->hseqbase);
    1868             :                         }
    1869             :                 } else {
    1870      370026 :                         if (bi.sorted || bi.revsorted) {
    1871      369978 :                                 assert(oidxh == NULL);
    1872             :                                 /* match: [low..high) */
    1873      369978 :                                 bn = canditer_sliceval(&ci,
    1874             :                                                        low + b->hseqbase,
    1875      369978 :                                                        high + b->hseqbase);
    1876             :                         } else {
    1877          48 :                                 BUN i;
    1878          48 :                                 BUN cnt = 0;
    1879          48 :                                 const oid *rs;
    1880          48 :                                 oid *rbn;
    1881             : 
    1882          48 :                                 rs = (const oid *) oidxh->base + ORDERIDXOFF;
    1883          48 :                                 rs += low;
    1884          48 :                                 bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
    1885          48 :                                 if (bn == NULL) {
    1886           0 :                                         HEAPdecref(oidxh, false);
    1887           0 :                                         bat_iterator_end(&bi);
    1888           0 :                                         bat_iterator_end(&pbi);
    1889           0 :                                         BBPreclaim(pb);
    1890           0 :                                         return NULL;
    1891             :                                 }
    1892             : 
    1893          48 :                                 rbn = (oid *) Tloc((bn), 0);
    1894             : 
    1895         251 :                                 for (i = low; i < high; i++) {
    1896         203 :                                         if (vwl <= *rs && *rs <= vwh) {
    1897         176 :                                                 *rbn++ = (oid) ((lng) *rs + vwo);
    1898         176 :                                                 cnt++;
    1899             :                                         }
    1900         203 :                                         rs++;
    1901             :                                 }
    1902          48 :                                 HEAPdecref(oidxh, false);
    1903          48 :                                 BATsetcount(bn, cnt);
    1904             : 
    1905             :                                 /* output must be sorted */
    1906          48 :                                 GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
    1907          48 :                                 bn->tsorted = true;
    1908          48 :                                 bn->trevsorted = bn->batCount <= 1;
    1909          48 :                                 bn->tkey = true;
    1910          48 :                                 bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
    1911          48 :                                 bn->tnil = false;
    1912          48 :                                 bn->tnonil = true;
    1913          48 :                                 if (s) {
    1914           9 :                                         s = BATintersectcand(bn, s);
    1915           9 :                                         BBPunfix(bn->batCacheid);
    1916           9 :                                         bn = s;
    1917             :                                 }
    1918             :                         }
    1919             :                 }
    1920             : 
    1921      407656 :                 bn = virtualize(bn);
    1922      407097 :                 MT_thread_setalgorithm(algo);
    1923      407790 :                 TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
    1924             :                           ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
    1925             :                           ALGOBATPAR(b), anti ? "true" : "false",
    1926             :                           ALGOOPTBATPAR(bn), algo,
    1927             :                           GDKusec() - t0);
    1928             : 
    1929      407790 :                 bat_iterator_end(&bi);
    1930      407603 :                 bat_iterator_end(&pbi);
    1931      407751 :                 BBPreclaim(pb);
    1932      407820 :                 return bn;
    1933             :         }
    1934             : 
    1935       57400 :         assert(oidxh == NULL);
    1936             :         /* upper limit for result size */
    1937     1315468 :         maximum = ci.ncand;
    1938     1315468 :         if ((equi || antiequi) && havehash) {
    1939             :                 /* we can look in the hash struct to see whether all
    1940             :                  * values are distinct and set estimate accordingly */
    1941       57506 :                 if (phash) {
    1942       56846 :                         if (pb->thash->nunique == pbi.count)
    1943             :                                 estimate = 1;
    1944         660 :                 } else if (b->thash->nunique == bi.count)
    1945             :                         estimate = 1;
    1946             :         }
    1947     1310522 :         if (estimate == BUN_NONE && (bi.key || (pb != NULL && pbi.key))) {
    1948             :                 /* exact result size in special cases */
    1949         303 :                 if (equi || (antiequi && wanthash)) {
    1950             :                         estimate = 1;
    1951          13 :                 } else if (!anti && lval && hval) {
    1952           0 :                         switch (t) {
    1953           0 :                         case TYPE_bte:
    1954           0 :                                 estimate = (BUN) (*(bte *) th - *(bte *) tl);
    1955           0 :                                 break;
    1956           0 :                         case TYPE_sht:
    1957           0 :                                 estimate = (BUN) (*(sht *) th - *(sht *) tl);
    1958           0 :                                 break;
    1959           0 :                         case TYPE_int:
    1960           0 :                                 CALC_ESTIMATE(int);
    1961             :                                 break;
    1962           0 :                         case TYPE_lng:
    1963           0 :                                 CALC_ESTIMATE(lng);
    1964             :                                 break;
    1965             : #ifdef HAVE_HGE
    1966           0 :                         case TYPE_hge:
    1967           0 :                                 CALC_ESTIMATE(hge);
    1968             :                                 break;
    1969             : #endif
    1970             :                         }
    1971           0 :                         if (estimate == BUN_NONE)
    1972           0 :                                 estimate += li + hi - 1;
    1973             :                 }
    1974             :         }
    1975             :         /* refine upper limit by exact size (if known) */
    1976     1315468 :         maximum = MIN(maximum, estimate);
    1977     1315468 :         if (wanthash && !havehash && estimate == BUN_NONE) {
    1978             :                 /* no exact result size, but we need estimate to
    1979             :                  * choose between hash- & scan-select (if we already
    1980             :                  * have a hash, it's a no-brainer: we use it) */
    1981       24631 :                 if (ci.ncand <= 10000) {
    1982             :                         /* "small" input: don't bother about more accurate
    1983             :                          * estimate */
    1984             :                         estimate = maximum;
    1985             :                 } else {
    1986             :                         /* layman's quick "pseudo-sample" of 1000 tuples,
    1987             :                          * i.e., 333 from begin, middle & end of BAT */
    1988           2 :                         BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
    1989           2 :                         BAT *smpl, *slct;
    1990             : 
    1991           2 :                         delta = 1000 / 3 / 2;
    1992           2 :                         skip = (bi.count - (2 * delta)) / 2;
    1993           8 :                         for (pos = delta; pos < bi.count; pos += skip) {
    1994           6 :                                 smpl = BATslice(b, pos - delta, pos + delta);
    1995           6 :                                 if (smpl) {
    1996           6 :                                         slct = BATselect(smpl, NULL, tl,
    1997             :                                                          th, li, hi, anti, nil_matches);
    1998           6 :                                         if (slct) {
    1999           6 :                                                 smpl_cnt += BATcount(smpl);
    2000           6 :                                                 slct_cnt += BATcount(slct);
    2001           6 :                                                 BBPreclaim(slct);
    2002             :                                         }
    2003           6 :                                         BBPreclaim(smpl);
    2004             :                                 }
    2005             :                         }
    2006           2 :                         if (smpl_cnt > 0 && slct_cnt > 0) {
    2007             :                                 /* linear extrapolation plus 10% margin */
    2008           1 :                                 estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
    2009           1 :                                                   * (dbl) bi.count * 1.1);
    2010           1 :                         } else if (smpl_cnt > 0 && slct_cnt == 0) {
    2011             :                                 /* estimate low enough to trigger hash select */
    2012           1 :                                 estimate = (ci.ncand / 100) - 1;
    2013             :                         }
    2014             :                 }
    2015       24631 :                 wanthash = estimate < ci.ncand / 100;
    2016             :         }
    2017     1315468 :         if (estimate == BUN_NONE) {
    2018             :                 /* no better estimate possible/required:
    2019             :                  * (pre-)allocate 1M tuples, i.e., avoid/delay extend
    2020             :                  * without too much overallocation */
    2021     1285594 :                 estimate = 1000000;
    2022             :         }
    2023             :         /* limit estimation by upper limit */
    2024     1315468 :         estimate = MIN(estimate, maximum);
    2025             : 
    2026     1315468 :         bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
    2027     1315322 :         if (bn == NULL) {
    2028           0 :                 if (havehash) {
    2029           0 :                         if (phash)
    2030           0 :                                 MT_rwlock_rdunlock(&pb->thashlock);
    2031             :                         else
    2032           0 :                                 MT_rwlock_rdunlock(&b->thashlock);
    2033             :                 }
    2034           0 :                 bat_iterator_end(&bi);
    2035           0 :                 bat_iterator_end(&pbi);
    2036           0 :                 BBPreclaim(pb);
    2037           0 :                 return NULL;
    2038             :         }
    2039             : 
    2040     1315322 :         if (wanthash) {
    2041             :                 /* hashselect unlocks the hash lock */
    2042       57455 :                 bn = hashselect(&bi, &ci, bn, tl, maximum, havehash, phash, &algo);
    2043       57518 :                 if (bn && antiequi) {
    2044        3484 :                         BAT *bn2;
    2045        3484 :                         if (s) {
    2046        3484 :                                 bn2 = BATdiffcand(s, bn);
    2047             :                         } else {
    2048           0 :                                 bn2 = BATnegcands(ci.seq, bi.count, bn);
    2049             :                         }
    2050        3482 :                         BBPreclaim(bn);
    2051        3482 :                         bn = bn2;
    2052        3482 :                         if (!bi.nonil) {
    2053           0 :                                 bn2 = BATselect(b, s, nil, NULL, true, true, false, false);
    2054           0 :                                 if (bn2 == NULL) {
    2055           0 :                                         BBPreclaim(bn);
    2056           0 :                                         return NULL;
    2057             :                                 }
    2058           0 :                                 BAT *bn3 = BATdiffcand(bn, bn2);
    2059           0 :                                 BBPreclaim(bn2);
    2060           0 :                                 BBPreclaim(bn);
    2061             :                                 bn = bn3;
    2062             :                         }
    2063             :                 }
    2064             :         } else {
    2065     1257867 :                 assert(!havehash);
    2066     1257867 :                 bn = scanselect(&bi, &ci, bn, tl, th, li, hi, equi, anti,
    2067             :                                 nil_matches, lval, hval, lnil, maximum,
    2068             :                                 &algo);
    2069             :         }
    2070     1314734 :         bat_iterator_end(&bi);
    2071     1315733 :         bat_iterator_end(&pbi);
    2072     1315663 :         BBPreclaim(pb);
    2073             : 
    2074     1315707 :         bn = virtualize(bn);
    2075     1315341 :         MT_thread_setalgorithm(algo);
    2076     1315164 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
    2077             :                   " %s (" LLFMT " usec)\n",
    2078             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    2079             :                   anti ? "true" : "false",
    2080             :                   ALGOOPTBATPAR(bn), algo,
    2081             :                   GDKusec() - t0);
    2082             : 
    2083             :         return bn;
    2084             : }
    2085             : 
    2086             : /* theta select
    2087             :  *
    2088             :  * Returns a BAT with the OID values of b for qualifying tuples.  The
    2089             :  * return BAT is sorted (i.e. in the same order as the input BAT).
    2090             :  *
    2091             :  * If s is not NULL, it is a list of candidates.  s must be sorted.
    2092             :  *
    2093             :  * Theta select returns all values from b which are less/greater than
    2094             :  * or (not) equal to the provided value depending on the value of op.
    2095             :  * Op is a string with one of the values: "=", "==", "<", "<=", ">",
    2096             :  * ">=", "<>", "!=" (the first two are equivalent and the last two are
    2097             :  * equivalent).  Theta select never returns nils.
    2098             :  *
    2099             :  * If value is nil, the result is empty, except when using eq/ne as
    2100             :  * operator.
    2101             :  */
    2102             : BAT *
    2103      471536 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
    2104             : {
    2105      471536 :         const void *nil;
    2106             : 
    2107      471536 :         BATcheck(b, NULL);
    2108      471536 :         BATcheck(val, NULL);
    2109      471536 :         BATcheck(op, NULL);
    2110             : 
    2111             :         /* eq/ne are can be used for "is" nil-handling */
    2112      471536 :         if (strcmp(op, "eq") == 0)
    2113       15027 :                 return BATselect(b, s, val, NULL, true, true, false, true);
    2114      456509 :         if (strcmp(op, "ne") == 0)
    2115        8777 :                 return BATselect(b, s, val, NULL, true, true, true, true);
    2116             : 
    2117      447732 :         nil = ATOMnilptr(b->ttype);
    2118      447732 :         if (ATOMcmp(b->ttype, val, nil) == 0)
    2119          29 :                 return BATdense(0, 0, 0);
    2120      447838 :         if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
    2121             :                 /* "=" or "==" */
    2122      368515 :                 return BATselect(b, s, val, NULL, true, true, false, false);
    2123             :         }
    2124       79323 :         if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
    2125             :                 /* "!=" (equivalent to "<>") */
    2126       74330 :                 return BATselect(b, s, val, NULL, true, true, true, false);
    2127             :         }
    2128        4993 :         if (op[0] == '<') {
    2129         696 :                 if (op[1] == 0) {
    2130             :                         /* "<" */
    2131         594 :                         return BATselect(b, s, nil, val, false, false, false, false);
    2132             :                 }
    2133         102 :                 if (op[1] == '=' && op[2] == 0) {
    2134             :                         /* "<=" */
    2135          97 :                         return BATselect(b, s, nil, val, false, true, false, false);
    2136             :                 }
    2137           5 :                 if (op[1] == '>' && op[2] == 0) {
    2138             :                         /* "<>" (equivalent to "!=") */
    2139           6 :                         return BATselect(b, s, val, NULL, true, true, true, false);
    2140             :                 }
    2141             :         }
    2142        4296 :         if (op[0] == '>') {
    2143        4296 :                 if (op[1] == 0) {
    2144             :                         /* ">" */
    2145        3745 :                         return BATselect(b, s, val, nil, false, false, false, false);
    2146             :                 }
    2147         551 :                 if (op[1] == '=' && op[2] == 0) {
    2148             :                         /* ">=" */
    2149         551 :                         return BATselect(b, s, val, nil, true, false, false, false);
    2150             :                 }
    2151             :         }
    2152           0 :         GDKerror("unknown operator.\n");
    2153           0 :         return NULL;
    2154             : }

Generated by: LCOV version 1.14