LCOV - code coverage report
Current view: top level - gdk - gdk_cand.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 764 994 76.9 %
Date: 2024-12-19 23:10:26 Functions: 24 25 96.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : 
      17             : bool
      18       11433 : BATiscand(BAT *b)
      19             : {
      20       11433 :         if (ATOMtype(b->ttype) != TYPE_oid)
      21             :                 return false;
      22       11433 :         if (complex_cand(b))
      23             :                 return true;
      24       10934 :         if (b->ttype == TYPE_void && is_oid_nil(b->tseqbase))
      25             :                 return false;
      26       21869 :         return b->tsorted && BATtkey(b);
      27             : }
      28             : 
      29             : /* create a new, dense candidate list with values from `first' up to,
      30             :  * but not including, `last' */
      31             : static inline BAT *
      32       10433 : newdensecand(oid first, oid last)
      33             : {
      34       10433 :         if (last <= first)
      35           0 :                 first = last = 0; /* empty range */
      36       10433 :         return BATdense(0, first, last - first);
      37             : }
      38             : 
      39             : /* merge two candidate lists and produce a new one
      40             :  *
      41             :  * candidate lists are VOID-headed BATs with an OID tail which is
      42             :  * sorted and unique.
      43             :  */
      44             : BAT *
      45      116741 : BATmergecand(BAT *a, BAT *b)
      46             : {
      47      116741 :         BAT *bn;
      48      116741 :         oid *restrict p, i;
      49      116741 :         struct canditer cia, cib;
      50             : 
      51      116741 :         BATcheck(a, NULL);
      52      116741 :         BATcheck(b, NULL);
      53             : 
      54      116741 :         canditer_init(&cia, NULL, a);
      55      116560 :         canditer_init(&cib, NULL, b);
      56             : 
      57             :         /* we can return a if b is empty (and v.v.) */
      58      116799 :         if (cia.ncand == 0) {
      59       37421 :                 bn = canditer_slice(&cib, 0, cib.ncand);
      60       37398 :                 goto doreturn;
      61             :         }
      62       79378 :         if (cib.ncand == 0) {
      63       19323 :                 bn = canditer_slice(&cia, 0, cia.ncand);
      64       19322 :                 goto doreturn;
      65             :         }
      66             :         /* we can return a if a fully covers b (and v.v) */
      67       60055 :         if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
      68             :                 /* both are dense */
      69       18392 :                 if (cia.seq <= cib.seq && cib.seq <= cia.seq + cia.ncand) {
      70             :                         /* partial overlap starting with a, or b is
      71             :                          * smack bang after a */
      72        7655 :                         bn = newdensecand(cia.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand);
      73        7661 :                         goto doreturn;
      74             :                 }
      75       10737 :                 if (cib.seq <= cia.seq && cia.seq <= cib.seq + cib.ncand) {
      76             :                         /* partial overlap starting with b, or a is
      77             :                          * smack bang after b */
      78        2788 :                         bn = newdensecand(cib.seq, cia.seq + cia.ncand < cib.seq + cib.ncand ? cib.seq + cib.ncand : cia.seq + cia.ncand);
      79        2789 :                         goto doreturn;
      80             :                 }
      81             :         }
      82       49612 :         if (cia.tpe == cand_dense
      83       10258 :             && cia.seq <= cib.seq
      84        4401 :             && canditer_last(&cia) >= canditer_last(&cib)) {
      85         220 :                 bn = canditer_slice(&cia, 0, cia.ncand);
      86         220 :                 goto doreturn;
      87             :         }
      88       49392 :         if (cib.tpe == cand_dense
      89       36388 :             && cib.seq <= cia.seq
      90       10423 :             && canditer_last(&cib) >= canditer_last(&cia)) {
      91         172 :                 bn = canditer_slice(&cib, 0, cib.ncand);
      92         172 :                 goto doreturn;
      93             :         }
      94             : 
      95       49220 :         bn = COLnew(0, TYPE_oid, cia.ncand + cib.ncand, TRANSIENT);
      96       49214 :         if (bn == NULL)
      97           0 :                 goto doreturn;
      98       49214 :         p = (oid *) Tloc(bn, 0);
      99       49214 :         if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
     100             :                 /* both lists are dense */
     101        7951 :                 if (cia.seq > cib.seq) {
     102        4215 :                         struct canditer ci;
     103        4215 :                         ci = cia;
     104        4215 :                         cia = cib;
     105        4215 :                         cib = ci;
     106             :                 }
     107             :                 /* cia completely before cib */
     108        7951 :                 assert(cia.seq + cia.ncand < cib.seq);
     109       30388 :                 for (i = cia.seq; i < cia.seq + cia.ncand; i++)
     110       22437 :                         *p++ = i;
     111             :                 /* there is a gap */
     112       26825 :                 for (i = cib.seq; i < cib.seq + cib.ncand; i++)
     113       18874 :                         *p++ = i;
     114       41263 :         } else if (cia.tpe == cand_dense || cib.tpe == cand_dense) {
     115       30352 :                 if (cib.tpe == cand_dense) {
     116       28323 :                         struct canditer ci;
     117       28323 :                         ci = cia;
     118       28323 :                         cia = cib;
     119       28323 :                         cib = ci;
     120             :                 }
     121             :                 /* only cia is dense */
     122             : 
     123             :                 /* copy part of cib that's before the start of cia */
     124      126302 :                 while (canditer_peek(&cib) < cia.seq) {
     125       95862 :                         *p++ = canditer_next(&cib);
     126             :                 }
     127             :                 /* copy all of cia */
     128       68926 :                 for (i = cia.seq; i < cia.seq + cia.ncand; i++)
     129       38486 :                         *p++ = i;
     130             :                 /* skip over part of cib that overlaps with cia */
     131       30440 :                 canditer_setidx(&cib, canditer_search(&cib, cia.seq + cia.ncand, true));
     132             :                 /* copy rest of cib */
     133      116400 :                 while (cib.next < cib.ncand) {
     134       85910 :                         *p++ = canditer_next(&cib);
     135             :                 }
     136             :         } else {
     137             :                 /* a and b are both not dense */
     138       10911 :                 oid ao = canditer_next(&cia);
     139       10905 :                 oid bo = canditer_next(&cib);
     140    22803821 :                 while (!is_oid_nil(ao) && !is_oid_nil(bo)) {
     141    22792914 :                         if (ao < bo) {
     142    13065758 :                                 *p++ = ao;
     143    13065758 :                                 ao = canditer_next(&cia);
     144     9727156 :                         } else if (bo < ao) {
     145     8693740 :                                 *p++ = bo;
     146     8693740 :                                 bo = canditer_next(&cib);
     147             :                         } else {
     148     1033416 :                                 *p++ = ao;
     149     1033416 :                                 ao = canditer_next(&cia);
     150     1033462 :                                 bo = canditer_next(&cib);
     151             :                         }
     152             :                 }
     153      544456 :                 while (!is_oid_nil(ao)) {
     154      533545 :                         *p++ = ao;
     155      533545 :                         ao = canditer_next(&cia);
     156             :                 }
     157      415485 :                 while (!is_oid_nil(bo)) {
     158      404572 :                         *p++ = bo;
     159      404572 :                         bo = canditer_next(&cib);
     160             :                 }
     161             :         }
     162             : 
     163             :         /* properties */
     164       49292 :         BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
     165       49228 :         bn->trevsorted = BATcount(bn) <= 1;
     166       49228 :         bn->tsorted = true;
     167       49228 :         bn->tkey = true;
     168       49228 :         bn->tnil = false;
     169       49228 :         bn->tnonil = true;
     170       49228 :         bn = virtualize(bn);
     171      116803 :   doreturn:
     172      116803 :         TRC_DEBUG(ALGO, ALGOBATFMT "," ALGOBATFMT " -> " ALGOOPTBATFMT "\n",
     173             :                   ALGOBATPAR(a), ALGOBATPAR(b), ALGOOPTBATPAR(bn));
     174             :         return bn;
     175             : }
     176             : 
     177             : /* intersect two candidate lists and produce a new one
     178             :  *
     179             :  * candidate lists are VOID-headed BATs with an OID tail which is
     180             :  * sorted and unique.
     181             :  */
     182             : BAT *
     183           9 : BATintersectcand(BAT *a, BAT *b)
     184             : {
     185           9 :         BAT *bn;
     186           9 :         oid *restrict p;
     187           9 :         struct canditer cia, cib;
     188             : 
     189           9 :         BATcheck(a, NULL);
     190           9 :         BATcheck(b, NULL);
     191             : 
     192           9 :         canditer_init(&cia, NULL, a);
     193           9 :         canditer_init(&cib, NULL, b);
     194             : 
     195           9 :         if (cia.ncand == 0 || cib.ncand == 0) {
     196           2 :                 bn = BATdense(0, 0, 0);
     197           2 :                 goto doreturn;
     198             :         }
     199             : 
     200           7 :         if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
     201             :                 /* both lists are dense */
     202           0 :                 bn = newdensecand(MAX(cia.seq, cib.seq), MIN(cia.seq + cia.ncand, cib.seq + cib.ncand));
     203           0 :                 goto doreturn;
     204             :         }
     205             : 
     206           7 :         bn = COLnew(0, TYPE_oid, MIN(cia.ncand, cib.ncand), TRANSIENT);
     207           7 :         if (bn == NULL)
     208           0 :                 goto doreturn;
     209           7 :         p = (oid *) Tloc(bn, 0);
     210           7 :         if (cia.tpe == cand_dense || cib.tpe == cand_dense) {
     211           7 :                 if (cib.tpe == cand_dense) {
     212           7 :                         struct canditer ci;
     213           7 :                         ci = cia;
     214           7 :                         cia = cib;
     215           7 :                         cib = ci;
     216             :                 }
     217             :                 /* only cia is dense */
     218             : 
     219             :                 /* search for first value in cib that is contained in cia */
     220           7 :                 canditer_setidx(&cib, canditer_search(&cib, cia.seq, true));
     221           7 :                 oid bo;
     222          33 :                 while (!is_oid_nil(bo = canditer_next(&cib)) && bo < cia.seq + cia.ncand)
     223          26 :                         *p++ = bo;
     224             :         } else {
     225             :                 /* a and b are both not dense */
     226           0 :                 oid ao = canditer_next(&cia);
     227           0 :                 oid bo = canditer_next(&cib);
     228           0 :                 while (!is_oid_nil(ao) && !is_oid_nil(bo)) {
     229           0 :                         if (ao < bo)
     230           0 :                                 ao = canditer_next(&cia);
     231           0 :                         else if (bo < ao)
     232           0 :                                 bo = canditer_next(&cib);
     233             :                         else {
     234           0 :                                 *p++ = ao;
     235           0 :                                 ao = canditer_next(&cia);
     236           0 :                                 bo = canditer_next(&cib);
     237             :                         }
     238             :                 }
     239             :         }
     240             : 
     241             :         /* properties */
     242           7 :         BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
     243           7 :         bn->trevsorted = BATcount(bn) <= 1;
     244           7 :         bn->tsorted = true;
     245           7 :         bn->tkey = true;
     246           7 :         bn->tnil = false;
     247           7 :         bn->tnonil = true;
     248           7 :         bn = virtualize(bn);
     249           9 :   doreturn:
     250           9 :         TRC_DEBUG(ALGO, ALGOBATFMT "," ALGOBATFMT " -> " ALGOOPTBATFMT "\n",
     251             :                   ALGOBATPAR(a), ALGOBATPAR(b), ALGOOPTBATPAR(bn));
     252             :         return bn;
     253             : }
     254             : 
     255             : /* calculate the difference of two candidate lists and produce a new one
     256             :  */
     257             : BAT *
     258        8329 : BATdiffcand(BAT *a, BAT *b)
     259             : {
     260        8329 :         BAT *bn;
     261        8329 :         oid *restrict p;
     262        8329 :         struct canditer cia, cib;
     263             : 
     264        8329 :         BATcheck(a, NULL);
     265        8329 :         BATcheck(b, NULL);
     266             : 
     267        8329 :         canditer_init(&cia, NULL, a);
     268        8332 :         canditer_init(&cib, NULL, b);
     269             : 
     270        8329 :         if (cia.ncand == 0) {
     271           0 :                 bn = BATdense(0, 0, 0);
     272           0 :                 goto doreturn;
     273             :         }
     274        8329 :         if (cib.ncand == 0) {
     275         712 :                 bn = canditer_slice(&cia, 0, cia.ncand);
     276         712 :                 goto doreturn;
     277             :         }
     278             : 
     279        7617 :         if (cib.seq > canditer_last(&cia) || canditer_last(&cib) < cia.seq) {
     280             :                 /* no overlap, return a */
     281           0 :                 bn = canditer_slice(&cia, 0, cia.ncand);
     282           0 :                 goto doreturn;
     283             :         }
     284             : 
     285        7617 :         if (cia.tpe == cand_dense && cib.tpe == cand_dense) {
     286             :                 /* both a and b are dense */
     287        3524 :                 if (cia.seq < cib.seq) {
     288             :                         /* a starts before b */
     289         678 :                         if (cia.seq + cia.ncand <= cib.seq + cib.ncand) {
     290             :                                 /* b overlaps with end of a */
     291          99 :                                 bn = canditer_slice(&cia, 0, cib.seq - cia.seq);
     292          99 :                                 goto doreturn;
     293             :                         }
     294             :                         /* b is subset of a */
     295         579 :                         bn = canditer_slice2(&cia, 0, cib.seq - cia.seq,
     296             :                                              cib.seq + cib.ncand - cia.seq,
     297             :                                              cia.ncand);
     298         582 :                         goto doreturn;
     299             :                 } else {
     300             :                         /* cia.seq >= cib.seq */
     301        2846 :                         if (cia.seq + cia.ncand > cib.seq + cib.ncand) {
     302             :                                 /* b overlaps with beginning of a */
     303         198 :                                 bn = canditer_slice(&cia, cib.seq + cib.ncand - cia.seq, cia.ncand);
     304         198 :                                 goto doreturn;
     305             :                         }
     306             :                         /* a is subset f b */
     307        2648 :                         bn = BATdense(0, 0, 0);
     308        2647 :                         goto doreturn;
     309             :                 }
     310             :         }
     311        4093 :         if (cib.tpe == cand_dense) {
     312             :                 /* b is dense and a is not: we can copy the part of a
     313             :                  * that is before the start of b and the part of a
     314             :                  * that is after the end of b */
     315        2496 :                 bn = canditer_slice2val(&cia, oid_nil, cib.seq,
     316             :                                         cib.seq + cib.ncand, oid_nil);
     317        2497 :                 goto doreturn;
     318             :         }
     319             : 
     320             :         /* b is not dense */
     321        1597 :         bn = COLnew(0, TYPE_oid, BATcount(a), TRANSIENT);
     322        1597 :         if (bn == NULL)
     323           0 :                 goto doreturn;
     324        1597 :         p = Tloc(bn, 0);
     325             :         /* find first position in b that is in range of a */
     326        1597 :         canditer_setidx(&cib, canditer_search(&cib, cia.seq, true));
     327             :         {
     328             :                 /* because we may jump over this declaration, we put
     329             :                  * it inside a block */
     330        1595 :                 oid ob = canditer_next(&cib);
     331     6956031 :                 for (BUN i = 0; i < cia.ncand; i++) {
     332     6954434 :                         oid oa = canditer_next(&cia);
     333     9373238 :                         while (!is_oid_nil(ob) && ob < oa) {
     334     2418159 :                                 ob = canditer_next(&cib);
     335             :                         }
     336     6954436 :                         if (is_oid_nil(ob) || oa < ob)
     337     4551014 :                                 *p++ = oa;
     338             :                 }
     339             :         }
     340             : 
     341             :         /* properties */
     342        1597 :         BATsetcount(bn, (BUN) (p - (oid *) Tloc(bn, 0)));
     343        1597 :         bn->trevsorted = BATcount(bn) <= 1;
     344        1597 :         bn->tsorted = true;
     345        1597 :         bn->tkey = true;
     346        1597 :         bn->tnil = false;
     347        1597 :         bn->tnonil = true;
     348        1597 :         bn = virtualize(bn);
     349        8326 :   doreturn:
     350        8326 :         TRC_DEBUG(ALGO, ALGOBATFMT "," ALGOBATFMT " -> " ALGOOPTBATFMT "\n",
     351             :                   ALGOBATPAR(a), ALGOBATPAR(b), ALGOOPTBATPAR(bn));
     352             :         return bn;
     353             : }
     354             : 
     355             : /* return offset of first value in cand that is >= o */
     356             : static inline BUN
     357      964360 : binsearchcand(const oid *cand, BUN hi, oid o)
     358             : {
     359      964360 :         BUN lo = 0;
     360             : 
     361      964360 :         if (o <= cand[lo])
     362             :                 return 0;
     363      465027 :         if (o > cand[hi])
     364      412771 :                 return hi + 1;
     365             :         /* loop invariant: cand[lo] < o <= cand[hi] */
     366      289935 :         while (hi > lo + 1) {
     367      258437 :                 BUN mid = (lo + hi) / 2;
     368      258437 :                 if (cand[mid] == o)
     369       20758 :                         return mid;
     370      237679 :                 if (cand[mid] < o)
     371             :                         lo = mid;
     372             :                 else
     373      167097 :                         hi = mid;
     374             :         }
     375             :         return hi;
     376             : }
     377             : 
     378             : /* count number of 1 bits in ci->mask between bit positions lo
     379             :  * (inclusive) and hi (not inclusive) */
     380             : static BUN
     381       41010 : count_mask_bits(const struct canditer *ci, BUN lo, BUN hi)
     382             : {
     383       41010 :         BUN n;
     384       41010 :         assert(lo <= hi);
     385       41010 :         assert(ci->tpe == cand_mask);
     386       41010 :         if (lo == hi)
     387             :                 return 0;
     388       41010 :         lo += ci->firstbit;
     389       41010 :         hi += ci->firstbit;
     390       41010 :         BUN loi = lo / 32;
     391       41010 :         BUN hii = hi / 32;
     392       41010 :         lo %= 32;
     393       41010 :         hi %= 32;
     394       41010 :         if (loi == hii)
     395        9054 :                 return (BUN) candmask_pop((ci->mask[loi] & ((1U << hi) - 1)) >> lo);
     396       31956 :         n = (BUN) candmask_pop(ci->mask[loi++] >> lo);
     397     2324949 :         while (loi < hii)
     398     2292993 :                 n += (BUN) candmask_pop(ci->mask[loi++]);
     399       31956 :         if (hi != 0)
     400        5226 :                 n += (BUN) candmask_pop(ci->mask[loi] & ((1U << hi) - 1));
     401             :         return n;
     402             : }
     403             : 
     404             : /* initialize a candidate iterator */
     405             : void
     406    10633688 : canditer_init(struct canditer *ci, BAT *b, BAT *s)
     407             : {
     408    10633688 :         assert(ci != NULL);
     409    10633688 :         BUN batcount = 0;
     410    10633688 :         oid hseq = 0;
     411    10633688 :         if (b) {
     412     9667668 :                 MT_lock_set(&b->theaplock);
     413     9677868 :                 batcount = BATcount(b);
     414     9677868 :                 hseq = b->hseqbase;
     415     9677868 :                 MT_lock_unset(&b->theaplock);
     416             :         }
     417             : 
     418    10658785 :         if (s == NULL) {
     419     5948413 :                 if (b == NULL) {
     420             :                         /* trivially empty candidate list */
     421           0 :                         *ci = (struct canditer) {
     422             :                                 .tpe = cand_dense,
     423             :                         };
     424           0 :                         return;
     425             :                 }
     426             :                 /* every row is a candidate */
     427     5948413 :                 *ci = (struct canditer) {
     428             :                         .tpe = cand_dense,
     429             :                         .seq = hseq,
     430             :                         .hseq = hseq,
     431             :                         .ncand = batcount,
     432             :                 };
     433     5948413 :                 return;
     434             :         }
     435             : 
     436     4710372 :         assert(ATOMtype(s->ttype) == TYPE_oid || s->ttype == TYPE_msk);
     437     4710372 :         assert(s->ttype == TYPE_msk|| s->tsorted);
     438     4710372 :         assert(s->ttype == TYPE_msk|| s->tkey);
     439     4710372 :         assert(s->ttype == TYPE_msk|| s->tnonil);
     440     4710372 :         assert(s->ttype == TYPE_void || s->tvheap == NULL);
     441             : 
     442     4710372 :         BUN cnt = BATcount(s);
     443             : 
     444     4710372 :         if (cnt == 0 || (b != NULL && batcount == 0)) {
     445             :                 /* candidate list for empty BAT or empty candidate list */
     446     1306158 :                 *ci = (struct canditer) {
     447             :                         .tpe = cand_dense,
     448     1306158 :                         .hseq = s->hseqbase,
     449             :                         .s = s,
     450             :                 };
     451     1306158 :                 return;
     452             :         }
     453             : 
     454     3404214 :         *ci = (struct canditer) {
     455     3404214 :                 .seq = s->tseqbase,
     456     3404214 :                 .hseq = s->hseqbase,
     457             :                 .s = s,
     458             :         };
     459             : 
     460     3404214 :         if (mask_cand(s)) {
     461       41078 :                 ci->tpe = cand_mask;
     462       41078 :                 ci->mask = (const uint32_t *) ccand_first(s);
     463       41078 :                 ci->seq = s->tseqbase - (oid) CCAND(s)->firstbit;
     464       41078 :                 ci->hseq = s->hseqbase;
     465       41078 :                 ci->nvals = ccand_free(s) / sizeof(uint32_t);
     466       41078 :                 cnt = ci->nvals * 32;
     467     3363136 :         } else if (s->ttype == TYPE_msk) {
     468           0 :                 assert(0);
     469             :                 ci->tpe = cand_mask;
     470             :                 ci->mask = (const uint32_t *) s->theap->base;
     471             :                 ci->seq = s->hseqbase;
     472             :                 ci->nvals = (cnt + 31U) / 32U;
     473     3363136 :         } else if (s->ttype == TYPE_void) {
     474     2823974 :                 assert(!is_oid_nil(ci->seq));
     475     2823974 :                 if (s->tvheap) {
     476       94942 :                         assert(ccand_free(s) % SIZEOF_OID == 0);
     477       94942 :                         ci->nvals = ccand_free(s) / SIZEOF_OID;
     478       94942 :                         if (ci->nvals > 0) {
     479       94969 :                                 ci->tpe = cand_except;
     480       94969 :                                 ci->oids = (const oid *) ccand_first(s);
     481             :                         } else {
     482             :                                 /* why the vheap? */
     483             :                                 ci->tpe = cand_dense;
     484             :                         }
     485             :                 } else {
     486             :                         ci->tpe = cand_dense;
     487             :                 }
     488      539162 :         } else if (is_oid_nil(ci->seq)) {
     489      500256 :                 ci->tpe = cand_materialized;
     490      500256 :                 ci->oids = (const oid *) Tloc(s, 0);
     491      500256 :                 ci->seq = ci->oids[0];
     492      500256 :                 ci->nvals = cnt;
     493             :         } else {
     494             :                 /* materialized dense: no exceptions */
     495             :                 ci->tpe = cand_dense;
     496             :         }
     497     3404214 :         switch (ci->tpe) {
     498      500260 :         case cand_materialized:
     499      500260 :                 if (b != NULL) {
     500      389676 :                         BUN p = binsearchcand(ci->oids, cnt - 1U, hseq);
     501             :                         /* p == cnt means candidate list is completely
     502             :                          * before b */
     503      389679 :                         ci->offset = p;
     504      389679 :                         ci->oids += p;
     505      389679 :                         cnt -= p;
     506      389679 :                         if (cnt > 0) {
     507      389679 :                                 cnt = binsearchcand(ci->oids, cnt  - 1U,
     508             :                                                     hseq + batcount);
     509             :                                 /* cnt == 0 means candidate list is
     510             :                                  * completely after b */
     511             :                         }
     512      389682 :                         if (cnt == 0) {
     513             :                                 /* no overlap */
     514           0 :                                 *ci = (struct canditer) {
     515             :                                         .tpe = cand_dense,
     516             :                                         .s = s,
     517             :                                 };
     518           0 :                                 return;
     519             :                         }
     520      389682 :                         ci->seq = ci->oids[0];
     521      389682 :                         ci->nvals = cnt;
     522      389682 :                         if (ci->oids[cnt - 1U] - ci->seq == cnt - 1U) {
     523             :                                 /* actually dense */
     524          61 :                                 ci->tpe = cand_dense;
     525          61 :                                 ci->oids = NULL;
     526          61 :                                 ci->nvals = 0;
     527             :                         }
     528             :                 }
     529             :                 break;
     530       94991 :         case cand_except:
     531             :                 /* exceptions must all be within range of s */
     532       94991 :                 assert(ci->oids[0] >= ci->seq);
     533       94991 :                 assert(ci->oids[ci->nvals - 1U] < ci->seq + cnt + ci->nvals);
     534             :                 /* prune exceptions at either end of range of s */
     535     3623224 :                 while (ci->nvals > 0 && ci->oids[0] == ci->seq) {
     536     3528233 :                         ci->nvals--;
     537     3528233 :                         ci->oids++;
     538     3528233 :                         ci->seq++;
     539             :                 }
     540       95199 :                 while (ci->nvals > 0 &&
     541       93950 :                        ci->oids[ci->nvals - 1U] == ci->seq + cnt + ci->nvals - 1U)
     542         208 :                         ci->nvals--;
     543       94991 :                 if (b != NULL) {
     544       87525 :                         if (ci->seq + cnt + ci->nvals <= hseq ||
     545       87525 :                             ci->seq >= hseq + batcount) {
     546             :                                 /* candidate list does not overlap with b */
     547           0 :                                 *ci = (struct canditer) {
     548             :                                         .tpe = cand_dense,
     549             :                                         .s = s,
     550             :                                 };
     551           0 :                                 return;
     552             :                         }
     553             :                 }
     554       94991 :                 if (ci->nvals > 0) {
     555       93745 :                         if (b == NULL)
     556             :                                 break;
     557       86621 :                         BUN p;
     558       86621 :                         p = binsearchcand(ci->oids, ci->nvals - 1U, hseq);
     559       86621 :                         if (p == ci->nvals) {
     560             :                                 /* all exceptions before start of b */
     561           0 :                                 ci->offset = hseq - ci->seq - ci->nvals;
     562           0 :                                 cnt = ci->seq + cnt + ci->nvals - hseq;
     563           0 :                                 ci->seq = hseq;
     564           0 :                                 ci->nvals = 0;
     565           0 :                                 ci->tpe = cand_dense;
     566           0 :                                 ci->oids = NULL;
     567           0 :                                 break;
     568             :                         }
     569       86621 :                         assert(hseq > ci->seq || p == 0);
     570       86621 :                         if (hseq > ci->seq) {
     571             :                                 /* skip candidates, possibly including
     572             :                                  * exceptions */
     573           0 :                                 ci->oids += p;
     574           0 :                                 ci->nvals -= p;
     575           0 :                                 p = hseq - ci->seq - p;
     576           0 :                                 cnt -= p;
     577           0 :                                 ci->offset += p;
     578           0 :                                 ci->seq = hseq;
     579             :                         }
     580       86621 :                         if (ci->seq + cnt + ci->nvals > hseq + batcount) {
     581           0 :                                 p = binsearchcand(ci->oids, ci->nvals - 1U,
     582             :                                                   hseq + batcount);
     583           0 :                                 ci->nvals = p;
     584           0 :                                 cnt = hseq + batcount - ci->seq - ci->nvals;
     585             :                         }
     586       86621 :                         while (ci->nvals > 0 && ci->oids[0] == ci->seq) {
     587           0 :                                 ci->nvals--;
     588           0 :                                 ci->oids++;
     589           0 :                                 ci->seq++;
     590             :                         }
     591       86621 :                         while (ci->nvals > 0 &&
     592       86621 :                                ci->oids[ci->nvals - 1U] == ci->seq + cnt + ci->nvals - 1U)
     593           0 :                                 ci->nvals--;
     594       86621 :                         if (ci->nvals > 0)
     595             :                                 break;
     596             :                 }
     597        1246 :                 ci->tpe = cand_dense;
     598        1246 :                 ci->oids = NULL;
     599             :                 /* fall through */
     600     2769172 :         case cand_dense:
     601     2769172 :                 if (b != NULL) {
     602     2398060 :                         if (ci->seq + cnt <= hseq ||
     603     2398060 :                             ci->seq >= hseq + batcount) {
     604             :                                 /* no overlap */
     605           0 :                                 *ci = (struct canditer) {
     606             :                                         .tpe = cand_dense,
     607             :                                         .s = s,
     608             :                                 };
     609           0 :                                 return;
     610             :                         }
     611     2398060 :                         if (hseq > ci->seq) {
     612           0 :                                 cnt -= hseq - ci->seq;
     613           0 :                                 ci->offset += hseq - ci->seq;
     614           0 :                                 ci->seq = hseq;
     615             :                         }
     616     2398060 :                         if (ci->seq + cnt > hseq + batcount)
     617           0 :                                 cnt = hseq + batcount - ci->seq;
     618             :                 }
     619             :                 break;
     620       41037 :         case cand_mask:
     621       41037 :                 assert(s->tseqbase != oid_nil);
     622       41037 :                 if (b != NULL) {
     623       15104 :                         if (ci->seq + cnt <= hseq ||
     624       15104 :                             ci->seq >= hseq + batcount) {
     625             :                                 /* no overlap */
     626           0 :                                 *ci = (struct canditer) {
     627             :                                         .tpe = cand_dense,
     628             :                                         .s = s,
     629             :                                 };
     630           0 :                                 return;
     631             :                         }
     632       15104 :                         if (hseq > ci->seq) {
     633           0 :                                 cnt = hseq - ci->seq;
     634           0 :                                 ci->mask += cnt / 32U;
     635           0 :                                 ci->firstbit = (uint8_t) (cnt % 32U);
     636           0 :                                 cnt = BATcount(s) - cnt;
     637           0 :                                 ci->seq = hseq;
     638             :                         }
     639       15104 :                         if (ci->seq + cnt > hseq + batcount) {
     640       14748 :                                 cnt = hseq + batcount - ci->seq;
     641             :                         }
     642       15104 :                         ci->nvals = (ci->firstbit + cnt + 31U) / 32U;
     643             :                 }
     644             :                 /* if first value is only partially used, check
     645             :                  * whether there are any 1 bits in the used part */
     646       41037 :                 if (ci->firstbit > 0 && (ci->mask[0] >> ci->firstbit) == 0) {
     647           0 :                         if (cnt <= 32U - ci->firstbit) {
     648             :                                 cnt = 0;
     649             :                                 /* returns below */
     650             :                         } else {
     651           0 :                                 cnt -= 32U - ci->firstbit;
     652           0 :                                 ci->firstbit = 0;
     653           0 :                                 ci->mask++;
     654           0 :                                 ci->nvals--;
     655             :                         }
     656             :                 }
     657             :                 /* skip over any zero mask words that are used completely */
     658       41037 :                 if (ci->firstbit == 0) {
     659       88620 :                         while (cnt >= 32U && ci->mask[0] == 0) {
     660       47574 :                                 cnt -= 32U;
     661       47574 :                                 ci->mask++;
     662       47574 :                                 ci->nvals--;
     663             :                         }
     664             :                 }
     665             :                 /* check whether there are any 1 bits in the last word */
     666       41037 :                 if (cnt == 0 ||
     667       41037 :                     (cnt < 32U - ci->firstbit &&
     668        9073 :                      ((ci->mask[0] >> ci->firstbit) & ((1U << cnt) - 1U)) == 0)) {
     669             :                         /* no one bits in the whole relevant portion
     670             :                          * of the bat */
     671           0 :                         *ci = (struct canditer) {
     672             :                                 .tpe = cand_dense,
     673             :                                 .s = s,
     674             :                         };
     675           0 :                         return;
     676             :                 }
     677             :                 /* here we know there are 1 bits in the first mask
     678             :                  * word */
     679       41037 :                 int i = candmask_lobit(ci->mask[0] >> ci->firstbit);
     680       41037 :                 assert(i >= 0);      /* there should be a set bit */
     681       41037 :                 ci->firstbit += i;
     682       41037 :                 cnt -= i;
     683       41037 :                 if (mask_cand(s))
     684       41026 :                         ci->mskoff = s->tseqbase - (oid) CCAND(s)->firstbit + (ci->mask - (const uint32_t *) ccand_first(s)) * 32U;
     685             :                 else
     686          11 :                         ci->mskoff = s->tseqbase + (ci->mask - (const uint32_t *) s->theap->base) * 32U;
     687       41037 :                 ci->seq = ci->mskoff + ci->firstbit;
     688       41037 :                 ci->nextbit = ci->firstbit;
     689             :                 /* at this point we know that bit ci->firstbit is set
     690             :                  * in ci->mask[0] */
     691       41037 :                 ci->lastbit = (ci->firstbit + cnt - 1U) % 32U + 1U;
     692       41037 :                 if (ci->lastbit < 32 &&
     693       14737 :                     (ci->mask[ci->nvals - 1] & ((1U << ci->lastbit) - 1)) == 0) {
     694             :                         /* last partial word is all zero */
     695         434 :                         cnt -= ci->lastbit;
     696         434 :                         ci->lastbit = 32;
     697         434 :                         ci->nvals--;
     698             :                 }
     699       41037 :                 if (ci->lastbit == 32) {
     700             :                         /* "remove" zero words at the end */
     701       41356 :                         while (cnt >= 32 && ci->mask[ci->nvals - 1] == 0) {
     702       14620 :                                 ci->nvals--;
     703       14620 :                                 cnt -= 32;
     704             :                         }
     705             :                 }
     706       41037 :                 ci->ncand = count_mask_bits(ci, 0, cnt);
     707       41015 :                 return;
     708             :         default:
     709           0 :                 MT_UNREACHABLE();
     710             :         }
     711     3363183 :         ci->ncand = cnt;
     712     3363183 :         ci->hseq += ci->offset;
     713             : }
     714             : 
     715             : /* return the next candidate without advancing */
     716             : oid
     717     2757347 : canditer_peek(const struct canditer *ci)
     718             : {
     719     2757347 :         oid o = oid_nil;
     720     2757347 :         if (ci->next == ci->ncand)
     721        6078 :                 return oid_nil;
     722     2751269 :         switch (ci->tpe) {
     723     2451314 :         case cand_dense:
     724     2451314 :                 o = ci->seq + ci->next;
     725     2451314 :                 break;
     726      280560 :         case cand_materialized:
     727      280560 :                 assert(ci->next < ci->nvals);
     728      280560 :                 o = ci->oids[ci->next];
     729      280560 :                 break;
     730       19395 :         case cand_except:
     731       19395 :                 o = ci->seq + ci->add + ci->next;
     732       19437 :                 for (oid a = ci->add; a < ci->nvals && o == ci->oids[a]; a++)
     733          42 :                         o++;
     734             :                 break;
     735           0 :         case cand_mask: {
     736           0 :                 BUN m = ci->nextmsk;
     737           0 :                 uint8_t b = ci->nextbit;
     738           0 :                 while ((ci->mask[m] >> b) == 0) {
     739           0 :                         m++;
     740           0 :                         b = 0;
     741             :                 }
     742           0 :                 b += candmask_lobit(ci->mask[m] >> b);
     743           0 :                 o = ci->mskoff + m * 32 + b;
     744           0 :                 break;
     745             :         }
     746             :         default:
     747           0 :                 MT_UNREACHABLE();
     748             :         }
     749             :         return o;
     750             : }
     751             : 
     752             : /* return the previous candidate */
     753             : oid
     754        2291 : canditer_prev(struct canditer *ci)
     755             : {
     756        2291 :         if (ci->next == 0)
     757           0 :                 return oid_nil;
     758        2291 :         switch (ci->tpe) {
     759        2291 :         case cand_dense:
     760        2291 :                 return ci->seq + --ci->next;
     761           0 :         case cand_materialized:
     762           0 :                 return ci->oids[--ci->next];
     763             :         case cand_except:
     764           0 :                 break;
     765           0 :         case cand_mask:
     766           0 :                 for (;;) {
     767           0 :                         if (ci->nextbit == 0) {
     768           0 :                                 ci->nextbit = 32;
     769           0 :                                 while (ci->mask[--ci->nextmsk] == 0)
     770             :                                         ;
     771             :                         }
     772           0 :                         if (ci->mask[ci->nextmsk] & (1U << --ci->nextbit))
     773             :                                 break;
     774             :                 }
     775           0 :                 ci->next--;
     776           0 :                 return ci->mskoff + ci->nextmsk * 32 + ci->nextbit;
     777             :         default:
     778           0 :                 MT_UNREACHABLE();
     779             :         }
     780           0 :         oid o = ci->seq + ci->add + --ci->next;
     781           0 :         while (ci->add > 0 && o == ci->oids[ci->add - 1]) {
     782           0 :                 ci->add--;
     783           0 :                 o--;
     784             :         }
     785             :         return o;
     786             : }
     787             : 
     788             : /* return the previous candidate without retreating */
     789             : oid
     790        5131 : canditer_peekprev(const struct canditer *ci)
     791             : {
     792        5131 :         oid o = oid_nil;
     793             : 
     794        5131 :         if (ci->next == 0)
     795           0 :                 return oid_nil;
     796        5131 :         switch (ci->tpe) {
     797        5131 :         case cand_dense:
     798        5131 :                 return ci->seq + ci->next - 1;
     799           0 :         case cand_materialized:
     800           0 :                 return ci->oids[ci->next - 1];
     801           0 :         case cand_except:
     802           0 :                 o = ci->seq + ci->add + ci->next - 1;
     803           0 :                 for (oid a = ci->add; a > 0 && o == ci->oids[a - 1]; a--)
     804           0 :                         o--;
     805             :                 break;
     806           0 :         case cand_mask: {
     807           0 :                 BUN m = ci->nextmsk;
     808           0 :                 uint8_t b = ci->nextbit;
     809           0 :                 do {
     810           0 :                         if (b == 0) {
     811           0 :                                 b = 32;
     812           0 :                                 while (ci->mask[--m] == 0)
     813             :                                         ;
     814             :                         }
     815           0 :                 } while ((ci->mask[m] & (1U << --b)) == 0);
     816           0 :                 o = ci->mskoff + m * 32 + b;
     817           0 :                 if (++b == 32) {
     818             :                         b = 0;
     819             :                         m++;
     820             :                 }
     821             :                 break;
     822             :         }
     823             :         default:
     824           0 :                 MT_UNREACHABLE();
     825             :         }
     826             :         return o;
     827             : }
     828             : 
     829             : /* if o is a candidate, return it, else return the next candidate from
     830             :  * the cand_mask iterator ci after (if next is set) or before (if next
     831             :  * is unset) o; if there are no more candidates, return oid_nil */
     832             : oid
     833           0 : canditer_mask_next(const struct canditer *ci, oid o, bool next)
     834             : {
     835           0 :         if (o < ci->mskoff)
     836           0 :                 return next ? ci->mskoff + ci->firstbit : oid_nil;
     837           0 :         o -= ci->mskoff;
     838           0 :         BUN p = o / 32;
     839           0 :         o %= 32;
     840           0 :         if (p >= ci->nvals || (p == ci->nvals - 1 && o >= ci->lastbit))
     841           0 :                 return next ? oid_nil : canditer_last(ci);
     842           0 :         if (next) {
     843           0 :                 while ((ci->mask[p] & (1U << o)) == 0) {
     844           0 :                         if (++o == 32) {
     845           0 :                                 o = 0;
     846           0 :                                 if (++p == ci->nvals)
     847           0 :                                         return oid_nil;
     848             :                         }
     849             :                 }
     850           0 :                 if (p == ci->nvals - 1 && o >= ci->lastbit)
     851           0 :                         return oid_nil;
     852             :         } else {
     853           0 :                 while ((ci->mask[p] & (1U << o)) == 0) {
     854           0 :                         if (o == 0) {
     855           0 :                                 o = 31;
     856           0 :                                 if (p == 0)
     857           0 :                                         return oid_nil;
     858           0 :                                 p--;
     859             :                         } else {
     860           0 :                                 o--;
     861             :                         }
     862             :                 }
     863           0 :                 if (p == 0 && o < ci->firstbit)
     864           0 :                         return oid_nil;
     865             :         }
     866           0 :         return ci->mskoff + 32 * p + o;
     867             : }
     868             : 
     869             : /* return the last candidate */
     870             : oid
     871      269054 : canditer_last(const struct canditer *ci)
     872             : {
     873      269054 :         if (ci->ncand == 0)
     874           0 :                 return oid_nil;
     875      269054 :         switch (ci->tpe) {
     876      238519 :         case cand_dense:
     877      238519 :                 return ci->seq + ci->ncand - 1;
     878       22806 :         case cand_materialized:
     879       22806 :                 return ci->oids[ci->ncand - 1];
     880        6338 :         case cand_except:
     881        6338 :                 return ci->seq + ci->ncand + ci->nvals - 1;
     882        1391 :         case cand_mask:
     883       25573 :                 for (uint8_t i = ci->lastbit; i > 0; ) {
     884       25573 :                         if (ci->mask[ci->nvals - 1] & (1U << --i))
     885        1391 :                                 return ci->mskoff + (ci->nvals - 1) * 32 + i;
     886             :                 }
     887           0 :                 break;          /* cannot happen */
     888             :         default:
     889           0 :                 MT_UNREACHABLE();
     890             :         }
     891           0 :         return oid_nil;         /* cannot happen */
     892             : }
     893             : 
     894             : /* return the candidate at the given index */
     895             : oid
     896   337354957 : canditer_idx(const struct canditer *ci, BUN p)
     897             : {
     898   337354957 :         if (p >= ci->ncand)
     899           0 :                 return oid_nil;
     900   337354957 :         switch (ci->tpe) {
     901   172877506 :         case cand_dense:
     902   172877506 :                 return ci->seq + p;
     903   164189351 :         case cand_materialized:
     904   164189351 :                 return ci->oids[p];
     905      288005 :         case cand_except: {
     906      288005 :                 oid o = ci->seq + p;
     907      288005 :                 if (o < ci->oids[0])
     908             :                         return o;
     909      197071 :                 if (o + ci->nvals > ci->oids[ci->nvals - 1])
     910             :                         return o + ci->nvals;
     911      124777 :                 BUN lo = 0, hi = ci->nvals - 1;
     912      857812 :                 while (hi - lo > 1) {
     913      608258 :                         BUN mid = (hi + lo) / 2;
     914      608258 :                         if (ci->oids[mid] - mid > o)
     915             :                                 hi = mid;
     916             :                         else
     917      378222 :                                 lo = mid;
     918             :                 }
     919      124777 :                 return o + hi;
     920             :         }
     921          95 :         case cand_mask: {
     922          95 :                 BUN x;
     923          95 :                 if ((x = candmask_pop(ci->mask[0] >> ci->firstbit)) > p) {
     924          24 :                         for (uint8_t i = ci->firstbit; ; i++) {
     925         107 :                                 if (ci->mask[0] & (1U << i)) {
     926         107 :                                         if (p == 0)
     927          83 :                                                 return ci->mskoff + i;
     928          24 :                                         p--;
     929             :                                 }
     930             :                         }
     931             :                 }
     932          63 :                 for (BUN n = 1; n < ci->nvals; n++) {
     933          63 :                         uint32_t mask = ci->mask[n];
     934          63 :                         p -= x;
     935          63 :                         x = candmask_pop(mask);
     936          63 :                         if (x > p) {
     937         122 :                                 for (uint8_t i = 0; ; i++) {
     938         134 :                                         if (mask & (1U << i)) {
     939         134 :                                                 if (p == 0)
     940          12 :                                                         return ci->mskoff + n * 32 + i;
     941         122 :                                                 p--;
     942             :                                         }
     943             :                                 }
     944             :                         }
     945             :                 }
     946           0 :                 break;          /* cannot happen */
     947             :         }
     948             :         default:
     949           0 :                 MT_UNREACHABLE();
     950             :         }
     951           0 :         return oid_nil;         /* cannot happen */
     952             : }
     953             : 
     954             : /* set the index for the next candidate to be returned */
     955             : void
     956      690031 : canditer_setidx(struct canditer *ci, BUN p)
     957             : {
     958      690031 :         if (p != ci->next) {
     959      653717 :                 if (p >= ci->ncand) {
     960       67211 :                         ci->next = ci->ncand;
     961       67211 :                         switch (ci->tpe) {
     962          28 :                         case cand_except:
     963          28 :                                 ci->add = ci->nvals;
     964          28 :                                 break;
     965           0 :                         case cand_mask:
     966           0 :                                 ci->nextbit = ci->lastbit;
     967           0 :                                 ci->nextmsk = ci->nvals;
     968           0 :                                 if (ci->nextbit == 32)
     969           0 :                                         ci->nextbit = 0;
     970             :                                 else
     971           0 :                                         ci->nextmsk--;
     972             :                                 break;
     973             :                         default:
     974             :                                 break;
     975             :                         }
     976             :                 } else {
     977      586506 :                         ci->next = p;
     978      586506 :                         switch (ci->tpe) {
     979        2442 :                         case cand_except:
     980        2442 :                                 ci->add = canditer_idx(ci, p) - ci->seq - p;
     981        2442 :                                 break;
     982           0 :                         case cand_mask: {
     983           0 :                                 oid o = canditer_idx(ci, p) - ci->mskoff;
     984           0 :                                 ci->nextmsk = o / 32;
     985           0 :                                 ci->nextbit = (uint8_t) (o % 32);
     986           0 :                                 break;
     987             :                         }
     988             :                         default:
     989             :                                 break;
     990             :                         }
     991             :                 }
     992             :         }
     993      690031 : }
     994             : 
     995             : /* reset */
     996             : void
     997      806182 : canditer_reset(struct canditer *ci)
     998             : {
     999      806182 :         if (ci->tpe == cand_mask) {
    1000           0 :                 ci->nextbit = ci->firstbit;
    1001           0 :                 ci->nextmsk = 0;
    1002             :         } else {
    1003      806182 :                 ci->add = 0;
    1004             :         }
    1005      806182 :         ci->next = 0;
    1006      806182 : }
    1007             : 
    1008             : /* return index of given candidate if it occurs; if the candidate does
    1009             :  * not occur, if next is set, return index of next larger candidate,
    1010             :  * if next is not set, return BUN_NONE */
    1011             : BUN
    1012     1241720 : canditer_search(const struct canditer *ci, oid o, bool next)
    1013             : {
    1014     1241720 :         BUN p;
    1015             : 
    1016     1241720 :         switch (ci->tpe) {
    1017     1142473 :         case cand_dense:
    1018     1142473 :                 if (o < ci->seq)
    1019        1059 :                         return next ? 0 : BUN_NONE;
    1020     1141414 :                 if (o >= ci->seq + ci->ncand)
    1021      206873 :                         return next ? ci->ncand : BUN_NONE;
    1022      934541 :                 return o - ci->seq;
    1023       52846 :         case cand_materialized:
    1024       52846 :                 if (ci->nvals == 0)
    1025             :                         return 0;
    1026       52873 :                 p = binsearchcand(ci->oids, ci->nvals - 1, o);
    1027       52892 :                 if (next || (p != ci->nvals && ci->oids[p] == o))
    1028       52654 :                         return p;
    1029             :                 break;
    1030       46401 :         case cand_except:
    1031       46401 :                 if (o < ci->seq)
    1032           0 :                         return next ? 0 : BUN_NONE;
    1033       46401 :                 if (o >= ci->seq + ci->ncand + ci->nvals)
    1034         942 :                         return next ? ci->ncand : BUN_NONE;
    1035       45459 :                 p = binsearchcand(ci->oids, ci->nvals - 1, o);
    1036       45459 :                 if (next || p == ci->nvals || ci->oids[p] != o)
    1037       45304 :                         return o - ci->seq - p;
    1038             :                 break;
    1039           0 :         case cand_mask:
    1040           0 :                 if (o < ci->mskoff) {
    1041           0 :                         return next ? 0 : BUN_NONE;
    1042             :                 }
    1043           0 :                 o -= ci->mskoff;
    1044           0 :                 p = o / 32;
    1045           0 :                 if (p >= ci->nvals)
    1046           0 :                         return next ? ci->ncand : BUN_NONE;
    1047           0 :                 o %= 32;
    1048           0 :                 if (p == ci->nvals - 1 && o >= ci->lastbit)
    1049           0 :                         return next ? ci->ncand : BUN_NONE;
    1050           0 :                 if (next || ci->mask[p] & (1U << o))
    1051           0 :                         return count_mask_bits(ci, 0, p * 32 + o) + !(ci->mask[p] & (1U << o));
    1052             :                 break;
    1053             :         default:
    1054           0 :                 MT_UNREACHABLE();
    1055             :         }
    1056             :         return BUN_NONE;
    1057             : }
    1058             : 
    1059             : static BAT *
    1060        1847 : canditer_sliceval_mask(const struct canditer *ci, oid lo1, oid hi1, BUN cnt1,
    1061             :                        oid lo2, oid hi2, BUN cnt2)
    1062             : {
    1063        1847 :         assert(cnt2 == 0 || !is_oid_nil(lo2));
    1064        1847 :         assert(cnt2 == 0 || lo2 >= hi1);
    1065        1847 :         if (is_oid_nil(lo1) || lo1 < ci->mskoff + ci->firstbit)
    1066         397 :                 lo1 = ci->mskoff + ci->firstbit;
    1067        1847 :         if (is_oid_nil(hi1) || hi1 >= ci->mskoff + (ci->nvals - 1) * 32 + ci->lastbit)
    1068        1557 :                 hi1 = ci->mskoff + (ci->nvals - 1) * 32 + ci->lastbit;
    1069             : 
    1070        1847 :         BAT *bn = COLnew(0, TYPE_oid, cnt1 + cnt2, TRANSIENT);
    1071        1847 :         if (bn == NULL)
    1072             :                 return NULL;
    1073        1847 :         bn->tkey = true;
    1074             : 
    1075        1847 :         if (hi1 > ci->mskoff) {
    1076        1604 :                 if (lo1 < ci->mskoff)
    1077             :                         lo1 = 0;
    1078             :                 else
    1079        1604 :                         lo1 -= ci->mskoff;
    1080        1604 :                 hi1 -= ci->mskoff;
    1081      214356 :                 for (oid o = lo1; o < hi1 && cnt1 > 0; o++) {
    1082      212754 :                         if (ci->mask[o / 32] & (1U << (o % 32))) {
    1083      186720 :                                 if (BUNappend(bn, &(oid){o + ci->mskoff}, false) != GDK_SUCCEED) {
    1084           0 :                                         BBPreclaim(bn);
    1085           0 :                                         return NULL;
    1086             :                                 }
    1087      186718 :                                 cnt1--;
    1088             :                         }
    1089             :                 }
    1090             :         }
    1091        1845 :         if (cnt2 > 0) {
    1092         234 :                 if (lo2 < ci->mskoff + ci->firstbit)
    1093             :                         lo2 = ci->mskoff + ci->firstbit;
    1094         234 :                 if (is_oid_nil(hi2) || hi2 >= ci->mskoff + (ci->nvals - 1) * 32 + ci->lastbit)
    1095         234 :                         hi2 = ci->mskoff + (ci->nvals - 1) * 32 + ci->lastbit;
    1096             : 
    1097         234 :                 lo2 -= ci->mskoff;
    1098         234 :                 hi2 -= ci->mskoff;
    1099         636 :                 for (oid o = lo2; o < hi2 && cnt2 > 0; o++) {
    1100         402 :                         if (ci->mask[o / 32] & (1U << (o % 32))) {
    1101          64 :                                 if (BUNappend(bn, &(oid){o + ci->mskoff}, false) != GDK_SUCCEED) {
    1102           0 :                                         BBPreclaim(bn);
    1103           0 :                                         return NULL;
    1104             :                                 }
    1105          64 :                                 cnt2--;
    1106             :                         }
    1107             :                 }
    1108             :         }
    1109             :         return bn;
    1110             : }
    1111             : 
    1112             : /* return either an actual BATslice or a new BAT that contains the
    1113             :  * "virtual" slice of the input candidate list BAT; except, unlike
    1114             :  * BATslice, the hseqbase of the returned BAT is 0; note for cand_mask
    1115             :  * candidate iterators, the BUN values refer to number of 1 bits */
    1116             : BAT *
    1117      593722 : canditer_slice(const struct canditer *ci, BUN lo, BUN hi)
    1118             : {
    1119      593722 :         BAT *bn;
    1120      593722 :         oid o;
    1121      593722 :         BUN add;
    1122             : 
    1123      593722 :         assert(ci != NULL);
    1124             : 
    1125      593722 :         if (lo >= ci->ncand || lo >= hi)
    1126      138208 :                 return BATdense(0, 0, 0);
    1127      455514 :         if (hi > ci->ncand)
    1128             :                 hi = ci->ncand;
    1129      455514 :         if (hi - lo == 1)
    1130      371572 :                 return BATdense(0, canditer_idx(ci, lo), 1);
    1131       83942 :         switch (ci->tpe) {
    1132        4696 :         case cand_materialized:
    1133        4696 :                 if (ci->s) {
    1134        4696 :                         bn = BATslice(ci->s, lo + ci->offset, hi + ci->offset);
    1135        4696 :                         BAThseqbase(bn, 0);
    1136        4696 :                         return bn;
    1137             :                 }
    1138           0 :                 bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT);
    1139           0 :                 if (bn == NULL)
    1140             :                         return NULL;
    1141           0 :                 BATsetcount(bn, hi - lo);
    1142           0 :                 memcpy(Tloc(bn, 0), ci->oids + lo, (hi - lo) * sizeof(oid));
    1143           0 :                 break;
    1144       78948 :         case cand_dense:
    1145       78948 :                 return BATdense(0, ci->seq + lo, hi - lo);
    1146         287 :         case cand_except:
    1147         287 :                 o = canditer_idx(ci, lo);
    1148         287 :                 add = o - ci->seq - lo;
    1149         287 :                 assert(add <= ci->nvals);
    1150         287 :                 if (add == ci->nvals || o + hi - lo < ci->oids[add]) {
    1151             :                         /* after last exception or before next
    1152             :                          * exception: return dense sequence */
    1153         243 :                         return BATdense(0, o, hi - lo);
    1154             :                 }
    1155          44 :                 bn = COLnew(0, TYPE_oid, hi - lo, TRANSIENT);
    1156          44 :                 if (bn == NULL)
    1157             :                         return NULL;
    1158          44 :                 BATsetcount(bn, hi - lo);
    1159         390 :                 for (oid *dst = Tloc(bn, 0); lo < hi; lo++) {
    1160         500 :                         while (add < ci->nvals && o == ci->oids[add]) {
    1161         154 :                                 o++;
    1162         154 :                                 add++;
    1163             :                         }
    1164         346 :                         *dst++ = o++;
    1165             :                 }
    1166             :                 break;
    1167          11 :         case cand_mask:
    1168          11 :                 return canditer_sliceval_mask(ci, canditer_idx(ci, lo),
    1169             :                                               oid_nil, hi - lo,
    1170             :                                               oid_nil, oid_nil, 0);
    1171             :         default:
    1172           0 :                 MT_UNREACHABLE();
    1173             :         }
    1174          44 :         bn->tsorted = true;
    1175          44 :         bn->trevsorted = BATcount(bn) <= 1;
    1176          44 :         bn->tkey = true;
    1177          44 :         bn->tseqbase = oid_nil;
    1178          44 :         bn->tnil = false;
    1179          44 :         bn->tnonil = true;
    1180          44 :         bn->tminpos = 0;
    1181          44 :         bn->tmaxpos = BATcount(bn) - 1;
    1182          44 :         return virtualize(bn);
    1183             : }
    1184             : 
    1185             : /* like the above, except the bounds are given by values instead of
    1186             :  * indexes */
    1187             : BAT *
    1188      389710 : canditer_sliceval(const struct canditer *ci, oid lo, oid hi)
    1189             : {
    1190      389710 :         if (ci->tpe != cand_mask) {
    1191     1164314 :                 return canditer_slice(
    1192             :                         ci,
    1193      388098 :                         is_oid_nil(lo) ? 0 : canditer_search(ci, lo, true),
    1194      388108 :                         is_oid_nil(hi) ? ci->ncand : canditer_search(ci, hi, true));
    1195             :         }
    1196             : 
    1197        1602 :         return canditer_sliceval_mask(ci, lo, hi, ci->ncand,
    1198             :                                       oid_nil, oid_nil, 0);
    1199             : }
    1200             : 
    1201             : /* return the combination of two slices */
    1202             : BAT *
    1203       43348 : canditer_slice2(const struct canditer *ci, BUN lo1, BUN hi1, BUN lo2, BUN hi2)
    1204             : {
    1205       43348 :         BAT *bn;
    1206       43348 :         oid o;
    1207       43348 :         BUN add;
    1208             : 
    1209       43348 :         assert(lo1 <= hi1);
    1210       43348 :         assert(lo2 <= hi2);
    1211       43348 :         assert(hi1 <= lo2 || (lo2 == 0 && hi2 == 0));
    1212             : 
    1213       43348 :         if (hi1 == lo2)         /* consecutive slices: combine into one */
    1214        2999 :                 return canditer_slice(ci, lo1, hi2);
    1215       40349 :         if (lo2 == hi2 || hi1 >= ci->ncand || lo2 >= ci->ncand) {
    1216             :                 /* empty second slice */
    1217       33258 :                 return canditer_slice(ci, lo1, hi1);
    1218             :         }
    1219        7091 :         if (lo1 == hi1)         /* empty first slice */
    1220        2121 :                 return canditer_slice(ci, lo2, hi2);
    1221        4970 :         if (lo1 >= ci->ncand)     /* out of range */
    1222             :                 return BATdense(0, 0, 0);
    1223             : 
    1224        4970 :         if (hi2 >= ci->ncand)
    1225             :                 hi2 = ci->ncand;
    1226             : 
    1227        4970 :         bn = COLnew(0, TYPE_oid, hi1 - lo1 + hi2 - lo2, TRANSIENT);
    1228        4978 :         if (bn == NULL)
    1229             :                 return NULL;
    1230        4978 :         BATsetcount(bn, hi1 - lo1 + hi2 - lo2);
    1231        4978 :         bn->tsorted = true;
    1232        4978 :         bn->trevsorted = BATcount(bn) <= 1;
    1233        4978 :         bn->tkey = true;
    1234        4978 :         bn->tseqbase = oid_nil;
    1235        4978 :         bn->tnil = false;
    1236        4978 :         bn->tnonil = true;
    1237             : 
    1238        4978 :         oid *dst = Tloc(bn, 0);
    1239             : 
    1240        4978 :         switch (ci->tpe) {
    1241        2424 :         case cand_materialized:
    1242        2424 :                 memcpy(dst, ci->oids + lo1, (hi1 - lo1) * sizeof(oid));
    1243        2424 :                 memcpy(dst + hi1 - lo1, ci->oids + lo2, (hi2 - lo2) * sizeof(oid));
    1244        2424 :                 break;
    1245             :         case cand_dense:
    1246      944162 :                 while (lo1 < hi1)
    1247      941609 :                         *dst++ = ci->seq + lo1++;
    1248      276472 :                 while (lo2 < hi2)
    1249      273919 :                         *dst++ = ci->seq + lo2++;
    1250             :                 break;
    1251           1 :         case cand_except:
    1252           1 :                 o = canditer_idx(ci, lo1);
    1253           1 :                 add = o - ci->seq - lo1;
    1254           1 :                 assert(add <= ci->nvals);
    1255           1 :                 if (add == ci->nvals) {
    1256             :                         /* after last exception: return dense sequence */
    1257           0 :                         while (lo1 < hi1)
    1258           0 :                                 *dst++ = ci->seq + add + lo1++;
    1259             :                 } else {
    1260           2 :                         while (lo1 < hi1) {
    1261           1 :                                 while (add < ci->nvals && o == ci->oids[add]) {
    1262           0 :                                         o++;
    1263           0 :                                         add++;
    1264             :                                 }
    1265           1 :                                 *dst++ = o++;
    1266           1 :                                 lo1++;
    1267             :                         }
    1268             :                 }
    1269           1 :                 o = canditer_idx(ci, lo2);
    1270           1 :                 add = o - ci->seq - lo2;
    1271           1 :                 assert(add <= ci->nvals);
    1272           1 :                 if (add == ci->nvals) {
    1273             :                         /* after last exception: return dense sequence */
    1274           0 :                         while (lo2 < hi2)
    1275           0 :                                 *dst++ = ci->seq + add + lo2++;
    1276             :                 } else {
    1277           4 :                         while (lo2 < hi2) {
    1278           6 :                                 while (add < ci->nvals && o == ci->oids[add]) {
    1279           3 :                                         o++;
    1280           3 :                                         add++;
    1281             :                                 }
    1282           3 :                                 *dst++ = o++;
    1283           3 :                                 lo2++;
    1284             :                         }
    1285             :                 }
    1286             :                 break;
    1287           0 :         case cand_mask:
    1288           0 :                 return canditer_sliceval_mask(ci, canditer_idx(ci, lo1),
    1289             :                                               oid_nil, hi1 - lo1,
    1290             :                                               canditer_idx(ci, lo2),
    1291             :                                               oid_nil, hi2 - lo2);
    1292             :         default:
    1293           0 :                 MT_UNREACHABLE();
    1294             :         }
    1295        4978 :         return virtualize(bn);
    1296             : }
    1297             : 
    1298             : BAT *
    1299       43014 : canditer_slice2val(const struct canditer *ci, oid lo1, oid hi1, oid lo2, oid hi2)
    1300             : {
    1301       43014 :         if (ci->tpe != cand_mask) {
    1302      168696 :                 return canditer_slice2(
    1303             :                         ci,
    1304       39959 :                         is_oid_nil(lo1) ? 0 : canditer_search(ci, lo1, true),
    1305       42780 :                         is_oid_nil(hi1) ? ci->ncand : canditer_search(ci, hi1, true),
    1306       42818 :                         is_oid_nil(lo2) ? 0 : canditer_search(ci, lo2, true),
    1307         359 :                         is_oid_nil(hi2) ? ci->ncand : canditer_search(ci, hi2, true));
    1308             :         }
    1309             : 
    1310         234 :         return canditer_sliceval_mask(ci, lo1, hi1, ci->ncand,
    1311         234 :                                       lo2, hi2, ci->ncand);
    1312             : }
    1313             : 
    1314             : BAT *
    1315        3214 : BATnegcands(oid tseq, BUN nr, BAT *odels)
    1316             : {
    1317        3214 :         const char *nme;
    1318        3214 :         Heap *dels;
    1319        3214 :         BUN lo, hi;
    1320        3214 :         ccand_t *c;
    1321        3214 :         BAT *bn;
    1322             : 
    1323        3214 :         bn = BATdense(0, tseq, nr);
    1324        3214 :         if (bn == NULL)
    1325             :                 return NULL;
    1326        3214 :         if (BATcount(odels) == 0)
    1327        2791 :                 goto doreturn;
    1328             : 
    1329         423 :         lo = SORTfndfirst(odels, &bn->tseqbase);
    1330         423 :         hi = SORTfndfirst(odels, &(oid) {bn->tseqbase + BATcount(bn)});
    1331         423 :         if (lo == hi)
    1332             :                 return bn;
    1333         423 :         if (lo + nr == hi) {
    1334         134 :                 BATsetcount(bn, 0);
    1335         134 :                 goto doreturn;
    1336             :         }
    1337             : 
    1338         289 :         nme = BBP_physical(bn->batCacheid);
    1339         289 :         if ((dels = GDKmalloc(sizeof(Heap))) == NULL){
    1340           0 :                 BBPreclaim(bn);
    1341           0 :                 return NULL;
    1342             :         }
    1343         578 :         *dels = (Heap) {
    1344         289 :                 .farmid = BBPselectfarm(bn->batRole, bn->ttype, varheap),
    1345         289 :                 .parentid = bn->batCacheid,
    1346             :                 .dirty = true,
    1347             :                 .refs = ATOMIC_VAR_INIT(1),
    1348             :         };
    1349         289 :         strconcat_len(dels->filename, sizeof(dels->filename),
    1350             :                       nme, ".theap", NULL);
    1351             : 
    1352         578 :         if (dels->farmid < 0 ||
    1353         289 :             HEAPalloc(dels, hi - lo + (sizeof(ccand_t)/sizeof(oid)), sizeof(oid)) != GDK_SUCCEED) {
    1354           0 :                 GDKfree(dels);
    1355           0 :                 BBPreclaim(bn);
    1356           0 :                 return NULL;
    1357             :         }
    1358         289 :         c = (ccand_t *) dels->base;
    1359         289 :         *c = (ccand_t) {
    1360             :                 .type = CAND_NEGOID,
    1361             :         };
    1362         289 :         dels->free = sizeof(ccand_t) + sizeof(oid) * (hi - lo);
    1363         289 :         BATiter bi = bat_iterator(odels);
    1364         289 :         if (bi.type == TYPE_void) {
    1365         166 :                 oid *r = (oid *) (dels->base + sizeof(ccand_t));
    1366       93673 :                 for (BUN x = lo; x < hi; x++)
    1367       93507 :                         r[x - lo] = x + odels->tseqbase;
    1368             :         } else {
    1369         123 :                 oid *r = (oid *) (dels->base + sizeof(ccand_t));
    1370         123 :                 memcpy(r, (const oid *) bi.base + lo, sizeof(oid) * (hi - lo));
    1371             :         }
    1372         289 :         bat_iterator_end(&bi);
    1373         289 :         assert(bn->tvheap == NULL);
    1374         289 :         bn->tvheap = dels;
    1375         289 :         BATsetcount(bn, bn->batCount - (hi - lo));
    1376        3214 :   doreturn:
    1377        3214 :         TRC_DEBUG(ALGO, "nr=" BUNFMT ", odels=" ALGOBATFMT
    1378             :                   " -> " ALGOBATFMT "\n",
    1379             :                   nr, ALGOBATPAR(odels),
    1380             :                   ALGOBATPAR(bn));
    1381             :         return bn;
    1382             : }
    1383             : 
    1384             : BAT *
    1385      130485 : BATmaskedcands(oid hseq, BUN nr, BAT *masked, bool selected)
    1386             : {
    1387      130485 :         const char *nme;
    1388      130485 :         Heap *msks;
    1389      130485 :         ccand_t *c;
    1390      130485 :         BUN nmask;
    1391      130485 :         BAT *bn;
    1392             : 
    1393      130485 :         assert(masked->ttype == TYPE_msk);
    1394             : 
    1395      130485 :         bn = COLnew(hseq, TYPE_void, 0, TRANSIENT);
    1396      130486 :         if (bn == NULL)
    1397             :                 return NULL;
    1398      130486 :         BATtseqbase(bn, hseq);
    1399             : 
    1400      130483 :         if (BATcount(masked) == 0)
    1401             :                 return bn;
    1402             : 
    1403      130485 :         nme = BBP_physical(bn->batCacheid);
    1404      130485 :         if ((msks = GDKmalloc(sizeof(Heap))) == NULL){
    1405           0 :                 BBPreclaim(bn);
    1406           0 :                 return NULL;
    1407             :         }
    1408      260971 :         *msks = (Heap) {
    1409      130485 :                 .farmid = BBPselectfarm(bn->batRole, bn->ttype, varheap),
    1410      130486 :                 .parentid = bn->batCacheid,
    1411             :                 .dirty = true,
    1412             :                 .refs = ATOMIC_VAR_INIT(1),
    1413             :         };
    1414      130486 :         strconcat_len(msks->filename, sizeof(msks->filename),
    1415             :                       nme, ".theap", NULL);
    1416             : 
    1417      130485 :         nmask = (nr + 31) / 32;
    1418      260971 :         if (msks->farmid < 0 ||
    1419      130485 :             HEAPalloc(msks, nmask + (sizeof(ccand_t)/sizeof(uint32_t)), sizeof(uint32_t)) != GDK_SUCCEED) {
    1420           0 :                 GDKfree(msks);
    1421           0 :                 BBPreclaim(bn);
    1422           0 :                 return NULL;
    1423             :         }
    1424      130486 :         c = (ccand_t *) msks->base;
    1425      130486 :         *c = (ccand_t) {
    1426             :                 .type = CAND_MSK,
    1427             : //              .mask = true,
    1428             :         };
    1429      130486 :         msks->free = sizeof(ccand_t) + nmask * sizeof(uint32_t);
    1430      130486 :         uint32_t *r = (uint32_t*)(msks->base + sizeof(ccand_t));
    1431      130486 :         BATiter bi = bat_iterator(masked);
    1432      130486 :         if (selected) {
    1433      130442 :                 if (nr <= bi.count)
    1434      130442 :                         memcpy(r, bi.base, nmask * sizeof(uint32_t));
    1435             :                 else
    1436           0 :                         memcpy(r, bi.base, (bi.count + 31) / 32 * sizeof(uint32_t));
    1437             :         } else {
    1438          44 :                 const uint32_t *s = (const uint32_t *) bi.base;
    1439          44 :                 BUN nmask_ = (bi.count + 31) / 32;
    1440        6498 :                 for (BUN i = 0; i < nmask_; i++)
    1441        6454 :                         r[i] = ~s[i];
    1442             :         }
    1443      130486 :         if (nr > bi.count) {
    1444           0 :                 BUN rest = bi.count & 31;
    1445           0 :                 BUN nmask_ = (bi.count + 31) / 32;
    1446           0 :                 if (rest > 0)
    1447           0 :                         r[nmask_ -1] |= ((1U << (32 - rest)) - 1) << rest;
    1448           0 :                 for (BUN j = nmask_; j < nmask; j++)
    1449           0 :                         r[j] = ~0;
    1450             :         }
    1451      130486 :         bat_iterator_end(&bi);
    1452             :         /* make sure last word doesn't have any spurious bits set */
    1453      130486 :         BUN cnt = nr % 32;
    1454      130486 :         if (cnt > 0)
    1455      128202 :                 r[nmask - 1] &= (1U << cnt) - 1;
    1456             :         cnt = 0;
    1457    12369616 :         for (BUN i = 0; i < nmask; i++) {
    1458    12239130 :                 if (cnt == 0 && r[i] != 0)
    1459      129125 :                         c->firstbit = candmask_lobit(r[i]) + i * 32;
    1460    12239130 :                 cnt += candmask_pop(r[i]);
    1461             :         }
    1462      130486 :         if (cnt > 0) {
    1463      129134 :                 assert(bn->tvheap == NULL);
    1464      129134 :                 bn->tvheap = msks;
    1465      129134 :                 bn->tseqbase += (oid) c->firstbit;
    1466             :         } else {
    1467             :                 /* no point having a mask if it's empty */
    1468        1352 :                 HEAPfree(msks, true);
    1469        1351 :                 GDKfree(msks);
    1470             :         }
    1471      130486 :         BATsetcount(bn, cnt);
    1472      130484 :         TRC_DEBUG(ALGO, "hseq=" OIDFMT ", masked=" ALGOBATFMT ", selected=%s"
    1473             :                   " -> " ALGOBATFMT "\n",
    1474             :                   hseq, ALGOBATPAR(masked),
    1475             :                   selected ? "true" : "false",
    1476             :                   ALGOBATPAR(bn));
    1477      130484 :         assert(bn->tseqbase != oid_nil);
    1478             :         return bn;
    1479             : }
    1480             : 
    1481             : /* convert a masked candidate list to a positive or negative candidate list */
    1482             : BAT *
    1483      121210 : BATunmask(BAT *b)
    1484             : {
    1485             : //      assert(!mask_cand(b) || CCAND(b)->mask); /* todo handle negmask case */
    1486      121210 :         BUN cnt;
    1487      121210 :         uint32_t rem;
    1488      121210 :         uint32_t val;
    1489      121210 :         const uint32_t *src;
    1490      121210 :         oid *dst;
    1491      121210 :         BUN n = 0;
    1492      121210 :         oid tseq = b->hseqbase;
    1493      121210 :         bool negcand = false;
    1494             : 
    1495      121210 :         BATiter bi = bat_iterator(b);
    1496      121288 :         if (mask_cand(b)) {
    1497      116101 :                 cnt = ccand_free(b) / sizeof(uint32_t);
    1498      116101 :                 rem = 0;
    1499      116101 :                 src = (const uint32_t *) ccand_first(b);
    1500      116101 :                 tseq = b->tseqbase;
    1501      116101 :                 tseq -= (oid) CCAND(b)->firstbit;
    1502             :                 /* create negative candidate list if more than half the
    1503             :                  * bits are set */
    1504      116101 :                 negcand = BATcount(b) > cnt * 16;
    1505             :         } else {
    1506        5187 :                 cnt = bi.count / 32;
    1507        5187 :                 rem = bi.count % 32;
    1508        5187 :                 src = (const uint32_t *) bi.base;
    1509             :         }
    1510      121288 :         BAT *bn;
    1511             : 
    1512      121288 :         if (negcand) {
    1513       97954 :                 bn = COLnew(b->hseqbase, TYPE_void, 0, TRANSIENT);
    1514       97951 :                 if (bn == NULL) {
    1515           0 :                         bat_iterator_end(&bi);
    1516           0 :                         return NULL;
    1517             :                 }
    1518       97951 :                 Heap *dels;
    1519       97951 :                 if ((dels = GDKmalloc(sizeof(Heap))) == NULL) {
    1520           0 :                         BBPreclaim(bn);
    1521           0 :                         bat_iterator_end(&bi);
    1522           0 :                         return NULL;
    1523             :                 }
    1524      195893 :                 *dels = (Heap) {
    1525       97948 :                         .farmid = BBPselectfarm(TRANSIENT, TYPE_void, varheap),
    1526       97945 :                         .parentid = bn->batCacheid,
    1527             :                         .dirty = true,
    1528             :                         .refs = ATOMIC_VAR_INIT(1),
    1529             :                 };
    1530       97945 :                 strconcat_len(dels->filename, sizeof(dels->filename),
    1531       97945 :                               BBP_physical(bn->batCacheid), ".theap", NULL);
    1532             : 
    1533      195810 :                 if (dels->farmid < 0 ||
    1534       97886 :                     HEAPalloc(dels, cnt * 32 - bi.count
    1535             :                               + sizeof(ccand_t) / sizeof(oid), sizeof(oid)) != GDK_SUCCEED) {
    1536           0 :                         GDKfree(dels);
    1537           0 :                         BBPreclaim(bn);
    1538           0 :                         bat_iterator_end(&bi);
    1539           0 :                         return NULL;
    1540             :                 }
    1541       97924 :                 * (ccand_t *) dels->base = (ccand_t) {
    1542             :                         .type = CAND_NEGOID,
    1543             :                 };
    1544       97924 :                 dst = (oid *) (dels->base + sizeof(ccand_t));
    1545     7917173 :                 for (BUN p = 0, v = 0; p < cnt; p++, v += 32) {
    1546     7819249 :                         if ((val = src[p]) == ~UINT32_C(0))
    1547     5909069 :                                 continue;
    1548    61593275 :                         for (uint32_t i = 0; i < 32; i++) {
    1549    59778866 :                                 if ((val & (1U << i)) == 0) {
    1550    49201924 :                                         if (v + i >= b->batCount + n)
    1551             :                                                 break;
    1552    49106153 :                                         dst[n++] = tseq + v + i;
    1553             :                                 }
    1554             :                         }
    1555             :                 }
    1556       97924 :                 if (n == 0) {
    1557             :                         /* didn't need it after all */
    1558         242 :                         HEAPfree(dels, true);
    1559         241 :                         GDKfree(dels);
    1560             :                 } else {
    1561       97682 :                         dels->free = sizeof(ccand_t) + n * sizeof(oid);
    1562       97682 :                         dels->dirty = true;
    1563       97682 :                         assert(bn->tvheap == NULL);
    1564       97682 :                         bn->tvheap = dels;
    1565             :                 }
    1566       97926 :                 BATsetcount(bn, n=bi.count);
    1567       97922 :                 bn->tseqbase = tseq;
    1568             :         } else {
    1569       23334 :                 bn = COLnew(b->hseqbase, TYPE_oid, mask_cand(b) ? bi.count : 1024, TRANSIENT);
    1570       23352 :                 if (bn == NULL) {
    1571           0 :                         bat_iterator_end(&bi);
    1572           0 :                         return NULL;
    1573             :                 }
    1574       23352 :                 dst = (oid *) Tloc(bn, 0);
    1575     2695294 :                 for (BUN p = 0; p < cnt; p++) {
    1576     2671941 :                         if ((val = src[p]) == 0)
    1577     1894578 :                                 continue;
    1578    25633653 :                         for (uint32_t i = 0; i < 32; i++) {
    1579    24856289 :                                 if (val & (1U << i)) {
    1580    21541507 :                                         if (n == BATcapacity(bn)) {
    1581          79 :                                                 BATsetcount(bn, n);
    1582          80 :                                                 if (BATextend(bn, BATgrows(bn)) != GDK_SUCCEED) {
    1583           0 :                                                         BBPreclaim(bn);
    1584           0 :                                                         bat_iterator_end(&bi);
    1585           0 :                                                         return NULL;
    1586             :                                                 }
    1587          80 :                                                 dst = (oid *) Tloc(bn, 0);
    1588             :                                         }
    1589    21541508 :                                         dst[n++] = tseq + p * 32 + i;
    1590             :                                 }
    1591             :                         }
    1592             :                 }
    1593             :                 /* the last partial mask word */
    1594       23353 :                 if (rem > 0 && (val = src[cnt]) != 0) {
    1595       26601 :                         for (uint32_t i = 0; i < rem; i++) {
    1596       23802 :                                 if (val & (1U << i)) {
    1597        1263 :                                         if (n == BATcapacity(bn)) {
    1598           0 :                                                 BATsetcount(bn, n);
    1599           0 :                                                 if (BATextend(bn, BATgrows(bn)) != GDK_SUCCEED) {
    1600           0 :                                                         BBPreclaim(bn);
    1601           0 :                                                         bat_iterator_end(&bi);
    1602           0 :                                                         return NULL;
    1603             :                                                 }
    1604           0 :                                                 dst = (oid *) Tloc(bn, 0);
    1605             :                                         }
    1606        1263 :                                         dst[n++] = tseq + cnt * 32 + i;
    1607             :                                 }
    1608             :                         }
    1609             :                 }
    1610       23353 :                 BATsetcount(bn, n);
    1611             :         }
    1612      121259 :         bat_iterator_end(&bi);
    1613      121306 :         bn->tkey = true;
    1614      121306 :         bn->tsorted = true;
    1615      121306 :         bn->trevsorted = n <= 1;
    1616      121306 :         bn->tnil = false;
    1617      121306 :         bn->tnonil = true;
    1618      121306 :         bn = virtualize(bn);
    1619      121246 :         TRC_DEBUG(ALGO, ALGOBATFMT " -> " ALGOBATFMT "\n", ALGOBATPAR(b), ALGOBATPAR(bn));
    1620             :         return bn;
    1621             : }

Generated by: LCOV version 1.14