LCOV - code coverage report
Current view: top level - gdk - gdk_join.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1831 2755 66.5 %
Date: 2024-04-26 00:35:57 Functions: 26 29 89.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : #include "gdk_calc_private.h"
      17             : 
      18             : /*
      19             :  * All join variants produce some sort of join on two input BATs,
      20             :  * optionally subject to up to two candidate lists.  Only values in
      21             :  * the input BATs that are mentioned in the associated candidate list
      22             :  * (if provided) are eligible.  They all return two output BATs in the
      23             :  * first two arguments.  The join operations differ in the way in
      24             :  * which tuples from the two inputs are matched.
      25             :  *
      26             :  * The outputs consist of two aligned BATs (i.e. same length and same
      27             :  * hseqbase (0@0)) that contain the OIDs of the input BATs that match.
      28             :  * The candidate lists, if given, contain the OIDs of the associated
      29             :  * input BAT which must be considered for matching.  The input BATs
      30             :  * must have the same type.
      31             :  *
      32             :  * All functions also have a parameter nil_matches which indicates
      33             :  * whether NIL must be considered an ordinary value that can match, or
      34             :  * whether NIL must be considered to never match.
      35             :  *
      36             :  * The join functions that are provided here are:
      37             :  * BATjoin
      38             :  *      normal equi-join
      39             :  * BATleftjoin
      40             :  *      normal equi-join, but the left output is sorted
      41             :  * BATouterjoin
      42             :  *      equi-join, but the left output is sorted, and if there is no
      43             :  *      match for a value in the left input, there is still an output
      44             :  *      with NIL in the right output
      45             :  * BATsemijoin
      46             :  *      equi-join, but the left output is sorted, and if there are
      47             :  *      multiple matches, only one is returned (i.e., the left output
      48             :  *      is also key, making it a candidate list)
      49             :  * BATmarkjoin
      50             :  *      equi-join, but the left output is sorted, if there is no
      51             :  *      match for a value in the left input, there is still an output
      52             :  *      with NIL in the right output, and there is a third output column
      53             :  *      containing a flag that indicates the "certainty" of the match: 1
      54             :  *      there is a match, 0, there is no match and there are no NIL
      55             :  *      values, NIL, there is no match but there are NIL values
      56             :  * BATthetajoin
      57             :  *      theta-join: an extra operator must be provided encoded as an
      58             :  *      integer (macros JOIN_EQ, JOIN_NE, JOIN_LT, JOIN_LE, JOIN_GT,
      59             :  *      JOIN_GE); values match if the left input has the given
      60             :  *      relationship with the right input; order of the outputs is not
      61             :  *      guaranteed
      62             :  * BATbandjoin
      63             :  *      band-join: two extra input values (c1, c2) must be provided as
      64             :  *      well as Booleans (li, hi) that indicate whether the value
      65             :  *      ranges are inclusive or not; values in the left and right
      66             :  *      inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
      67             :  *      c2 is NIL, there are no matches
      68             :  * BATrangejoin
      69             :  *      range-join: the right input consists of two aligned BATs,
      70             :  *      values match if the left value is between two corresponding
      71             :  *      right values; two extra Boolean parameters, li and hi,
      72             :  *      indicate whether equal values match
      73             :  *
      74             :  * In addition to these functions, there are two more functions that
      75             :  * are closely related:
      76             :  * BATintersect
      77             :  *      intersection: return a candidate list with OIDs of tuples in
      78             :  *      the left input whose value occurs in the right input
      79             :  * BATdiff
      80             :  *      difference: return a candidate list with OIDs of tuples in the
      81             :  *      left input whose value does not occur in the right input
      82             :  */
      83             : 
      84             : /* Perform a bunch of sanity checks on the inputs to a join. */
      85             : static gdk_return
      86      489606 : joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
      87             : {
      88     1305819 :         if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
      89         278 :             (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
      90           0 :                 GDKerror("%s: inputs not compatible.\n", func);
      91           0 :                 return GDK_FAIL;
      92             :         }
      93         139 :         if (r2 &&
      94         139 :             (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
      95           0 :                 GDKerror("%s: right inputs not aligned.\n", func);
      96           0 :                 return GDK_FAIL;
      97             :         }
      98      489606 :         if ((sl && !BATiscand(sl)) || (sr && !BATiscand(sr))) {
      99           0 :                 GDKerror("%s: argument not a candidate list.\n", func);
     100           0 :                 return GDK_FAIL;
     101             :         }
     102             :         return GDK_SUCCEED;
     103             : }
     104             : 
     105             : #define INCRSIZELOG     (8 + (SIZEOF_OID / 2))
     106             : #define INCRSIZE        (1 << INCRSIZELOG)
     107             : 
     108             : /* Create the result bats for a join, returns the absolute maximum
     109             :  * number of outputs that could possibly be generated. */
     110             : static BUN
     111       79173 : joininitresults(BAT **r1p, BAT **r2p, BAT **r3p, BUN lcnt, BUN rcnt,
     112             :                 bool lkey, bool rkey, bool semi, bool nil_on_miss,
     113             :                 bool only_misses, bool min_one, BUN estimate)
     114             : {
     115       79173 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     116       79173 :         BUN maxsize, size;
     117             : 
     118             :         /* if nil_on_miss is set, we really need a right output */
     119       79173 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
     120             : 
     121       79173 :         lkey |= lcnt <= 1;
     122       79173 :         rkey |= rcnt <= 1;
     123             : 
     124       79173 :         *r1p = NULL;
     125       79173 :         if (r2p)
     126       27255 :                 *r2p = NULL;
     127       79173 :         if (r3p)
     128         232 :                 *r3p = NULL;
     129       79173 :         if (lcnt == 0) {
     130             :                 /* there is nothing to match */
     131             :                 maxsize = 0;
     132       64423 :         } else if (!only_misses && !nil_on_miss && rcnt == 0) {
     133             :                 /* if right is empty, we have no hits, so if we don't
     134             :                  * want misses, the result is empty */
     135             :                 maxsize = 0;
     136       64521 :         } else if (rkey | semi | only_misses) {
     137             :                 /* each entry left matches at most one on right, in
     138             :                  * case nil_on_miss is also set, each entry matches
     139             :                  * exactly one (see below) */
     140             :                 maxsize = lcnt;
     141       43422 :         } else if (lkey) {
     142             :                 /* each entry on right is matched at most once */
     143       10219 :                 if (nil_on_miss) {
     144             :                         /* one entry left could match all right, and
     145             :                          * all other entries left match nil */
     146          15 :                         maxsize = lcnt + rcnt - 1;
     147             :                 } else {
     148             :                         maxsize = rcnt;
     149             :                 }
     150       33203 :         } else if (rcnt == 0) {
     151             :                 /* nil_on_miss must be true due to previous checks, so
     152             :                  * all values on left miss */
     153             :                 maxsize = lcnt;
     154       33200 :         } else if (BUN_MAX / lcnt >= rcnt) {
     155             :                 /* in the worst case we have a full cross product */
     156       33303 :                 maxsize = lcnt * rcnt;
     157             :         } else {
     158             :                 /* a BAT cannot grow larger than BUN_MAX */
     159             :                 maxsize = BUN_MAX;
     160             :         }
     161       79173 :         size = estimate == BUN_NONE ? lcnt < rcnt ? lcnt : rcnt : estimate;
     162       79173 :         if (size < INCRSIZE)
     163             :                 size = INCRSIZE;
     164       79173 :         if (size > maxsize)
     165             :                 size = maxsize;
     166       79173 :         if ((rkey | semi | only_misses) & nil_on_miss) {
     167             :                 /* see comment above: each entry left matches exactly
     168             :                  * once */
     169         116 :                 size = maxsize;
     170             :         }
     171       79173 :         if (min_one && size < lcnt)
     172           0 :                 size = lcnt;
     173             : 
     174       79173 :         if (maxsize == 0) {
     175       14680 :                 r1 = BATdense(0, 0, 0);
     176       14499 :                 if (r1 == NULL) {
     177             :                         return BUN_NONE;
     178             :                 }
     179       14499 :                 if (r2p) {
     180         629 :                         r2 = BATdense(0, 0, 0);
     181         628 :                         if (r2 == NULL) {
     182           0 :                                 BBPreclaim(r1);
     183           0 :                                 return BUN_NONE;
     184             :                         }
     185         628 :                         *r2p = r2;
     186             :                 }
     187       14498 :                 if (r3p) {
     188           0 :                         r3 = COLnew(0, TYPE_bit, 0, TRANSIENT);
     189           0 :                         if (r3 == NULL) {
     190           0 :                                 BBPreclaim(r1);
     191           0 :                                 BBPreclaim(r2);
     192           0 :                                 if (r2p)
     193           0 :                                         *r2p = NULL;
     194           0 :                                 return BUN_NONE;
     195             :                         }
     196           0 :                         *r3p = r3;
     197             :                 }
     198       14498 :                 *r1p = r1;
     199       14498 :                 return 0;
     200             :         }
     201             : 
     202       64493 :         r1 = COLnew(0, TYPE_oid, size, TRANSIENT);
     203       63403 :         if (r1 == NULL) {
     204             :                 return BUN_NONE;
     205             :         }
     206       63403 :         r1->tnil = false;
     207       63403 :         r1->tnonil = true;
     208       63403 :         r1->tkey = true;
     209       63403 :         r1->tsorted = true;
     210       63403 :         r1->trevsorted = true;
     211       63403 :         r1->tseqbase = 0;
     212       63403 :         r1->theap->dirty = true;
     213       63403 :         *r1p = r1;
     214       63403 :         if (r2p) {
     215       26284 :                 r2 = COLnew(0, TYPE_oid, size, TRANSIENT);
     216       26543 :                 if (r2 == NULL) {
     217           0 :                         BBPreclaim(r1);
     218           0 :                         return BUN_NONE;
     219             :                 }
     220       26543 :                 r2->tnil = false;
     221       26543 :                 r2->tnonil = true;
     222       26543 :                 r2->tkey = true;
     223       26543 :                 r2->tsorted = true;
     224       26543 :                 r2->trevsorted = true;
     225       26543 :                 r2->tseqbase = 0;
     226       26543 :                 r2->theap->dirty = true;
     227       26543 :                 *r2p = r2;
     228             :         }
     229       63662 :         if (r3p) {
     230         109 :                 BAT *r3 = COLnew(0, TYPE_bit, size, TRANSIENT);
     231         111 :                 if (r3 == NULL) {
     232           0 :                         BBPreclaim(r1);
     233           0 :                         BBPreclaim(r2);
     234           0 :                         return BUN_NONE;
     235             :                 }
     236         111 :                 r3->tnil = false;
     237         111 :                 r3->tnonil = true;
     238         111 :                 r3->tkey = false;
     239         111 :                 r3->tsorted = false;
     240         111 :                 r3->trevsorted = false;
     241         111 :                 r3->tseqbase = oid_nil;
     242         111 :                 r3->theap->dirty = true;
     243         111 :                 *r3p = r3;
     244             :         }
     245             :         return maxsize;
     246             : }
     247             : 
     248             : #define VALUE(s, x)     (s##vars ?                                      \
     249             :                          s##vars + VarHeapVal(s##vals, (x), s##i.width) : \
     250             :                          s##vals ? (const char *) s##vals + ((x) * s##i.width) : \
     251             :                          (s##val = BUNtoid(s, (x)), (const char *) &s##val))
     252             : #define FVALUE(s, x)    ((const char *) s##vals + ((x) * s##i.width))
     253             : 
     254             : #define APPEND(b, o)            (((oid *) b->theap->base)[b->batCount++] = (o))
     255             : 
     256             : static inline gdk_return
     257   153726553 : maybeextend(BAT *restrict r1, BAT *restrict r2, BAT *restrict r3,
     258             :             BUN cnt, BUN lcur, BUN lcnt, BUN maxsize)
     259             : {
     260   153726553 :         if (BATcount(r1) + cnt > BATcapacity(r1)) {
     261             :                 /* make some extra space by extrapolating how much more
     262             :                  * we need (fraction of l we've seen so far is used to
     263             :                  * estimate a new size but with a shallow slope so that
     264             :                  * a skewed join doesn't overwhelm, whilst making sure
     265             :                  * there is somewhat significant progress) */
     266        2041 :                 BUN newcap = (BUN) (lcnt / (lcnt / 4.0 + lcur * .75) * (BATcount(r1) + cnt));
     267        2041 :                 newcap = (newcap + INCRSIZE - 1) & ~(((BUN) 1 << INCRSIZELOG) - 1);
     268        2041 :                 if (newcap < cnt + BATcount(r1))
     269           0 :                         newcap = cnt + BATcount(r1) + INCRSIZE;
     270             :                 /* if close to maxsize, then just use maxsize */
     271        2041 :                 if (newcap + INCRSIZE > maxsize)
     272         147 :                         newcap = maxsize;
     273             :                 /* make sure heap.free is set properly before
     274             :                  * extending */
     275        2041 :                 BATsetcount(r1, BATcount(r1));
     276        2042 :                 if (BATextend(r1, newcap) != GDK_SUCCEED)
     277             :                         return GDK_FAIL;
     278        2039 :                 if (r2) {
     279        1213 :                         BATsetcount(r2, BATcount(r2));
     280        1213 :                         if (BATextend(r2, newcap) != GDK_SUCCEED)
     281             :                                 return GDK_FAIL;
     282        1213 :                         assert(BATcapacity(r1) == BATcapacity(r2));
     283             :                 }
     284        2039 :                 if (r3) {
     285           0 :                         BATsetcount(r3, BATcount(r3));
     286           0 :                         if (BATextend(r3, newcap) != GDK_SUCCEED)
     287             :                                 return GDK_FAIL;
     288           0 :                         assert(BATcapacity(r1) == BATcapacity(r3));
     289             :                 }
     290             :         }
     291             :         return GDK_SUCCEED;
     292             : }
     293             : 
     294             : /* Return BATs through r1p, r2p, and r3p for the case that there is no
     295             :  * match between l and r, taking all flags into consideration.
     296             :  *
     297             :  * This means, if nil_on_miss is set or only_misses is set, *r1p is a
     298             :  * copy of the left candidate list or a dense list of all "head"
     299             :  * values of l, and *r2p (if r2p is not NULL) is all nil.  If neither
     300             :  * of those flags is set, the result is two empty BATs. */
     301             : static gdk_return
     302      342903 : nomatch(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     303             :         struct canditer *restrict lci, bit defmark,
     304             :         bool nil_on_miss, bool only_misses, const char *func, lng t0)
     305             : {
     306      342903 :         BAT *r1, *r2 = NULL, *r3 = NULL;
     307             : 
     308      342903 :         MT_thread_setalgorithm(__func__);
     309      343710 :         if (lci->ncand == 0 || !(nil_on_miss | only_misses)) {
     310             :                 /* return empty BATs */
     311      325622 :                 if ((r1 = BATdense(0, 0, 0)) == NULL)
     312             :                         return GDK_FAIL;
     313      323687 :                 if (r2p) {
     314      249094 :                         if ((r2 = BATdense(0, 0, 0)) == NULL) {
     315           0 :                                 BBPreclaim(r1);
     316           0 :                                 return GDK_FAIL;
     317             :                         }
     318      250876 :                         *r2p = r2;
     319             :                 }
     320      325469 :                 if (r3p) {
     321        3600 :                         if ((r3 = COLnew(0, TYPE_bit, 0, TRANSIENT)) == NULL) {
     322           0 :                                 BBPreclaim(r1);
     323           0 :                                 BBPreclaim(r2);
     324           0 :                                 return GDK_FAIL;
     325             :                         }
     326        3607 :                         *r3p = r3;
     327             :                 }
     328             :         } else {
     329       18088 :                 r1 = canditer_slice(lci, 0, lci->ncand);
     330       18089 :                 if (r2p) {
     331           5 :                         if ((r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand, TRANSIENT)) == NULL) {
     332           0 :                                 BBPreclaim(r1);
     333           0 :                                 return GDK_FAIL;
     334             :                         }
     335           5 :                         *r2p = r2;
     336             :                 }
     337       18089 :                 if (r3p) {
     338          85 :                         if ((r3 = BATconstant(0, TYPE_bit, &defmark, lci->ncand, TRANSIENT)) == NULL) {
     339           0 :                                 BBPreclaim(r1);
     340           0 :                                 BBPreclaim(r2);
     341           0 :                                 return GDK_FAIL;
     342             :                         }
     343          85 :                         *r3p = r3;
     344             :                 }
     345             :         }
     346      343565 :         *r1p = r1;
     347      343565 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT
     348             :                   ",nil_on_miss=%s,only_misses=%s"
     349             :                   " - > " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     350             :                   " (%s -- " LLFMT "usec)\n",
     351             :                   ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(lci->s),
     352             :                   nil_on_miss ? "true" : "false",
     353             :                   only_misses ? "true" : "false",
     354             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     355             :                   func, GDKusec() - t0);
     356             :         return GDK_SUCCEED;
     357             : }
     358             : 
     359             : /* Implementation of join where there is a single value (possibly
     360             :  * repeated multiple times) on the left.  This means we can use a
     361             :  * point select to find matches in the right column. */
     362             : static gdk_return
     363       48983 : selectjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     364             :            struct canditer *lci, struct canditer *rci,
     365             :            bool nil_matches, bool nil_on_miss, bool semi, bool max_one, bool min_one,
     366             :            lng t0, bool swapped, const char *reason)
     367             : {
     368       48983 :         BATiter li = bat_iterator(l);
     369       49031 :         const void *v;
     370       49031 :         BAT *bn = NULL;
     371       49031 :         BAT *r1 = NULL;
     372       49031 :         BAT *r2 = NULL;
     373       49031 :         BUN bncount;
     374             : 
     375       49031 :         assert(lci->ncand > 0);
     376       49031 :         assert(lci->ncand == 1 || (li.sorted && li.revsorted));
     377             : 
     378       49031 :         size_t counter = 0;
     379       49031 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     380             : 
     381       49033 :         MT_thread_setalgorithm(__func__);
     382       49040 :         oid o = canditer_next(lci);
     383       49028 :         v = BUNtail(li, o - l->hseqbase);
     384             : 
     385       95999 :         if (!nil_matches &&
     386       46991 :             (*ATOMcompare(li.type))(v, ATOMnilptr(li.type)) == 0) {
     387             :                 /* NIL doesn't match anything */
     388         174 :                 bat_iterator_end(&li);
     389         174 :                 gdk_return rc = nomatch(r1p, r2p, r3p, l, r, lci, bit_nil, nil_on_miss,
     390             :                                         false, reason, t0);
     391         174 :                 return rc;
     392             :         }
     393             : 
     394       48834 :         bn = BATselect(r, rci->s, v, NULL, true, true, false);
     395       48719 :         bat_iterator_end(&li);
     396       48780 :         if (bn == NULL) {
     397             :                 return GDK_FAIL;
     398             :         }
     399       48780 :         bncount = BATcount(bn);
     400       48780 :         if (bncount == 0) {
     401       11648 :                 BBPreclaim(bn);
     402       11681 :                 if (min_one) {
     403           0 :                         GDKerror("not enough matches");
     404           0 :                         return GDK_FAIL;
     405             :                 }
     406       11681 :                 if (!nil_on_miss) {
     407       11541 :                         assert(r3p == NULL);
     408       11541 :                         return nomatch(r1p, r2p, r3p, l, r, lci, 0, nil_on_miss,
     409             :                                        false, reason, t0);
     410             :                 }
     411             :                 /* special case: return nil on RHS */
     412             :                 bncount = 1;
     413             :                 bn = NULL;
     414             :         }
     415       37132 :         if (bncount > 1) {
     416        1866 :                 if (semi)
     417         703 :                         bncount = 1;
     418        1866 :                 if (max_one) {
     419          15 :                         GDKerror("more than one match");
     420          15 :                         goto bailout;
     421             :                 }
     422             :         }
     423       37257 :         r1 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     424       37216 :         if (r1 == NULL)
     425           0 :                 goto bailout;
     426       37216 :         r1->tsorted = true;
     427       37216 :         r1->trevsorted = lci->ncand == 1;
     428       37216 :         r1->tseqbase = bncount == 1 && lci->tpe == cand_dense ? o : oid_nil;
     429       37216 :         r1->tkey = bncount == 1;
     430       37216 :         r1->tnil = false;
     431       37216 :         r1->tnonil = true;
     432       37216 :         if (bn == NULL) {
     433             :                 /* left outer join, no match, we're returning nil in r2 */
     434         139 :                 oid *o1p = (oid *) Tloc(r1, 0);
     435         139 :                 BUN p, q = bncount;
     436             : 
     437         139 :                 if (r2p) {
     438           1 :                         r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand * bncount, TRANSIENT);
     439           1 :                         if (r2 == NULL)
     440           0 :                                 goto bailout;
     441           1 :                         *r2p = r2;
     442             :                 }
     443         289 :                 do {
     444         289 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
     445             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     446         579 :                         for (p = 0; p < q; p++) {
     447         290 :                                 *o1p++ = o;
     448             :                         }
     449         289 :                         o = canditer_next(lci);
     450         290 :                 } while (!is_oid_nil(o));
     451             :         } else {
     452       37077 :                 oid *o1p = (oid *) Tloc(r1, 0);
     453       37077 :                 oid *o2p;
     454       37077 :                 BUN p, q = bncount;
     455             : 
     456       37077 :                 if (r2p) {
     457       31874 :                         r2 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     458       31906 :                         if (r2 == NULL)
     459           0 :                                 goto bailout;
     460       31906 :                         r2->tsorted = lci->ncand == 1 || bncount == 1;
     461       31906 :                         r2->trevsorted = bncount == 1;
     462       31906 :                         r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil;
     463       31906 :                         r2->tkey = lci->ncand == 1;
     464       31906 :                         r2->tnil = false;
     465       31906 :                         r2->tnonil = true;
     466       31906 :                         *r2p = r2;
     467       31906 :                         o2p = (oid *) Tloc(r2, 0);
     468             :                 } else {
     469             :                         o2p = NULL;
     470             :                 }
     471             : 
     472       37109 :                 if (BATtdense(bn)) {
     473             :                         oid bno = bn->tseqbase;
     474             : 
     475     1294411 :                         do {
     476     1294411 :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
     477             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     478     2782860 :                                 for (p = 0; p < q; p++) {
     479     1488449 :                                         *o1p++ = o;
     480             :                                 }
     481     1294411 :                                 if (o2p) {
     482      550557 :                                         for (p = 0; p < q; p++) {
     483      372275 :                                                 *o2p++ = bno + p;
     484             :                                         }
     485             :                                 }
     486     1294411 :                                 o = canditer_next(lci);
     487     1294411 :                         } while (!is_oid_nil(o));
     488             :                 } else {
     489         314 :                         const oid *bnp = (const oid *) Tloc(bn, 0);
     490             : 
     491      115453 :                         do {
     492      115453 :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
     493             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     494    32372106 :                                 for (p = 0; p < q; p++) {
     495    32256653 :                                         *o1p++ = o;
     496             :                                 }
     497      115453 :                                 if (o2p) {
     498    32521701 :                                         for (p = 0; p < q; p++) {
     499    32396157 :                                                 *o2p++ = bnp[p];
     500             :                                         }
     501             :                                 }
     502      115453 :                                 o = canditer_next(lci);
     503      115450 :                         } while (!is_oid_nil(o));
     504             :                 }
     505       37106 :                 if (r2)
     506       31896 :                         BATsetcount(r2, lci->ncand * bncount);
     507             :         }
     508       37246 :         BATsetcount(r1, lci->ncand * bncount);
     509       37265 :         *r1p = r1;
     510       37265 :         BAT *r3 = NULL;
     511       37265 :         if (r3p) {
     512         213 :                 bit mark;
     513         213 :                 if (bn) {
     514             :                         /* there is a match */
     515          73 :                         mark = 1;
     516         140 :                 } else if (r->tnonil) {
     517             :                         /* no match, no NIL in r */
     518           2 :                         mark = 0;
     519             :                 } else {
     520             :                         /* no match, search for NIL in r */
     521         138 :                         BAT *n = BATselect(r, rci->s, ATOMnilptr(r->ttype), NULL, true, true, false);
     522         138 :                         if (n == NULL)
     523           0 :                                 goto bailout;
     524         138 :                         mark = BATcount(n) == 0 ? 0 : bit_nil;
     525         138 :                         BBPreclaim(n);
     526             :                 }
     527         213 :                 r3 = BATconstant(0, TYPE_bit, &mark, lci->ncand, TRANSIENT);
     528         212 :                 if (r3 == NULL)
     529           0 :                         goto bailout;
     530         212 :                 *r3p = r3;
     531             :         }
     532       37264 :         BBPreclaim(bn);
     533       37259 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     534             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
     535             :                   "sr=" ALGOOPTBATFMT ",nil_matches=%s;%s %s "
     536             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     537             :                   " (" LLFMT "usec)\n",
     538             :                   ALGOBATPAR(l), ALGOBATPAR(r),
     539             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
     540             :                   nil_matches ? "true" : "false",
     541             :                   swapped ? " swapped" : "", reason,
     542             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     543             :                   GDKusec() - t0);
     544             : 
     545             :         return GDK_SUCCEED;
     546             : 
     547          15 :   bailout:
     548          15 :         BBPreclaim(bn);
     549          15 :         BBPreclaim(r1);
     550          15 :         BBPreclaim(r2);
     551          15 :         if (r2p)
     552          14 :                 *r2p = NULL;
     553             :         return GDK_FAIL;
     554             : }
     555             : 
     556             : #if SIZEOF_OID == SIZEOF_INT
     557             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, last)
     558             : #endif
     559             : #if SIZEOF_OID == SIZEOF_LNG
     560             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last)
     561             : #endif
     562             : 
     563             : /* Implementation of join where the right-hand side is dense, and if
     564             :  * there is a right candidate list, it too is dense.  This means there
     565             :  * are no NIL values in r.  In case nil_on_miss is not set, we use a
     566             :  * range select (BATselect) to find the matching values in the left
     567             :  * column and then calculate the corresponding matches from the right.
     568             :  * If nil_on_miss is set, we need to do some more work. The latter is
     569             :  * also the only case in which r3p van be set. */
     570             : static gdk_return
     571       24056 : mergejoin_void(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     572             :                struct canditer *restrict lci, struct canditer *restrict rci,
     573             :                bool nil_on_miss, bool only_misses, lng t0, bool swapped,
     574             :                const char *reason)
     575             : {
     576       24056 :         oid lo, hi;
     577       24056 :         BUN i;
     578       24056 :         oid o, *o1p = NULL, *o2p = NULL;
     579       24056 :         bit *m3p = NULL;
     580       24056 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     581       24056 :         bool ltsorted = false, ltrevsorted = false, ltkey = false;
     582             : 
     583             :         /* r is dense, and if there is a candidate list, it too is
     584             :          * dense.  This means we don't have to do any searches, we
     585             :          * only need to compare ranges to know whether a value from l
     586             :          * has a match in r */
     587       34448 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
     588       24056 :         assert(r->tsorted || r->trevsorted);
     589       24056 :         assert(BATcount(l) > 0);
     590       24056 :         assert(rci->tpe == cand_dense);
     591       24056 :         assert(BATcount(r) > 0);
     592             : 
     593       24056 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     594             : 
     595       24069 :         MT_thread_setalgorithm(__func__);
     596             :         /* figure out range [lo..hi) of values in r that we need to match */
     597       24091 :         lo = r->tseqbase;
     598       24091 :         hi = lo + BATcount(r);
     599             :         /* restrict [lo..hi) range further using candidate list */
     600       24091 :         if (rci->seq > r->hseqbase)
     601           0 :                 lo += rci->seq - r->hseqbase;
     602       24091 :         if (rci->seq + rci->ncand < r->hseqbase + BATcount(r))
     603           0 :                 hi -= r->hseqbase + BATcount(r) - rci->seq - rci->ncand;
     604             : 
     605             :         /* at this point, the matchable values in r are [lo..hi) */
     606       24091 :         if (!nil_on_miss) {
     607       24091 :                 assert(r3p == NULL);
     608       24091 :                 r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses);
     609       23995 :                 if (r1 == NULL)
     610             :                         return GDK_FAIL;
     611       23995 :                 if (only_misses && !l->tnonil) {
     612             :                         /* also look for NILs */
     613           0 :                         r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false);
     614           0 :                         if (r2 == NULL) {
     615           0 :                                 BBPreclaim(r1);
     616           0 :                                 return GDK_FAIL;
     617             :                         }
     618           0 :                         if (BATcount(r2) > 0) {
     619           0 :                                 BAT *mg = BATmergecand(r1, r2);
     620           0 :                                 BBPunfix(r1->batCacheid);
     621           0 :                                 BBPunfix(r2->batCacheid);
     622           0 :                                 r1 = mg;
     623           0 :                                 if (r1 == NULL)
     624             :                                         return GDK_FAIL;
     625             :                         } else {
     626           0 :                                 BBPunfix(r2->batCacheid);
     627             :                         }
     628             :                         r2 = NULL;
     629             :                 }
     630       23995 :                 *r1p = r1;
     631       23995 :                 if (r2p == NULL)
     632       22871 :                         goto doreturn2;
     633        1124 :                 if (BATcount(r1) == 0) {
     634          44 :                         r2 = BATdense(0, 0, 0);
     635          42 :                         if (r2 == NULL) {
     636           0 :                                 BBPreclaim(r1);
     637           0 :                                 return GDK_FAIL;
     638             :                         }
     639        1080 :                 } else if (BATtdense(r1) && BATtdense(l)) {
     640          75 :                         r2 = BATdense(0, l->tseqbase + r1->tseqbase - l->hseqbase + r->hseqbase - r->tseqbase, BATcount(r1));
     641          75 :                         if (r2 == NULL) {
     642           0 :                                 BBPreclaim(r1);
     643           0 :                                 return GDK_FAIL;
     644             :                         }
     645             :                 } else {
     646        1005 :                         r2 = COLnew(0, TYPE_oid, BATcount(r1), TRANSIENT);
     647        1033 :                         if (r2 == NULL) {
     648           0 :                                 BBPreclaim(r1);
     649           0 :                                 return GDK_FAIL;
     650             :                         }
     651        1033 :                         BATiter li = bat_iterator(l);
     652        1011 :                         const oid *lp = (const oid *) li.base;
     653        1011 :                         const oid *o1p = (const oid *) Tloc(r1, 0);
     654        1011 :                         oid *o2p = (oid *) Tloc(r2, 0);
     655        1011 :                         hi = BATcount(r1);
     656        1011 :                         if (complex_cand(l)) {
     657             :                                 /* this is actually generic code */
     658           0 :                                 for (o = 0; o < hi; o++)
     659           0 :                                         o2p[o] = BUNtoid(l, BUNtoid(r1, o) - l->hseqbase) - r->tseqbase + r->hseqbase;
     660        1011 :                         } else if (BATtdense(r1)) {
     661         462 :                                 lo = r1->tseqbase - l->hseqbase;
     662         462 :                                 if (r->tseqbase == r->hseqbase) {
     663         449 :                                         memcpy(o2p, lp + lo, hi * SIZEOF_OID);
     664             :                                 } else {
     665          13 :                                         hi += lo;
     666     5085027 :                                         for (o = 0; lo < hi; o++, lo++) {
     667     5085014 :                                                 o2p[o] = lp[lo] - r->tseqbase + r->hseqbase;
     668             :                                         }
     669             :                                 }
     670         549 :                         } else if (BATtdense(l)) {
     671           0 :                                 for (o = 0; o < hi; o++) {
     672           0 :                                         o2p[o] = o1p[o] - l->hseqbase + li.tseq - r->tseqbase + r->hseqbase;
     673             :                                 }
     674             :                         } else {
     675    34399229 :                                 for (o = 0; o < hi; o++) {
     676    34398680 :                                         o2p[o] = lp[o1p[o] - l->hseqbase] - r->tseqbase + r->hseqbase;
     677             :                                 }
     678             :                         }
     679        1011 :                         r2->tkey = li.key;
     680        1011 :                         r2->tsorted = li.sorted;
     681        1011 :                         r2->trevsorted = li.revsorted;
     682        1011 :                         bat_iterator_end(&li);
     683        1031 :                         r2->tnil = false;
     684        1031 :                         r2->tnonil = true;
     685        1031 :                         BATsetcount(r2, BATcount(r1));
     686             :                 }
     687        1151 :                 *r2p = r2;
     688        1151 :                 goto doreturn2;
     689             :         }
     690             :         /* nil_on_miss is set, this means we must have a second or third
     691             :          * output */
     692           0 :         assert(r2p || r3p);
     693           0 :         if (BATtdense(l)) {
     694             :                 /* if l is dense, we can further restrict the [lo..hi)
     695             :                  * range to values in l that match with values in r */
     696           0 :                 o = lo;
     697           0 :                 i = lci->seq - l->hseqbase;
     698           0 :                 if (l->tseqbase + i > lo)
     699           0 :                         lo = l->tseqbase + i;
     700           0 :                 i = canditer_last(lci) + 1 - l->hseqbase;
     701           0 :                 if (l->tseqbase + i < hi)
     702           0 :                         hi = l->tseqbase + i;
     703           0 :                 if (lci->tpe == cand_dense) {
     704             :                         /* l is dense, and so is the left candidate
     705             :                          * list (if it exists); this means we don't
     706             :                          * have to actually look at any values in l:
     707             :                          * we can just do some arithmetic; it also
     708             :                          * means that r1 will be dense, and if
     709             :                          * nil_on_miss is not set, or if all values in
     710             :                          * l match, r2 will too */
     711           0 :                         if (hi <= lo) {
     712           0 :                                 return nomatch(r1p, r2p, r3p, l, r, lci, 0,
     713             :                                                nil_on_miss, only_misses,
     714             :                                                __func__, t0);
     715             :                         }
     716             : 
     717             :                         /* at this point, the matched values in l and
     718             :                          * r (taking candidate lists into account) are
     719             :                          * [lo..hi) which we can translate back to the
     720             :                          * respective OID values that we can store in
     721             :                          * r1 and r2; note that r1 will be dense since
     722             :                          * all values in l will match something (even
     723             :                          * if nil since nil_on_miss is set) */
     724           0 :                         *r1p = r1 = BATdense(0, lci->seq, lci->ncand);
     725           0 :                         if (r1 == NULL)
     726             :                                 return GDK_FAIL;
     727           0 :                         if (r2p) {
     728           0 :                                 if (hi - lo < lci->ncand) {
     729             :                                         /* we need to fill in nils in r2 for
     730             :                                          * missing values */
     731           0 :                                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     732           0 :                                         if (r2 == NULL) {
     733           0 :                                                 BBPreclaim(*r1p);
     734           0 :                                                 return GDK_FAIL;
     735             :                                         }
     736           0 :                                         o2p = (oid *) Tloc(r2, 0);
     737           0 :                                         i = l->tseqbase + lci->seq - l->hseqbase;
     738           0 :                                         lo -= i;
     739           0 :                                         hi -= i;
     740           0 :                                         i += r->hseqbase - r->tseqbase;
     741           0 :                                         for (o = 0; o < lo; o++)
     742           0 :                                                 *o2p++ = oid_nil;
     743           0 :                                         for (o = lo; o < hi; o++)
     744           0 :                                                 *o2p++ = o + i;
     745           0 :                                         for (o = hi; o < lci->ncand; o++)
     746           0 :                                                 *o2p++ = oid_nil;
     747           0 :                                         r2->tnonil = false;
     748           0 :                                         r2->tnil = true;
     749             :                                         /* sorted of no nils at end */
     750           0 :                                         r2->tsorted = hi == lci->ncand;
     751             :                                         /* reverse sorted if single non-nil at start */
     752           0 :                                         r2->trevsorted = lo == 0 && hi == 1;
     753           0 :                                         r2->tseqbase = oid_nil;
     754             :                                         /* (hi - lo) different OIDs in r2,
     755             :                                          * plus one for nil */
     756           0 :                                         r2->tkey = hi - lo + 1 == lci->ncand;
     757           0 :                                         BATsetcount(r2, lci->ncand);
     758             :                                 } else {
     759             :                                         /* no missing values */
     760           0 :                                         *r2p = r2 = BATdense(0, r->hseqbase + lo - r->tseqbase, lci->ncand);
     761           0 :                                         if (r2 == NULL) {
     762           0 :                                                 BBPreclaim(*r1p);
     763           0 :                                                 return GDK_FAIL;
     764             :                                         }
     765             :                                 }
     766             :                         }
     767           0 :                         if (r3p) {
     768           0 :                                 if (hi - lo < lci->ncand) {
     769           0 :                                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     770           0 :                                         if (r3 == NULL) {
     771           0 :                                                 BBPreclaim(*r1p);
     772           0 :                                                 BBPreclaim(r2);
     773           0 :                                                 return GDK_FAIL;
     774             :                                         }
     775           0 :                                         m3p = (bit *) Tloc(r3, 0);
     776           0 :                                         for (o = 0; o < lo; o++)
     777           0 :                                                 *m3p++ = 0;
     778           0 :                                         for (o = lo; o < hi; o++)
     779           0 :                                                 *m3p++ = 1;
     780           0 :                                         for (o = hi; o < lci->ncand; o++)
     781           0 :                                                 *m3p++ = 0;
     782           0 :                                         r3->tnonil = true;
     783           0 :                                         r3->tnil = false;
     784           0 :                                         r3->tsorted = hi == lci->ncand;
     785           0 :                                         r3->trevsorted = lo == 0;
     786           0 :                                         r3->tkey = false;
     787           0 :                                         BATsetcount(r3, lci->ncand);
     788             :                                 }
     789             :                         }
     790           0 :                         goto doreturn;
     791             :                 }
     792             :                 /* l is dense, but the candidate list exists and is
     793             :                  * not dense; we can, by manipulating the range
     794             :                  * [lo..hi), just look at the candidate list values */
     795             : 
     796             :                 /* translate lo and hi to l's OID values that now need
     797             :                  * to match */
     798           0 :                 lo = lo - l->tseqbase + l->hseqbase;
     799           0 :                 hi = hi - l->tseqbase + l->hseqbase;
     800             : 
     801           0 :                 *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     802           0 :                 if (r2p)
     803           0 :                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     804           0 :                 if (r3p)
     805           0 :                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     806           0 :                 if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     807           0 :                         BBPreclaim(r1);
     808           0 :                         BBPreclaim(r2);
     809           0 :                         BBPreclaim(r3);
     810           0 :                         return GDK_FAIL;
     811             :                 }
     812           0 :                 o1p = (oid *) Tloc(r1, 0);
     813           0 :                 if (r2) {
     814           0 :                         o2p = (oid *) Tloc(r2, 0);
     815           0 :                         r2->tnil = false;
     816           0 :                         r2->tnonil = true;
     817           0 :                         r2->tkey = true;
     818           0 :                         r2->tsorted = true;
     819             :                 }
     820           0 :                 if (r3) {
     821           0 :                         m3p = (bit *) Tloc(r3, 0);
     822           0 :                         r3->tnil = false;
     823           0 :                         r3->tnonil = true;
     824           0 :                         r3->tkey = false;
     825           0 :                         r3->tsorted = false;
     826             :                 }
     827           0 :                 o = canditer_next(lci);
     828           0 :                 for (i = 0; i < lci->ncand && o < lo; i++) {
     829           0 :                         *o1p++ = o;
     830           0 :                         if (r2)
     831           0 :                                 *o2p++ = oid_nil;
     832           0 :                         if (r3)
     833           0 :                                 *m3p++ = 0;
     834           0 :                         o = canditer_next(lci);
     835             :                 }
     836           0 :                 if (i > 0 && r2) {
     837           0 :                         r2->tnil = true;
     838           0 :                         r2->tnonil = false;
     839           0 :                         r2->tkey = i == 1;
     840             :                 }
     841           0 :                 for (; i < lci->ncand && o < hi; i++) {
     842           0 :                         *o1p++ = o;
     843           0 :                         if (r2)
     844           0 :                                 *o2p++ = o - l->hseqbase + l->tseqbase - r->tseqbase + r->hseqbase;
     845           0 :                         if (r3)
     846           0 :                                 *m3p++ = 1;
     847           0 :                         o = canditer_next(lci);
     848             :                 }
     849           0 :                 if (i < lci->ncand) {
     850           0 :                         if (r2) {
     851           0 :                                 r2->tkey = !r2->tnil && lci->ncand - i == 1;
     852           0 :                                 r2->tnil = true;
     853           0 :                                 r2->tnonil = false;
     854           0 :                                 r2->tsorted = false;
     855             :                         }
     856           0 :                         for (; i < lci->ncand; i++) {
     857           0 :                                 *o1p++ = o;
     858           0 :                                 if (r2)
     859           0 :                                         *o2p++ = oid_nil;
     860           0 :                                 if (r1)
     861           0 :                                         *m3p++ = 0;
     862           0 :                                 o = canditer_next(lci);
     863             :                         }
     864             :                 }
     865           0 :                 BATsetcount(r1, lci->ncand);
     866           0 :                 r1->tseqbase = BATcount(r1) == 1 ? *(oid*)Tloc(r1, 0) : oid_nil;
     867           0 :                 r1->tsorted = true;
     868           0 :                 r1->trevsorted = BATcount(r1) <= 1;
     869           0 :                 r1->tnil = false;
     870           0 :                 r1->tnonil = true;
     871           0 :                 r1->tkey = true;
     872           0 :                 if (r2) {
     873           0 :                         BATsetcount(r2, BATcount(r1));
     874           0 :                         r2->tseqbase = r2->tnil || BATcount(r2) > 1 ? oid_nil : BATcount(r2) == 1 ? *(oid*)Tloc(r2, 0) : 0;
     875           0 :                         r2->trevsorted = BATcount(r2) <= 1;
     876             :                 }
     877           0 :                 if (r3) {
     878           0 :                         BATsetcount(r3, BATcount(r1));
     879             :                 }
     880           0 :                 goto doreturn;
     881             :         }
     882             :         /* l is not dense, so we need to look at the values and check
     883             :          * whether they are in the range [lo..hi) */
     884             : 
     885             :         /* do indirection through the candidate list to look at the
     886             :          * value */
     887             : 
     888           0 :         *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     889           0 :         if (r2p)
     890           0 :                 *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     891           0 :         if (r3p)
     892           0 :                 *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     893           0 :         if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     894           0 :                 BBPreclaim(r1);
     895           0 :                 BBPreclaim(r2);
     896           0 :                 BBPreclaim(r3);
     897           0 :                 return GDK_FAIL;
     898             :         }
     899           0 :         o1p = (oid *) Tloc(r1, 0);
     900           0 :         if (r2) {
     901           0 :                 o2p = (oid *) Tloc(r2, 0);
     902           0 :                 r2->tnil = false;
     903           0 :                 r2->tnonil = true;
     904             :         }
     905           0 :         if (r3) {
     906           0 :                 m3p = (bit *) Tloc(r3, 0);
     907           0 :                 r3->tnil = false;
     908           0 :                 r3->tnonil = true;
     909             :         }
     910           0 :         if (complex_cand(l)) {
     911           0 :                 ltsorted = l->tsorted;
     912           0 :                 ltrevsorted = l->trevsorted;
     913           0 :                 ltkey = l->tkey;
     914           0 :                 TIMEOUT_LOOP(lci->ncand, qry_ctx) {
     915           0 :                         oid c = canditer_next(lci);
     916             : 
     917           0 :                         o = BUNtoid(l, c - l->hseqbase);
     918           0 :                         *o1p++ = c;
     919           0 :                         if (r2) {
     920           0 :                                 if (o >= lo && o < hi) {
     921           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     922             :                                 } else {
     923           0 :                                         *o2p++ = oid_nil;
     924           0 :                                         r2->tnil = true;
     925           0 :                                         r2->tnonil = false;
     926             :                                 }
     927             :                         }
     928           0 :                         if (r3) {
     929           0 :                                 if (is_oid_nil(o)) {
     930           0 :                                         *m3p++ = bit_nil;
     931           0 :                                         r3->tnil = true;
     932           0 :                                         r3->tnonil = false;
     933             :                                 } else {
     934           0 :                                         *m3p++ = (o >= lo && o < hi);
     935             :                                 }
     936             :                         }
     937             :                 }
     938           0 :                 TIMEOUT_CHECK(qry_ctx,
     939             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     940             :         } else {
     941           0 :                 BATiter li = bat_iterator(l);
     942           0 :                 const oid *lvals = (const oid *) li.base;
     943           0 :                 ltsorted = li.sorted;
     944           0 :                 ltrevsorted = li.revsorted;
     945           0 :                 ltkey = li.key;
     946           0 :                 TIMEOUT_LOOP(lci->ncand, qry_ctx) {
     947           0 :                         oid c = canditer_next(lci);
     948             : 
     949           0 :                         o = lvals[c - l->hseqbase];
     950           0 :                         *o1p++ = c;
     951           0 :                         if (r2) {
     952           0 :                                 if (o >= lo && o < hi) {
     953           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     954             :                                 } else {
     955           0 :                                         *o2p++ = oid_nil;
     956           0 :                                         r2->tnil = true;
     957           0 :                                         r2->tnonil = false;
     958             :                                 }
     959             :                         }
     960           0 :                         if (r3) {
     961           0 :                                 if (is_oid_nil(o)) {
     962           0 :                                         *m3p++ = bit_nil;
     963           0 :                                         r3->tnil = true;
     964           0 :                                         r3->tnonil = false;
     965             :                                 } else {
     966           0 :                                         *m3p++ = (o >= lo && o < hi);
     967             :                                 }
     968             :                         }
     969             :                 }
     970           0 :                 bat_iterator_end(&li);
     971           0 :                 TIMEOUT_CHECK(qry_ctx,
     972             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     973             :         }
     974           0 :         r1->tsorted = true;
     975           0 :         r1->trevsorted = BATcount(r1) <= 1;
     976           0 :         r1->tkey = true;
     977           0 :         r1->tseqbase = oid_nil;
     978           0 :         r1->tnil = false;
     979           0 :         r1->tnonil = true;
     980           0 :         BATsetcount(r1, lci->ncand);
     981           0 :         if (r2) {
     982           0 :                 BATsetcount(r2, lci->ncand);
     983           0 :                 r2->tsorted = ltsorted || BATcount(r2) <= 1;
     984           0 :                 r2->trevsorted = ltrevsorted || BATcount(r2) <= 1;
     985           0 :                 r2->tkey = ltkey || BATcount(r2) <= 1;
     986           0 :                 r2->tseqbase = oid_nil;
     987             :         }
     988           0 :         if (r3) {
     989           0 :                 BATsetcount(r3, lci->ncand);
     990             :         }
     991             : 
     992           0 :   doreturn:
     993           0 :         if (r1->tkey)
     994           0 :                 virtualize(r1);
     995           0 :         if (r2 && r2->tkey && r2->tsorted)
     996           0 :                 virtualize(r2);
     997           0 :   doreturn2:
     998       24022 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     999             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    1000             :                   "sr=" ALGOOPTBATFMT ","
    1001             :                   "nil_on_miss=%s,only_misses=%s;%s %s "
    1002             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
    1003             :                   " (" LLFMT "usec)\n",
    1004             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1005             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    1006             :                   nil_on_miss ? "true" : "false",
    1007             :                   only_misses ? "true" : "false",
    1008             :                   swapped ? " swapped" : "", reason,
    1009             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
    1010             :                   GDKusec() - t0);
    1011             : 
    1012             :         return GDK_SUCCEED;
    1013             : 
    1014           0 :   bailout:
    1015           0 :         BBPreclaim(r1);
    1016           0 :         BBPreclaim(r2);
    1017             :         return GDK_FAIL;
    1018             : }
    1019             : 
    1020             : /* Implementation of mergejoin (see below) for the special case that
    1021             :  * the values are of type int, and some more conditions are met. */
    1022             : static gdk_return
    1023        7399 : mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1024             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1025             :               const char *reason)
    1026             : {
    1027        7399 :         BAT *r1, *r2;
    1028        7399 :         BUN lstart, lend, lcnt;
    1029        7399 :         BUN rstart, rend;
    1030        7399 :         BUN lscan, rscan;       /* opportunistic scan window */
    1031        7399 :         BUN maxsize;
    1032        7399 :         const int *lvals, *rvals;
    1033        7399 :         int v;
    1034        7399 :         BUN nl, nr;
    1035        7399 :         oid lv;
    1036        7399 :         BUN i;
    1037        7399 :         BATiter li = bat_iterator(l);
    1038        7427 :         BATiter ri = bat_iterator(r);
    1039             : 
    1040       22297 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1041        7432 :         assert(ri.sorted || ri.revsorted);
    1042             : 
    1043        7432 :         MT_thread_setalgorithm(__func__);
    1044        7432 :         lstart = rstart = 0;
    1045        7432 :         lend = BATcount(l);
    1046        7432 :         lcnt = lend - lstart;
    1047        7432 :         rend = BATcount(r);
    1048        7432 :         lvals = (const int *) li.base;
    1049        7432 :         rvals = (const int *) ri.base;
    1050        7432 :         size_t counter = 0;
    1051        7432 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1052             : 
    1053             :         /* basic properties will be adjusted if necessary later on,
    1054             :          * they were initially set by joininitresults() */
    1055             : 
    1056        7429 :         if (lend == 0 || rend == 0) {
    1057             :                 /* there are no matches */
    1058           0 :                 bat_iterator_end(&li);
    1059           0 :                 bat_iterator_end(&ri);
    1060           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1061           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1062             :                                0, false, false, __func__, t0);
    1063             :         }
    1064             : 
    1065        7352 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1066        7429 :                                        li.key, ri.key, false, false,
    1067             :                                        false, false, estimate)) == BUN_NONE) {
    1068           0 :                 bat_iterator_end(&li);
    1069           0 :                 bat_iterator_end(&ri);
    1070           0 :                 return GDK_FAIL;
    1071             :         }
    1072        7352 :         r1 = *r1p;
    1073        7352 :         r2 = r2p ? *r2p : NULL;
    1074             : 
    1075             :         /* determine opportunistic scan window for l and r */
    1076       40521 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1077       33169 :                 nl >>= 1;
    1078       54015 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1079       46663 :                 nr >>= 1;
    1080             : 
    1081        7352 :         if (!nil_matches) {
    1082             :                 /* skip over nils at the start of the columns */
    1083        4344 :                 if (lscan < lend - lstart && is_int_nil(lvals[lstart + lscan])) {
    1084           0 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1085             :                                                lend - 1, int_nil, 1, 1);
    1086             :                 } else {
    1087        4344 :                         while (is_int_nil(lvals[lstart]))
    1088           0 :                                 lstart++;
    1089             :                 }
    1090        4344 :                 if (rscan < rend - rstart && is_int_nil(rvals[rstart + rscan])) {
    1091           0 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1092             :                                                rend - 1, int_nil, 1, 1);
    1093             :                 } else {
    1094        4344 :                         while (is_int_nil(rvals[rstart]))
    1095           0 :                                 rstart++;
    1096             :                 }
    1097             :         }
    1098             :         /* from here on we don't have to worry about nil values */
    1099             : 
    1100      380570 :         while (lstart < lend && rstart < rend) {
    1101      375180 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1102             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1103             : 
    1104      375180 :                 v = rvals[rstart];
    1105             : 
    1106      375180 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1107        1418 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1108             :                                                lend - 1, v, 1, 0);
    1109             :                 } else {
    1110             :                         /* scan l for v */
    1111      379052 :                         while (lstart < lend && lvals[lstart] < v)
    1112        5290 :                                 lstart++;
    1113             :                 }
    1114      373347 :                 if (lstart >= lend) {
    1115             :                         /* nothing found */
    1116             :                         break;
    1117             :                 }
    1118             : 
    1119             :                 /* Here we determine the next value in l that we are
    1120             :                  * going to try to match in r.  We will also count the
    1121             :                  * number of occurrences in l of that value.
    1122             :                  * Afterwards, v points to the value and nl is the
    1123             :                  * number of times it occurs.  Also, lstart will
    1124             :                  * point to the next value to be considered (ready for
    1125             :                  * the next iteration).
    1126             :                  * If there are many equal values in l (more than
    1127             :                  * lscan), we will use binary search to find the end
    1128             :                  * of the sequence.  Obviously, we can do this only if
    1129             :                  * l is actually sorted (lscan > 0). */
    1130      372491 :                 nl = 1;         /* we'll match (at least) one in l */
    1131      372491 :                 nr = 0;         /* maybe we won't match anything in r */
    1132      372491 :                 v = lvals[lstart];
    1133      372491 :                 if (li.key) {
    1134             :                         /* if l is key, there is a single value */
    1135       57768 :                         lstart++;
    1136      314723 :                 } else if (lscan < lend - lstart &&
    1137      309956 :                            v == lvals[lstart + lscan]) {
    1138             :                         /* lots of equal values: use binary search to
    1139             :                          * find end */
    1140       25038 :                         nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1141             :                                            lend - 1, v, 1, 1);
    1142       25053 :                         nl -= lstart;
    1143       25053 :                         lstart += nl;
    1144             :                 } else {
    1145             :                         /* just scan */
    1146     1385432 :                         while (++lstart < lend && v == lvals[lstart])
    1147     1095747 :                                 nl++;
    1148             :                 }
    1149             :                 /* lstart points one beyond the value we're
    1150             :                  * going to match: ready for the next iteration. */
    1151             : 
    1152             :                 /* First we find the first value in r that is at
    1153             :                  * least as large as v, then we find the first
    1154             :                  * value in r that is larger than v.  The difference
    1155             :                  * is the number of values equal to v and is stored in
    1156             :                  * nr.
    1157             :                  * We will use binary search on r to find both ends of
    1158             :                  * the sequence of values that are equal to v in case
    1159             :                  * the position is "too far" (more than rscan
    1160             :                  * away). */
    1161             : 
    1162             :                 /* first find the location of the first value in r
    1163             :                  * that is >= v, then find the location of the first
    1164             :                  * value in r that is > v; the difference is the
    1165             :                  * number of values equal to v */
    1166             : 
    1167             :                 /* look ahead a little (rscan) in r to see whether
    1168             :                  * we're better off doing a binary search */
    1169      372506 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1170             :                         /* value too far away in r: use binary
    1171             :                          * search */
    1172       18157 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1173             :                                                rend - 1, v, 1, 0);
    1174             :                 } else {
    1175             :                         /* scan r for v */
    1176      379688 :                         while (rstart < rend && rvals[rstart] < v)
    1177       25339 :                                 rstart++;
    1178             :                 }
    1179      374410 :                 if (rstart == rend) {
    1180             :                         /* nothing found */
    1181             :                         break;
    1182             :                 }
    1183             : 
    1184             :                 /* now find the end of the sequence of equal values v */
    1185             : 
    1186             :                 /* if r is key, there is zero or one match, otherwise
    1187             :                  * look ahead a little (rscan) in r to see whether
    1188             :                  * we're better off doing a binary search */
    1189      373281 :                 if (ri.key) {
    1190      174305 :                         if (rstart < rend && v == rvals[rstart]) {
    1191      173921 :                                 nr = 1;
    1192      173921 :                                 rstart++;
    1193             :                         }
    1194      198976 :                 } else if (rscan < rend - rstart &&
    1195      198243 :                            v == rvals[rstart + rscan]) {
    1196             :                         /* range too large: use binary search */
    1197       67741 :                         nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1198             :                                            rend - 1, v, 1, 1);
    1199       69255 :                         nr -= rstart;
    1200       69255 :                         rstart += nr;
    1201             :                 } else {
    1202             :                         /* scan r for end of range */
    1203     1056438 :                         while (rstart < rend && v == rvals[rstart]) {
    1204      925203 :                                 nr++;
    1205      925203 :                                 rstart++;
    1206             :                         }
    1207             :                 }
    1208             :                 /* rstart points to first value > v or end of
    1209             :                  * r, and nr is the number of values in r that
    1210             :                  * are equal to v */
    1211      374411 :                 if (nr == 0) {
    1212             :                         /* no entries in r found */
    1213           0 :                         continue;
    1214             :                 }
    1215             :                 /* make space: nl values in l match nr values in r, so
    1216             :                  * we need to add nl * nr values in the results */
    1217      375277 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1218           0 :                         goto bailout;
    1219             : 
    1220             :                 /* maintain properties */
    1221      373700 :                 if (nl > 1) {
    1222             :                         /* value occurs multiple times in l, so entry
    1223             :                          * in r will be repeated multiple times: hence
    1224             :                          * r2 is not key and not dense */
    1225      251757 :                         if (r2) {
    1226      218701 :                                 r2->tkey = false;
    1227      218701 :                                 r2->tseqbase = oid_nil;
    1228             :                         }
    1229             :                         /* multiple different values will be inserted
    1230             :                          * in r1 (always in order), so not reverse
    1231             :                          * ordered anymore */
    1232      251757 :                         r1->trevsorted = false;
    1233             :                 }
    1234      373700 :                 if (nr > 1) {
    1235             :                         /* value occurs multiple times in r, so entry
    1236             :                          * in l will be repeated multiple times: hence
    1237             :                          * r1 is not key and not dense */
    1238      161619 :                         r1->tkey = false;
    1239      161619 :                         r1->tseqbase = oid_nil;
    1240             :                         /* multiple different values will be inserted
    1241             :                          * in r2 (in order), so not reverse ordered
    1242             :                          * anymore */
    1243      161619 :                         if (r2) {
    1244      112093 :                                 r2->trevsorted = false;
    1245      112093 :                                 if (nl > 1) {
    1246             :                                         /* multiple values in l match
    1247             :                                          * multiple values in r, so an
    1248             :                                          * ordered sequence will be
    1249             :                                          * inserted multiple times in
    1250             :                                          * r2, so r2 is not ordered
    1251             :                                          * anymore */
    1252       82955 :                                         r2->tsorted = false;
    1253             :                                 }
    1254             :                         }
    1255             :                 }
    1256      373700 :                 if (BATcount(r1) > 0) {
    1257             :                         /* a new, higher value will be inserted into
    1258             :                          * r1, so r1 is not reverse ordered anymore */
    1259      367941 :                         r1->trevsorted = false;
    1260             :                         /* a new higher value will be added to r2 */
    1261      367941 :                         if (r2) {
    1262      304674 :                                 r2->trevsorted = false;
    1263             :                         }
    1264      367941 :                         if (BATtdense(r1) &&
    1265      201540 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1266          61 :                                 r1->tseqbase = oid_nil;
    1267             :                         }
    1268             :                 }
    1269             : 
    1270      373700 :                 if (r2 &&
    1271      309500 :                     BATcount(r2) > 0 &&
    1272      304871 :                     BATtdense(r2) &&
    1273       76805 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1274         501 :                         r2->tseqbase = oid_nil;
    1275             :                 }
    1276             : 
    1277             :                 /* insert values */
    1278      373700 :                 lv = l->hseqbase + lstart - nl;
    1279    15731261 :                 for (i = 0; i < nl; i++) {
    1280             :                         BUN j;
    1281             : 
    1282   109116916 :                         for (j = 0; j < nr; j++) {
    1283    93759355 :                                 APPEND(r1, lv);
    1284             :                         }
    1285    15357561 :                         if (r2) {
    1286    15205793 :                                 oid rv = r->hseqbase + rstart - nr;
    1287             : 
    1288   103250536 :                                 for (j = 0; j < nr; j++) {
    1289    88044743 :                                         APPEND(r2, rv);
    1290    88044743 :                                         rv++;
    1291             :                                 }
    1292             :                         }
    1293    15357561 :                         lv++;
    1294             :                 }
    1295             :         }
    1296             :         /* also set other bits of heap to correct value to indicate size */
    1297        7375 :         BATsetcount(r1, BATcount(r1));
    1298        7351 :         if (r2) {
    1299        5329 :                 BATsetcount(r2, BATcount(r2));
    1300        5328 :                 assert(BATcount(r1) == BATcount(r2));
    1301             :         }
    1302        7350 :         if (BATcount(r1) > 0) {
    1303        5685 :                 if (BATtdense(r1))
    1304        4573 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1305        5685 :                 if (r2 && BATtdense(r2))
    1306        3475 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1307             :         } else {
    1308        1665 :                 r1->tseqbase = 0;
    1309        1665 :                 if (r2) {
    1310         641 :                         r2->tseqbase = 0;
    1311             :                 }
    1312             :         }
    1313        7350 :         bat_iterator_end(&li);
    1314        7394 :         bat_iterator_end(&ri);
    1315        7359 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1316             :                   "nil_matches=%s;%s %s "
    1317             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1318             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1319             :                   nil_matches ? "true" : "false",
    1320             :                   swapped ? " swapped" : "", reason,
    1321             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1322             :                   GDKusec() - t0);
    1323             : 
    1324             :         return GDK_SUCCEED;
    1325             : 
    1326           0 :   bailout:
    1327           0 :         bat_iterator_end(&li);
    1328           0 :         bat_iterator_end(&ri);
    1329           0 :         BBPreclaim(r1);
    1330           0 :         BBPreclaim(r2);
    1331             :         return GDK_FAIL;
    1332             : }
    1333             : 
    1334             : /* Implementation of mergejoin (see below) for the special case that
    1335             :  * the values are of type lng, and some more conditions are met. */
    1336             : static gdk_return
    1337         245 : mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1338             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1339             :               const char *reason)
    1340             : {
    1341         245 :         BAT *r1, *r2;
    1342         245 :         BUN lstart, lend, lcnt;
    1343         245 :         BUN rstart, rend;
    1344         245 :         BUN lscan, rscan;       /* opportunistic scan window */
    1345         245 :         BUN maxsize;
    1346         245 :         const lng *lvals, *rvals;
    1347         245 :         lng v;
    1348         245 :         BUN nl, nr;
    1349         245 :         oid lv;
    1350         245 :         BUN i;
    1351         245 :         BATiter li = bat_iterator(l);
    1352         246 :         BATiter ri = bat_iterator(r);
    1353             : 
    1354         738 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1355         246 :         assert(ri.sorted || ri.revsorted);
    1356             : 
    1357         246 :         MT_thread_setalgorithm(__func__);
    1358         246 :         lstart = rstart = 0;
    1359         246 :         lend = BATcount(l);
    1360         246 :         lcnt = lend - lstart;
    1361         246 :         rend = BATcount(r);
    1362         246 :         lvals = (const lng *) li.base;
    1363         246 :         rvals = (const lng *) ri.base;
    1364         246 :         size_t counter = 0;
    1365         246 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1366             : 
    1367             :         /* basic properties will be adjusted if necessary later on,
    1368             :          * they were initially set by joininitresults() */
    1369             : 
    1370         246 :         if (lend == 0 || rend == 0) {
    1371             :                 /* there are no matches */
    1372           0 :                 bat_iterator_end(&li);
    1373           0 :                 bat_iterator_end(&ri);
    1374           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1375           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1376             :                                0, false, false, __func__, t0);
    1377             :         }
    1378             : 
    1379         245 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1380         246 :                                        li.key, ri.key, false, false,
    1381             :                                        false, false, estimate)) == BUN_NONE) {
    1382           0 :                 bat_iterator_end(&li);
    1383           0 :                 bat_iterator_end(&ri);
    1384           0 :                 return GDK_FAIL;
    1385             :         }
    1386         245 :         r1 = *r1p;
    1387         245 :         r2 = r2p ? *r2p : NULL;
    1388             : 
    1389             :         /* determine opportunistic scan window for l and r */
    1390        1760 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1391        1515 :                 nl >>= 1;
    1392        1739 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1393        1494 :                 nr >>= 1;
    1394             : 
    1395         245 :         if (!nil_matches) {
    1396             :                 /* skip over nils at the start of the columns */
    1397         139 :                 if (lscan < lend - lstart && is_lng_nil(lvals[lstart + lscan])) {
    1398           0 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1399             :                                                lend - 1, lng_nil, 1, 1);
    1400             :                 } else {
    1401         139 :                         while (is_lng_nil(lvals[lstart]))
    1402           0 :                                 lstart++;
    1403             :                 }
    1404         139 :                 if (rscan < rend - rstart && is_lng_nil(rvals[rstart + rscan])) {
    1405           0 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1406             :                                                rend - 1, lng_nil, 1, 1);
    1407             :                 } else {
    1408         139 :                         while (is_lng_nil(rvals[rstart]))
    1409           0 :                                 rstart++;
    1410             :                 }
    1411             :         }
    1412             :         /* from here on we don't have to worry about nil values */
    1413             : 
    1414      415737 :         while (lstart < lend && rstart < rend) {
    1415      415599 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1416             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1417      415598 :                 v = rvals[rstart];
    1418             : 
    1419      415598 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1420         962 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1421             :                                                lend - 1, v, 1, 0);
    1422             :                 } else {
    1423             :                         /* scan l for v */
    1424      496357 :                         while (lstart < lend && lvals[lstart] < v)
    1425       81721 :                                 lstart++;
    1426             :                 }
    1427      415143 :                 if (lstart >= lend) {
    1428             :                         /* nothing found */
    1429             :                         break;
    1430             :                 }
    1431             : 
    1432             :                 /* Here we determine the next value in l that we are
    1433             :                  * going to try to match in r.  We will also count the
    1434             :                  * number of occurrences in l of that value.
    1435             :                  * Afterwards, v points to the value and nl is the
    1436             :                  * number of times it occurs.  Also, lstart will
    1437             :                  * point to the next value to be considered (ready for
    1438             :                  * the next iteration).
    1439             :                  * If there are many equal values in l (more than
    1440             :                  * lscan), we will use binary search to find the end
    1441             :                  * of the sequence.  Obviously, we can do this only if
    1442             :                  * l is actually sorted (lscan > 0). */
    1443      415064 :                 nl = 1;         /* we'll match (at least) one in l */
    1444      415064 :                 nr = 0;         /* maybe we won't match anything in r */
    1445      415064 :                 v = lvals[lstart];
    1446      415064 :                 if (li.key) {
    1447             :                         /* if l is key, there is a single value */
    1448      369914 :                         lstart++;
    1449       45150 :                 } else if (lscan < lend - lstart &&
    1450       45047 :                            v == lvals[lstart + lscan]) {
    1451             :                         /* lots of equal values: use binary search to
    1452             :                          * find end */
    1453         395 :                         nl = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1454             :                                            lend - 1, v, 1, 1);
    1455         395 :                         nl -= lstart;
    1456         395 :                         lstart += nl;
    1457             :                 } else {
    1458             :                         /* just scan */
    1459       71033 :                         while (++lstart < lend && v == lvals[lstart])
    1460       26278 :                                 nl++;
    1461             :                 }
    1462             :                 /* lstart points one beyond the value we're
    1463             :                  * going to match: ready for the next iteration. */
    1464             : 
    1465             :                 /* First we find the first value in r that is at
    1466             :                  * least as large as v, then we find the first
    1467             :                  * value in r that is larger than v.  The difference
    1468             :                  * is the number of values equal to v and is stored in
    1469             :                  * nr.
    1470             :                  * We will use binary search on r to find both ends of
    1471             :                  * the sequence of values that are equal to v in case
    1472             :                  * the position is "too far" (more than rscan
    1473             :                  * away). */
    1474             : 
    1475             :                 /* first find the location of the first value in r
    1476             :                  * that is >= v, then find the location of the first
    1477             :                  * value in r that is > v; the difference is the
    1478             :                  * number of values equal to v */
    1479             : 
    1480             :                 /* look ahead a little (rscan) in r to see whether
    1481             :                  * we're better off doing a binary search */
    1482      415064 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1483             :                         /* value too far away in r: use binary
    1484             :                          * search */
    1485        2262 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1486             :                                                rend - 1, v, 1, 0);
    1487             :                 } else {
    1488             :                         /* scan r for v */
    1489     1489985 :                         while (rstart < rend && rvals[rstart] < v)
    1490     1077183 :                                 rstart++;
    1491             :                 }
    1492      415233 :                 if (rstart == rend) {
    1493             :                         /* nothing found */
    1494             :                         break;
    1495             :                 }
    1496             : 
    1497             :                 /* now find the end of the sequence of equal values v */
    1498             : 
    1499             :                 /* if r is key, there is zero or one match, otherwise
    1500             :                  * look ahead a little (rscan) in r to see whether
    1501             :                  * we're better off doing a binary search */
    1502      415206 :                 if (ri.key) {
    1503      377481 :                         if (rstart < rend && v == rvals[rstart]) {
    1504       82287 :                                 nr = 1;
    1505       82287 :                                 rstart++;
    1506             :                         }
    1507       37725 :                 } else if (rscan < rend - rstart &&
    1508       37677 :                            v == rvals[rstart + rscan]) {
    1509             :                         /* range too large: use binary search */
    1510           0 :                         nr = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1511             :                                            rend - 1, v, 1, 1);
    1512           0 :                         nr -= rstart;
    1513           0 :                         rstart += nr;
    1514             :                 } else {
    1515             :                         /* scan r for end of range */
    1516       90901 :                         while (rstart < rend && v == rvals[rstart]) {
    1517       53176 :                                 nr++;
    1518       53176 :                                 rstart++;
    1519             :                         }
    1520             :                 }
    1521             :                 /* rstart points to first value > v or end of
    1522             :                  * r, and nr is the number of values in r that
    1523             :                  * are equal to v */
    1524      120012 :                 if (nr == 0) {
    1525             :                         /* no entries in r found */
    1526      295169 :                         continue;
    1527             :                 }
    1528             :                 /* make space: nl values in l match nr values in r, so
    1529             :                  * we need to add nl * nr values in the results */
    1530      120037 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1531           0 :                         goto bailout;
    1532             : 
    1533             :                 /* maintain properties */
    1534      120323 :                 if (nl > 1) {
    1535             :                         /* value occurs multiple times in l, so entry
    1536             :                          * in r will be repeated multiple times: hence
    1537             :                          * r2 is not key and not dense */
    1538        6772 :                         if (r2) {
    1539        1768 :                                 r2->tkey = false;
    1540        1768 :                                 r2->tseqbase = oid_nil;
    1541             :                         }
    1542             :                         /* multiple different values will be inserted
    1543             :                          * in r1 (always in order), so not reverse
    1544             :                          * ordered anymore */
    1545        6772 :                         r1->trevsorted = false;
    1546             :                 }
    1547      120323 :                 if (nr > 1) {
    1548             :                         /* value occurs multiple times in r, so entry
    1549             :                          * in l will be repeated multiple times: hence
    1550             :                          * r1 is not key and not dense */
    1551        4529 :                         r1->tkey = false;
    1552        4529 :                         r1->tseqbase = oid_nil;
    1553             :                         /* multiple different values will be inserted
    1554             :                          * in r2 (in order), so not reverse ordered
    1555             :                          * anymore */
    1556        4529 :                         if (r2) {
    1557        4529 :                                 r2->trevsorted = false;
    1558        4529 :                                 if (nl > 1) {
    1559             :                                         /* multiple values in l match
    1560             :                                          * multiple values in r, so an
    1561             :                                          * ordered sequence will be
    1562             :                                          * inserted multiple times in
    1563             :                                          * r2, so r2 is not ordered
    1564             :                                          * anymore */
    1565           0 :                                         r2->tsorted = false;
    1566             :                                 }
    1567             :                         }
    1568             :                 }
    1569      120323 :                 if (BATcount(r1) > 0) {
    1570             :                         /* a new, higher value will be inserted into
    1571             :                          * r1, so r1 is not reverse ordered anymore */
    1572      119733 :                         r1->trevsorted = false;
    1573             :                         /* a new higher value will be added to r2 */
    1574      119733 :                         if (r2) {
    1575      113029 :                                 r2->trevsorted = false;
    1576             :                         }
    1577      119733 :                         if (BATtdense(r1) &&
    1578       52800 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1579          68 :                                 r1->tseqbase = oid_nil;
    1580             :                         }
    1581             :                 }
    1582             : 
    1583      120323 :                 if (r2 &&
    1584      113607 :                     BATcount(r2) > 0 &&
    1585      112783 :                     BATtdense(r2) &&
    1586       51655 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1587          36 :                         r2->tseqbase = oid_nil;
    1588             :                 }
    1589             : 
    1590             :                 /* insert values */
    1591      120323 :                 lv = l->hseqbase + lstart - nl;
    1592      270015 :                 for (i = 0; i < nl; i++) {
    1593             :                         BUN j;
    1594             : 
    1595      311950 :                         for (j = 0; j < nr; j++) {
    1596      162258 :                                 APPEND(r1, lv);
    1597             :                         }
    1598      149692 :                         if (r2) {
    1599      131645 :                                 oid rv = r->hseqbase + rstart - nr;
    1600             : 
    1601      275882 :                                 for (j = 0; j < nr; j++) {
    1602      144237 :                                         APPEND(r2, rv);
    1603      144237 :                                         rv++;
    1604             :                                 }
    1605             :                         }
    1606      149692 :                         lv++;
    1607             :                 }
    1608             :         }
    1609             :         /* also set other bits of heap to correct value to indicate size */
    1610         244 :         BATsetcount(r1, BATcount(r1));
    1611         244 :         if (r2) {
    1612         219 :                 BATsetcount(r2, BATcount(r2));
    1613         219 :                 assert(BATcount(r1) == BATcount(r2));
    1614             :         }
    1615         244 :         if (BATcount(r1) > 0) {
    1616         212 :                 if (BATtdense(r1))
    1617         120 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1618         212 :                 if (r2 && BATtdense(r2))
    1619         119 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1620             :         } else {
    1621          32 :                 r1->tseqbase = 0;
    1622          32 :                 if (r2) {
    1623          18 :                         r2->tseqbase = 0;
    1624             :                 }
    1625             :         }
    1626         244 :         bat_iterator_end(&li);
    1627         245 :         bat_iterator_end(&ri);
    1628         245 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1629             :                   "nil_matches=%s;%s %s "
    1630             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1631             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1632             :                   nil_matches ? "true" : "false",
    1633             :                   swapped ? " swapped" : "", reason,
    1634             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1635             :                   GDKusec() - t0);
    1636             : 
    1637             :         return GDK_SUCCEED;
    1638             : 
    1639           1 :   bailout:
    1640           1 :         bat_iterator_end(&li);
    1641           1 :         bat_iterator_end(&ri);
    1642           1 :         BBPreclaim(r1);
    1643           1 :         BBPreclaim(r2);
    1644             :         return GDK_FAIL;
    1645             : }
    1646             : 
    1647             : /* Implementation of mergejoin (see below) for the special case that
    1648             :  * the values are of type oid, and the right-hand side is a candidate
    1649             :  * list with exception, and some more conditions are met. */
    1650             : static gdk_return
    1651           0 : mergejoin_cand(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1652             :                bool nil_matches, BUN estimate, lng t0, bool swapped,
    1653             :                const char *reason)
    1654             : {
    1655             : /* the comments in this function have not been checked after making a
    1656             :  * copy of mergejoin below and adapting it to a mask right-hand side */
    1657           0 :         BAT *r1, *r2;
    1658           0 :         BUN lstart, lend, lcnt;
    1659           0 :         struct canditer lci, rci;
    1660           0 :         BUN lscan;              /* opportunistic scan window */
    1661           0 :         BUN maxsize;
    1662           0 :         const oid *lvals;
    1663           0 :         oid v;
    1664           0 :         BUN nl, nr;
    1665           0 :         oid lv;
    1666           0 :         BUN i;
    1667           0 :         BATiter li = bat_iterator(l);
    1668           0 :         BATiter ri = bat_iterator(r);
    1669             : 
    1670           0 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1671             : 
    1672           0 :         MT_thread_setalgorithm(__func__);
    1673           0 :         lstart = 0;
    1674           0 :         lend = BATcount(l);
    1675           0 :         lcnt = lend - lstart;
    1676           0 :         if (li.type == TYPE_void) {
    1677           0 :                 assert(!is_oid_nil(l->tseqbase));
    1678           0 :                 canditer_init(&lci, NULL, l);
    1679           0 :                 lcnt = lci.ncand;
    1680           0 :                 lvals = NULL;
    1681             :         } else {
    1682           0 :                 lci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1683           0 :                 lvals = (const oid *) li.base;
    1684           0 :                 assert(lvals != NULL);
    1685             :         }
    1686             : 
    1687           0 :         assert(complex_cand(r));
    1688           0 :         canditer_init(&rci, NULL, r);
    1689           0 :         size_t counter = 0;
    1690           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1691             : 
    1692             :         /* basic properties will be adjusted if necessary later on,
    1693             :          * they were initially set by joininitresults() */
    1694             : 
    1695           0 :         if (lend == 0 || rci.ncand == 0) {
    1696             :                 /* there are no matches */
    1697           0 :                 bat_iterator_end(&li);
    1698           0 :                 bat_iterator_end(&ri);
    1699           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1700           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1701             :                                0, false, false, __func__, t0);
    1702             :         }
    1703             : 
    1704           0 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1705           0 :                                        li.key, ri.key, false, false,
    1706             :                                        false, false, estimate)) == BUN_NONE) {
    1707           0 :                 bat_iterator_end(&li);
    1708           0 :                 bat_iterator_end(&ri);
    1709           0 :                 return GDK_FAIL;
    1710             :         }
    1711           0 :         r1 = *r1p;
    1712           0 :         r2 = r2p ? *r2p : NULL;
    1713             : 
    1714             :         /* determine opportunistic scan window for l and r */
    1715           0 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1716           0 :                 nl >>= 1;
    1717             : 
    1718           0 :         if (!nil_matches) {
    1719             :                 /* skip over nils at the start of the columns */
    1720           0 :                 if (lscan < lend - lstart && lvals && is_oid_nil(lvals[lstart + lscan])) {
    1721           0 :                         lstart = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1722             :                                                lend - 1, oid_nil, 1, 1);
    1723           0 :                 } else if (lvals) {
    1724           0 :                         while (is_oid_nil(lvals[lstart]))
    1725           0 :                                 lstart++;
    1726             :                 } /* else l is candidate list: no nils */
    1727             :         }
    1728             :         /* from here on we don't have to worry about nil values */
    1729             : 
    1730           0 :         while (lstart < lend && rci.next < rci.ncand) {
    1731           0 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1732             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1733           0 :                 v = canditer_peek(&rci);
    1734             : 
    1735           0 :                 if (lvals) {
    1736           0 :                         if (lscan < lend - lstart &&
    1737           0 :                             lvals[lstart + lscan] < v) {
    1738           0 :                                 lstart = binsearch_oid(NULL, 0, lvals,
    1739             :                                                        lstart + lscan,
    1740             :                                                        lend - 1, v, 1, 0);
    1741             :                         } else {
    1742             :                                 /* scan l for v */
    1743           0 :                                 while (lstart < lend && lvals[lstart] < v)
    1744           0 :                                         lstart++;
    1745             :                         }
    1746             :                 } else {
    1747           0 :                         lstart = canditer_search(&lci, v, true);
    1748           0 :                         canditer_setidx(&lci, lstart);
    1749             :                 }
    1750           0 :                 if (lstart >= lend) {
    1751             :                         /* nothing found */
    1752             :                         break;
    1753             :                 }
    1754             : 
    1755             :                 /* Here we determine the next value in l that we are
    1756             :                  * going to try to match in r.  We will also count the
    1757             :                  * number of occurrences in l of that value.
    1758             :                  * Afterwards, v points to the value and nl is the
    1759             :                  * number of times it occurs.  Also, lstart will
    1760             :                  * point to the next value to be considered (ready for
    1761             :                  * the next iteration).
    1762             :                  * If there are many equal values in l (more than
    1763             :                  * lscan), we will use binary search to find the end
    1764             :                  * of the sequence.  Obviously, we can do this only if
    1765             :                  * l is actually sorted (lscan > 0). */
    1766           0 :                 nl = 1;         /* we'll match (at least) one in l */
    1767           0 :                 nr = 0;         /* maybe we won't match anything in r */
    1768           0 :                 v = lvals ? lvals[lstart] : canditer_next(&lci);
    1769           0 :                 if (li.key || lvals == NULL) {
    1770             :                         /* if l is key, there is a single value */
    1771           0 :                         lstart++;
    1772           0 :                 } else if (lscan < lend - lstart &&
    1773           0 :                            v == lvals[lstart + lscan]) {
    1774             :                         /* lots of equal values: use binary search to
    1775             :                          * find end */
    1776           0 :                         nl = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1777             :                                            lend - 1, v, 1, 1);
    1778           0 :                         nl -= lstart;
    1779           0 :                         lstart += nl;
    1780             :                 } else {
    1781             :                         /* just scan */
    1782           0 :                         while (++lstart < lend && v == lvals[lstart])
    1783           0 :                                 nl++;
    1784             :                 }
    1785             :                 /* lstart points one beyond the value we're
    1786             :                  * going to match: ready for the next iteration. */
    1787             : 
    1788             :                 /* First we find the first value in r that is at
    1789             :                  * least as large as v, then we find the first
    1790             :                  * value in r that is larger than v.  The difference
    1791             :                  * is the number of values equal to v and is stored in
    1792             :                  * nr.
    1793             :                  * We will use binary search on r to find both ends of
    1794             :                  * the sequence of values that are equal to v in case
    1795             :                  * the position is "too far" (more than rscan
    1796             :                  * away). */
    1797             : 
    1798             :                 /* first find the location of the first value in r
    1799             :                  * that is >= v, then find the location of the first
    1800             :                  * value in r that is > v; the difference is the
    1801             :                  * number of values equal to v */
    1802           0 :                 nr = canditer_search(&rci, v, true);
    1803           0 :                 canditer_setidx(&rci, nr);
    1804           0 :                 if (nr == rci.ncand) {
    1805             :                         /* nothing found */
    1806             :                         break;
    1807             :                 }
    1808             : 
    1809             :                 /* now find the end of the sequence of equal values v */
    1810             : 
    1811             :                 /* if r is key, there is zero or one match, otherwise
    1812             :                  * look ahead a little (rscan) in r to see whether
    1813             :                  * we're better off doing a binary search */
    1814           0 :                 if (canditer_peek(&rci) == v) {
    1815           0 :                         nr = 1;
    1816           0 :                         canditer_next(&rci);
    1817             :                 } else {
    1818             :                         /* rci points to first value > v or end of
    1819             :                          * r, and nr is the number of values in r that
    1820             :                          * are equal to v */
    1821             :                         /* no entries in r found */
    1822           0 :                         continue;
    1823             :                 }
    1824             :                 /* make space: nl values in l match nr values in r, so
    1825             :                  * we need to add nl * nr values in the results */
    1826           0 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1827           0 :                         goto bailout;
    1828             : 
    1829             :                 /* maintain properties */
    1830           0 :                 if (nl > 1) {
    1831             :                         /* value occurs multiple times in l, so entry
    1832             :                          * in r will be repeated multiple times: hence
    1833             :                          * r2 is not key and not dense */
    1834           0 :                         if (r2) {
    1835           0 :                                 r2->tkey = false;
    1836           0 :                                 r2->tseqbase = oid_nil;
    1837             :                         }
    1838             :                         /* multiple different values will be inserted
    1839             :                          * in r1 (always in order), so not reverse
    1840             :                          * ordered anymore */
    1841           0 :                         r1->trevsorted = false;
    1842             :                 }
    1843           0 :                 if (BATcount(r1) > 0) {
    1844             :                         /* a new, higher value will be inserted into
    1845             :                          * r1, so r1 is not reverse ordered anymore */
    1846           0 :                         r1->trevsorted = false;
    1847             :                         /* a new higher value will be added to r2 */
    1848           0 :                         if (r2) {
    1849           0 :                                 r2->trevsorted = false;
    1850             :                         }
    1851           0 :                         if (BATtdense(r1) &&
    1852           0 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1853           0 :                                 r1->tseqbase = oid_nil;
    1854             :                         }
    1855             :                 }
    1856             : 
    1857           0 :                 if (r2 &&
    1858           0 :                     BATcount(r2) > 0 &&
    1859           0 :                     BATtdense(r2) &&
    1860           0 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rci.next - nr) {
    1861           0 :                         r2->tseqbase = oid_nil;
    1862             :                 }
    1863             : 
    1864             :                 /* insert values */
    1865           0 :                 lv = l->hseqbase + lstart - nl;
    1866           0 :                 for (i = 0; i < nl; i++) {
    1867             :                         BUN j;
    1868             : 
    1869           0 :                         for (j = 0; j < nr; j++) {
    1870           0 :                                 APPEND(r1, lv);
    1871             :                         }
    1872           0 :                         if (r2) {
    1873           0 :                                 oid rv = r->hseqbase + rci.next - nr;
    1874             : 
    1875           0 :                                 for (j = 0; j < nr; j++) {
    1876           0 :                                         APPEND(r2, rv);
    1877           0 :                                         rv++;
    1878             :                                 }
    1879             :                         }
    1880           0 :                         lv++;
    1881             :                 }
    1882             :         }
    1883             :         /* also set other bits of heap to correct value to indicate size */
    1884           0 :         BATsetcount(r1, BATcount(r1));
    1885           0 :         if (r2) {
    1886           0 :                 BATsetcount(r2, BATcount(r2));
    1887           0 :                 assert(BATcount(r1) == BATcount(r2));
    1888             :         }
    1889           0 :         if (BATcount(r1) > 0) {
    1890           0 :                 if (BATtdense(r1))
    1891           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1892           0 :                 if (r2 && BATtdense(r2))
    1893           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1894             :         } else {
    1895           0 :                 r1->tseqbase = 0;
    1896           0 :                 if (r2) {
    1897           0 :                         r2->tseqbase = 0;
    1898             :                 }
    1899             :         }
    1900           0 :         bat_iterator_end(&li);
    1901           0 :         bat_iterator_end(&ri);
    1902           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1903             :                   "nil_matches=%s;%s %s "
    1904             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1905             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1906             :                   nil_matches ? "true" : "false",
    1907             :                   swapped ? " swapped" : "", reason,
    1908             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1909             :                   GDKusec() - t0);
    1910             : 
    1911             :         return GDK_SUCCEED;
    1912             : 
    1913           0 :   bailout:
    1914           0 :         bat_iterator_end(&li);
    1915           0 :         bat_iterator_end(&ri);
    1916           0 :         BBPreclaim(r1);
    1917           0 :         BBPreclaim(r2);
    1918             :         return GDK_FAIL;
    1919             : }
    1920             : 
    1921             : /* Perform a "merge" join on l and r (if both are sorted) with
    1922             :  * optional candidate lists, or join using binary search on r if l is
    1923             :  * not sorted.
    1924             :  *
    1925             :  * If nil_matches is set, nil values are treated as ordinary values
    1926             :  * that can match; otherwise nil values never match.
    1927             :  *
    1928             :  * If nil_on_miss is set, a nil value is returned in r2 if there is no
    1929             :  * match in r for a particular value in l (left outer join).
    1930             :  *
    1931             :  * If semi is set, only a single set of values in r1/r2 is returned if
    1932             :  * there is a match of l in r, no matter how many matches there are in
    1933             :  * r; otherwise all matches are returned.
    1934             :  *
    1935             :  * If max_one is set, only a single match is allowed.  This is like
    1936             :  * semi, but enforces the single match.
    1937             :  *
    1938             :  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
    1939             :  */
    1940             : static gdk_return
    1941       15695 : mergejoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    1942             :           struct canditer *restrict lci, struct canditer *restrict rci,
    1943             :           bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    1944             :           bool not_in, bool max_one, bool min_one, BUN estimate,
    1945             :           lng t0, bool swapped,
    1946             :           const char *reason)
    1947             : {
    1948             :         /* [lr]scan determine how far we look ahead in l/r in order to
    1949             :          * decide whether we want to do a binary search or a scan */
    1950       15695 :         BUN lscan, rscan;
    1951       15695 :         const void *lvals, *rvals; /* the values of l/r (NULL if dense) */
    1952       15695 :         const char *lvars, *rvars; /* the indirect values (NULL if fixed size) */
    1953       15695 :         const void *nil = ATOMnilptr(l->ttype);
    1954       15695 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    1955       15695 :         const void *v;          /* points to value under consideration */
    1956       15695 :         const void *prev = NULL;
    1957       15695 :         BUN nl, nr;
    1958       15695 :         bool insert_nil;
    1959             :         /* equal_order is set if we can scan both BATs in the same
    1960             :          * order, so when both are sorted or both are reverse sorted
    1961             :          * -- important to know in order to skip over values; if l is
    1962             :          * not sorted, this must be set to true and we will always do a
    1963             :          * binary search on all of r */
    1964       15695 :         bool equal_order;
    1965             :         /* [lr]ordering is either 1 or -1 depending on the order of
    1966             :          * l/r: it determines the comparison function used */
    1967       15695 :         int lordering, rordering;
    1968       15695 :         oid lv;
    1969       15695 :         BUN i, j;               /* counters */
    1970       15695 :         oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
    1971       15695 :         struct canditer llci, rrci;
    1972       15695 :         struct canditer *mlci, xlci;
    1973       15695 :         struct canditer *mrci, xrci;
    1974             : 
    1975       15695 :         if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
    1976       15642 :             rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
    1977       14944 :             !nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
    1978        9970 :             !not_in &&
    1979        7797 :             l->tsorted && r->tsorted) {
    1980             :                 /* special cases with far fewer options */
    1981        7747 :                 if (complex_cand(r))
    1982           0 :                         return mergejoin_cand(r1p, r2p, l, r, nil_matches,
    1983             :                                               estimate, t0, swapped, __func__);
    1984       15417 :                 switch (ATOMbasetype(l->ttype)) {
    1985        7412 :                 case TYPE_int:
    1986        7412 :                         return mergejoin_int(r1p, r2p, l, r, nil_matches,
    1987             :                                              estimate, t0, swapped, __func__);
    1988         245 :                 case TYPE_lng:
    1989         245 :                         return mergejoin_lng(r1p, r2p, l, r, nil_matches,
    1990             :                                              estimate, t0, swapped, __func__);
    1991             :                 }
    1992             :         }
    1993             : 
    1994       23921 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1995        8038 :         assert(r->tsorted || r->trevsorted);
    1996             : 
    1997        8038 :         size_t counter = 0;
    1998        8038 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1999             : 
    2000        8038 :         BATiter li = bat_iterator(l);
    2001        8038 :         BATiter ri = bat_iterator(r);
    2002        8039 :         MT_thread_setalgorithm(__func__);
    2003        8035 :         if (BATtvoid(l)) {
    2004             :                 /* l->ttype == TYPE_void && is_oid_nil(l->tseqbase) is
    2005             :                  * handled by selectjoin */
    2006          76 :                 assert(!is_oid_nil(l->tseqbase));
    2007          76 :                 canditer_init(&llci, NULL, l);
    2008          76 :                 lvals = NULL;
    2009             :         } else {
    2010        7959 :                 lvals = li.base;                              /* non NULL */
    2011        7959 :                 llci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2012             :         }
    2013        8032 :         rrci = (struct canditer) {.tpe = cand_dense};
    2014        8032 :         if (BATtvoid(r)) {
    2015         105 :                 if (!is_oid_nil(r->tseqbase))
    2016         105 :                         canditer_init(&rrci, NULL, r);
    2017             :                 rvals = NULL;
    2018             :         } else {
    2019        7927 :                 rvals = ri.base;
    2020             :         }
    2021        8030 :         if (li.vh && li.type) {
    2022         173 :                 assert(ri.vh && ri.type);
    2023         173 :                 lvars = li.vh->base;
    2024         173 :                 rvars = ri.vh->base;
    2025             :         } else {
    2026        7857 :                 assert(ri.vh == NULL || ri.type == TYPE_void);
    2027             :                 lvars = rvars = NULL;
    2028             :         }
    2029             :         /* if the var pointer is not NULL, then so is the val pointer */
    2030        8030 :         assert(lvars == NULL || lvals != NULL);
    2031        8030 :         assert(rvars == NULL || rvals != NULL);
    2032             : 
    2033        8030 :         const bool rhasnil = !ri.nonil &&
    2034        4560 :                 ((BATtvoid(r) && r->tseqbase == oid_nil) ||
    2035        4560 :                  (rvals && cmp(nil, VALUE(r, (ri.sorted ? rci->seq : canditer_last(rci)) - r->hseqbase)) == 0));
    2036          11 :         const bit defmark = rhasnil ? bit_nil : 0;
    2037             : 
    2038        8030 :         if (not_in && (rhasnil || (BATtvoid(l) && l->tseqbase == oid_nil))) {
    2039           0 :                 bat_iterator_end(&li);
    2040           0 :                 bat_iterator_end(&ri);
    2041           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark, false, false,
    2042             :                                __func__, t0);
    2043             :         }
    2044             : 
    2045        8030 :         if ((!nil_matches &&
    2046        7947 :              ((li.type == TYPE_void && is_oid_nil(l->tseqbase)) ||
    2047        7947 :               (ri.type == TYPE_void && is_oid_nil(r->tseqbase)))) ||
    2048        8030 :             (li.type == TYPE_void && is_oid_nil(l->tseqbase) &&
    2049           0 :              (ri.nonil ||
    2050           0 :               (ri.type == TYPE_void && !is_oid_nil(r->tseqbase)))) ||
    2051        8030 :             (ri.type == TYPE_void && is_oid_nil(r->tseqbase) &&
    2052           0 :              (li.nonil ||
    2053           0 :               (li.type == TYPE_void && !is_oid_nil(l->tseqbase))))) {
    2054             :                 /* there are no matches */
    2055           0 :                 bat_iterator_end(&li);
    2056           0 :                 bat_iterator_end(&ri);
    2057           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark,
    2058             :                                nil_on_miss, only_misses, __func__, t0);
    2059             :         }
    2060             : 
    2061       16024 :         BUN maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    2062        8030 :                                       li.key, ri.key, semi | max_one,
    2063             :                                       nil_on_miss, only_misses, min_one,
    2064             :                                       estimate);
    2065        7994 :         if (maxsize == BUN_NONE) {
    2066           0 :                 bat_iterator_end(&li);
    2067           0 :                 bat_iterator_end(&ri);
    2068           0 :                 return GDK_FAIL;
    2069             :         }
    2070        7994 :         BAT *r1 = *r1p;
    2071        7994 :         BAT *r2 = r2p ? *r2p : NULL;
    2072        7994 :         BAT *r3 = r3p ? *r3p : NULL;
    2073             : 
    2074        7994 :         if (lci->tpe == cand_mask) {
    2075           4 :                 mlci = lci;
    2076           4 :                 canditer_init(&xlci, l, NULL);
    2077           4 :                 lci = &xlci;
    2078             :         } else {
    2079        7990 :                 mlci = NULL;
    2080        7990 :                 xlci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2081             :         }
    2082        7994 :         if (rci->tpe == cand_mask) {
    2083           0 :                 mrci = rci;
    2084           0 :                 canditer_init(&xrci, r, NULL);
    2085           0 :                 rci = &xrci;
    2086             :         } else {
    2087        7994 :                 mrci = NULL;
    2088        7994 :                 xrci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2089             :         }
    2090             : 
    2091        7994 :         if (li.sorted || li.revsorted) {
    2092        5675 :                 equal_order = (li.sorted && ri.sorted) ||
    2093         279 :                         (li.revsorted && ri.revsorted &&
    2094         100 :                          !BATtvoid(l) && !BATtvoid(r));
    2095        5675 :                 lordering = li.sorted && (ri.sorted || !equal_order) ? 1 : -1;
    2096        5594 :                 rordering = equal_order ? lordering : -lordering;
    2097        5675 :                 if (!li.nonil && !nil_matches && !nil_on_miss && lvals != NULL) {
    2098             :                         /* find first non-nil */
    2099        2544 :                         nl = binsearch(NULL, 0, li.type, lvals, lvars, li.width, 0, BATcount(l), nil, li.sorted ? 1 : -1, li.sorted ? 1 : 0);
    2100        2453 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2101        2451 :                         if (li.sorted) {
    2102        2364 :                                 canditer_setidx(lci, nl);
    2103          87 :                         } else if (li.revsorted) {
    2104          87 :                                 lci->ncand = nl;
    2105             :                         }
    2106             :                 }
    2107             :                 /* determine opportunistic scan window for l */
    2108       11334 :                 lscan = 4 + ilog2(lci->ncand);
    2109             :         } else {
    2110             :                 /* if l not sorted, we will always use binary search
    2111             :                  * on r */
    2112        2319 :                 assert(!BATtvoid(l)); /* void is always sorted */
    2113        2319 :                 lscan = 0;
    2114        2319 :                 equal_order = true;
    2115        2319 :                 lordering = 1;
    2116        2319 :                 rordering = ri.sorted ? 1 : -1;
    2117             :         }
    2118             :         /* determine opportunistic scan window for r; if l is not
    2119             :          * sorted this is only used to find range of equal values */
    2120        7986 :         rscan = 4 + ilog2(rci->ncand);
    2121             : 
    2122        7986 :         if (!equal_order) {
    2123             :                 /* we go through r backwards */
    2124         183 :                 canditer_setidx(rci, rci->ncand);
    2125             :         }
    2126             :         /* At this point the various variables that help us through
    2127             :          * the algorithm have been set.  The table explains them.  The
    2128             :          * first two columns are the inputs, the next three columns
    2129             :          * are the variables, the final two columns indicate how the
    2130             :          * variables can be used.
    2131             :          *
    2132             :          * l/r    sl/sr | vals  cand  off | result   value being matched
    2133             :          * -------------+-----------------+----------------------------------
    2134             :          * dense  NULL  | NULL  NULL  set | i        off==nil?nil:i+off
    2135             :          * dense  dense | NULL  NULL  set | i        off==nil?nil:i+off
    2136             :          * dense  set   | NULL  set   set | cand[i]  off==nil?nil:cand[i]+off
    2137             :          * set    NULL  | set   NULL  0   | i        vals[i]
    2138             :          * set    dense | set   NULL  0   | i        vals[i]
    2139             :          * set    set   | set   set   0   | cand[i]  vals[cand[i]]
    2140             :          *
    2141             :          * If {l,r}off is lng_nil, all values in the corresponding bat
    2142             :          * are oid_nil because the bat has type VOID and the tseqbase
    2143             :          * is nil.
    2144             :          */
    2145             : 
    2146             : 
    2147             :         /* Before we start adding values to r1 and r2, the properties
    2148             :          * are as follows:
    2149             :          * tseqbase - 0
    2150             :          * tkey - true
    2151             :          * tsorted - true
    2152             :          * trevsorted - true
    2153             :          * tnil - false
    2154             :          * tnonil - true
    2155             :          * We will modify these as we go along.
    2156             :          */
    2157      597979 :         while (lci->next < lci->ncand) {
    2158      592873 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    2159             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    2160      592873 :                 bit mark = defmark;
    2161      592873 :                 if (lscan == 0) {
    2162             :                         /* always search r completely */
    2163      299937 :                         assert(equal_order);
    2164      299937 :                         canditer_reset(rci);
    2165             :                 } else {
    2166             :                         /* If l is sorted (lscan > 0), we look at the
    2167             :                          * next value in r to see whether we can jump
    2168             :                          * over a large section of l using binary
    2169             :                          * search.  We do this by looking ahead in l
    2170             :                          * (lscan far, to be precise) and seeing if
    2171             :                          * the value there is still too "small"
    2172             :                          * (definition depends on sort order of l).
    2173             :                          * If it is, we use binary search on l,
    2174             :                          * otherwise we scan l for the next position
    2175             :                          * with a value greater than or equal to the
    2176             :                          * value in r.
    2177             :                          * The next value to match in r is the first
    2178             :                          * if equal_order is set, the last
    2179             :                          * otherwise.
    2180             :                          * When skipping over values in l, we count
    2181             :                          * how many we skip in nlx.  We need this in
    2182             :                          * case only_misses or nil_on_miss is set, and
    2183             :                          * to properly set the dense property in the
    2184             :                          * first output BAT. */
    2185      292936 :                         BUN nlx = 0; /* number of non-matching values in l */
    2186             : 
    2187      292936 :                         if (equal_order) {
    2188      291985 :                                 if (rci->next == rci->ncand)
    2189             :                                         v = NULL; /* no more values */
    2190      289674 :                                 else if (mrci) {
    2191           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peek(rci), true);
    2192           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2193             :                                 } else
    2194      289674 :                                         v = VALUE(r, canditer_peek(rci) - r->hseqbase);
    2195             :                         } else {
    2196         951 :                                 if (rci->next == 0)
    2197             :                                         v = NULL; /* no more values */
    2198         910 :                                 else if (mrci) {
    2199           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peekprev(rci), false);
    2200           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2201             :                                 } else
    2202         910 :                                         v = VALUE(r, canditer_peekprev(rci) - r->hseqbase);
    2203             :                         }
    2204             :                         /* here, v points to next value in r, or if
    2205             :                          * we're at the end of r, v is NULL */
    2206       20858 :                         if (v == NULL) {
    2207        2352 :                                 nlx = lci->ncand - lci->next;
    2208             :                         } else {
    2209      290646 :                                 if (lscan < lci->ncand - lci->next) {
    2210      266906 :                                         lv = canditer_idx(lci, lci->next + lscan);
    2211      266979 :                                         lv -= l->hseqbase;
    2212      266979 :                                         if (lvals) {
    2213      255259 :                                                 if (lordering * cmp(VALUE(l, lv), v) < 0) {
    2214        3608 :                                                         nlx = binsearch(NULL, 0, li.type, lvals, lvars, li.width, lv, BATcount(l), v, lordering, 0);
    2215        3608 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2216        3608 :                                                         nlx -= lci->next;
    2217             :                                                 }
    2218             :                                         } else {
    2219       11720 :                                                 assert(lordering == 1);
    2220       11720 :                                                 if (canditer_idx(&llci, lv) < *(const oid *)v) {
    2221          29 :                                                         nlx = canditer_search(&llci, *(const oid *)v, true);
    2222          29 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2223          29 :                                                         nlx -= lci->next;
    2224             :                                                 }
    2225             :                                         }
    2226      266895 :                                         if (mlci) {
    2227        6254 :                                                 lv = canditer_mask_next(mlci, lci->seq + lci->next + nlx, true);
    2228        6254 :                                                 if (lv == oid_nil)
    2229           0 :                                                         nlx = lci->ncand - lci->next;
    2230             :                                                 else
    2231        6254 :                                                         nlx = lv - lci->seq - lci->next;
    2232             :                                         }
    2233      266895 :                                         if (lci->next + nlx == lci->ncand)
    2234         468 :                                                 v = NULL;
    2235             :                                 }
    2236             :                         }
    2237      269247 :                         if (nlx > 0) {
    2238        6006 :                                 if (only_misses) {
    2239        3862 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2240           0 :                                                 goto bailout;
    2241      456403 :                                         while (nlx > 0) {
    2242      452540 :                                                 lv = canditer_next(lci);
    2243      452540 :                                                 if (mlci == NULL || canditer_contains(mlci, lv))
    2244      452540 :                                                         APPEND(r1, lv);
    2245      452540 :                                                 nlx--;
    2246             :                                         }
    2247        3863 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2248        1090 :                                                 r1->trevsorted = false;
    2249        2144 :                                 } else if (nil_on_miss) {
    2250          18 :                                         if (r2 && r2->tnonil) {
    2251           2 :                                                 r2->tnil = true;
    2252           2 :                                                 r2->tnonil = false;
    2253           2 :                                                 r2->tseqbase = oid_nil;
    2254           2 :                                                 r2->tsorted = false;
    2255           2 :                                                 r2->trevsorted = false;
    2256           2 :                                                 r2->tkey = false;
    2257             :                                         }
    2258          18 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2259           0 :                                                 goto bailout;
    2260          18 :                                         if (r3)
    2261          17 :                                                 r3->tnil = false;
    2262        2052 :                                         while (nlx > 0) {
    2263        2034 :                                                 lv = canditer_next(lci);
    2264        2034 :                                                 if (mlci == NULL || canditer_contains(mlci, lv)) {
    2265        2034 :                                                         APPEND(r1, lv);
    2266        2034 :                                                         if (r2)
    2267           2 :                                                                 APPEND(r2, oid_nil);
    2268        2034 :                                                         if (r3) {
    2269        2033 :                                                                 if (rhasnil || cmp(VALUE(l, lv - l->hseqbase), nil) == 0) {
    2270           0 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = bit_nil;
    2271           0 :                                                                         r3->tnil = true;
    2272             :                                                                 } else {
    2273        2033 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = 0;
    2274             :                                                                 }
    2275             :                                                         }
    2276             :                                                 }
    2277        2034 :                                                 nlx--;
    2278             :                                         }
    2279          18 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2280           5 :                                                 r1->trevsorted = false;
    2281             :                                 } else {
    2282        2126 :                                         canditer_setidx(lci, lci->next + nlx);
    2283             :                                 }
    2284             :                         }
    2285      292988 :                         if (v == NULL) {
    2286             :                                 /* we have exhausted the inputs */
    2287             :                                 break;
    2288             :                         }
    2289             :                 }
    2290             : 
    2291             :                 /* Here we determine the next value in l that we are
    2292             :                  * going to try to match in r.  We will also count the
    2293             :                  * number of occurrences in l of that value.
    2294             :                  * Afterwards, v points to the value and nl is the
    2295             :                  * number of times it occurs.  Also, lci will point to
    2296             :                  * the next value to be considered (ready for the next
    2297             :                  * iteration).
    2298             :                  * If there are many equal values in l (more than
    2299             :                  * lscan), we will use binary search to find the end
    2300             :                  * of the sequence.  Obviously, we can do this only if
    2301             :                  * l is actually sorted (lscan > 0). */
    2302      583933 :                 nl = 1;         /* we'll match (at least) one in l */
    2303      583933 :                 nr = 0;         /* maybe we won't match anything in r */
    2304      583933 :                 lv = canditer_peek(lci);
    2305      583757 :                 if (mlci) {
    2306        6314 :                         lv = canditer_mask_next(mlci, lv, true);
    2307        6314 :                         if (lv == oid_nil)
    2308             :                                 break;
    2309        6314 :                         canditer_setidx(lci, canditer_search(lci, lv, true));
    2310             :                 }
    2311      584012 :                 v = VALUE(l, lv - l->hseqbase);
    2312      587208 :                 if (li.key) {
    2313             :                         /* if l is key, there is a single value */
    2314      335133 :                 } else if (lscan > 0 &&
    2315      128688 :                            lscan < lci->ncand - lci->next &&
    2316       63685 :                            cmp(v, VALUE(l, canditer_idx(lci, lci->next + lscan) - l->hseqbase)) == 0) {
    2317             :                         /* lots of equal values: use binary search to
    2318             :                          * find end */
    2319        1029 :                         assert(lvals != NULL);
    2320        2057 :                         nl = binsearch(NULL, 0,
    2321        1029 :                                        li.type, lvals, lvars,
    2322        1029 :                                        li.width, lci->next + lscan,
    2323             :                                        BATcount(l),
    2324             :                                        v, lordering, 1);
    2325        1028 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2326        1026 :                         nl -= lci->next;
    2327             :                 } else {
    2328      334056 :                         struct canditer ci = *lci; /* work on copy */
    2329      334056 :                         nl = 0; /* it will be incremented again */
    2330      530057 :                         do {
    2331      530057 :                                 canditer_next(&ci);
    2332      527683 :                                 nl++;
    2333     1051189 :                         } while (ci.next < ci.ncand &&
    2334      526100 :                                  cmp(v, VALUE(l, canditer_peek(&ci) - l->hseqbase)) == 0);
    2335             :                 }
    2336             :                 /* lci->next + nl is the position for the next iteration */
    2337             : 
    2338      582189 :                 if ((!nil_matches || not_in) && !li.nonil && cmp(v, nil) == 0) {
    2339         636 :                         if (not_in) {
    2340             :                                 /* just skip the whole thing: nils
    2341             :                                  * don't cause any output */
    2342           1 :                                 canditer_setidx(lci, lci->next + nl);
    2343           1 :                                 continue;
    2344             :                         }
    2345             :                         /* v is nil and nils don't match anything, set
    2346             :                          * to NULL to indicate nil */
    2347         635 :                         v = NULL;
    2348         635 :                         mark = bit_nil;
    2349         635 :                         if (r3)
    2350          54 :                                 r3->tnil = true;
    2351             :                 }
    2352             : 
    2353             :                 /* First we find the "first" value in r that is "at
    2354             :                  * least as large" as v, then we find the "first"
    2355             :                  * value in r that is "larger" than v.  The difference
    2356             :                  * is the number of values equal to v and is stored in
    2357             :                  * nr.  The definitions of "larger" and "first" depend
    2358             :                  * on the orderings of l and r.  If equal_order is
    2359             :                  * set, we go through r from low to high (this
    2360             :                  * includes the case that l is not sorted); otherwise
    2361             :                  * we go through r from high to low.
    2362             :                  * In either case, we will use binary search on r to
    2363             :                  * find both ends of the sequence of values that are
    2364             :                  * equal to v in case the position is "too far" (more
    2365             :                  * than rscan away). */
    2366          54 :                 if (v == NULL) {
    2367             :                         nr = 0; /* nils don't match anything */
    2368      579978 :                 } else if (ri.type == TYPE_void && is_oid_nil(r->tseqbase)) {
    2369           0 :                         if (is_oid_nil(*(const oid *) v)) {
    2370             :                                 /* all values in r match */
    2371           0 :                                 nr = rci->ncand;
    2372             :                         } else {
    2373             :                                 /* no value in r matches */
    2374             :                                 nr = 0;
    2375             :                         }
    2376             :                         /* in either case, we're done after this */
    2377           0 :                         canditer_setidx(rci, equal_order ? rci->ncand : 0);
    2378      579978 :                 } else if (equal_order) {
    2379             :                         /* first find the location of the first value
    2380             :                          * in r that is >= v, then find the location
    2381             :                          * of the first value in r that is > v; the
    2382             :                          * difference is the number of values equal
    2383             :                          * v; we change rci */
    2384             : 
    2385             :                         /* look ahead a little (rscan) in r to
    2386             :                          * see whether we're better off doing
    2387             :                          * a binary search */
    2388      579067 :                         if (rvals) {
    2389      558209 :                                 if (rscan < rci->ncand - rci->next &&
    2390      513575 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) > 0) {
    2391             :                                         /* value too far away in r:
    2392             :                                          * use binary search */
    2393      115392 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 0);
    2394      127465 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2395      126283 :                                         canditer_setidx(rci, lv);
    2396             :                                 } else {
    2397             :                                         /* scan r for v */
    2398      470545 :                                         while (rci->next < rci->ncand) {
    2399      470450 :                                                 if (rordering * cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) <= 0)
    2400             :                                                         break;
    2401       28897 :                                                 canditer_next(rci);
    2402             :                                         }
    2403             :                                 }
    2404     1097957 :                                 if (rci->next < rci->ncand &&
    2405      533385 :                                     cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0) {
    2406             :                                         /* if we found an equal value,
    2407             :                                          * look for the last equal
    2408             :                                          * value */
    2409      297573 :                                         if (ri.key) {
    2410             :                                                 /* r is key, there can
    2411             :                                                  * only be a single
    2412             :                                                  * equal value */
    2413      169470 :                                                 nr = 1;
    2414      169470 :                                                 canditer_next(rci);
    2415      254280 :                                         } else if (rscan < rci->ncand - rci->next &&
    2416      126176 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) == 0) {
    2417             :                                                 /* many equal values:
    2418             :                                                  * use binary search
    2419             :                                                  * to find the end */
    2420       65791 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 1);
    2421       65791 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2422       65791 :                                                 nr -= rci->next;
    2423       65791 :                                                 canditer_setidx(rci, rci->next + nr);
    2424             :                                         } else {
    2425             :                                                 /* scan r for end of
    2426             :                                                  * range */
    2427      170378 :                                                 do {
    2428      170378 :                                                         nr++;
    2429      170378 :                                                         canditer_next(rci);
    2430      340300 :                                                 } while (rci->next < rci->ncand &&
    2431      169931 :                                                          cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0);
    2432             :                                         }
    2433             :                                 }
    2434             :                         } else {
    2435       20858 :                                 assert(rordering == 1);
    2436       20858 :                                 rval = canditer_search(&rrci, *(const oid*)v, true) + r->hseqbase;
    2437       20858 :                                 lv = canditer_search(rci, rval, true);
    2438       20858 :                                 canditer_setidx(rci, lv);
    2439       20858 :                                 nr = (canditer_idx(&rrci, canditer_peek(rci) - r->hseqbase) == *(oid*)v);
    2440       20858 :                                 if (nr == 1)
    2441       20858 :                                         canditer_next(rci);
    2442             :                         }
    2443             :                         /* rci points to first value > v or end of r,
    2444             :                          * and nr is the number of values in r that
    2445             :                          * are equal to v */
    2446             :                 } else {
    2447             :                         /* first find the location of the first value
    2448             :                          * in r that is > v, then find the location
    2449             :                          * of the first value in r that is >= v; the
    2450             :                          * difference is the number of values equal
    2451             :                          * v; we change rci */
    2452             : 
    2453             :                         /* look back from the end a little
    2454             :                          * (rscan) in r to see whether we're
    2455             :                          * better off doing a binary search */
    2456         911 :                         if (rvals) {
    2457         911 :                                 if (rci->next > rscan &&
    2458         593 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) < 0) {
    2459             :                                         /* value too far away
    2460             :                                          * in r: use binary
    2461             :                                          * search */
    2462          37 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 1);
    2463          37 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2464          37 :                                         canditer_setidx(rci, lv);
    2465             :                                 } else {
    2466             :                                         /* scan r for v */
    2467        1538 :                                         while (rci->next > 0 &&
    2468        1532 :                                                rordering * cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) < 0)
    2469         664 :                                                 canditer_prev(rci);
    2470             :                                 }
    2471        1801 :                                 if (rci->next > 0 &&
    2472         889 :                                     cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0) {
    2473             :                                         /* if we found an equal value,
    2474             :                                          * look for the last equal
    2475             :                                          * value */
    2476         815 :                                         if (ri.key) {
    2477             :                                                 /* r is key, there can only be a single equal value */
    2478         249 :                                                 nr = 1;
    2479         249 :                                                 canditer_prev(rci);
    2480        1051 :                                         } else if (rci->next > rscan &&
    2481         485 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) == 0) {
    2482             :                                                 /* use binary search to find the start */
    2483          38 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 0);
    2484          38 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2485          38 :                                                 nr = rci->next - nr;
    2486          38 :                                                 canditer_setidx(rci, rci->next - nr);
    2487             :                                         } else {
    2488             :                                                 /* scan r for start of range */
    2489         638 :                                                 do {
    2490         638 :                                                         canditer_prev(rci);
    2491         638 :                                                         nr++;
    2492        1236 :                                                 } while (rci->next > 0 &&
    2493         598 :                                                          cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0);
    2494             :                                         }
    2495             :                                 }
    2496             :                         } else {
    2497           0 :                                 lv = canditer_search(&rrci, *(const oid *)v, true);
    2498           0 :                                 lv = canditer_search(rci, lv + r->hseqbase, true);
    2499           0 :                                 nr = (canditer_idx(rci, lv) == *(const oid*)v);
    2500           0 :                                 canditer_setidx(rci, lv);
    2501             :                         }
    2502             :                         /* rci points to first value > v
    2503             :                          * or end of r, and nr is the number of values
    2504             :                          * in r that are equal to v */
    2505             :                 }
    2506             : 
    2507      323248 :                 if (nr == 0) {
    2508             :                         /* no entries in r found */
    2509      271652 :                         if (!(nil_on_miss | only_misses)) {
    2510      213265 :                                 if (min_one) {
    2511           0 :                                         GDKerror("not enough matches");
    2512           0 :                                         goto bailout;
    2513             :                                 }
    2514      223128 :                                 if (lscan > 0 &&
    2515        9863 :                                     (equal_order ? rci->next == rci->ncand : rci->next == 0)) {
    2516             :                                         /* nothing more left to match
    2517             :                                          * in r */
    2518             :                                         break;
    2519             :                                 }
    2520      213201 :                                 canditer_setidx(lci, lci->next + nl);
    2521      212749 :                                 continue;
    2522             :                         }
    2523             :                         /* insert a nil to indicate a non-match */
    2524       58387 :                         insert_nil = true;
    2525       58387 :                         nr = 1;
    2526       58387 :                         if (r2) {
    2527           4 :                                 r2->tnil = true;
    2528           4 :                                 r2->tnonil = false;
    2529           4 :                                 r2->tsorted = false;
    2530           4 :                                 r2->trevsorted = false;
    2531           4 :                                 r2->tseqbase = oid_nil;
    2532           4 :                                 r2->tkey = false;
    2533             :                         }
    2534      319044 :                 } else if (nr > 1 && max_one) {
    2535          20 :                         GDKerror("more than one match");
    2536          20 :                         goto bailout;
    2537      319024 :                 } else if (only_misses) {
    2538             :                         /* we had a match, so we're not interested */
    2539      124629 :                         canditer_setidx(lci, lci->next + nl);
    2540      124721 :                         continue;
    2541             :                 } else {
    2542      194395 :                         insert_nil = false;
    2543      194395 :                         if (semi) {
    2544             :                                 /* for semi-join, only insert single
    2545             :                                  * value */
    2546       52671 :                                 nr = 1;
    2547             :                         }
    2548             :                 }
    2549             :                 /* make space: nl values in l match nr values in r, so
    2550             :                  * we need to add nl * nr values in the results */
    2551      252782 :                 if (maybeextend(r1, r2, r3, nl * nr, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2552           0 :                         goto bailout;
    2553             : 
    2554             :                 /* maintain properties */
    2555      252966 :                 if (nl > 1) {
    2556       62932 :                         if (r2) {
    2557             :                                 /* value occurs multiple times in l,
    2558             :                                  * so entry in r will be repeated
    2559             :                                  * multiple times: hence r2 is not key
    2560             :                                  * and not dense */
    2561       14634 :                                 r2->tkey = false;
    2562       14634 :                                 r2->tseqbase = oid_nil;
    2563             :                         }
    2564             :                         /* multiple different values will be inserted
    2565             :                          * in r1 (always in order), so not reverse
    2566             :                          * ordered anymore */
    2567       62932 :                         r1->trevsorted = false;
    2568             :                 }
    2569      252966 :                 if (nr > 1) {
    2570             :                         /* value occurs multiple times in r, so entry
    2571             :                          * in l will be repeated multiple times: hence
    2572             :                          * r1 is not key and not dense */
    2573       65726 :                         r1->tkey = false;
    2574       65726 :                         if (r2) {
    2575             :                                 /* multiple different values will be
    2576             :                                  * inserted in r2 (in order), so not
    2577             :                                  * reverse ordered anymore */
    2578       65271 :                                 r2->trevsorted = false;
    2579       65271 :                                 if (nl > 1) {
    2580             :                                         /* multiple values in l match
    2581             :                                          * multiple values in r, so an
    2582             :                                          * ordered sequence will be
    2583             :                                          * inserted multiple times in
    2584             :                                          * r2, so r2 is not ordered
    2585             :                                          * anymore */
    2586        7117 :                                         r2->tsorted = false;
    2587             :                                 }
    2588             :                         }
    2589             :                 }
    2590      252966 :                 if (lscan == 0) {
    2591             :                         /* deduce relative positions of r matches for
    2592             :                          * this and previous value in v */
    2593       87644 :                         if (prev && r2) {
    2594             :                                 /* keyness or r2 can only be assured
    2595             :                                  * as long as matched values are
    2596             :                                  * ordered */
    2597       86492 :                                 int ord = rordering * cmp(prev, v ? v : nil);
    2598       87295 :                                 if (ord < 0) {
    2599             :                                         /* previous value in l was
    2600             :                                          * less than current */
    2601       29272 :                                         r2->trevsorted = false;
    2602       29272 :                                         r2->tkey &= r2->tsorted;
    2603       58023 :                                 } else if (ord > 0) {
    2604             :                                         /* previous value was
    2605             :                                          * greater */
    2606       28747 :                                         r2->tsorted = false;
    2607       28747 :                                         r2->tkey &= r2->trevsorted;
    2608             :                                 } else {
    2609             :                                         /* value can be equal if
    2610             :                                          * intervening values in l
    2611             :                                          * didn't match anything; if
    2612             :                                          * multiple values match in r,
    2613             :                                          * r2 won't be sorted */
    2614       29276 :                                         r2->tkey = false;
    2615       29276 :                                         if (nr > 1) {
    2616       29247 :                                                 r2->tsorted = false;
    2617       29247 :                                                 r2->trevsorted = false;
    2618             :                                         }
    2619             :                                 }
    2620             :                         }
    2621       88447 :                         prev = v ? v : nil;
    2622             :                 }
    2623      253769 :                 if (BATcount(r1) > 0) {
    2624             :                         /* a new, higher value will be inserted into
    2625             :                          * r1, so r1 is not reverse ordered anymore */
    2626      249066 :                         r1->trevsorted = false;
    2627      249066 :                         if (r2) {
    2628             :                                 /* depending on whether l and r are
    2629             :                                  * ordered the same or not, a new
    2630             :                                  * higher or lower value will be added
    2631             :                                  * to r2 */
    2632       89327 :                                 if (equal_order)
    2633       89209 :                                         r2->trevsorted = false;
    2634             :                                 else {
    2635         118 :                                         r2->tsorted = false;
    2636         118 :                                         r2->tseqbase = oid_nil;
    2637             :                                 }
    2638             :                         }
    2639             :                 }
    2640             : 
    2641             :                 /* insert values: first the left output */
    2642             :                 BUN nladded = 0;
    2643      649338 :                 for (i = 0; i < nl; i++) {
    2644      396745 :                         lv = canditer_next(lci);
    2645      395569 :                         if (mlci == NULL || canditer_contains(mlci, lv)) {
    2646      395569 :                                 nladded++;
    2647    32264259 :                                 for (j = 0; j < nr; j++)
    2648    31868690 :                                         APPEND(r1, lv);
    2649             :                         }
    2650             :                 }
    2651      252593 :                 nl = nladded;
    2652             :                 /* then the right output, various different ways of
    2653             :                  * doing it */
    2654      252593 :                 if (r2) {
    2655       89512 :                         if (insert_nil) {
    2656          11 :                                 for (i = 0; i < nl; i++) {
    2657          14 :                                         for (j = 0; j < nr; j++) {
    2658           7 :                                                 APPEND(r2, oid_nil);
    2659             :                                         }
    2660             :                                 }
    2661       89508 :                         } else if (equal_order) {
    2662       89275 :                                 struct canditer ci = *rci; /* work on copy */
    2663       89275 :                                 if (r2->batCount > 0 &&
    2664       91567 :                                     BATtdense(r2) &&
    2665        2739 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_idx(&ci, ci.next - nr))
    2666          79 :                                         r2->tseqbase = oid_nil;
    2667      212574 :                                 for (i = 0; i < nl; i++) {
    2668      123370 :                                         canditer_setidx(&ci, ci.next - nr);
    2669    31832824 :                                         for (j = 0; j < nr; j++) {
    2670    31586155 :                                                 APPEND(r2, canditer_next(&ci));
    2671             :                                         }
    2672             :                                 }
    2673             :                         } else {
    2674         233 :                                 if (r2->batCount > 0 &&
    2675         118 :                                     BATtdense(r2) &&
    2676           0 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_peek(rci))
    2677           0 :                                         r2->tseqbase = oid_nil;
    2678        4106 :                                 for (i = 0; i < nl; i++) {
    2679        3873 :                                         struct canditer ci = *rci; /* work on copy */
    2680       12362 :                                         for (j = 0; j < nr; j++) {
    2681        8489 :                                                 APPEND(r2, canditer_next(&ci));
    2682             :                                         }
    2683             :                                 }
    2684             :                         }
    2685             :                 }
    2686             :                 /* finally the mark output */
    2687      252522 :                 if (r3) {
    2688        2687 :                         if (insert_nil) {
    2689         325 :                                 r3->tnil |= rhasnil;
    2690         811 :                                 for (i = 0; i < nl; i++) {
    2691         972 :                                         for (j = 0; j < nr; j++) {
    2692         486 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    2693             :                                         }
    2694             :                                 }
    2695             :                         } else {
    2696        8127 :                                 for (i = 0; i < nl; i++) {
    2697       11500 :                                         for (j = 0; j < nr; j++) {
    2698        5735 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = 1;
    2699             :                                         }
    2700             :                                 }
    2701             :                         }
    2702             :                 }
    2703             :         }
    2704             :         /* also set other bits of heap to correct value to indicate size */
    2705        7990 :         BATsetcount(r1, BATcount(r1));
    2706        7985 :         r1->tseqbase = oid_nil;
    2707        7985 :         if (r1->tkey)
    2708        7903 :                 r1 = virtualize(r1);
    2709        7994 :         if (r2) {
    2710        2356 :                 BATsetcount(r2, BATcount(r2));
    2711        2357 :                 assert(BATcount(r1) == BATcount(r2));
    2712        2357 :                 r2->tseqbase = oid_nil;
    2713        2357 :                 if (BATcount(r2) <= 1) {
    2714        1762 :                         r2->tkey = true;
    2715        1762 :                         r2 = virtualize(r2);
    2716             :                 }
    2717             :         }
    2718        7996 :         if (r3) {
    2719          61 :                 BATsetcount(r3, BATcount(r3));
    2720          61 :                 assert(BATcount(r1) == BATcount(r3));
    2721          61 :                 r3->tseqbase = oid_nil;
    2722          61 :                 r3->tnonil = !r3->tnil;
    2723          61 :                 if (BATcount(r3) <= 1) {
    2724           0 :                         r3->tkey = true;
    2725           0 :                         r3->tsorted = true;
    2726           0 :                         r3->trevsorted = true;
    2727             :                 }
    2728             :         }
    2729        7996 :         bat_iterator_end(&li);
    2730        8003 :         bat_iterator_end(&ri);
    2731        7926 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2732             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    2733             :                   "sr=" ALGOOPTBATFMT ","
    2734             :                   "nil_on_miss=%s,semi=%s,only_misses=%s,not_in=%s;%s %s "
    2735             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    2736             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    2737             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    2738             :                   nil_on_miss ? "true" : "false",
    2739             :                   semi ? "true" : "false",
    2740             :                   only_misses ? "true" : "false",
    2741             :                   not_in ? "true" : "false",
    2742             :                   swapped ? " swapped" : "", reason,
    2743             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    2744             :                   GDKusec() - t0);
    2745             : 
    2746             :         return GDK_SUCCEED;
    2747             : 
    2748          20 :   bailout:
    2749          20 :         bat_iterator_end(&li);
    2750          20 :         bat_iterator_end(&ri);
    2751          20 :         BBPreclaim(r1);
    2752          20 :         BBPreclaim(r2);
    2753          20 :         BBPreclaim(r3);
    2754             :         return GDK_FAIL;
    2755             : }
    2756             : 
    2757             : #define HASHLOOPBODY()                                                  \
    2758             :         do {                                                            \
    2759             :                 if (nr >= 1 && max_one) {                            \
    2760             :                         GDKerror("more than one match");              \
    2761             :                         goto bailout;                                   \
    2762             :                 }                                                       \
    2763             :                 if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2764             :                         goto bailout;                                   \
    2765             :                 APPEND(r1, lo);                                         \
    2766             :                 if (r2)                                                 \
    2767             :                         APPEND(r2, ro);                                 \
    2768             :                 if (r3)                                                 \
    2769             :                         ((bit *) r3->theap->base)[r3->batCount++] = 1; \
    2770             :                 nr++;                                                   \
    2771             :         } while (false)
    2772             : 
    2773             : #define EQ_int(a, b)    ((a) == (b))
    2774             : #define EQ_lng(a, b)    ((a) == (b))
    2775             : #ifdef HAVE_HGE
    2776             : #define EQ_uuid(a, b)   ((a).h == (b).h)
    2777             : #else
    2778             : #define EQ_uuid(a, b)   (memcmp((a).u, (b).u, UUID_SIZE) == 0)
    2779             : #endif
    2780             : 
    2781             : #define HASHJOIN(TYPE)                                                  \
    2782             :         do {                                                            \
    2783             :                 TYPE *rvals = ri.base;                                  \
    2784             :                 TYPE *lvals = li.base;                                  \
    2785             :                 TYPE v;                                                 \
    2786             :                 while (lci->next < lci->ncand) {                       \
    2787             :                         GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    2788             :                         lo = canditer_next(lci);                        \
    2789             :                         v = lvals[lo - l->hseqbase];                 \
    2790             :                         nr = 0;                                         \
    2791             :                         bit mark = defmark;                             \
    2792             :                         if ((!nil_matches || not_in) && is_##TYPE##_nil(v)) { \
    2793             :                                 /* no match */                          \
    2794             :                                 if (not_in) {                           \
    2795             :                                         lskipped = BATcount(r1) > 0; \
    2796             :                                         continue;                       \
    2797             :                                 }                                       \
    2798             :                                 mark = bit_nil;                         \
    2799             :                         } else if (hash_cand) {                         \
    2800             :                                 /* private hash: no locks */            \
    2801             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2802             :                                      rb != BUN_NONE;                    \
    2803             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2804             :                                         ro = canditer_idx(rci, rb);     \
    2805             :                                         if (!EQ_##TYPE(v, rvals[ro - r->hseqbase])) \
    2806             :                                                 continue;               \
    2807             :                                         if (only_misses) {              \
    2808             :                                                 nr++;                   \
    2809             :                                                 break;                  \
    2810             :                                         }                               \
    2811             :                                         HASHLOOPBODY();                 \
    2812             :                                         if (semi && !max_one)           \
    2813             :                                                 break;                  \
    2814             :                                 }                                       \
    2815             :                         } else if (rci->tpe != cand_dense) {         \
    2816             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2817             :                                      rb != BUN_NONE;                    \
    2818             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2819             :                                         if (rb >= rl && rb < rh &&        \
    2820             :                                             EQ_##TYPE(v, rvals[rb]) &&  \
    2821             :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) { \
    2822             :                                                 if (only_misses) {      \
    2823             :                                                         nr++;           \
    2824             :                                                         break;          \
    2825             :                                                 }                       \
    2826             :                                                 HASHLOOPBODY();         \
    2827             :                                                 if (semi && !max_one)   \
    2828             :                                                         break;          \
    2829             :                                         }                               \
    2830             :                                 }                                       \
    2831             :                         } else {                                        \
    2832             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2833             :                                      rb != BUN_NONE;                    \
    2834             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2835             :                                         if (rb >= rl && rb < rh &&        \
    2836             :                                             EQ_##TYPE(v, rvals[rb])) {  \
    2837             :                                                 if (only_misses) {      \
    2838             :                                                         nr++;           \
    2839             :                                                         break;          \
    2840             :                                                 }                       \
    2841             :                                                 ro = (oid) (rb - roff + rseq); \
    2842             :                                                 HASHLOOPBODY();         \
    2843             :                                                 if (semi && !max_one)   \
    2844             :                                                         break;          \
    2845             :                                         }                               \
    2846             :                                 }                                       \
    2847             :                         }                                               \
    2848             :                         if (nr == 0) {                                  \
    2849             :                                 if (only_misses) {                      \
    2850             :                                         nr = 1;                         \
    2851             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2852             :                                                 goto bailout;           \
    2853             :                                         APPEND(r1, lo);                 \
    2854             :                                         if (lskipped)                   \
    2855             :                                                 r1->tseqbase = oid_nil;      \
    2856             :                                 } else if (nil_on_miss) {               \
    2857             :                                         nr = 1;                         \
    2858             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2859             :                                                 goto bailout;           \
    2860             :                                         APPEND(r1, lo);                 \
    2861             :                                         if (r2) {                       \
    2862             :                                                 r2->tnil = true;     \
    2863             :                                                 r2->tnonil = false;  \
    2864             :                                                 r2->tkey = false;    \
    2865             :                                                 APPEND(r2, oid_nil);    \
    2866             :                                         }                               \
    2867             :                                         if (r3) {                       \
    2868             :                                                 r3->tnil |= mark == bit_nil; \
    2869             :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark; \
    2870             :                                         }                               \
    2871             :                                 } else if (min_one) {                   \
    2872             :                                         GDKerror("not enough matches");       \
    2873             :                                         goto bailout;                   \
    2874             :                                 } else {                                \
    2875             :                                         lskipped = BATcount(r1) > 0; \
    2876             :                                 }                                       \
    2877             :                         } else if (only_misses) {                       \
    2878             :                                 lskipped = BATcount(r1) > 0;         \
    2879             :                         } else {                                        \
    2880             :                                 if (lskipped) {                         \
    2881             :                                         /* note, we only get here in an \
    2882             :                                          * iteration *after* lskipped was \
    2883             :                                          * first set to true, i.e. we did \
    2884             :                                          * indeed skip values in l */   \
    2885             :                                         r1->tseqbase = oid_nil;              \
    2886             :                                 }                                       \
    2887             :                                 if (nr > 1) {                                \
    2888             :                                         r1->tkey = false;            \
    2889             :                                         r1->tseqbase = oid_nil;              \
    2890             :                                 }                                       \
    2891             :                         }                                               \
    2892             :                         if (nr > 0 && BATcount(r1) > nr)          \
    2893             :                                 r1->trevsorted = false;                      \
    2894             :                 }                                                       \
    2895             :         } while (0)
    2896             : 
    2897             : /* Implementation of join using a hash lookup of values in the right
    2898             :  * column. */
    2899             : static gdk_return
    2900       12816 : hashjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    2901             :          struct canditer *restrict lci, struct canditer *restrict rci,
    2902             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    2903             :          bool not_in, bool max_one, bool min_one,
    2904             :          BUN estimate, lng t0, bool swapped,
    2905             :          bool hash, bool phash, bool hash_cand,
    2906             :          const char *reason)
    2907             : {
    2908       12816 :         oid lo, ro;
    2909       12816 :         BATiter li, ri;
    2910       12816 :         BUN rb, roff = 0;
    2911             :         /* rl, rh: lower and higher bounds for BUN values in hash table */
    2912       12816 :         BUN rl, rh;
    2913       12816 :         oid rseq;
    2914       12816 :         BUN nr;
    2915       12816 :         const char *lvals;
    2916       12816 :         const char *lvars;
    2917       12816 :         const void *nil = ATOMnilptr(l->ttype);
    2918       12816 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2919       12816 :         oid lval = oid_nil;     /* hold value if l is dense */
    2920       12816 :         const char *v = (const char *) &lval;
    2921       12816 :         bool lskipped = false;  /* whether we skipped values in l */
    2922       12816 :         Hash *restrict hsh = NULL;
    2923       12816 :         bool locked = false;
    2924       12816 :         BUN maxsize;
    2925       12816 :         BAT *r1 = NULL;
    2926       12816 :         BAT *r2 = NULL;
    2927       12816 :         BAT *r3 = NULL;
    2928       12816 :         BAT *b = NULL;
    2929             : 
    2930       38414 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2931             : 
    2932       12816 :         size_t counter = 0;
    2933       12816 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2934             : 
    2935       12815 :         li = bat_iterator(l);
    2936       12818 :         ri = bat_iterator(r);
    2937             : 
    2938       12820 :         int t = ATOMbasetype(ri.type);
    2939       12820 :         if (BATtvoid(r) || BATtvoid(l))
    2940           9 :                 t = TYPE_void;
    2941             : 
    2942       12820 :         lvals = (const char *) li.base;
    2943       12820 :         if (li.vh && li.type) {
    2944        1112 :                 assert(ri.vh && ri.type);
    2945        1112 :                 lvars = li.vh->base;
    2946             :         } else {
    2947       11708 :                 assert(ri.vh == NULL);
    2948             :                 lvars = NULL;
    2949             :         }
    2950             :         /* offset to convert BUN to OID for value in right column */
    2951       12820 :         rseq = r->hseqbase;
    2952             : 
    2953       12820 :         rl = rci->seq - r->hseqbase;
    2954       12820 :         rh = canditer_last(rci) + 1 - r->hseqbase;
    2955       12815 :         if (hash_cand) {
    2956             :                 /* we need to create a hash on r specific for the
    2957             :                  * candidate list */
    2958         138 :                 char ext[32];
    2959         138 :                 assert(rci->s);
    2960         177 :                 MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash");
    2961         138 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
    2962             :                           "hash for candidate list " ALGOBATFMT "%s%s\n",
    2963             :                           ALGOBATPAR(r), ALGOBATPAR(rci->s),
    2964             :                           r->thash ? " ignoring existing hash" : "",
    2965             :                           swapped ? " (swapped)" : "");
    2966         138 :                 if (snprintf(ext, sizeof(ext), "thshjn%x",
    2967         138 :                              (unsigned) MT_getpid()) >= (int) sizeof(ext))
    2968           0 :                         goto bailout;
    2969         138 :                 if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
    2970           0 :                         goto bailout;
    2971             :                 }
    2972       12677 :         } else if (phash) {
    2973             :                 /* there is a hash on the parent which we should use */
    2974        1184 :                 MT_thread_setalgorithm(swapped ? "hashjoin using parent hash (swapped)" : "hashjoin using parent hash");
    2975         922 :                 b = BATdescriptor(VIEWtparent(r));
    2976         922 :                 if (b == NULL)
    2977           0 :                         goto bailout;
    2978         922 :                 TRC_DEBUG(ALGO, "%s(%s): using "
    2979             :                           "parent(" ALGOBATFMT ") for hash%s\n",
    2980             :                           __func__,
    2981             :                           BATgetId(r), ALGOBATPAR(b),
    2982             :                           swapped ? " (swapped)" : "");
    2983         922 :                 roff = r->tbaseoff - b->tbaseoff;
    2984         922 :                 rl += roff;
    2985         922 :                 rh += roff;
    2986         922 :                 r = b;
    2987         922 :                 bat_iterator_end(&ri);
    2988         922 :                 ri = bat_iterator(r);
    2989         922 :                 MT_rwlock_rdlock(&r->thashlock);
    2990         922 :                 hsh = r->thash;
    2991         922 :                 locked = true;
    2992       11755 :         } else if (hash) {
    2993             :                 /* there is a hash on r which we should use */
    2994        9428 :                 MT_thread_setalgorithm(swapped ? "hashjoin using existing hash (swapped)" : "hashjoin using existing hash");
    2995        5629 :                 MT_rwlock_rdlock(&r->thashlock);
    2996        5628 :                 hsh = r->thash;
    2997        5628 :                 locked = true;
    2998        5628 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": using "
    2999             :                           "existing hash%s\n",
    3000             :                           ALGOBATPAR(r),
    3001             :                           swapped ? " (swapped)" : "");
    3002        6130 :         } else if (BATtdensebi(&ri)) {
    3003             :                 /* no hash, just dense lookup */
    3004           1 :                 MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" : "hashjoin on dense");
    3005             :         } else {
    3006             :                 /* we need to create a hash on r */
    3007        8676 :                 MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash");
    3008        6129 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating hash%s\n",
    3009             :                           ALGOBATPAR(r),
    3010             :                           swapped ? " (swapped)" : "");
    3011        6129 :                 if (BAThash(r) != GDK_SUCCEED)
    3012           0 :                         goto bailout;
    3013        6095 :                 MT_rwlock_rdlock(&r->thashlock);
    3014        6127 :                 hsh = r->thash;
    3015        6127 :                 locked = true;
    3016             :         }
    3017       12816 :         if (locked && hsh == NULL) {
    3018           0 :                 GDKerror("Hash disappeared for "ALGOBATFMT"\n", ALGOBATPAR(r));
    3019           0 :                 goto bailout;
    3020             :         }
    3021       12816 :         assert(hsh != NULL || BATtdensebi(&ri));
    3022             :         if (hsh) {
    3023       12815 :                 TRC_DEBUG(ALGO, "hash for " ALGOBATFMT ": nbucket " BUNFMT ", nunique " BUNFMT ", nheads " BUNFMT "\n", ALGOBATPAR(r), hsh->nbucket, hsh->nunique, hsh->nheads);
    3024             :         }
    3025             : 
    3026       12816 :         bit defmark = 0;
    3027       12816 :         if ((not_in || r3p) && !ri.nonil) {
    3028             :                 /* check whether there is a nil on the right, since if
    3029             :                  * so, we should return an empty result if not_in is
    3030             :                  * set, or use a NIL mark for non-matches if r3p is
    3031             :                  * set */
    3032         443 :                 if (hash_cand) {
    3033           0 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3034           0 :                              rb != BUN_NONE;
    3035           0 :                              rb = HASHgetlink(hsh, rb)) {
    3036           0 :                                 ro = canditer_idx(rci, rb);
    3037           0 :                                 if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
    3038           0 :                                         assert(!locked);
    3039           0 :                                         if (r3p) {
    3040           0 :                                                 defmark = bit_nil;
    3041           0 :                                                 break;
    3042             :                                         }
    3043           0 :                                         HEAPfree(&hsh->heaplink, true);
    3044           0 :                                         HEAPfree(&hsh->heapbckt, true);
    3045           0 :                                         GDKfree(hsh);
    3046           0 :                                         bat_iterator_end(&li);
    3047           0 :                                         bat_iterator_end(&ri);
    3048           0 :                                         BBPreclaim(b);
    3049           0 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3050             :                                                        bit_nil, false, false,
    3051             :                                                        __func__, t0);
    3052             :                                 }
    3053             :                         }
    3054         443 :                 } else if (!BATtdensebi(&ri)) {
    3055         443 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3056         500 :                              rb != BUN_NONE;
    3057          55 :                              rb = HASHgetlink(hsh, rb)) {
    3058          87 :                                 if (rb >= rl && rb < rh &&
    3059          82 :                                     (cmp == NULL ||
    3060          85 :                                      (*cmp)(nil, BUNtail(ri, rb)) == 0)) {
    3061          29 :                                         if (r3p) {
    3062          28 :                                                 defmark = bit_nil;
    3063          28 :                                                 break;
    3064             :                                         }
    3065           1 :                                         if (locked)
    3066           1 :                                                 MT_rwlock_rdunlock(&r->thashlock);
    3067           1 :                                         bat_iterator_end(&li);
    3068           1 :                                         bat_iterator_end(&ri);
    3069           1 :                                         BBPreclaim(b);
    3070           1 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3071             :                                                        bit_nil, false, false,
    3072             :                                                        __func__, t0);
    3073             :                                 }
    3074             :                         }
    3075             :                 }
    3076             :         }
    3077             : 
    3078       25553 :         maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    3079       12814 :                                   li.key, ri.key, semi | max_one,
    3080             :                                   nil_on_miss, only_misses, min_one,
    3081             :                                   estimate);
    3082       12739 :         if (maxsize == BUN_NONE) {
    3083           0 :                 goto bailout;
    3084             :         }
    3085             : 
    3086       12739 :         r1 = *r1p;
    3087       12739 :         r2 = r2p ? *r2p : NULL;
    3088       12739 :         r3 = r3p ? *r3p : NULL;
    3089             : 
    3090             :         /* basic properties will be adjusted if necessary later on,
    3091             :          * they were initially set by joininitresults() */
    3092             : 
    3093       12739 :         if (r2) {
    3094       10719 :                 r2->tkey = li.key;
    3095             :                 /* r2 is not likely to be sorted (although it is
    3096             :                  * certainly possible) */
    3097       10719 :                 r2->tsorted = false;
    3098       10719 :                 r2->trevsorted = false;
    3099       10719 :                 r2->tseqbase = oid_nil;
    3100             :         }
    3101             : 
    3102       12739 :         if (lci->tpe != cand_dense)
    3103         328 :                 r1->tseqbase = oid_nil;
    3104             : 
    3105             : 
    3106       12739 :         switch (t) {
    3107        9702 :         case TYPE_int:
    3108   302606512 :                 HASHJOIN(int);
    3109             :                 break;
    3110        1481 :         case TYPE_lng:
    3111   105067083 :                 HASHJOIN(lng);
    3112             :                 break;
    3113           0 :         case TYPE_uuid:
    3114           0 :                 HASHJOIN(uuid);
    3115           0 :                 break;
    3116             :         default:
    3117     2227859 :                 while (lci->next < lci->ncand) {
    3118     2226309 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
    3119             :                                         GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    3120     2226309 :                         lo = canditer_next(lci);
    3121     2237973 :                         if (BATtdensebi(&li))
    3122         323 :                                 lval = lo - l->hseqbase + l->tseqbase;
    3123     2237650 :                         else if (li.type != TYPE_void)
    3124     2234141 :                                 v = VALUE(l, lo - l->hseqbase);
    3125     2269446 :                         nr = 0;
    3126     2269446 :                         bit mark = defmark;
    3127     2269446 :                         if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
    3128             :                                 /* no match */
    3129        2944 :                                 if (not_in) {
    3130          10 :                                         lskipped = BATcount(r1) > 0;
    3131          10 :                                         continue;
    3132             :                                 }
    3133        2934 :                                 mark = bit_nil;
    3134     2267177 :                         } else if (hash_cand) {
    3135           0 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3136           0 :                                      rb != BUN_NONE;
    3137           0 :                                      rb = HASHgetlink(hsh, rb)) {
    3138           0 :                                         ro = canditer_idx(rci, rb);
    3139           0 :                                         if ((*cmp)(v, BUNtail(ri, ro - r->hseqbase)) != 0)
    3140           0 :                                                 continue;
    3141           0 :                                         if (only_misses) {
    3142           0 :                                                 nr++;
    3143           0 :                                                 break;
    3144             :                                         }
    3145           0 :                                         HASHLOOPBODY();
    3146           0 :                                         if (semi && !max_one)
    3147             :                                                 break;
    3148             :                                 }
    3149     2267177 :                         } else if (hsh == NULL) {
    3150           4 :                                 assert(BATtdensebi(&ri));
    3151           4 :                                 ro = *(const oid *) v;
    3152           4 :                                 if (ro >= r->tseqbase &&
    3153           4 :                                     ro < r->tseqbase + r->batCount) {
    3154           4 :                                         ro -= r->tseqbase;
    3155           4 :                                         ro += rseq;
    3156           4 :                                         if (canditer_contains(rci, ro)) {
    3157           4 :                                                 if (only_misses) {
    3158       12779 :                                                         nr++;
    3159             :                                                         break;
    3160             :                                                 }
    3161           4 :                                                 HASHLOOPBODY();
    3162           4 :                                                 if (semi && !max_one)
    3163             :                                                         break;
    3164             :                                         }
    3165             :                                 }
    3166     2267173 :                         } else if (rci->tpe != cand_dense) {
    3167           2 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3168          84 :                                      rb != BUN_NONE;
    3169          82 :                                      rb = HASHgetlink(hsh, rb)) {
    3170         164 :                                         if (rb >= rl && rb < rh &&
    3171         164 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0 &&
    3172          82 :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) {
    3173           4 :                                                 if (only_misses) {
    3174           0 :                                                         nr++;
    3175           0 :                                                         break;
    3176             :                                                 }
    3177           4 :                                                 HASHLOOPBODY();
    3178           4 :                                                 if (semi && !max_one)
    3179             :                                                         break;
    3180             :                                         }
    3181             :                                 }
    3182             :                         } else {
    3183     2267171 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3184     4407180 :                                      rb != BUN_NONE;
    3185     2085928 :                                      rb = HASHgetlink(hsh, rb)) {
    3186     4486573 :                                         if (rb >= rl && rb < rh &&
    3187     2295916 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0) {
    3188     1779960 :                                                 if (only_misses) {
    3189       36122 :                                                         nr++;
    3190       36122 :                                                         break;
    3191             :                                                 }
    3192     1743838 :                                                 ro = (oid) (rb - roff + rseq);
    3193     1743838 :                                                 HASHLOOPBODY();
    3194     1676086 :                                                 if (semi && !max_one)
    3195             :                                                         break;
    3196             :                                         }
    3197             :                                 }
    3198             :                         }
    3199     2226501 :                         if (nr == 0) {
    3200      421308 :                                 if (only_misses) {
    3201         260 :                                         nr = 1;
    3202         260 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3203           0 :                                                 goto bailout;
    3204         260 :                                         APPEND(r1, lo);
    3205         260 :                                         if (lskipped)
    3206         231 :                                                 r1->tseqbase = oid_nil;
    3207      421048 :                                 } else if (nil_on_miss) {
    3208       10630 :                                         nr = 1;
    3209       10630 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3210           0 :                                                 goto bailout;
    3211       10418 :                                         APPEND(r1, lo);
    3212       10418 :                                         if (r2) {
    3213           0 :                                                 r2->tnil = true;
    3214           0 :                                                 r2->tnonil = false;
    3215           0 :                                                 r2->tkey = false;
    3216           0 :                                                 APPEND(r2, oid_nil);
    3217             :                                         }
    3218       10418 :                                         if (r3) {
    3219       10418 :                                                 r3->tnil |= mark == bit_nil;
    3220       10418 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    3221             :                                         }
    3222      410418 :                                 } else if (min_one) {
    3223           0 :                                         GDKerror("not enough matches");
    3224           0 :                                         goto bailout;
    3225             :                                 } else {
    3226      410418 :                                         lskipped = BATcount(r1) > 0;
    3227             :                                 }
    3228     1805197 :                         } else if (only_misses) {
    3229       36166 :                                 lskipped = BATcount(r1) > 0;
    3230             :                         } else {
    3231     1769031 :                                 if (lskipped) {
    3232             :                                         /* note, we only get here in an
    3233             :                                          * iteration *after* lskipped was
    3234             :                                          * first set to true, i.e. we did
    3235             :                                          * indeed skip values in l */
    3236     1646321 :                                         r1->tseqbase = oid_nil;
    3237             :                                 }
    3238     1769031 :                                 if (nr > 1) {
    3239        3087 :                                         r1->tkey = false;
    3240        3087 :                                         r1->tseqbase = oid_nil;
    3241             :                                 }
    3242             :                         }
    3243     2226293 :                         if (nr > 0 && BATcount(r1) > nr)
    3244     1823374 :                                 r1->trevsorted = false;
    3245             :                 }
    3246             :                 break;
    3247             :         }
    3248       12779 :         if (locked) {
    3249       12609 :                 locked = false;
    3250       12609 :                 MT_rwlock_rdunlock(&r->thashlock);
    3251             :         }
    3252       12843 :         bat_iterator_end(&li);
    3253       12809 :         bat_iterator_end(&ri);
    3254             : 
    3255       12807 :         if (hash_cand) {
    3256         138 :                 HEAPfree(&hsh->heaplink, true);
    3257         138 :                 HEAPfree(&hsh->heapbckt, true);
    3258         138 :                 GDKfree(hsh);
    3259             :         }
    3260             :         /* also set other bits of heap to correct value to indicate size */
    3261       12807 :         BATsetcount(r1, BATcount(r1));
    3262       12742 :         if (BATcount(r1) <= 1) {
    3263        4880 :                 r1->tsorted = true;
    3264        4880 :                 r1->trevsorted = true;
    3265        4880 :                 r1->tkey = true;
    3266        4880 :                 r1->tseqbase = 0;
    3267             :         }
    3268       12742 :         if (r2) {
    3269       10739 :                 BATsetcount(r2, BATcount(r2));
    3270       10746 :                 assert(BATcount(r1) == BATcount(r2));
    3271       10746 :                 if (BATcount(r2) <= 1) {
    3272        4265 :                         r2->tsorted = true;
    3273        4265 :                         r2->trevsorted = true;
    3274        4265 :                         r2->tkey = true;
    3275        4265 :                         r2->tseqbase = 0;
    3276             :                 }
    3277             :         }
    3278       12749 :         if (r3) {
    3279          51 :                 r3->tnonil = !r3->tnil;
    3280          51 :                 BATsetcount(r3, BATcount(r3));
    3281          51 :                 assert(BATcount(r1) == BATcount(r3));
    3282             :         }
    3283       12749 :         if (BATcount(r1) > 0) {
    3284        9017 :                 if (BATtdense(r1))
    3285        4636 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3286        9017 :                 if (r2 && BATtdense(r2))
    3287        1102 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3288             :         } else {
    3289        3732 :                 r1->tseqbase = 0;
    3290        3732 :                 if (r2) {
    3291        3149 :                         r2->tseqbase = 0;
    3292             :                 }
    3293             :         }
    3294       12749 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3295             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3296             :                   "nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s,"
    3297             :                   "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3298             :                   " (" LLFMT "usec)\n",
    3299             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3300             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    3301             :                   nil_matches ? "true" : "false",
    3302             :                   nil_on_miss ? "true" : "false",
    3303             :                   semi ? "true" : "false",
    3304             :                   only_misses ? "true" : "false",
    3305             :                   not_in ? "true" : "false",
    3306             :                   max_one ? "true" : "false",
    3307             :                   min_one ? "true" : "false",
    3308             :                   swapped ? " swapped" : "", reason,
    3309             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3310             :                   GDKusec() - t0);
    3311             : 
    3312       12749 :         BBPreclaim(b);
    3313             :         return GDK_SUCCEED;
    3314             : 
    3315           6 :   bailout:
    3316           6 :         bat_iterator_end(&li);
    3317           6 :         bat_iterator_end(&ri);
    3318           6 :         if (locked)
    3319           6 :                 MT_rwlock_rdunlock(&r->thashlock);
    3320           6 :         if (hash_cand && hsh) {
    3321           0 :                 HEAPfree(&hsh->heaplink, true);
    3322           0 :                 HEAPfree(&hsh->heapbckt, true);
    3323           0 :                 GDKfree(hsh);
    3324             :         }
    3325           6 :         BBPreclaim(r1);
    3326           6 :         BBPreclaim(r2);
    3327           6 :         BBPreclaim(b);
    3328             :         return GDK_FAIL;
    3329             : }
    3330             : 
    3331             : /* Count the number of unique values for the first half and the complete
    3332             :  * set (the sample s of b) and return the two values in *cnt1 and
    3333             :  * *cnt2. In case of error, both values are 0. */
    3334             : static gdk_return
    3335     1035194 : count_unique(BAT *b, BAT *s, BUN *cnt1, BUN *cnt2)
    3336             : {
    3337     1035194 :         struct canditer ci;
    3338     1035194 :         BUN half;
    3339     1035194 :         BUN cnt = 0;
    3340     1035194 :         const void *v;
    3341     1035194 :         const char *bvals;
    3342     1035194 :         const char *bvars;
    3343     1035194 :         oid bval;
    3344     1035194 :         oid i, o;
    3345     1035194 :         const char *nme;
    3346     1035194 :         BUN hb;
    3347     1035194 :         BATiter bi;
    3348     1035194 :         int (*cmp)(const void *, const void *);
    3349     1035194 :         const char *algomsg = "";
    3350     1035194 :         lng t0 = 0;
    3351             : 
    3352     1035194 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    3353     1035194 :         canditer_init(&ci, b, s);
    3354     1035702 :         half = ci.ncand / 2;
    3355             : 
    3356     1035702 :         MT_lock_set(&b->theaplock);
    3357     1036092 :         if (b->tkey || ci.ncand <= 1 || BATtdense(b)) {
    3358             :                 /* trivial: already unique */
    3359        1306 :                 MT_lock_unset(&b->theaplock);
    3360        1306 :                 *cnt1 = half;
    3361        1306 :                 *cnt2 = ci.ncand;
    3362        1306 :                 return GDK_SUCCEED;
    3363             :         }
    3364     1034786 :         MT_lock_unset(&b->theaplock);
    3365             : 
    3366     1034964 :         (void) BATordered(b);
    3367     1034936 :         (void) BATordered_rev(b);
    3368     1034954 :         bi = bat_iterator(b);
    3369     1034630 :         if ((bi.sorted && bi.revsorted) ||
    3370      974742 :             (bi.type == TYPE_void && is_oid_nil(bi.tseq))) {
    3371             :                 /* trivial: all values are the same */
    3372       59888 :                 *cnt1 = *cnt2 = 1;
    3373       59888 :                 bat_iterator_end(&bi);
    3374       59888 :                 return GDK_SUCCEED;
    3375             :         }
    3376             : 
    3377      974742 :         assert(bi.type != TYPE_void);
    3378             : 
    3379      974742 :         bvals = bi.base;
    3380      974742 :         if (bi.vh && bi.type)
    3381       71603 :                 bvars = bi.vh->base;
    3382             :         else
    3383             :                 bvars = NULL;
    3384      974742 :         cmp = ATOMcompare(bi.type);
    3385             : 
    3386      974742 :         *cnt1 = *cnt2 = 0;
    3387             : 
    3388      974742 :         if (bi.sorted || bi.revsorted) {
    3389             :                 const void *prev = NULL;
    3390     9895899 :                 algomsg = "sorted";
    3391     9895899 :                 for (i = 0; i < ci.ncand; i++) {
    3392     9811221 :                         if (i == half)
    3393       84610 :                                 *cnt1 = cnt;
    3394     9811221 :                         o = canditer_next(&ci);
    3395     9811366 :                         v = VALUE(b, o - b->hseqbase);
    3396     9811374 :                         if (prev == NULL || (*cmp)(v, prev) != 0) {
    3397     4504067 :                                 cnt++;
    3398             :                         }
    3399     9811391 :                         prev = v;
    3400             :                 }
    3401       84678 :                 *cnt2 = cnt;
    3402      890234 :         } else if (ATOMbasetype(bi.type) == TYPE_bte) {
    3403       38636 :                 unsigned char val;
    3404       38636 :                 uint32_t seen[256 / 32];
    3405             : 
    3406       38636 :                 algomsg = "byte-sized atoms";
    3407       38636 :                 assert(bvars == NULL);
    3408       38636 :                 memset(seen, 0, sizeof(seen));
    3409     5921612 :                 for (i = 0; i < ci.ncand; i++) {
    3410     5882915 :                         if (i == ci.ncand/ 2) {
    3411             :                                 cnt = 0;
    3412      346506 :                                 for (int j = 0; j < 256 / 32; j++)
    3413      307808 :                                         cnt += candmask_pop(seen[j]);
    3414       38698 :                                 *cnt1 = cnt;
    3415             :                         }
    3416     5882915 :                         o = canditer_next(&ci);
    3417     5882976 :                         val = ((const unsigned char *) bvals)[o - b->hseqbase];
    3418     5882976 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3419       96922 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3420             :                         }
    3421             :                 }
    3422             :                 cnt = 0;
    3423      346785 :                 for (int j = 0; j < 256 / 32; j++)
    3424      308088 :                         cnt += candmask_pop(seen[j]);
    3425       38697 :                 *cnt2 = cnt;
    3426      851598 :         } else if (ATOMbasetype(bi.type) == TYPE_sht) {
    3427       42905 :                 unsigned short val;
    3428       42905 :                 uint32_t *seen = NULL;
    3429             : 
    3430       42905 :                 algomsg = "short-sized atoms";
    3431       42905 :                 assert(bvars == NULL);
    3432       42905 :                 seen = GDKzalloc((65536 / 32) * sizeof(seen[0]));
    3433       42905 :                 if (seen == NULL) {
    3434           0 :                         bat_iterator_end(&bi);
    3435           0 :                         return GDK_FAIL;
    3436             :                 }
    3437     6645402 :                 for (i = 0; i < ci.ncand; i++) {
    3438     6602497 :                         if (i == half) {
    3439             :                                 cnt = 0;
    3440    87680921 :                                 for (int j = 0; j < 65536 / 32; j++)
    3441    87638016 :                                         cnt += candmask_pop(seen[j]);
    3442       42905 :                                 *cnt1 = cnt;
    3443             :                         }
    3444     6602497 :                         o = canditer_next(&ci);
    3445     6602497 :                         val = ((const unsigned short *) bvals)[o - b->hseqbase];
    3446     6602497 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3447      131499 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3448             :                         }
    3449             :                 }
    3450             :                 cnt = 0;
    3451    87711641 :                 for (int j = 0; j < 65536 / 32; j++)
    3452    87668736 :                         cnt += candmask_pop(seen[j]);
    3453       42905 :                 *cnt2 = cnt;
    3454       42905 :                 GDKfree(seen);
    3455       42905 :                 seen = NULL;
    3456             :         } else {
    3457      808693 :                 BUN prb;
    3458      808693 :                 BUN mask;
    3459      808693 :                 Hash hs = {
    3460             :                         .heapbckt.parentid = b->batCacheid,
    3461      808693 :                         .heaplink.parentid = b->batCacheid,
    3462             :                 };
    3463             : 
    3464      808693 :                 GDKclrerr();    /* not interested in BAThash errors */
    3465      808696 :                 algomsg = "new partial hash";
    3466      808696 :                 nme = BBP_physical(b->batCacheid);
    3467      808696 :                 mask = HASHmask(ci.ncand);
    3468      588899 :                 if (mask < ((BUN) 1 << 16))
    3469      808696 :                         mask = (BUN) 1 << 16;
    3470      808696 :                 if ((hs.heaplink.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3471      808702 :                     (hs.heapbckt.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3472      808712 :                     snprintf(hs.heaplink.filename, sizeof(hs.heaplink.filename), "%s.thshjnl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heaplink.filename) ||
    3473     1617447 :                     snprintf(hs.heapbckt.filename, sizeof(hs.heapbckt.filename), "%s.thshjnb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heapbckt.filename) ||
    3474      808737 :                     HASHnew(&hs, bi.type, ci.ncand, mask, BUN_NONE, false) != GDK_SUCCEED) {
    3475           0 :                         GDKerror("cannot allocate hash table\n");
    3476           0 :                         HEAPfree(&hs.heaplink, true);
    3477           0 :                         HEAPfree(&hs.heapbckt, true);
    3478           0 :                         bat_iterator_end(&bi);
    3479           0 :                         return GDK_FAIL;
    3480             :                 }
    3481   409437681 :                 for (i = 0; i < ci.ncand; i++) {
    3482   408628953 :                         if (i == half)
    3483      808729 :                                 *cnt1 = cnt;
    3484   408628953 :                         o = canditer_next(&ci);
    3485   408618622 :                         v = VALUE(b, o - b->hseqbase);
    3486   408618618 :                         prb = HASHprobe(&hs, v);
    3487   408638111 :                         for (hb = HASHget(&hs, prb);
    3488   408646610 :                              hb != BUN_NONE;
    3489        8499 :                              hb = HASHgetlink(&hs, hb)) {
    3490   247773227 :                                 BUN p = canditer_idx(&ci, hb) - b->hseqbase;
    3491   247805908 :                                 if (cmp(v, BUNtail(bi, p)) == 0)
    3492             :                                         break;
    3493             :                         }
    3494   408602883 :                         if (hb == BUN_NONE) {
    3495   160852581 :                                 cnt++;
    3496             :                                 /* enter into hash table */
    3497   160852581 :                                 HASHputlink(&hs, i, HASHget(&hs, prb));
    3498   160876278 :                                 HASHput(&hs, prb, i);
    3499             :                         }
    3500             :                 }
    3501      808728 :                 *cnt2 = cnt;
    3502      808728 :                 HEAPfree(&hs.heaplink, true);
    3503      808719 :                 HEAPfree(&hs.heapbckt, true);
    3504             :         }
    3505      975002 :         bat_iterator_end(&bi);
    3506             : 
    3507      974777 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
    3508             :                   " -> " BUNFMT " " BUNFMT " (%s -- " LLFMT "usec)\n",
    3509             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    3510             :                   *cnt1, *cnt2, algomsg, GDKusec() - t0);
    3511             : 
    3512             :         return GDK_SUCCEED;
    3513             : }
    3514             : 
    3515             : static double
    3516     2027216 : guess_uniques(BAT *b, struct canditer *ci)
    3517             : {
    3518     2027216 :         BUN cnt1, cnt2;
    3519     2027216 :         BAT *s1;
    3520             : 
    3521     2027216 :         MT_lock_set(&b->theaplock);
    3522     2028747 :         bool key = b->tkey;
    3523     2028747 :         double unique_est = b->tunique_est;
    3524     2028747 :         BUN batcount = BATcount(b);
    3525     2028747 :         MT_lock_unset(&b->theaplock);
    3526     2029105 :         if (key)
    3527      992637 :                 return (double) ci->ncand;
    3528             : 
    3529     1036468 :         if (ci->s == NULL ||
    3530           0 :             (ci->tpe == cand_dense && ci->ncand == batcount)) {
    3531     1036468 :                 if (unique_est != 0) {
    3532         204 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
    3533             :                                   ALGOBATPAR(b));
    3534         204 :                         return unique_est;
    3535             :                 }
    3536     1036264 :                 s1 = BATsample(b, 1000);
    3537             :         } else {
    3538           0 :                 BAT *s2 = BATsample(ci->s, 1000);
    3539           0 :                 if (s2 == NULL)
    3540             :                         return -1;
    3541           0 :                 s1 = BATproject(s2, ci->s);
    3542           0 :                 BBPreclaim(s2);
    3543             :         }
    3544     1035429 :         if (s1 == NULL)
    3545             :                 return -1;
    3546     1035429 :         BUN n2 = BATcount(s1);
    3547     1035429 :         BUN n1 = n2 / 2;
    3548     1035429 :         if (count_unique(b, s1, &cnt1, &cnt2) != GDK_SUCCEED) {
    3549           0 :                 BBPreclaim(s1);
    3550           0 :                 return -1;
    3551             :         }
    3552     1035888 :         BBPreclaim(s1);
    3553             : 
    3554     1036123 :         double A = (double) (cnt2 - cnt1) / (n2 - n1);
    3555     1036123 :         double B = cnt1 - n1 * A;
    3556             : 
    3557     1036123 :         B += A * ci->ncand;
    3558     1036123 :         MT_lock_set(&b->theaplock);
    3559     1036089 :         if (ci->s == NULL ||
    3560           0 :             (ci->tpe == cand_dense && ci->ncand == BATcount(b) && ci->ncand == batcount)) {
    3561     1036089 :                 if (b->tunique_est == 0)
    3562     1032552 :                         b->tunique_est = B;
    3563             :         }
    3564     1036089 :         MT_lock_unset(&b->theaplock);
    3565     1036065 :         return B;
    3566             : }
    3567             : 
    3568             : BUN
    3569     1167402 : BATguess_uniques(BAT *b, struct canditer *ci)
    3570             : {
    3571     1167402 :         struct canditer lci;
    3572     1167402 :         if (ci == NULL) {
    3573     1167748 :                 canditer_init(&lci, b, NULL);
    3574     1167748 :                 ci = &lci;
    3575             :         }
    3576     1167795 :         return (BUN) guess_uniques(b, ci);
    3577             : }
    3578             : 
    3579             : /* estimate the cost of doing a hashjoin with a hash on r; return value
    3580             :  * is the estimated cost, the last three arguments receive some extra
    3581             :  * information */
    3582             : double
    3583     1781226 : joincost(BAT *r, BUN lcount, struct canditer *rci,
    3584             :          bool *hash, bool *phash, bool *cand)
    3585             : {
    3586     1781226 :         bool rhash;
    3587     1781226 :         bool prhash = false;
    3588     1781226 :         bool rcand = false;
    3589     1781226 :         double rcost = 1;
    3590     1781226 :         bat parent;
    3591     1781226 :         BAT *b;
    3592     1781226 :         BUN nheads;
    3593     1781226 :         BUN cnt;
    3594             : 
    3595     1781226 :         (void) BATcheckhash(r);
    3596     1782524 :         MT_rwlock_rdlock(&r->thashlock);
    3597     1782806 :         rhash = r->thash != NULL;
    3598     1782806 :         nheads = r->thash ? r->thash->nheads : 0;
    3599     1782806 :         cnt = BATcount(r);
    3600     1782806 :         MT_rwlock_rdunlock(&r->thashlock);
    3601             : 
    3602     1781233 :         if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
    3603      308604 :             rci->nvals > 0) {
    3604             :                 /* if we need to do binary search on candidate list,
    3605             :                  * take that into account; note checking the other
    3606             :                  * candidate types is essentially free */
    3607      308605 :                 rcost += log2((double) rci->nvals);
    3608             :         }
    3609     1781233 :         rcost *= lcount;
    3610     1781233 :         if (BATtdense(r)) {
    3611             :                 /* no need for a hash, and lookup is free */
    3612             :                 rhash = false;  /* don't use it, even if it's there */
    3613             :         } else {
    3614     1780521 :                 if (rhash) {
    3615             :                         /* average chain length */
    3616       10160 :                         rcost *= (double) cnt / nheads;
    3617     1770361 :                 } else if ((parent = VIEWtparent(r)) != 0 &&
    3618     1608912 :                            (b = BATdescriptor(parent)) != NULL) {
    3619     1609798 :                         if (BATcheckhash(b)) {
    3620       71554 :                                 MT_rwlock_rdlock(&b->thashlock);
    3621       71553 :                                 rhash = prhash = b->thash != NULL;
    3622       71553 :                                 if (rhash) {
    3623             :                                         /* average chain length */
    3624       71553 :                                         rcost *= (double) BATcount(b) / b->thash->nheads;
    3625             :                                 }
    3626       71553 :                                 MT_rwlock_rdunlock(&b->thashlock);
    3627             :                         }
    3628     1609824 :                         BBPunfix(b->batCacheid);
    3629             :                 }
    3630     1781093 :                 if (!rhash) {
    3631     1699463 :                         MT_lock_set(&r->theaplock);
    3632     1700600 :                         double unique_est = r->tunique_est;
    3633     1700600 :                         MT_lock_unset(&r->theaplock);
    3634     1700868 :                         if (unique_est == 0) {
    3635      860643 :                                 unique_est = guess_uniques(r, &(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)});
    3636      860380 :                                 if (unique_est < 0)
    3637           0 :                                         return -1;
    3638             :                         }
    3639             :                         /* we have an estimate of the number of unique
    3640             :                          * values, assume some collisions */
    3641     1700605 :                         rcost *= 1.1 * ((double) cnt / unique_est);
    3642             : #ifdef PERSISTENTHASH
    3643             :                         /* only count the cost of creating the hash for
    3644             :                          * non-persistent bats */
    3645     1700605 :                         MT_lock_set(&r->theaplock);
    3646     1700645 :                         if (r->batRole != PERSISTENT /* || r->theap->dirty */ || GDKinmemory(r->theap->farmid))
    3647     1671468 :                                 rcost += cnt * 2.0;
    3648     1700645 :                         MT_lock_unset(&r->theaplock);
    3649             : #else
    3650             :                         rcost += cnt * 2.0;
    3651             : #endif
    3652             :                 }
    3653             :         }
    3654     1783184 :         if (cand) {
    3655       29115 :                 if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
    3656             :                         /* instead of using the hash on r (cost in
    3657             :                          * rcost), we can build a new hash on r taking
    3658             :                          * the candidate list into account; don't do
    3659             :                          * this for masked candidate since the searching
    3660             :                          * of the candidate list (canditer_idx) will
    3661             :                          * kill us */
    3662        1918 :                         double rccost;
    3663        1918 :                         if (rhash && !prhash) {
    3664         705 :                                 rccost = (double) cnt / nheads;
    3665             :                         } else {
    3666        1213 :                                 MT_lock_set(&r->theaplock);
    3667        1213 :                                 double unique_est = r->tunique_est;
    3668        1213 :                                 MT_lock_unset(&r->theaplock);
    3669        1213 :                                 if (unique_est == 0) {
    3670          60 :                                         unique_est = guess_uniques(r, rci);
    3671          60 :                                         if (unique_est < 0)
    3672             :                                                 return -1;
    3673             :                                 }
    3674             :                                 /* we have an estimate of the number of unique
    3675             :                                  * values, assume some chains */
    3676        1213 :                                 rccost = 1.1 * ((double) cnt / unique_est);
    3677             :                         }
    3678        1918 :                         rccost *= lcount;
    3679        1918 :                         rccost += rci->ncand * 2.0; /* cost of building the hash */
    3680        1918 :                         if (rccost < rcost) {
    3681       29115 :                                 rcost = rccost;
    3682       29115 :                                 rcand = true;
    3683             :                         }
    3684             :                 }
    3685       29115 :                 *cand = rcand;
    3686             :         }
    3687     1783184 :         *hash = rhash;
    3688     1783184 :         *phash = prhash;
    3689     1783184 :         return rcost;
    3690             : }
    3691             : 
    3692             : #define MASK_EQ         1
    3693             : #define MASK_LT         2
    3694             : #define MASK_GT         4
    3695             : #define MASK_LE         (MASK_EQ | MASK_LT)
    3696             : #define MASK_GE         (MASK_EQ | MASK_GT)
    3697             : #define MASK_NE         (MASK_LT | MASK_GT)
    3698             : 
    3699             : static gdk_return
    3700       50635 : thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode,
    3701             :           BUN estimate, bool nil_matches, const char *reason, lng t0)
    3702             : {
    3703       50635 :         struct canditer lci, rci;
    3704       50635 :         const char *lvals, *rvals;
    3705       50635 :         const char *lvars, *rvars;
    3706       50635 :         const void *nil = ATOMnilptr(l->ttype);
    3707       50635 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    3708       50635 :         const void *vl, *vr;
    3709       50635 :         oid lastr = 0;          /* last value inserted into r2 */
    3710       50635 :         BUN nr;
    3711       50635 :         oid lo, ro;
    3712       50635 :         int c;
    3713       50635 :         bool lskipped = false;  /* whether we skipped values in l */
    3714       50635 :         lng loff = 0, roff = 0;
    3715       50635 :         oid lval = oid_nil, rval = oid_nil;
    3716             : 
    3717       50635 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    3718             : 
    3719      151900 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    3720       50637 :         assert((opcode & (MASK_EQ | MASK_LT | MASK_GT)) != 0);
    3721             : 
    3722       50637 :         BATiter li = bat_iterator(l);
    3723       50786 :         BATiter ri = bat_iterator(r);
    3724             : 
    3725       50796 :         canditer_init(&lci, l, sl);
    3726       50781 :         canditer_init(&rci, r, sr);
    3727             : 
    3728       50584 :         lvals = BATtvoid(l) ? NULL : (const char *) li.base;
    3729       50584 :         rvals = BATtvoid(r) ? NULL : (const char *) ri.base;
    3730       50584 :         if (li.vh && li.type) {
    3731          16 :                 assert(ri.vh && ri.type);
    3732          16 :                 lvars = li.vh->base;
    3733          16 :                 rvars = ri.vh->base;
    3734             :         } else {
    3735       50568 :                 assert(ri.vh == NULL);
    3736             :                 lvars = rvars = NULL;
    3737             :         }
    3738             : 
    3739       50584 :         if (BATtvoid(l)) {
    3740           0 :                 if (!BATtdensebi(&li)) {
    3741           0 :                         if (!nil_matches) {
    3742             :                                 /* trivial: nils don't match anything */
    3743           0 :                                 bat_iterator_end(&li);
    3744           0 :                                 bat_iterator_end(&ri);
    3745           0 :                                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    3746             :                                                0, false, false, __func__, t0);
    3747             :                         }
    3748             :                 } else {
    3749           0 :                         loff = (lng) l->tseqbase - (lng) l->hseqbase;
    3750             :                 }
    3751             :         }
    3752       50584 :         if (BATtvoid(r)) {
    3753           1 :                 if (!BATtdensebi(&ri)) {
    3754           0 :                         if (!nil_matches) {
    3755             :                                 /* trivial: nils don't match anything */
    3756           0 :                                 bat_iterator_end(&li);
    3757           0 :                                 bat_iterator_end(&ri);
    3758           0 :                                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    3759             :                                                0, false, false, __func__, t0);
    3760             :                         }
    3761             :                 } else {
    3762           1 :                         roff = (lng) r->tseqbase - (lng) r->hseqbase;
    3763             :                 }
    3764             :         }
    3765             : 
    3766       50584 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    3767             :                                       false, false, false, false, estimate);
    3768       49312 :         if (maxsize == BUN_NONE) {
    3769           0 :                 bat_iterator_end(&li);
    3770           0 :                 bat_iterator_end(&ri);
    3771           0 :                 return GDK_FAIL;
    3772             :         }
    3773       49312 :         BAT *r1 = *r1p;
    3774       49312 :         BAT *r2 = r2p ? *r2p : NULL;
    3775             : 
    3776       49312 :         r1->tkey = true;
    3777       49312 :         r1->tsorted = true;
    3778       49312 :         r1->trevsorted = true;
    3779       49312 :         if (r2) {
    3780        8118 :                 r2->tkey = true;
    3781        8118 :                 r2->tsorted = true;
    3782        8118 :                 r2->trevsorted = true;
    3783             :         }
    3784             : 
    3785             :         /* nested loop implementation for theta join */
    3786             :         vl = &lval;
    3787             :         vr = &rval;
    3788      527462 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    3789      477249 :                 lo = canditer_next(&lci);
    3790      465153 :                 if (lvals)
    3791      465153 :                         vl = VALUE(l, lo - l->hseqbase);
    3792           0 :                 else if (BATtdensebi(&li))
    3793           0 :                         lval = (oid) ((lng) lo + loff);
    3794      465153 :                 nr = 0;
    3795      465153 :                 if (nil_matches || cmp(vl, nil) != 0) {
    3796      457483 :                         canditer_reset(&rci);
    3797     4097546 :                         TIMEOUT_LOOP(rci.ncand, qry_ctx) {
    3798     3202609 :                                 ro = canditer_next(&rci);
    3799     3120249 :                                 if (rvals)
    3800     3120245 :                                         vr = VALUE(r, ro - r->hseqbase);
    3801           4 :                                 else if (BATtdensebi(&ri))
    3802           4 :                                         rval = (oid) ((lng) ro + roff);
    3803     3120249 :                                 if (!nil_matches && cmp(vr, nil) == 0)
    3804       59232 :                                         continue;
    3805     3054205 :                                 c = cmp(vl, vr);
    3806     3094545 :                                 if (!((opcode & MASK_LT && c < 0) ||
    3807     2896554 :                                       (opcode & MASK_GT && c > 0) ||
    3808     1646849 :                                       (opcode & MASK_EQ && c == 0)))
    3809     1646827 :                                         continue;
    3810     1447718 :                                 if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    3811           0 :                                         goto bailout;
    3812     1478977 :                                 if (BATcount(r1) > 0) {
    3813     1460430 :                                         if (r2 && lastr + 1 != ro)
    3814       53636 :                                                 r2->tseqbase = oid_nil;
    3815     1460430 :                                         if (nr == 0) {
    3816      162662 :                                                 r1->trevsorted = false;
    3817      162662 :                                                 if (r2 == NULL) {
    3818             :                                                         /* nothing */
    3819       43838 :                                                 } else if (lastr > ro) {
    3820       40385 :                                                         r2->tsorted = false;
    3821       40385 :                                                         r2->tkey = false;
    3822        3453 :                                                 } else if (lastr < ro) {
    3823           0 :                                                         r2->trevsorted = false;
    3824             :                                                 } else {
    3825        3453 :                                                         r2->tkey = false;
    3826             :                                                 }
    3827             :                                         }
    3828             :                                 }
    3829     1478977 :                                 APPEND(r1, lo);
    3830     1478977 :                                 if (r2) {
    3831     1197550 :                                         APPEND(r2, ro);
    3832             :                                 }
    3833     1478977 :                                 lastr = ro;
    3834     1478977 :                                 nr++;
    3835             :                         }
    3836      458307 :                         TIMEOUT_CHECK(qry_ctx,
    3837             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    3838             :                 }
    3839      478150 :                 if (nr > 1) {
    3840      140926 :                         r1->tkey = false;
    3841      140926 :                         r1->tseqbase = oid_nil;
    3842      140926 :                         if (r2) {
    3843       46553 :                                 r2->trevsorted = false;
    3844             :                         }
    3845      337224 :                 } else if (nr == 0) {
    3846      291528 :                         lskipped = BATcount(r1) > 0;
    3847       45696 :                 } else if (lskipped) {
    3848       33298 :                         r1->tseqbase = oid_nil;
    3849             :                 }
    3850             :         }
    3851             :         /* also set other bits of heap to correct value to indicate size */
    3852       50213 :         BATsetcount(r1, BATcount(r1));
    3853       50046 :         if (r2) {
    3854        8243 :                 BATsetcount(r2, BATcount(r2));
    3855        8309 :                 assert(BATcount(r1) == BATcount(r2));
    3856             :         }
    3857       50112 :         if (BATcount(r1) > 0) {
    3858       25261 :                 if (BATtdense(r1))
    3859        1173 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3860       25261 :                 if (r2 && BATtdense(r2))
    3861        1012 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3862             :         } else {
    3863       24851 :                 r1->tseqbase = 0;
    3864       24851 :                 if (r2) {
    3865        1651 :                         r2->tseqbase = 0;
    3866             :                 }
    3867             :         }
    3868       50112 :         bat_iterator_end(&li);
    3869       50449 :         bat_iterator_end(&ri);
    3870       49392 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3871             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3872             :                   "opcode=%s%s%s; %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3873             :                   " (" LLFMT "usec)\n",
    3874             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3875             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3876             :                   opcode & MASK_LT ? "<" : "",
    3877             :                   opcode & MASK_GT ? ">" : "",
    3878             :                   opcode & MASK_EQ ? "=" : "",
    3879             :                   reason,
    3880             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3881             :                   GDKusec() - t0);
    3882             :         return GDK_SUCCEED;
    3883             : 
    3884           0 :   bailout:
    3885           0 :         bat_iterator_end(&li);
    3886           0 :         bat_iterator_end(&ri);
    3887           0 :         BBPreclaim(r1);
    3888           0 :         BBPreclaim(r2);
    3889             :         return GDK_FAIL;
    3890             : }
    3891             : 
    3892             : /* small ordered right, dense left, oid's only, do fetches */
    3893             : static gdk_return
    3894           0 : fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3895             :           struct canditer *restrict lci, struct canditer *restrict rci,
    3896             :           const char *reason, lng t0)
    3897             : {
    3898           0 :         oid lo = lci->seq - l->hseqbase + l->tseqbase, hi = lo + lci->ncand;
    3899           0 :         BUN b, e, p;
    3900           0 :         BAT *r1, *r2 = NULL;
    3901             : 
    3902           0 :         MT_thread_setalgorithm(__func__);
    3903           0 :         if (r->tsorted) {
    3904           0 :                 b = SORTfndfirst(r, &lo);
    3905           0 :                 e = SORTfndfirst(r, &hi);
    3906             :         } else {
    3907           0 :                 assert(r->trevsorted);
    3908           0 :                 b = SORTfndlast(r, &hi);
    3909           0 :                 e = SORTfndlast(r, &lo);
    3910             :         }
    3911           0 :         if (b < rci->seq - r->hseqbase)
    3912             :                 b = rci->seq - r->hseqbase;
    3913           0 :         if (e > rci->seq + rci->ncand - r->hseqbase)
    3914             :                 e = rci->seq + rci->ncand - r->hseqbase;
    3915           0 :         if (e == b) {
    3916           0 :                 return nomatch(r1p, r2p, NULL, l, r, lci,
    3917             :                                0, false, false, __func__, t0);
    3918             :         }
    3919           0 :         r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
    3920           0 :         if (r1 == NULL)
    3921             :                 return GDK_FAIL;
    3922           0 :         if (r2p) {
    3923           0 :                 if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
    3924           0 :                         BBPreclaim(r1);
    3925           0 :                         return GDK_FAIL;
    3926             :                 }
    3927           0 :                 *r2p = r2;
    3928             :         }
    3929           0 :         *r1p = r1;
    3930           0 :         oid *op = (oid *) Tloc(r1, 0);
    3931           0 :         BATiter ri = bat_iterator(r);
    3932           0 :         const oid *rp = (const oid *) ri.base;
    3933           0 :         for (p = b; p < e; p++) {
    3934           0 :                 *op++ = rp[p] + l->hseqbase - l->tseqbase;
    3935             :         }
    3936           0 :         BATsetcount(r1, e - b);
    3937           0 :         r1->tkey = ri.key;
    3938           0 :         r1->tsorted = ri.sorted || e - b <= 1;
    3939           0 :         r1->trevsorted = ri.revsorted || e - b <= 1;
    3940           0 :         r1->tseqbase = e == b ? 0 : e - b == 1 ? *(const oid *)Tloc(r1, 0) : oid_nil;
    3941           0 :         bat_iterator_end(&ri);
    3942           0 :         TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    3943             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3944             :                   "sr=" ALGOOPTBATFMT ") %s "
    3945             :                   "-> (" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us\n",
    3946             :                   __func__,
    3947             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3948             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3949             :                   reason,
    3950             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3951             :                   GDKusec() - t0);
    3952             : 
    3953             :         return GDK_SUCCEED;
    3954             : }
    3955             : 
    3956             : static BAT *
    3957        5033 : bitmaskjoin(BAT *l, BAT *r,
    3958             :             struct canditer *restrict lci, struct canditer *restrict rci,
    3959             :             bool only_misses,
    3960             :             const char *reason, lng t0)
    3961             : {
    3962        5033 :         BAT *r1;
    3963        5033 :         size_t nmsk = (lci->ncand + 31) / 32;
    3964        5033 :         uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t));
    3965        5039 :         BUN cnt = 0;
    3966             : 
    3967        5039 :         MT_thread_setalgorithm(__func__);
    3968        5035 :         if (mask == NULL)
    3969             :                 return NULL;
    3970             : 
    3971    21175243 :         for (BUN n = 0; n < rci->ncand; n++) {
    3972    21170205 :                 oid o = canditer_next(rci) - r->hseqbase;
    3973    21107378 :                 o = BUNtoid(r, o);
    3974    21170208 :                 if (is_oid_nil(o))
    3975           0 :                         continue;
    3976    21170208 :                 o += l->hseqbase;
    3977    21170208 :                 if (o < lci->seq + l->tseqbase)
    3978           2 :                         continue;
    3979    21170206 :                 o -= lci->seq + l->tseqbase;
    3980    21170206 :                 if (o >= lci->ncand)
    3981           0 :                         continue;
    3982    21170206 :                 if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) {
    3983    16050797 :                         cnt++;
    3984    16050797 :                         mask[o >> 5] |= 1U << (o & 0x1F);
    3985             :                 }
    3986             :         }
    3987        5038 :         if (only_misses)
    3988        3530 :                 cnt = lci->ncand - cnt;
    3989        5038 :         if (cnt == 0 || cnt == lci->ncand) {
    3990        1443 :                 GDKfree(mask);
    3991        1443 :                 if (cnt == 0)
    3992         292 :                         return BATdense(0, 0, 0);
    3993        1151 :                 return BATdense(0, lci->seq, lci->ncand);
    3994             :         }
    3995        3595 :         r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
    3996        3595 :         if (r1 != NULL) {
    3997        3595 :                 oid *r1p = Tloc(r1, 0);
    3998             : 
    3999        3595 :                 r1->tkey = true;
    4000        3595 :                 r1->tnil = false;
    4001        3595 :                 r1->tnonil = true;
    4002        3595 :                 r1->tsorted = true;
    4003        3595 :                 r1->trevsorted = cnt <= 1;
    4004        3595 :                 if (only_misses) {
    4005             :                         /* set the bits for unused values at the
    4006             :                          * end so that we don't need special
    4007             :                          * code in the loop */
    4008        3238 :                         if (lci->ncand & 0x1F)
    4009        3182 :                                 mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F);
    4010     1659106 :                         for (size_t i = 0; i < nmsk; i++)
    4011     1655868 :                                 if (mask[i] != ~0U)
    4012    53074065 :                                         for (uint32_t j = 0; j < 32; j++)
    4013    51465760 :                                                 if ((mask[i] & (1U << j)) == 0)
    4014    45057419 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4015             :                 } else {
    4016      300518 :                         for (size_t i = 0; i < nmsk; i++)
    4017      300161 :                                 if (mask[i] != 0U)
    4018     7521393 :                                         for (uint32_t j = 0; j < 32; j++)
    4019     7293472 :                                                 if ((mask[i] & (1U << j)) != 0)
    4020     6531487 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4021             :                 }
    4022        3595 :                 BATsetcount(r1, cnt);
    4023        3596 :                 assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1));
    4024             : 
    4025        3596 :                 TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    4026             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4027             :                           "sr=" ALGOOPTBATFMT ",only_misses=%s; %s "
    4028             :                           "-> " ALGOBATFMT " (" LLFMT "usec)\n",
    4029             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4030             :                           ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    4031             :                           only_misses ? "true" : "false",
    4032             :                           reason,
    4033             :                           ALGOBATPAR(r1),
    4034             :                           GDKusec() - t0);
    4035             :         }
    4036        3596 :         GDKfree(mask);
    4037        3596 :         return r1;
    4038             : }
    4039             : 
    4040             : /* Make the implementation choices for various left joins.
    4041             :  * If r3p is set, this is a "mark join" and *r3p will be a third return value containing a bat with type msk with a bit set for each
    4042             :  * nil_matches: nil is an ordinary value that can match;
    4043             :  * nil_on_miss: outer join: fill in a nil value in case of no match;
    4044             :  * semi: semi join: return one of potentially more than one matches;
    4045             :  * only_misses: difference: list rows without match on the right;
    4046             :  * not_in: for implementing NOT IN: if nil on right then there are no matches;
    4047             :  * max_one: error if there is more than one match;
    4048             :  * min_one: error if there are no matches. */
    4049             : static gdk_return
    4050      100959 : leftjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4051             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    4052             :          bool not_in, bool max_one, bool min_one, BUN estimate,
    4053             :          const char *func, lng t0)
    4054             : {
    4055      100959 :         struct canditer lci, rci;
    4056      100959 :         bool rhash, prhash, rcand;
    4057      100959 :         bat parent;
    4058      100959 :         double rcost = 0;
    4059      100959 :         gdk_return rc;
    4060      100959 :         BAT *lp = NULL;
    4061      100959 :         BAT *rp = NULL;
    4062             : 
    4063      100959 :         MT_thread_setalgorithm(__func__);
    4064             :         /* only_misses implies left output only */
    4065      100917 :         assert(!only_misses || r2p == NULL);
    4066             :         /* if nil_on_miss is set, we really need a right output */
    4067      100917 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
    4068             :         /* if not_in is set, then so is only_misses */
    4069      100917 :         assert(!not_in || only_misses);
    4070             :         /* if r3p is set, then so is nil_on_miss */
    4071      100917 :         assert(r3p == NULL || nil_on_miss);
    4072      100917 :         *r1p = NULL;
    4073      100917 :         if (r2p)
    4074        1057 :                 *r2p = NULL;
    4075      100917 :         if (r3p)
    4076        3968 :                 *r3p = NULL;
    4077             : 
    4078      100917 :         canditer_init(&lci, l, sl);
    4079      101177 :         canditer_init(&rci, r, sr);
    4080             : 
    4081      101223 :         if ((parent = VIEWtparent(l)) != 0) {
    4082        3893 :                 lp = BATdescriptor(parent);
    4083        3898 :                 if (lp == NULL)
    4084             :                         return GDK_FAIL;
    4085        3898 :                 if (l->hseqbase == lp->hseqbase &&
    4086        4629 :                     BATcount(l) == BATcount(lp) &&
    4087        3018 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4088             :                         l = lp;
    4089             :                 } else {
    4090        2389 :                         BBPunfix(lp->batCacheid);
    4091        2389 :                         lp = NULL;
    4092             :                 }
    4093             :         }
    4094      101227 :         if ((parent = VIEWtparent(r)) != 0) {
    4095        3951 :                 rp = BATdescriptor(parent);
    4096        3953 :                 if (rp == NULL) {
    4097           0 :                         BBPreclaim(lp);
    4098           0 :                         return GDK_FAIL;
    4099             :                 }
    4100        3953 :                 if (r->hseqbase == rp->hseqbase &&
    4101        6798 :                     BATcount(r) == BATcount(rp) &&
    4102        5691 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4103             :                         r = rp;
    4104             :                 } else {
    4105        1110 :                         BBPunfix(rp->batCacheid);
    4106        1110 :                         rp = NULL;
    4107             :                 }
    4108             :         }
    4109             : 
    4110      101229 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4111           3 :                 l = BATunmask(l);
    4112           3 :                 BBPreclaim(lp);
    4113           3 :                 if (l == NULL) {
    4114           0 :                         BBPreclaim(rp);
    4115           0 :                         return GDK_FAIL;
    4116             :                 }
    4117             :                 lp = l;
    4118             :         }
    4119      101229 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4120         130 :                 r = BATunmask(r);
    4121         130 :                 BBPreclaim(rp);
    4122         130 :                 if (r == NULL) {
    4123           0 :                         BBPreclaim(lp);
    4124           0 :                         return GDK_FAIL;
    4125             :                 }
    4126             :                 rp = r;
    4127             :         }
    4128             : 
    4129      101229 :         if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED) {
    4130           0 :                 rc = GDK_FAIL;
    4131           0 :                 goto doreturn;
    4132             :         }
    4133             : 
    4134      101063 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4135       65459 :                 TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    4136             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4137             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
    4138             :                           "nil_on_miss=%d,semi=%d,only_misses=%d,"
    4139             :                           "not_in=%d,max_one=%d,min_one=%d)\n",
    4140             :                           func,
    4141             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4142             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4143             :                           nil_matches, nil_on_miss, semi, only_misses,
    4144             :                           not_in, max_one, min_one);
    4145       65459 :                 rc = nomatch(r1p, r2p, r3p, l, r, &lci,
    4146             :                              0, nil_on_miss, only_misses, func, t0);
    4147       65421 :                 goto doreturn;
    4148             :         }
    4149             : 
    4150       35604 :         if (!only_misses && !not_in &&
    4151        3995 :             (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) ||
    4152        3941 :              (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
    4153             :                 /* single value to join, use select */
    4154        1625 :                 rc = selectjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4155             :                                 nil_matches, nil_on_miss, semi, max_one, min_one,
    4156             :                                 t0, false, func);
    4157        1626 :                 goto doreturn;
    4158       33983 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4159             :                 /* use special implementation for dense right-hand side */
    4160       22383 :                 rc = mergejoin_void(r1p, r2p, r3p, l, r, &lci, &rci,
    4161             :                                     nil_on_miss, only_misses, t0, false,
    4162             :                                     func);
    4163       22359 :                 goto doreturn;
    4164       11600 :         } else if (BATtdense(l)
    4165        5119 :                    && lci.tpe == cand_dense
    4166        5085 :                    && rci.tpe == cand_dense
    4167             :                    && !semi
    4168        5087 :                    && !max_one
    4169             :                    && !min_one
    4170        3545 :                    && !nil_matches
    4171             :                    && !only_misses
    4172        3545 :                    && !not_in
    4173             :                    /* && (rci.ncand * 1024) < lci.ncand */
    4174           0 :                    && (BATordered(r) || BATordered_rev(r))) {
    4175           0 :                 assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
    4176           0 :                 rc = fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0);
    4177           0 :                 goto doreturn;
    4178       11600 :         } else if (BATtdense(l)
    4179        5121 :                    && lci.tpe == cand_dense
    4180        5087 :                    && r2p == NULL
    4181        5053 :                    && (semi || only_misses)
    4182             :                    && !nil_on_miss
    4183        5053 :                    && !not_in
    4184             :                    && !max_one
    4185        5038 :                    && !min_one) {
    4186        5038 :                 *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
    4187        5038 :                 rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
    4188        5038 :                 goto doreturn;
    4189             :         } else {
    4190             :                 /* looking at r->tvheap, so we need a lock */
    4191        6562 :                 MT_lock_set(&r->theaplock);
    4192        6574 :                 BUN hsz = r->tvheap ? r->tvheap->size : 0;
    4193        6574 :                 MT_lock_unset(&r->theaplock);
    4194        6575 :                 if ((BATordered(r) || BATordered_rev(r))
    4195        5207 :                     && (BATordered(l)
    4196         468 :                         || BATordered_rev(l)
    4197         434 :                         || BATtdense(r)
    4198         434 :                         || lci.ncand < 1024
    4199         246 :                         || BATcount(r) * (r->twidth + hsz + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) {
    4200        5083 :                         rc = mergejoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4201             :                                        nil_matches, nil_on_miss, semi, only_misses,
    4202             :                                        not_in, max_one, min_one, estimate, t0, false, func);
    4203        5045 :                         goto doreturn;
    4204             :                 }
    4205             :         }
    4206        1492 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4207        1485 :         if (rcost < 0) {
    4208           0 :                 rc = GDK_FAIL;
    4209           0 :                 goto doreturn;
    4210             :         }
    4211             : 
    4212        1485 :         if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
    4213             :                 /* maybe do a hash join on the swapped operands; if we
    4214             :                  * do, we need to sort the output, so we take that into
    4215             :                  * account as well */
    4216         903 :                 bool lhash, plhash, lcand;
    4217         903 :                 double lcost;
    4218             : 
    4219         903 :                 lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4220         903 :                 if (lcost < 0) {
    4221           0 :                         rc = GDK_FAIL;
    4222         742 :                         goto doreturn;
    4223             :                 }
    4224         903 :                 if (semi)
    4225         800 :                         lcost += rci.ncand; /* cost of BATunique(r) */
    4226             :                 /* add cost of sorting; obviously we don't know the
    4227             :                  * size, so we guess that the size of the output is
    4228             :                  * the same as the right input */
    4229         903 :                 lcost += rci.ncand * log((double) rci.ncand); /* sort */
    4230         903 :                 if (lcost < rcost) {
    4231         742 :                         BAT *tmp = sr;
    4232         742 :                         BAT *r1, *r2;
    4233         742 :                         if (semi) {
    4234         737 :                                 sr = BATunique(r, sr);
    4235         737 :                                 if (sr == NULL) {
    4236           0 :                                         rc = GDK_FAIL;
    4237           0 :                                         goto doreturn;
    4238             :                                 }
    4239         737 :                                 canditer_init(&rci, r, sr);
    4240             :                         }
    4241         742 :                         rc = hashjoin(&r2, &r1, NULL, r, l, &rci, &lci, nil_matches,
    4242             :                                       false, false, false, false, false, false, estimate,
    4243             :                                       t0, true, lhash, plhash, lcand, func);
    4244         742 :                         if (semi)
    4245         737 :                                 BBPunfix(sr->batCacheid);
    4246         742 :                         if (rc != GDK_SUCCEED)
    4247           0 :                                 goto doreturn;
    4248         742 :                         if (r2p == NULL) {
    4249         737 :                                 BBPunfix(r2->batCacheid);
    4250         737 :                                 r2 = NULL;
    4251             :                         }
    4252         742 :                         if (semi)
    4253         737 :                                 r1->tkey = true;
    4254         742 :                         if (!VIEWtparent(r1) &&
    4255         742 :                             r1->ttype == TYPE_oid &&
    4256         742 :                             BBP_refs(r1->batCacheid) == 1 &&
    4257         742 :                             (r2 == NULL ||
    4258           5 :                              (!VIEWtparent(r2) &&
    4259           5 :                               BBP_refs(r2->batCacheid) == 1 &&
    4260           5 :                               r2->ttype == TYPE_oid))) {
    4261             :                                 /* in-place sort if we can */
    4262         742 :                                 if (r2) {
    4263           5 :                                         GDKqsort(r1->theap->base, r2->theap->base,
    4264           5 :                                                  NULL, r1->batCount, r1->twidth,
    4265           5 :                                                  r2->twidth, TYPE_oid, false,
    4266             :                                                  false);
    4267           5 :                                         r2->tsorted = false;
    4268           5 :                                         r2->trevsorted = false;
    4269           5 :                                         r2->tseqbase = oid_nil;
    4270           5 :                                         *r2p = r2;
    4271             :                                 } else {
    4272         737 :                                         GDKqsort(r1->theap->base, NULL, NULL,
    4273         737 :                                                  r1->batCount, r1->twidth, 0,
    4274             :                                                  TYPE_oid, false, false);
    4275             :                                 }
    4276         742 :                                 r1->tsorted = true;
    4277         742 :                                 r1->trevsorted = false;
    4278         742 :                                 *r1p = r1;
    4279             :                         } else {
    4280           0 :                                 BAT *ob;
    4281           0 :                                 rc = BATsort(&tmp, r2p ? &ob : NULL, NULL,
    4282             :                                              r1, NULL, NULL, false, false, false);
    4283           0 :                                 BBPunfix(r1->batCacheid);
    4284           0 :                                 if (rc != GDK_SUCCEED) {
    4285           0 :                                         BBPreclaim(r2);
    4286           0 :                                         goto doreturn;
    4287             :                                 }
    4288           0 :                                 *r1p = r1 = tmp;
    4289           0 :                                 if (r2p) {
    4290           0 :                                         tmp = BATproject(ob, r2);
    4291           0 :                                         BBPunfix(r2->batCacheid);
    4292           0 :                                         BBPunfix(ob->batCacheid);
    4293           0 :                                         if (tmp == NULL) {
    4294           0 :                                                 BBPunfix(r1->batCacheid);
    4295           0 :                                                 rc = GDK_FAIL;
    4296           0 :                                                 goto doreturn;
    4297             :                                         }
    4298           0 :                                         *r2p = tmp;
    4299             :                                 }
    4300             :                         }
    4301         742 :                         rc = GDK_SUCCEED;
    4302         742 :                         goto doreturn;
    4303             :                 }
    4304             :         }
    4305         743 :         rc = hashjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4306             :                       nil_matches, nil_on_miss, semi, only_misses,
    4307             :                       not_in, max_one, min_one, estimate, t0, false, rhash, prhash,
    4308             :                       rcand, func);
    4309      100968 :   doreturn:
    4310      100968 :         BBPreclaim(lp);
    4311      100935 :         BBPreclaim(rp);
    4312      101079 :         if (rc == GDK_SUCCEED && (semi | only_misses))
    4313      100315 :                 *r1p = virtualize(*r1p);
    4314             :         return rc;
    4315             : }
    4316             : 
    4317             : /* Perform an equi-join over l and r.  Returns two new, aligned, bats
    4318             :  * with the oids of matching tuples.  The result is in the same order
    4319             :  * as l (i.e. r1 is sorted). */
    4320             : gdk_return
    4321         645 : BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4322             : {
    4323         645 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4324             :                         false, false, false, false, false, false,
    4325             :                         estimate, __func__,
    4326         645 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4327             : }
    4328             : 
    4329             : /* Performs a left outer join over l and r.  Returns two new, aligned,
    4330             :  * bats with the oids of matching tuples, or the oid in the first
    4331             :  * output bat and nil in the second output bat if the value in l does
    4332             :  * not occur in r.  The result is in the same order as l (i.e. r1 is
    4333             :  * sorted). */
    4334             : gdk_return
    4335          73 : BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
    4336             : {
    4337          73 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4338             :                         true, false, false, false, match_one, match_one,
    4339             :                         estimate, __func__,
    4340          73 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4341             : }
    4342             : 
    4343             : /* Perform a semi-join over l and r.  Returns one or two new bats
    4344             :  * with the oids of matching tuples.  The result is in the same order
    4345             :  * as l (i.e. r1 is sorted).  If a single bat is returned, it is a
    4346             :  * candidate list. */
    4347             : gdk_return
    4348        1056 : BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4349             :             bool nil_matches, bool max_one, BUN estimate)
    4350             : {
    4351        1056 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4352             :                         false, true, false, false, max_one, false,
    4353             :                         estimate, __func__,
    4354        1056 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4355             : }
    4356             : 
    4357             : /* Perform a mark-join over l and r.  Returns one or two new bats with
    4358             :  * the oids of matching tuples.  In addition, returns a bat with "marks"
    4359             :  * that indicate the type of match.  This is an outer join, so returns
    4360             :  * at least one value for each row on the left.  If the second output
    4361             :  * pointer (r2p) is NULL, this is also a semi-join, so returns exactly
    4362             :  * one row for each row on the left.  If there is a match, the mark
    4363             :  * column will be TRUE, of there is no match, the second output is NIL,
    4364             :  * and the mark output is FALSE if there are no NILs in the right input,
    4365             :  * and the left input is also not NIL, otherwise the mark output is
    4366             :  * NIL. */
    4367             : gdk_return
    4368        4022 : BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4369             :             BUN estimate)
    4370             : {
    4371        4022 :         return leftjoin(r1p, r2p, r3p, l, r, sl, sr, false, true, r2p == NULL,
    4372             :                         false, false, false, false, estimate, __func__,
    4373        4022 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4374             : }
    4375             : 
    4376             : /* Return a candidate list with the list of rows in l whose value also
    4377             :  * occurs in r.  This is just the left output of a semi-join. */
    4378             : BAT *
    4379        7765 : BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one,
    4380             :              BUN estimate)
    4381             : {
    4382        7765 :         BAT *bn;
    4383             : 
    4384        7765 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4385             :                      false, true, false, false, max_one, false,
    4386             :                      estimate, __func__,
    4387        7765 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4388        7772 :                 return bn;
    4389             :         return NULL;
    4390             : }
    4391             : 
    4392             : /* Return the difference of l and r.  The result is a BAT with the
    4393             :  * oids of those values in l that do not occur in r.  This is what you
    4394             :  * might call an anti-semi-join.  The result is a candidate list. */
    4395             : BAT *
    4396       87485 : BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in,
    4397             :         BUN estimate)
    4398             : {
    4399       87485 :         BAT *bn;
    4400             : 
    4401       87485 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4402             :                      false, false, true, not_in, false, false,
    4403             :                      estimate, __func__,
    4404       87485 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4405       87478 :                 return bn;
    4406             :         return NULL;
    4407             : }
    4408             : 
    4409             : gdk_return
    4410       50733 : BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
    4411             : {
    4412       50733 :         int opcode = 0;
    4413       50733 :         lng t0 = 0;
    4414             : 
    4415             :         /* encode operator as a bit mask into opcode */
    4416       50733 :         switch (op) {
    4417           0 :         case JOIN_EQ:
    4418           0 :                 return BATjoin(r1p, r2p, l, r, sl, sr, nil_matches, estimate);
    4419             :         case JOIN_NE:
    4420             :                 opcode = MASK_NE;
    4421             :                 break;
    4422        8351 :         case JOIN_LT:
    4423        8351 :                 opcode = MASK_LT;
    4424        8351 :                 break;
    4425          11 :         case JOIN_LE:
    4426          11 :                 opcode = MASK_LE;
    4427          11 :                 break;
    4428       42272 :         case JOIN_GT:
    4429       42272 :                 opcode = MASK_GT;
    4430       42272 :                 break;
    4431          34 :         case JOIN_GE:
    4432          34 :                 opcode = MASK_GE;
    4433          34 :                 break;
    4434           0 :         default:
    4435           0 :                 GDKerror("unknown operator %d.\n", op);
    4436           0 :                 return GDK_FAIL;
    4437             :         }
    4438             : 
    4439       50733 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4440       50733 :         *r1p = NULL;
    4441       50733 :         if (r2p) {
    4442        8429 :                 *r2p = NULL;
    4443             :         }
    4444       50733 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4445             :                 return GDK_FAIL;
    4446             : 
    4447       50711 :         return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate, nil_matches,
    4448             :                          __func__, t0);
    4449             : }
    4450             : 
    4451             : gdk_return
    4452      337473 : BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4453             : {
    4454      337473 :         struct canditer lci, rci;
    4455      337473 :         bool lhash = false, rhash = false, lcand = false;
    4456      337473 :         bool plhash = false, prhash = false, rcand = false;
    4457      337473 :         bool swap;
    4458      337473 :         bat parent;
    4459      337473 :         double rcost = 0;
    4460      337473 :         double lcost = 0;
    4461      337473 :         gdk_return rc;
    4462      337473 :         lng t0 = 0;
    4463      337473 :         BAT *r2 = NULL;
    4464      337473 :         BAT *lp = NULL;
    4465      337473 :         BAT *rp = NULL;
    4466             : 
    4467      337473 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4468             : 
    4469      337473 :         canditer_init(&lci, l, sl);
    4470      338161 :         canditer_init(&rci, r, sr);
    4471             : 
    4472      338022 :         if ((parent = VIEWtparent(l)) != 0) {
    4473       60212 :                 lp = BATdescriptor(parent);
    4474       60227 :                 if (lp == NULL)
    4475             :                         return GDK_FAIL;
    4476       60227 :                 if (l->hseqbase == lp->hseqbase &&
    4477       59895 :                     BATcount(l) == BATcount(lp) &&
    4478       22742 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4479             :                         l = lp;
    4480             :                 } else {
    4481       48856 :                         BBPunfix(lp->batCacheid);
    4482       48856 :                         lp = NULL;
    4483             :                 }
    4484             :         }
    4485      338038 :         if ((parent = VIEWtparent(r)) != 0) {
    4486      280450 :                 rp = BATdescriptor(parent);
    4487      280389 :                 if (rp == NULL) {
    4488           0 :                         BBPreclaim(lp);
    4489           0 :                         return GDK_FAIL;
    4490             :                 }
    4491      280389 :                 if (r->hseqbase == rp->hseqbase &&
    4492      498024 :                     BATcount(r) == BATcount(rp) &&
    4493      459899 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4494             :                         r = rp;
    4495             :                 } else {
    4496       50442 :                         BBPunfix(rp->batCacheid);
    4497       50442 :                         rp = NULL;
    4498             :                 }
    4499             :         }
    4500             : 
    4501      337999 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4502           0 :                 l = BATunmask(l);
    4503           0 :                 BBPreclaim(lp);
    4504           0 :                 if (l == NULL) {
    4505           0 :                         BBPreclaim(rp);
    4506           0 :                         return GDK_FAIL;
    4507             :                 }
    4508             :                 lp = l;
    4509             :         }
    4510      337999 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4511          24 :                 r = BATunmask(r);
    4512          24 :                 BBPreclaim(rp);
    4513          24 :                 if (r == NULL) {
    4514           0 :                         BBPreclaim(lp);
    4515           0 :                         return GDK_FAIL;
    4516             :                 }
    4517             :                 rp = r;
    4518             :         }
    4519             : 
    4520      337999 :         *r1p = NULL;
    4521      337999 :         if (r2p)
    4522      294996 :                 *r2p = NULL;
    4523             : 
    4524      337999 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED) {
    4525           0 :                 rc = GDK_FAIL;
    4526           0 :                 goto doreturn;
    4527             :         }
    4528             : 
    4529      337659 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4530      266641 :                 TRC_DEBUG(ALGO, "BATjoin(l=" ALGOBATFMT ","
    4531             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4532             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d)\n",
    4533             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4534             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4535             :                           nil_matches);
    4536      266641 :                 rc = nomatch(r1p, r2p, NULL, l, r, &lci,
    4537             :                              0, false, false, __func__, t0);
    4538      265023 :                 goto doreturn;
    4539             :         }
    4540             : 
    4541       71018 :         swap = false;
    4542             : 
    4543       71018 :         if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase))) {
    4544             :                 /* single value to join, use select */
    4545       38546 :                 rc = selectjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4546             :                                 nil_matches, false, false, false, false,
    4547             :                                 t0, false, __func__);
    4548       38458 :                 goto doreturn;
    4549       32481 :         } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) {
    4550             :                 /* single value to join, use select */
    4551       11896 :                 rc = selectjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4552             :                                 nil_matches, false, false, false, false,
    4553             :                                 t0, true, __func__);
    4554        8788 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4555        5769 :                         BBPunfix(r2->batCacheid);
    4556        8811 :                 goto doreturn;
    4557       23631 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4558             :                 /* use special implementation for dense right-hand side */
    4559        1632 :                 rc = mergejoin_void(r1p, r2p, NULL, l, r, &lci, &rci,
    4560             :                                     false, false, t0, false, __func__);
    4561        1608 :                 goto doreturn;
    4562       21999 :         } else if (BATtdense(l) && lci.tpe == cand_dense) {
    4563             :                 /* use special implementation for dense right-hand side */
    4564          67 :                 rc = mergejoin_void(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4565             :                                     false, false, t0, true, __func__);
    4566          45 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4567          26 :                         BBPunfix(r2->batCacheid);
    4568          46 :                 goto doreturn;
    4569       34102 :         } else if ((BATordered(l) || BATordered_rev(l)) &&
    4570       15775 :                    (BATordered(r) || BATordered_rev(r))) {
    4571             :                 /* both sorted */
    4572        8590 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4573             :                                nil_matches, false, false, false, false, false, false,
    4574             :                                estimate, t0, false, __func__);
    4575        8450 :                 goto doreturn;
    4576             :         }
    4577             : 
    4578       13369 :         lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4579       13351 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4580       13349 :         if (lcost < 0 || rcost < 0) {
    4581           0 :                 rc = GDK_FAIL;
    4582           0 :                 goto doreturn;
    4583             :         }
    4584             : 
    4585             :         /* if the cost of doing searches on l is lower than the cost
    4586             :          * of doing searches on r, we swap */
    4587       13349 :         swap = (lcost < rcost);
    4588             : 
    4589       26716 :         if ((r->ttype == TYPE_void && r->tvheap != NULL) ||
    4590       26852 :             ((BATordered(r) || BATordered_rev(r)) &&
    4591        3946 :              (lci.ncand * (log2((double) rci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4592             :                 /* r is sorted and it is cheaper to do multiple binary
    4593             :                  * searches than it is to use a hash */
    4594         125 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4595             :                                nil_matches, false, false, false, false, false, false,
    4596             :                                estimate, t0, false, __func__);
    4597       26488 :         } else if ((l->ttype == TYPE_void && l->tvheap != NULL) ||
    4598       26669 :             ((BATordered(l) || BATordered_rev(l)) &&
    4599        3558 :              (rci.ncand * (log2((double) lci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4600             :                 /* l is sorted and it is cheaper to do multiple binary
    4601             :                  * searches than it is to use a hash */
    4602        3835 :                 rc = mergejoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4603             :                                nil_matches, false, false, false, false, false, false,
    4604             :                                estimate, t0, true, __func__);
    4605        1860 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4606           3 :                         BBPunfix(r2->batCacheid);
    4607       11326 :         } else if (swap) {
    4608       10345 :                 rc = hashjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4609             :                               nil_matches, false, false, false, false, false, false,
    4610             :                               estimate, t0, true, lhash, plhash, lcand,
    4611             :                               __func__);
    4612        5402 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4613         501 :                         BBPunfix(r2->batCacheid);
    4614             :         } else {
    4615        5902 :                 rc = hashjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4616             :                               nil_matches, false, false, false, false, false, false,
    4617             :                               estimate, t0, false, rhash, prhash, rcand,
    4618             :                               __func__);
    4619             :         }
    4620      335638 :   doreturn:
    4621      335638 :         BBPreclaim(lp);
    4622      335314 :         BBPreclaim(rp);
    4623             :         return rc;
    4624             : }
    4625             : 
    4626             : gdk_return
    4627           0 : BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4628             :             const void *c1, const void *c2, bool linc, bool hinc, BUN estimate)
    4629             : {
    4630           0 :         lng t0 = 0;
    4631           0 :         struct canditer lci, rci;
    4632           0 :         const char *lvals, *rvals;
    4633           0 :         int t;
    4634           0 :         const void *nil = ATOMnilptr(l->ttype);
    4635           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    4636           0 :         const char *vl, *vr;
    4637           0 :         oid lastr = 0;          /* last value inserted into r2 */
    4638           0 :         BUN nr;
    4639           0 :         oid lo, ro;
    4640           0 :         bool lskipped = false;  /* whether we skipped values in l */
    4641             : 
    4642           0 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4643             : 
    4644           0 :         size_t counter = 0;
    4645           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    4646             : 
    4647             : 
    4648           0 :         MT_thread_setalgorithm(__func__);
    4649           0 :         *r1p = NULL;
    4650           0 :         if (r2p) {
    4651           0 :                 *r2p = NULL;
    4652             :         }
    4653           0 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4654             :                 return GDK_FAIL;
    4655             : 
    4656           0 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    4657             : 
    4658           0 :         t = ATOMtype(l->ttype);
    4659           0 :         t = ATOMbasetype(t);
    4660             : 
    4661           0 :         canditer_init(&lci, l, sl);
    4662           0 :         canditer_init(&rci, r, sr);
    4663             : 
    4664           0 :         if (lci.ncand == 0 || rci.ncand == 0)
    4665           0 :                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    4666             :                                0, false, false, __func__, t0);
    4667             : 
    4668           0 :         switch (t) {
    4669           0 :         case TYPE_bte:
    4670           0 :                 if (is_bte_nil(*(const bte *)c1) ||
    4671           0 :                     is_bte_nil(*(const bte *)c2) ||
    4672           0 :                     -*(const bte *)c1 > *(const bte *)c2 ||
    4673           0 :                     ((!hinc || !linc) && -*(const bte *)c1 == *(const bte *)c2))
    4674           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4675             :                                        0, false, false, __func__, t0);
    4676             :                 break;
    4677           0 :         case TYPE_sht:
    4678           0 :                 if (is_sht_nil(*(const sht *)c1) ||
    4679           0 :                     is_sht_nil(*(const sht *)c2) ||
    4680           0 :                     -*(const sht *)c1 > *(const sht *)c2 ||
    4681           0 :                     ((!hinc || !linc) && -*(const sht *)c1 == *(const sht *)c2))
    4682           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4683             :                                        0, false, false, __func__, t0);
    4684             :                 break;
    4685           0 :         case TYPE_int:
    4686           0 :                 if (is_int_nil(*(const int *)c1) ||
    4687           0 :                     is_int_nil(*(const int *)c2) ||
    4688           0 :                     -*(const int *)c1 > *(const int *)c2 ||
    4689           0 :                     ((!hinc || !linc) && -*(const int *)c1 == *(const int *)c2))
    4690           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4691             :                                        0, false, false, __func__, t0);
    4692             :                 break;
    4693           0 :         case TYPE_lng:
    4694           0 :                 if (is_lng_nil(*(const lng *)c1) ||
    4695           0 :                     is_lng_nil(*(const lng *)c2) ||
    4696           0 :                     -*(const lng *)c1 > *(const lng *)c2 ||
    4697           0 :                     ((!hinc || !linc) && -*(const lng *)c1 == *(const lng *)c2))
    4698           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4699             :                                        0, false, false, __func__, t0);
    4700             :                 break;
    4701             : #ifdef HAVE_HGE
    4702           0 :         case TYPE_hge:
    4703           0 :                 if (is_hge_nil(*(const hge *)c1) ||
    4704           0 :                     is_hge_nil(*(const hge *)c2) ||
    4705           0 :                     -*(const hge *)c1 > *(const hge *)c2 ||
    4706           0 :                     ((!hinc || !linc) && -*(const hge *)c1 == *(const hge *)c2))
    4707           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4708             :                                        0, false, false, __func__, t0);
    4709             :                 break;
    4710             : #endif
    4711           0 :         case TYPE_flt:
    4712           0 :                 if (is_flt_nil(*(const flt *)c1) ||
    4713           0 :                     is_flt_nil(*(const flt *)c2) ||
    4714           0 :                     -*(const flt *)c1 > *(const flt *)c2 ||
    4715           0 :                     ((!hinc || !linc) && -*(const flt *)c1 == *(const flt *)c2))
    4716           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4717             :                                        0, false, false, __func__, t0);
    4718             :                 break;
    4719           0 :         case TYPE_dbl:
    4720           0 :                 if (is_dbl_nil(*(const dbl *)c1) ||
    4721           0 :                     is_dbl_nil(*(const dbl *)c2) ||
    4722           0 :                     -*(const dbl *)c1 > *(const dbl *)c2 ||
    4723           0 :                     ((!hinc || !linc) && -*(const dbl *)c1 == *(const dbl *)c2))
    4724           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4725             :                                        0, false, false, __func__, t0);
    4726             :                 break;
    4727           0 :         default:
    4728           0 :                 GDKerror("unsupported type\n");
    4729           0 :                 return GDK_FAIL;
    4730             :         }
    4731             : 
    4732           0 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    4733             :                                       false, false, false, false, estimate);
    4734           0 :         if (maxsize == BUN_NONE)
    4735             :                 return GDK_FAIL;
    4736           0 :         BAT *r1 = *r1p;
    4737           0 :         BAT *r2 = r2p ? *r2p : NULL;
    4738           0 :         BATiter li = bat_iterator(l);
    4739           0 :         BATiter ri = bat_iterator(r);
    4740             : 
    4741           0 :         lvals = (const char *) li.base;
    4742           0 :         rvals = (const char *) ri.base;
    4743           0 :         assert(ri.vh == NULL);
    4744             : 
    4745           0 :         assert(lvals != NULL);
    4746           0 :         assert(rvals != NULL);
    4747             : 
    4748           0 :         r1->tkey = true;
    4749           0 :         r1->tsorted = true;
    4750           0 :         r1->trevsorted = true;
    4751           0 :         if (r2) {
    4752           0 :                 r2->tkey = true;
    4753           0 :                 r2->tsorted = true;
    4754           0 :                 r2->trevsorted = true;
    4755             :         }
    4756             : 
    4757             :         /* nested loop implementation for band join */
    4758           0 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    4759           0 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    4760             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    4761           0 :                 lo = canditer_next(&lci);
    4762           0 :                 vl = FVALUE(l, lo - l->hseqbase);
    4763           0 :                 if (cmp(vl, nil) == 0)
    4764           0 :                         continue;
    4765           0 :                 nr = 0;
    4766           0 :                 canditer_reset(&rci);
    4767           0 :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
    4768           0 :                         ro = canditer_next(&rci);
    4769           0 :                         vr = FVALUE(r, ro - r->hseqbase);
    4770           0 :                         switch (ATOMtype(li.type)) {
    4771           0 :                         case TYPE_bte: {
    4772           0 :                                 if (is_bte_nil(*(const bte *) vr))
    4773           0 :                                         continue;
    4774           0 :                                 sht v1 = (sht) *(const bte *) vr, v2;
    4775           0 :                                 v2 = v1;
    4776           0 :                                 v1 -= *(const bte *)c1;
    4777           0 :                                 if (*(const bte *)vl <= v1 &&
    4778           0 :                                     (!linc || *(const bte *)vl != v1))
    4779           0 :                                         continue;
    4780           0 :                                 v2 += *(const bte *)c2;
    4781           0 :                                 if (*(const bte *)vl >= v2 &&
    4782           0 :                                     (!hinc || *(const bte *)vl != v2))
    4783           0 :                                         continue;
    4784             :                                 break;
    4785             :                         }
    4786           0 :                         case TYPE_sht: {
    4787           0 :                                 if (is_sht_nil(*(const sht *) vr))
    4788           0 :                                         continue;
    4789           0 :                                 int v1 = (int) *(const sht *) vr, v2;
    4790           0 :                                 v2 = v1;
    4791           0 :                                 v1 -= *(const sht *)c1;
    4792           0 :                                 if (*(const sht *)vl <= v1 &&
    4793           0 :                                     (!linc || *(const sht *)vl != v1))
    4794           0 :                                         continue;
    4795           0 :                                 v2 += *(const sht *)c2;
    4796           0 :                                 if (*(const sht *)vl >= v2 &&
    4797           0 :                                     (!hinc || *(const sht *)vl != v2))
    4798           0 :                                         continue;
    4799             :                                 break;
    4800             :                         }
    4801           0 :                         case TYPE_int: {
    4802           0 :                                 if (is_int_nil(*(const int *) vr))
    4803           0 :                                         continue;
    4804           0 :                                 lng v1 = (lng) *(const int *) vr, v2;
    4805           0 :                                 v2 = v1;
    4806           0 :                                 v1 -= *(const int *)c1;
    4807           0 :                                 if (*(const int *)vl <= v1 &&
    4808           0 :                                     (!linc || *(const int *)vl != v1))
    4809           0 :                                         continue;
    4810           0 :                                 v2 += *(const int *)c2;
    4811           0 :                                 if (*(const int *)vl >= v2 &&
    4812           0 :                                     (!hinc || *(const int *)vl != v2))
    4813           0 :                                         continue;
    4814             :                                 break;
    4815             :                         }
    4816             : #ifdef HAVE_HGE
    4817           0 :                         case TYPE_lng: {
    4818           0 :                                 if (is_lng_nil(*(const lng *) vr))
    4819           0 :                                         continue;
    4820           0 :                                 hge v1 = (hge) *(const lng *) vr, v2;
    4821           0 :                                 v2 = v1;
    4822           0 :                                 v1 -= *(const lng *)c1;
    4823           0 :                                 if (*(const lng *)vl <= v1 &&
    4824           0 :                                     (!linc || *(const lng *)vl != v1))
    4825           0 :                                         continue;
    4826           0 :                                 v2 += *(const lng *)c2;
    4827           0 :                                 if (*(const lng *)vl >= v2 &&
    4828           0 :                                     (!hinc || *(const lng *)vl != v2))
    4829           0 :                                         continue;
    4830             :                                 break;
    4831             :                         }
    4832             : #else
    4833             : #ifdef HAVE___INT128
    4834             :                         case TYPE_lng: {
    4835             :                                 if (is_lng_nil(*(const lng *) vr))
    4836             :                                         continue;
    4837             :                                 __int128 v1 = (__int128) *(const lng *) vr, v2;
    4838             :                                 v2 = v1;
    4839             :                                 v1 -= *(const lng *)c1;
    4840             :                                 if (*(const lng *)vl <= v1 &&
    4841             :                                     (!linc || *(const lng *)vl != v1))
    4842             :                                         continue;
    4843             :                                 v2 += *(const lng *)c2;
    4844             :                                 if (*(const lng *)vl >= v2 &&
    4845             :                                     (!hinc || *(const lng *)vl != v2))
    4846             :                                         continue;
    4847             :                                 break;
    4848             :                         }
    4849             : #else
    4850             : #ifdef HAVE___INT128_T
    4851             :                         case TYPE_lng: {
    4852             :                                 if (is_lng_nil(*(const lng *) vr))
    4853             :                                         continue;
    4854             :                                 __int128_t v1 = (__int128_t) *(const lng *) vr, v2;
    4855             :                                 v2 = v1;
    4856             :                                 v1 -= *(const lng *)c1;
    4857             :                                 if (*(const lng *)vl <= v1 &&
    4858             :                                     (!linc || *(const lng *)vl != v1))
    4859             :                                         continue;
    4860             :                                 v2 += *(const lng *)c2;
    4861             :                                 if (*(const lng *)vl >= v2 &&
    4862             :                                     (!hinc || *(const lng *)vl != v2))
    4863             :                                         continue;
    4864             :                                 break;
    4865             :                         }
    4866             : #else
    4867             :                         case TYPE_lng: {
    4868             :                                 if (is_lng_nil(*(const lng *) vr))
    4869             :                                         continue;
    4870             :                                 lng v1, v2;
    4871             :                                 SUB_WITH_CHECK(*(const lng *)vr,
    4872             :                                                *(const lng *)c1,
    4873             :                                                lng, v1,
    4874             :                                                GDK_lng_max,
    4875             :                                                do{if(*(const lng*)c1<0)goto nolmatch;else goto lmatch1;}while(false));
    4876             :                                 if (*(const lng *)vl <= v1 &&
    4877             :                                     (!linc || *(const lng *)vl != v1))
    4878             :                                         continue;
    4879             :                                   lmatch1:
    4880             :                                 ADD_WITH_CHECK(*(const lng *)vr,
    4881             :                                                *(const lng *)c2,
    4882             :                                                lng, v2,
    4883             :                                                GDK_lng_max,
    4884             :                                                do{if(*(const lng*)c2>0)goto nolmatch;else goto lmatch2;}while(false));
    4885             :                                 if (*(const lng *)vl >= v2 &&
    4886             :                                     (!hinc || *(const lng *)vl != v2))
    4887             :                                         continue;
    4888             :                                   lmatch2:
    4889             :                                 break;
    4890             :                                   nolmatch:
    4891             :                                 continue;
    4892             :                         }
    4893             : #endif
    4894             : #endif
    4895             : #endif
    4896             : #ifdef HAVE_HGE
    4897           0 :                         case TYPE_hge: {
    4898           0 :                                 if (is_hge_nil(*(const hge *) vr))
    4899           0 :                                         continue;
    4900           0 :                                 hge v1, v2;
    4901           0 :                                 SUB_WITH_CHECK(*(const hge *)vr,
    4902             :                                                *(const hge *)c1,
    4903             :                                                hge, v1,
    4904             :                                                GDK_hge_max,
    4905             :                                                do{if(*(const hge*)c1<0)goto nohmatch;else goto hmatch1;}while(false));
    4906           0 :                                 if (*(const hge *)vl <= v1 &&
    4907           0 :                                     (!linc || *(const hge *)vl != v1))
    4908           0 :                                         continue;
    4909           0 :                                   hmatch1:
    4910           0 :                                 ADD_WITH_CHECK(*(const hge *)vr,
    4911             :                                                *(const hge *)c2,
    4912             :                                                hge, v2,
    4913             :                                                GDK_hge_max,
    4914             :                                                do{if(*(const hge*)c2>0)goto nohmatch;else goto hmatch2;}while(false));
    4915           0 :                                 if (*(const hge *)vl >= v2 &&
    4916           0 :                                     (!hinc || *(const hge *)vl != v2))
    4917           0 :                                         continue;
    4918           0 :                                   hmatch2:
    4919             :                                 break;
    4920           0 :                                   nohmatch:
    4921           0 :                                 continue;
    4922             :                         }
    4923             : #endif
    4924           0 :                         case TYPE_flt: {
    4925           0 :                                 if (is_flt_nil(*(const flt *) vr))
    4926           0 :                                         continue;
    4927           0 :                                 dbl v1 = (dbl) *(const flt *) vr, v2;
    4928           0 :                                 v2 = v1;
    4929           0 :                                 v1 -= *(const flt *)c1;
    4930           0 :                                 if (*(const flt *)vl <= v1 &&
    4931           0 :                                     (!linc || *(const flt *)vl != v1))
    4932           0 :                                         continue;
    4933           0 :                                 v2 += *(const flt *)c2;
    4934           0 :                                 if (*(const flt *)vl >= v2 &&
    4935           0 :                                     (!hinc || *(const flt *)vl != v2))
    4936           0 :                                         continue;
    4937             :                                 break;
    4938             :                         }
    4939           0 :                         case TYPE_dbl: {
    4940           0 :                                 if (is_dbl_nil(*(const dbl *) vr))
    4941           0 :                                         continue;
    4942           0 :                                 dbl v1, v2;
    4943           0 :                                 SUB_WITH_CHECK(*(const dbl *)vr,
    4944             :                                                *(const dbl *)c1,
    4945             :                                                dbl, v1,
    4946             :                                                GDK_dbl_max,
    4947             :                                                do{if(*(const dbl*)c1<0)goto nodmatch;else goto dmatch1;}while(false));
    4948           0 :                                 if (*(const dbl *)vl <= v1 &&
    4949           0 :                                     (!linc || *(const dbl *)vl != v1))
    4950           0 :                                         continue;
    4951           0 :                                   dmatch1:
    4952           0 :                                 ADD_WITH_CHECK(*(const dbl *)vr,
    4953             :                                                *(const dbl *)c2,
    4954             :                                                dbl, v2,
    4955             :                                                GDK_dbl_max,
    4956             :                                                do{if(*(const dbl*)c2>0)goto nodmatch;else goto dmatch2;}while(false));
    4957           0 :                                 if (*(const dbl *)vl >= v2 &&
    4958           0 :                                     (!hinc || *(const dbl *)vl != v2))
    4959           0 :                                         continue;
    4960           0 :                                   dmatch2:
    4961             :                                 break;
    4962           0 :                                   nodmatch:
    4963           0 :                                 continue;
    4964             :                         }
    4965             :                         }
    4966           0 :                         if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    4967           0 :                                 goto bailout;
    4968           0 :                         if (BATcount(r1) > 0) {
    4969           0 :                                 if (r2 && lastr + 1 != ro)
    4970           0 :                                         r2->tseqbase = oid_nil;
    4971           0 :                                 if (nr == 0) {
    4972           0 :                                         r1->trevsorted = false;
    4973           0 :                                         if (r2 == NULL) {
    4974             :                                                 /* nothing */
    4975           0 :                                         } else if (lastr > ro) {
    4976           0 :                                                 r2->tsorted = false;
    4977           0 :                                                 r2->tkey = false;
    4978           0 :                                         } else if (lastr < ro) {
    4979           0 :                                                 r2->trevsorted = false;
    4980             :                                         } else {
    4981           0 :                                                 r2->tkey = false;
    4982             :                                         }
    4983             :                                 }
    4984             :                         }
    4985           0 :                         APPEND(r1, lo);
    4986           0 :                         if (r2) {
    4987           0 :                                 APPEND(r2, ro);
    4988             :                         }
    4989           0 :                         lastr = ro;
    4990           0 :                         nr++;
    4991             :                 }
    4992           0 :                 if (nr > 1) {
    4993           0 :                         r1->tkey = false;
    4994           0 :                         r1->tseqbase = oid_nil;
    4995           0 :                         if (r2) {
    4996           0 :                                 r2->trevsorted = false;
    4997             :                         }
    4998           0 :                 } else if (nr == 0) {
    4999           0 :                         lskipped = BATcount(r1) > 0;
    5000           0 :                 } else if (lskipped) {
    5001           0 :                         r1->tseqbase = oid_nil;
    5002             :                 }
    5003             :         }
    5004             :         /* also set other bits of heap to correct value to indicate size */
    5005           0 :         BATsetcount(r1, BATcount(r1));
    5006           0 :         if (r2) {
    5007           0 :                 BATsetcount(r2, BATcount(r2));
    5008           0 :                 assert(BATcount(r1) == BATcount(r2));
    5009             :         }
    5010           0 :         if (BATcount(r1) > 0) {
    5011           0 :                 if (BATtdense(r1))
    5012           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    5013           0 :                 if (r2 && BATtdense(r2))
    5014           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    5015             :         } else {
    5016           0 :                 r1->tseqbase = 0;
    5017           0 :                 if (r2) {
    5018           0 :                         r2->tseqbase = 0;
    5019             :                 }
    5020             :         }
    5021           0 :         bat_iterator_end(&li);
    5022           0 :         bat_iterator_end(&ri);
    5023           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    5024             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    5025             :                   " -> " ALGOBATFMT "," ALGOOPTBATFMT
    5026             :                   " (" LLFMT "usec)\n",
    5027             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    5028             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    5029             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    5030             :                   GDKusec() - t0);
    5031             :         return GDK_SUCCEED;
    5032             : 
    5033           0 :   bailout:
    5034           0 :         bat_iterator_end(&li);
    5035           0 :         bat_iterator_end(&ri);
    5036           0 :         BBPreclaim(r1);
    5037           0 :         BBPreclaim(r2);
    5038             :         return GDK_FAIL;
    5039             : }
    5040             : 
    5041             : gdk_return
    5042         139 : BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh,
    5043             :              BAT *sl, BAT *sr, bool linc, bool hinc, bool anti, bool symmetric,
    5044             :              BUN estimate)
    5045             : {
    5046         139 :         struct canditer lci, rci;
    5047         139 :         BAT *r1 = NULL, *r2 = NULL;
    5048         139 :         BUN maxsize;
    5049         139 :         lng t0 = 0;
    5050             : 
    5051         139 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    5052         139 :         *r1p = NULL;
    5053         139 :         if (r2p) {
    5054         114 :                 *r2p = NULL;
    5055             :         }
    5056         139 :         if (joinparamcheck(l, rl, rh, sl, sr, __func__) != GDK_SUCCEED)
    5057             :                 return GDK_FAIL;
    5058         139 :         canditer_init(&lci, l, sl);
    5059         139 :         canditer_init(&rci, rl, sr);
    5060         138 :         if (lci.ncand == 0 ||
    5061         128 :             rci.ncand == 0 ||
    5062         117 :             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) ||
    5063         117 :             ((rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) &&
    5064           0 :              (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)))) {
    5065             :                 /* trivial: empty input */
    5066          21 :                 return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5067             :                                __func__, t0);
    5068             :         }
    5069         117 :         if (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) {
    5070           0 :                 if (!anti)
    5071           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5072             :                                        __func__, t0);
    5073           0 :                 return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate, false,
    5074             :                                  __func__, t0);
    5075             :         }
    5076         117 :         if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) {
    5077           0 :                 if (!anti)
    5078           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5079             :                                        __func__, t0);
    5080           0 :                 return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate, false,
    5081             :                                  __func__, t0);
    5082             :         }
    5083             : 
    5084         136 :         if ((maxsize = joininitresults(&r1, r2p ? &r2 : NULL, NULL, sl ? BATcount(sl) : BATcount(l), sr ? BATcount(sr) : BATcount(rl), false, false, false, false, false, false, estimate)) == BUN_NONE)
    5085             :                 return GDK_FAIL;
    5086         115 :         *r1p = r1;
    5087         115 :         if (r2p) {
    5088          97 :                 *r2p = r2;
    5089             :         }
    5090         115 :         if (maxsize == 0)
    5091             :                 return GDK_SUCCEED;
    5092             : 
    5093             :         /* note, the rangejoin implementation is in gdk_select.c since
    5094             :          * it uses the imprints code there */
    5095         116 :         return rangejoin(r1, r2, l, rl, rh, &lci, &rci, linc, hinc, anti, symmetric, maxsize);
    5096             : }

Generated by: LCOV version 1.14