LCOV - code coverage report
Current view: top level - gdk - gdk_join.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1840 2777 66.3 %
Date: 2024-04-25 20:03:45 Functions: 26 29 89.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : #include "gdk_calc_private.h"
      17             : 
      18             : /*
      19             :  * All join variants produce some sort of join on two input BATs,
      20             :  * optionally subject to up to two candidate lists.  Only values in
      21             :  * the input BATs that are mentioned in the associated candidate list
      22             :  * (if provided) are eligible.  They all return two output BATs in the
      23             :  * first two arguments.  The join operations differ in the way in
      24             :  * which tuples from the two inputs are matched.
      25             :  *
      26             :  * The outputs consist of two aligned BATs (i.e. same length and same
      27             :  * hseqbase (0@0)) that contain the OIDs of the input BATs that match.
      28             :  * The candidate lists, if given, contain the OIDs of the associated
      29             :  * input BAT which must be considered for matching.  The input BATs
      30             :  * must have the same type.
      31             :  *
      32             :  * All functions also have a parameter nil_matches which indicates
      33             :  * whether NIL must be considered an ordinary value that can match, or
      34             :  * whether NIL must be considered to never match.
      35             :  *
      36             :  * The join functions that are provided here are:
      37             :  * BATjoin
      38             :  *      normal equi-join
      39             :  * BATleftjoin
      40             :  *      normal equi-join, but the left output is sorted
      41             :  * BATouterjoin
      42             :  *      equi-join, but the left output is sorted, and if there is no
      43             :  *      match for a value in the left input, there is still an output
      44             :  *      with NIL in the right output
      45             :  * BATsemijoin
      46             :  *      equi-join, but the left output is sorted, and if there are
      47             :  *      multiple matches, only one is returned (i.e., the left output
      48             :  *      is also key, making it a candidate list)
      49             :  * BATmarkjoin
      50             :  *      equi-join, but the left output is sorted, if there is no
      51             :  *      match for a value in the left input, there is still an output
      52             :  *      with NIL in the right output, and there is a third output column
      53             :  *      containing a flag that indicates the "certainty" of the match: 1
      54             :  *      there is a match, 0, there is no match and there are no NIL
      55             :  *      values, NIL, there is no match but there are NIL values
      56             :  * BATthetajoin
      57             :  *      theta-join: an extra operator must be provided encoded as an
      58             :  *      integer (macros JOIN_EQ, JOIN_NE, JOIN_LT, JOIN_LE, JOIN_GT,
      59             :  *      JOIN_GE); values match if the left input has the given
      60             :  *      relationship with the right input; order of the outputs is not
      61             :  *      guaranteed
      62             :  * BATbandjoin
      63             :  *      band-join: two extra input values (c1, c2) must be provided as
      64             :  *      well as Booleans (li, hi) that indicate whether the value
      65             :  *      ranges are inclusive or not; values in the left and right
      66             :  *      inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
      67             :  *      c2 is NIL, there are no matches
      68             :  * BATrangejoin
      69             :  *      range-join: the right input consists of two aligned BATs,
      70             :  *      values match if the left value is between two corresponding
      71             :  *      right values; two extra Boolean parameters, li and hi,
      72             :  *      indicate whether equal values match
      73             :  *
      74             :  * In addition to these functions, there are two more functions that
      75             :  * are closely related:
      76             :  * BATintersect
      77             :  *      intersection: return a candidate list with OIDs of tuples in
      78             :  *      the left input whose value occurs in the right input
      79             :  * BATdiff
      80             :  *      difference: return a candidate list with OIDs of tuples in the
      81             :  *      left input whose value does not occur in the right input
      82             :  */
      83             : 
      84             : /* Perform a bunch of sanity checks on the inputs to a join. */
      85             : static gdk_return
      86      275390 : joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
      87             : {
      88      706388 :         if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
      89         268 :             (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
      90           0 :                 GDKerror("%s: inputs not compatible.\n", func);
      91           0 :                 return GDK_FAIL;
      92             :         }
      93         134 :         if (r2 &&
      94         134 :             (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
      95           0 :                 GDKerror("%s: right inputs not aligned.\n", func);
      96           0 :                 return GDK_FAIL;
      97             :         }
      98      275390 :         if ((sl && !BATiscand(sl)) || (sr && !BATiscand(sr))) {
      99           0 :                 GDKerror("%s: argument not a candidate list.\n", func);
     100           0 :                 return GDK_FAIL;
     101             :         }
     102             :         return GDK_SUCCEED;
     103             : }
     104             : 
     105             : #define INCRSIZELOG     (8 + (SIZEOF_OID / 2))
     106             : #define INCRSIZE        (1 << INCRSIZELOG)
     107             : 
     108             : /* Create the result bats for a join, returns the absolute maximum
     109             :  * number of outputs that could possibly be generated. */
     110             : static BUN
     111       37180 : joininitresults(BAT **r1p, BAT **r2p, BAT **r3p, BUN lcnt, BUN rcnt,
     112             :                 bool lkey, bool rkey, bool semi, bool nil_on_miss,
     113             :                 bool only_misses, bool min_one, BUN estimate)
     114             : {
     115       37180 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     116       37180 :         BUN maxsize, size;
     117             : 
     118             :         /* if nil_on_miss is set, we really need a right output */
     119       37180 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
     120             : 
     121       37180 :         lkey |= lcnt <= 1;
     122       37180 :         rkey |= rcnt <= 1;
     123             : 
     124       37180 :         *r1p = NULL;
     125       37180 :         if (r2p)
     126       17967 :                 *r2p = NULL;
     127       37180 :         if (r3p)
     128         117 :                 *r3p = NULL;
     129       37180 :         if (lcnt == 0) {
     130             :                 /* there is nothing to match */
     131             :                 maxsize = 0;
     132       35121 :         } else if (!only_misses && !nil_on_miss && rcnt == 0) {
     133             :                 /* if right is empty, we have no hits, so if we don't
     134             :                  * want misses, the result is empty */
     135             :                 maxsize = 0;
     136       35118 :         } else if (rkey | semi | only_misses) {
     137             :                 /* each entry left matches at most one on right, in
     138             :                  * case nil_on_miss is also set, each entry matches
     139             :                  * exactly one (see below) */
     140             :                 maxsize = lcnt;
     141       19558 :         } else if (lkey) {
     142             :                 /* each entry on right is matched at most once */
     143        3662 :                 if (nil_on_miss) {
     144             :                         /* one entry left could match all right, and
     145             :                          * all other entries left match nil */
     146          15 :                         maxsize = lcnt + rcnt - 1;
     147             :                 } else {
     148             :                         maxsize = rcnt;
     149             :                 }
     150       15896 :         } else if (rcnt == 0) {
     151             :                 /* nil_on_miss must be true due to previous checks, so
     152             :                  * all values on left miss */
     153             :                 maxsize = lcnt;
     154       15894 :         } else if (BUN_MAX / lcnt >= rcnt) {
     155             :                 /* in the worst case we have a full cross product */
     156       15895 :                 maxsize = lcnt * rcnt;
     157             :         } else {
     158             :                 /* a BAT cannot grow larger than BUN_MAX */
     159             :                 maxsize = BUN_MAX;
     160             :         }
     161       37180 :         size = estimate == BUN_NONE ? lcnt < rcnt ? lcnt : rcnt : estimate;
     162       37180 :         if (size < INCRSIZE)
     163             :                 size = INCRSIZE;
     164       37180 :         if (size > maxsize)
     165             :                 size = maxsize;
     166       37180 :         if ((rkey | semi | only_misses) & nil_on_miss) {
     167             :                 /* see comment above: each entry left matches exactly
     168             :                  * once */
     169         102 :                 size = maxsize;
     170             :         }
     171       37180 :         if (min_one && size < lcnt)
     172           0 :                 size = lcnt;
     173             : 
     174       37180 :         if (maxsize == 0) {
     175        2063 :                 r1 = BATdense(0, 0, 0);
     176        2064 :                 if (r1 == NULL) {
     177             :                         return BUN_NONE;
     178             :                 }
     179        2064 :                 if (r2p) {
     180         243 :                         r2 = BATdense(0, 0, 0);
     181         243 :                         if (r2 == NULL) {
     182           0 :                                 BBPreclaim(r1);
     183           0 :                                 return BUN_NONE;
     184             :                         }
     185         243 :                         *r2p = r2;
     186             :                 }
     187        2064 :                 if (r3p) {
     188           0 :                         r3 = COLnew(0, TYPE_bit, 0, TRANSIENT);
     189           0 :                         if (r3 == NULL) {
     190           0 :                                 BBPreclaim(r1);
     191           0 :                                 BBPreclaim(r2);
     192           0 :                                 if (r2p)
     193           0 :                                         *r2p = NULL;
     194           0 :                                 return BUN_NONE;
     195             :                         }
     196           0 :                         *r3p = r3;
     197             :                 }
     198        2064 :                 *r1p = r1;
     199        2064 :                 return 0;
     200             :         }
     201             : 
     202       35117 :         r1 = COLnew(0, TYPE_oid, size, TRANSIENT);
     203       35118 :         if (r1 == NULL) {
     204             :                 return BUN_NONE;
     205             :         }
     206       35118 :         r1->tnil = false;
     207       35118 :         r1->tnonil = true;
     208       35118 :         r1->tkey = true;
     209       35118 :         r1->tsorted = true;
     210       35118 :         r1->trevsorted = true;
     211       35118 :         r1->tseqbase = 0;
     212       35118 :         r1->theap->dirty = true;
     213       35118 :         *r1p = r1;
     214       35118 :         if (r2p) {
     215       17723 :                 r2 = COLnew(0, TYPE_oid, size, TRANSIENT);
     216       17722 :                 if (r2 == NULL) {
     217           0 :                         BBPreclaim(r1);
     218           0 :                         return BUN_NONE;
     219             :                 }
     220       17722 :                 r2->tnil = false;
     221       17722 :                 r2->tnonil = true;
     222       17722 :                 r2->tkey = true;
     223       17722 :                 r2->tsorted = true;
     224       17722 :                 r2->trevsorted = true;
     225       17722 :                 r2->tseqbase = 0;
     226       17722 :                 r2->theap->dirty = true;
     227       17722 :                 *r2p = r2;
     228             :         }
     229       35117 :         if (r3p) {
     230         115 :                 BAT *r3 = COLnew(0, TYPE_bit, size, TRANSIENT);
     231         115 :                 if (r3 == NULL) {
     232           0 :                         BBPreclaim(r1);
     233           0 :                         BBPreclaim(r2);
     234           0 :                         return BUN_NONE;
     235             :                 }
     236         115 :                 r3->tnil = false;
     237         115 :                 r3->tnonil = true;
     238         115 :                 r3->tkey = false;
     239         115 :                 r3->tsorted = false;
     240         115 :                 r3->trevsorted = false;
     241         115 :                 r3->tseqbase = oid_nil;
     242         115 :                 r3->theap->dirty = true;
     243         115 :                 *r3p = r3;
     244             :         }
     245             :         return maxsize;
     246             : }
     247             : 
     248             : #define VALUE(s, x)     (s##vars ?                                      \
     249             :                          s##vars + VarHeapVal(s##vals, (x), s##i.width) : \
     250             :                          s##vals ? (const char *) s##vals + ((x) * s##i.width) : \
     251             :                          (s##val = BUNtoid(s, (x)), (const char *) &s##val))
     252             : #define FVALUE(s, x)    ((const char *) s##vals + ((x) * s##i.width))
     253             : 
     254             : #define APPEND(b, o)            (((oid *) b->theap->base)[b->batCount++] = (o))
     255             : 
     256             : static inline gdk_return
     257   220299054 : maybeextend(BAT *restrict r1, BAT *restrict r2, BAT *restrict r3,
     258             :             BUN cnt, BUN lcur, BUN lcnt, BUN maxsize)
     259             : {
     260   220299054 :         if (BATcount(r1) + cnt > BATcapacity(r1)) {
     261             :                 /* make some extra space by extrapolating how much more
     262             :                  * we need (fraction of l we've seen so far is used to
     263             :                  * estimate a new size but with a shallow slope so that
     264             :                  * a skewed join doesn't overwhelm, whilst making sure
     265             :                  * there is somewhat significant progress) */
     266        1754 :                 BUN newcap = (BUN) (lcnt / (lcnt / 4.0 + lcur * .75) * (BATcount(r1) + cnt));
     267        1754 :                 newcap = (newcap + INCRSIZE - 1) & ~(((BUN) 1 << INCRSIZELOG) - 1);
     268        1754 :                 if (newcap < cnt + BATcount(r1))
     269           0 :                         newcap = cnt + BATcount(r1) + INCRSIZE;
     270             :                 /* if close to maxsize, then just use maxsize */
     271        1754 :                 if (newcap + INCRSIZE > maxsize)
     272         158 :                         newcap = maxsize;
     273             :                 /* make sure heap.free is set properly before
     274             :                  * extending */
     275        1754 :                 BATsetcount(r1, BATcount(r1));
     276        1754 :                 if (BATextend(r1, newcap) != GDK_SUCCEED)
     277             :                         return GDK_FAIL;
     278        1754 :                 if (r2) {
     279        1127 :                         BATsetcount(r2, BATcount(r2));
     280        1127 :                         if (BATextend(r2, newcap) != GDK_SUCCEED)
     281             :                                 return GDK_FAIL;
     282        1127 :                         assert(BATcapacity(r1) == BATcapacity(r2));
     283             :                 }
     284        1754 :                 if (r3) {
     285           0 :                         BATsetcount(r3, BATcount(r3));
     286           0 :                         if (BATextend(r3, newcap) != GDK_SUCCEED)
     287             :                                 return GDK_FAIL;
     288           0 :                         assert(BATcapacity(r1) == BATcapacity(r3));
     289             :                 }
     290             :         }
     291             :         return GDK_SUCCEED;
     292             : }
     293             : 
     294             : /* Return BATs through r1p, r2p, and r3p for the case that there is no
     295             :  * match between l and r, taking all flags into consideration.
     296             :  *
     297             :  * This means, if nil_on_miss is set or only_misses is set, *r1p is a
     298             :  * copy of the left candidate list or a dense list of all "head"
     299             :  * values of l, and *r2p (if r2p is not NULL) is all nil.  If neither
     300             :  * of those flags is set, the result is two empty BATs. */
     301             : static gdk_return
     302      182189 : nomatch(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     303             :         struct canditer *restrict lci, bit defmark,
     304             :         bool nil_on_miss, bool only_misses, const char *func, lng t0)
     305             : {
     306      182189 :         BAT *r1, *r2 = NULL, *r3 = NULL;
     307             : 
     308      182189 :         MT_thread_setalgorithm(__func__);
     309      182161 :         if (lci->ncand == 0 || !(nil_on_miss | only_misses)) {
     310             :                 /* return empty BATs */
     311      169985 :                 if ((r1 = BATdense(0, 0, 0)) == NULL)
     312             :                         return GDK_FAIL;
     313      170000 :                 if (r2p) {
     314      120886 :                         if ((r2 = BATdense(0, 0, 0)) == NULL) {
     315           0 :                                 BBPreclaim(r1);
     316           0 :                                 return GDK_FAIL;
     317             :                         }
     318      120876 :                         *r2p = r2;
     319             :                 }
     320      169990 :                 if (r3p) {
     321        2921 :                         if ((r3 = COLnew(0, TYPE_bit, 0, TRANSIENT)) == NULL) {
     322           0 :                                 BBPreclaim(r1);
     323           0 :                                 BBPreclaim(r2);
     324           0 :                                 return GDK_FAIL;
     325             :                         }
     326        2921 :                         *r3p = r3;
     327             :                 }
     328             :         } else {
     329       12176 :                 r1 = canditer_slice(lci, 0, lci->ncand);
     330       12176 :                 if (r2p) {
     331           5 :                         if ((r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand, TRANSIENT)) == NULL) {
     332           0 :                                 BBPreclaim(r1);
     333           0 :                                 return GDK_FAIL;
     334             :                         }
     335           5 :                         *r2p = r2;
     336             :                 }
     337       12176 :                 if (r3p) {
     338          74 :                         if ((r3 = BATconstant(0, TYPE_bit, &defmark, lci->ncand, TRANSIENT)) == NULL) {
     339           0 :                                 BBPreclaim(r1);
     340           0 :                                 BBPreclaim(r2);
     341           0 :                                 return GDK_FAIL;
     342             :                         }
     343          74 :                         *r3p = r3;
     344             :                 }
     345             :         }
     346      182166 :         *r1p = r1;
     347      182166 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT
     348             :                   ",nil_on_miss=%s,only_misses=%s"
     349             :                   " - > " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     350             :                   " (%s -- " LLFMT "usec)\n",
     351             :                   ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(lci->s),
     352             :                   nil_on_miss ? "true" : "false",
     353             :                   only_misses ? "true" : "false",
     354             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     355             :                   func, GDKusec() - t0);
     356             :         return GDK_SUCCEED;
     357             : }
     358             : 
     359             : /* Implementation of join where there is a single value (possibly
     360             :  * repeated multiple times) on the left.  This means we can use a
     361             :  * point select to find matches in the right column. */
     362             : static gdk_return
     363       40052 : selectjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     364             :            struct canditer *lci, struct canditer *rci,
     365             :            bool nil_matches, bool nil_on_miss, bool semi, bool max_one, bool min_one,
     366             :            lng t0, bool swapped, const char *reason)
     367             : {
     368       40052 :         BATiter li = bat_iterator(l);
     369       40052 :         const void *v;
     370       40052 :         BAT *bn = NULL;
     371       40052 :         BAT *r1 = NULL;
     372       40052 :         BAT *r2 = NULL;
     373       40052 :         BUN bncount;
     374             : 
     375       40052 :         assert(lci->ncand > 0);
     376       40052 :         assert(lci->ncand == 1 || (li.sorted && li.revsorted));
     377             : 
     378       40052 :         size_t counter = 0;
     379       40052 :         lng timeoffset = 0;
     380       40052 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     381       40052 :         if (qry_ctx != NULL) {
     382       39258 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     383             :         }
     384             : 
     385       40052 :         MT_thread_setalgorithm(__func__);
     386       40052 :         oid o = canditer_next(lci);
     387       40051 :         v = BUNtail(li, o - l->hseqbase);
     388             : 
     389       78684 :         if (!nil_matches &&
     390       38633 :             (*ATOMcompare(li.type))(v, ATOMnilptr(li.type)) == 0) {
     391             :                 /* NIL doesn't match anything */
     392         146 :                 bat_iterator_end(&li);
     393         146 :                 gdk_return rc = nomatch(r1p, r2p, r3p, l, r, lci, bit_nil, nil_on_miss,
     394             :                                         false, reason, t0);
     395         146 :                 return rc;
     396             :         }
     397             : 
     398       39905 :         bn = BATselect(r, rci->s, v, NULL, true, true, false);
     399       39906 :         bat_iterator_end(&li);
     400       39906 :         if (bn == NULL) {
     401             :                 return GDK_FAIL;
     402             :         }
     403       39906 :         bncount = BATcount(bn);
     404       39906 :         if (bncount == 0) {
     405        6345 :                 BBPreclaim(bn);
     406        6345 :                 if (min_one) {
     407           0 :                         GDKerror("not enough matches");
     408           0 :                         return GDK_FAIL;
     409             :                 }
     410        6345 :                 if (!nil_on_miss) {
     411        6216 :                         assert(r3p == NULL);
     412        6216 :                         return nomatch(r1p, r2p, r3p, l, r, lci, 0, nil_on_miss,
     413             :                                        false, reason, t0);
     414             :                 }
     415             :                 /* special case: return nil on RHS */
     416             :                 bncount = 1;
     417             :                 bn = NULL;
     418             :         }
     419       33561 :         if (bncount > 1) {
     420        1242 :                 if (semi)
     421         360 :                         bncount = 1;
     422        1242 :                 if (max_one) {
     423          15 :                         GDKerror("more than one match");
     424          15 :                         goto bailout;
     425             :                 }
     426             :         }
     427       33675 :         r1 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     428       33675 :         if (r1 == NULL)
     429           0 :                 goto bailout;
     430       33675 :         r1->tsorted = true;
     431       33675 :         r1->trevsorted = lci->ncand == 1;
     432       33675 :         r1->tseqbase = bncount == 1 && lci->tpe == cand_dense ? o : oid_nil;
     433       33675 :         r1->tkey = bncount == 1;
     434       33675 :         r1->tnil = false;
     435       33675 :         r1->tnonil = true;
     436       33675 :         if (bn == NULL) {
     437             :                 /* left outer join, no match, we're returning nil in r2 */
     438         129 :                 oid *o1p = (oid *) Tloc(r1, 0);
     439         129 :                 BUN p, q = bncount;
     440             : 
     441         129 :                 if (r2p) {
     442           1 :                         r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand * bncount, TRANSIENT);
     443           1 :                         if (r2 == NULL)
     444           0 :                                 goto bailout;
     445           1 :                         *r2p = r2;
     446             :                 }
     447         212 :                 do {
     448         212 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
     449             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     450         424 :                         for (p = 0; p < q; p++) {
     451         212 :                                 *o1p++ = o;
     452             :                         }
     453         212 :                         o = canditer_next(lci);
     454         212 :                 } while (!is_oid_nil(o));
     455             :         } else {
     456       33546 :                 oid *o1p = (oid *) Tloc(r1, 0);
     457       33546 :                 oid *o2p;
     458       33546 :                 BUN p, q = bncount;
     459             : 
     460       33546 :                 if (r2p) {
     461       29985 :                         r2 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     462       29985 :                         if (r2 == NULL)
     463           0 :                                 goto bailout;
     464       29985 :                         r2->tsorted = lci->ncand == 1 || bncount == 1;
     465       29985 :                         r2->trevsorted = bncount == 1;
     466       29985 :                         r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil;
     467       29985 :                         r2->tkey = lci->ncand == 1;
     468       29985 :                         r2->tnil = false;
     469       29985 :                         r2->tnonil = true;
     470       29985 :                         *r2p = r2;
     471       29985 :                         o2p = (oid *) Tloc(r2, 0);
     472             :                 } else {
     473             :                         o2p = NULL;
     474             :                 }
     475             : 
     476       33546 :                 if (BATtdense(bn)) {
     477             :                         oid bno = bn->tseqbase;
     478             : 
     479     1228968 :                         do {
     480     1228968 :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,
     481             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     482     2640581 :                                 for (p = 0; p < q; p++) {
     483     1411613 :                                         *o1p++ = o;
     484             :                                 }
     485     1228968 :                                 if (o2p) {
     486      445769 :                                         for (p = 0; p < q; p++) {
     487      314155 :                                                 *o2p++ = bno + p;
     488             :                                         }
     489             :                                 }
     490     1228968 :                                 o = canditer_next(lci);
     491     1228968 :                         } while (!is_oid_nil(o));
     492             :                 } else {
     493         164 :                         const oid *bnp = (const oid *) Tloc(bn, 0);
     494             : 
     495         267 :                         do {
     496         267 :                                 GDK_CHECK_TIMEOUT(timeoffset, counter,
     497             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     498      172370 :                                 for (p = 0; p < q; p++) {
     499      172103 :                                         *o1p++ = o;
     500             :                                 }
     501         267 :                                 if (o2p) {
     502      169975 :                                         for (p = 0; p < q; p++) {
     503      169727 :                                                 *o2p++ = bnp[p];
     504             :                                         }
     505             :                                 }
     506         267 :                                 o = canditer_next(lci);
     507         267 :                         } while (!is_oid_nil(o));
     508             :                 }
     509       33546 :                 if (r2)
     510       29985 :                         BATsetcount(r2, lci->ncand * bncount);
     511             :         }
     512       33675 :         BATsetcount(r1, lci->ncand * bncount);
     513       33675 :         *r1p = r1;
     514       33675 :         BAT *r3 = NULL;
     515       33675 :         if (r3p) {
     516         196 :                 bit mark;
     517         196 :                 if (bn) {
     518             :                         /* there is a match */
     519          67 :                         mark = 1;
     520         129 :                 } else if (r->tnonil) {
     521             :                         /* no match, no NIL in r */
     522           2 :                         mark = 0;
     523             :                 } else {
     524             :                         /* no match, search for NIL in r */
     525         127 :                         BAT *n = BATselect(r, rci->s, ATOMnilptr(r->ttype), NULL, true, true, false);
     526         127 :                         if (n == NULL)
     527           0 :                                 goto bailout;
     528         127 :                         mark = BATcount(n) == 0 ? 0 : bit_nil;
     529         127 :                         BBPreclaim(n);
     530             :                 }
     531         196 :                 r3 = BATconstant(0, TYPE_bit, &mark, lci->ncand, TRANSIENT);
     532         196 :                 if (r3 == NULL)
     533           0 :                         goto bailout;
     534         196 :                 *r3p = r3;
     535             :         }
     536       33675 :         BBPreclaim(bn);
     537       33675 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     538             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
     539             :                   "sr=" ALGOOPTBATFMT ",nil_matches=%s;%s %s "
     540             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     541             :                   " (" LLFMT "usec)\n",
     542             :                   ALGOBATPAR(l), ALGOBATPAR(r),
     543             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
     544             :                   nil_matches ? "true" : "false",
     545             :                   swapped ? " swapped" : "", reason,
     546             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     547             :                   GDKusec() - t0);
     548             : 
     549             :         return GDK_SUCCEED;
     550             : 
     551          15 :   bailout:
     552          15 :         BBPreclaim(bn);
     553          15 :         BBPreclaim(r1);
     554          15 :         BBPreclaim(r2);
     555          15 :         if (r2p)
     556          14 :                 *r2p = NULL;
     557             :         return GDK_FAIL;
     558             : }
     559             : 
     560             : #if SIZEOF_OID == SIZEOF_INT
     561             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, last)
     562             : #endif
     563             : #if SIZEOF_OID == SIZEOF_LNG
     564             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last)
     565             : #endif
     566             : 
     567             : /* Implementation of join where the right-hand side is dense, and if
     568             :  * there is a right candidate list, it too is dense.  This means there
     569             :  * are no NIL values in r.  In case nil_on_miss is not set, we use a
     570             :  * range select (BATselect) to find the matching values in the left
     571             :  * column and then calculate the corresponding matches from the right.
     572             :  * If nil_on_miss is set, we need to do some more work. The latter is
     573             :  * also the only case in which r3p van be set. */
     574             : static gdk_return
     575       17455 : mergejoin_void(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     576             :                struct canditer *restrict lci, struct canditer *restrict rci,
     577             :                bool nil_on_miss, bool only_misses, lng t0, bool swapped,
     578             :                const char *reason)
     579             : {
     580       17455 :         oid lo, hi;
     581       17455 :         BUN i;
     582       17455 :         oid o, *o1p = NULL, *o2p = NULL;
     583       17455 :         bit *m3p = NULL;
     584       17455 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     585       17455 :         bool ltsorted = false, ltrevsorted = false, ltkey = false;
     586             : 
     587             :         /* r is dense, and if there is a candidate list, it too is
     588             :          * dense.  This means we don't have to do any searches, we
     589             :          * only need to compare ranges to know whether a value from l
     590             :          * has a match in r */
     591       25369 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
     592       17455 :         assert(r->tsorted || r->trevsorted);
     593       17455 :         assert(BATcount(l) > 0);
     594       17455 :         assert(rci->tpe == cand_dense);
     595       17455 :         assert(BATcount(r) > 0);
     596             : 
     597       17455 :         lng timeoffset = 0;
     598       17455 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     599       17454 :         if (qry_ctx != NULL) {
     600       17439 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
     601             :         }
     602             : 
     603       17454 :         MT_thread_setalgorithm(__func__);
     604             :         /* figure out range [lo..hi) of values in r that we need to match */
     605       17454 :         lo = r->tseqbase;
     606       17454 :         hi = lo + BATcount(r);
     607             :         /* restrict [lo..hi) range further using candidate list */
     608       17454 :         if (rci->seq > r->hseqbase)
     609           0 :                 lo += rci->seq - r->hseqbase;
     610       17454 :         if (rci->seq + rci->ncand < r->hseqbase + BATcount(r))
     611           0 :                 hi -= r->hseqbase + BATcount(r) - rci->seq - rci->ncand;
     612             : 
     613             :         /* at this point, the matchable values in r are [lo..hi) */
     614       17454 :         if (!nil_on_miss) {
     615       17454 :                 assert(r3p == NULL);
     616       17454 :                 r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses);
     617       17455 :                 if (r1 == NULL)
     618             :                         return GDK_FAIL;
     619       17455 :                 if (only_misses && !l->tnonil) {
     620             :                         /* also look for NILs */
     621           0 :                         r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false);
     622           0 :                         if (r2 == NULL) {
     623           0 :                                 BBPreclaim(r1);
     624           0 :                                 return GDK_FAIL;
     625             :                         }
     626           0 :                         if (BATcount(r2) > 0) {
     627           0 :                                 BAT *mg = BATmergecand(r1, r2);
     628           0 :                                 BBPunfix(r1->batCacheid);
     629           0 :                                 BBPunfix(r2->batCacheid);
     630           0 :                                 r1 = mg;
     631           0 :                                 if (r1 == NULL)
     632             :                                         return GDK_FAIL;
     633             :                         } else {
     634           0 :                                 BBPunfix(r2->batCacheid);
     635             :                         }
     636             :                         r2 = NULL;
     637             :                 }
     638       17455 :                 *r1p = r1;
     639       17455 :                 if (r2p == NULL)
     640       16736 :                         goto doreturn2;
     641         719 :                 if (BATcount(r1) == 0) {
     642          14 :                         r2 = BATdense(0, 0, 0);
     643          14 :                         if (r2 == NULL) {
     644           0 :                                 BBPreclaim(r1);
     645           0 :                                 return GDK_FAIL;
     646             :                         }
     647         705 :                 } else if (BATtdense(r1) && BATtdense(l)) {
     648          80 :                         r2 = BATdense(0, l->tseqbase + r1->tseqbase - l->hseqbase + r->hseqbase - r->tseqbase, BATcount(r1));
     649          80 :                         if (r2 == NULL) {
     650           0 :                                 BBPreclaim(r1);
     651           0 :                                 return GDK_FAIL;
     652             :                         }
     653             :                 } else {
     654         625 :                         r2 = COLnew(0, TYPE_oid, BATcount(r1), TRANSIENT);
     655         625 :                         if (r2 == NULL) {
     656           0 :                                 BBPreclaim(r1);
     657           0 :                                 return GDK_FAIL;
     658             :                         }
     659         625 :                         BATiter li = bat_iterator(l);
     660         623 :                         const oid *lp = (const oid *) li.base;
     661         623 :                         const oid *o1p = (const oid *) Tloc(r1, 0);
     662         623 :                         oid *o2p = (oid *) Tloc(r2, 0);
     663         623 :                         hi = BATcount(r1);
     664         623 :                         if (complex_cand(l)) {
     665             :                                 /* this is actually generic code */
     666           0 :                                 for (o = 0; o < hi; o++)
     667           0 :                                         o2p[o] = BUNtoid(l, BUNtoid(r1, o) - l->hseqbase) - r->tseqbase + r->hseqbase;
     668         623 :                         } else if (BATtdense(r1)) {
     669         264 :                                 lo = r1->tseqbase - l->hseqbase;
     670         264 :                                 if (r->tseqbase == r->hseqbase) {
     671         257 :                                         memcpy(o2p, lp + lo, hi * SIZEOF_OID);
     672             :                                 } else {
     673           7 :                                         hi += lo;
     674     5085011 :                                         for (o = 0; lo < hi; o++, lo++) {
     675     5085004 :                                                 o2p[o] = lp[lo] - r->tseqbase + r->hseqbase;
     676             :                                         }
     677             :                                 }
     678         359 :                         } else if (BATtdense(l)) {
     679           0 :                                 for (o = 0; o < hi; o++) {
     680           0 :                                         o2p[o] = o1p[o] - l->hseqbase + li.tseq - r->tseqbase + r->hseqbase;
     681             :                                 }
     682             :                         } else {
     683    43044150 :                                 for (o = 0; o < hi; o++) {
     684    43043791 :                                         o2p[o] = lp[o1p[o] - l->hseqbase] - r->tseqbase + r->hseqbase;
     685             :                                 }
     686             :                         }
     687         623 :                         r2->tkey = li.key;
     688         623 :                         r2->tsorted = li.sorted;
     689         623 :                         r2->trevsorted = li.revsorted;
     690         623 :                         bat_iterator_end(&li);
     691         624 :                         r2->tnil = false;
     692         624 :                         r2->tnonil = true;
     693         624 :                         BATsetcount(r2, BATcount(r1));
     694             :                 }
     695         719 :                 *r2p = r2;
     696         719 :                 goto doreturn2;
     697             :         }
     698             :         /* nil_on_miss is set, this means we must have a second or third
     699             :          * output */
     700           0 :         assert(r2p || r3p);
     701           0 :         if (BATtdense(l)) {
     702             :                 /* if l is dense, we can further restrict the [lo..hi)
     703             :                  * range to values in l that match with values in r */
     704           0 :                 o = lo;
     705           0 :                 i = lci->seq - l->hseqbase;
     706           0 :                 if (l->tseqbase + i > lo)
     707           0 :                         lo = l->tseqbase + i;
     708           0 :                 i = canditer_last(lci) + 1 - l->hseqbase;
     709           0 :                 if (l->tseqbase + i < hi)
     710           0 :                         hi = l->tseqbase + i;
     711           0 :                 if (lci->tpe == cand_dense) {
     712             :                         /* l is dense, and so is the left candidate
     713             :                          * list (if it exists); this means we don't
     714             :                          * have to actually look at any values in l:
     715             :                          * we can just do some arithmetic; it also
     716             :                          * means that r1 will be dense, and if
     717             :                          * nil_on_miss is not set, or if all values in
     718             :                          * l match, r2 will too */
     719           0 :                         if (hi <= lo) {
     720           0 :                                 return nomatch(r1p, r2p, r3p, l, r, lci, 0,
     721             :                                                nil_on_miss, only_misses,
     722             :                                                __func__, t0);
     723             :                         }
     724             : 
     725             :                         /* at this point, the matched values in l and
     726             :                          * r (taking candidate lists into account) are
     727             :                          * [lo..hi) which we can translate back to the
     728             :                          * respective OID values that we can store in
     729             :                          * r1 and r2; note that r1 will be dense since
     730             :                          * all values in l will match something (even
     731             :                          * if nil since nil_on_miss is set) */
     732           0 :                         *r1p = r1 = BATdense(0, lci->seq, lci->ncand);
     733           0 :                         if (r1 == NULL)
     734             :                                 return GDK_FAIL;
     735           0 :                         if (r2p) {
     736           0 :                                 if (hi - lo < lci->ncand) {
     737             :                                         /* we need to fill in nils in r2 for
     738             :                                          * missing values */
     739           0 :                                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     740           0 :                                         if (r2 == NULL) {
     741           0 :                                                 BBPreclaim(*r1p);
     742           0 :                                                 return GDK_FAIL;
     743             :                                         }
     744           0 :                                         o2p = (oid *) Tloc(r2, 0);
     745           0 :                                         i = l->tseqbase + lci->seq - l->hseqbase;
     746           0 :                                         lo -= i;
     747           0 :                                         hi -= i;
     748           0 :                                         i += r->hseqbase - r->tseqbase;
     749           0 :                                         for (o = 0; o < lo; o++)
     750           0 :                                                 *o2p++ = oid_nil;
     751           0 :                                         for (o = lo; o < hi; o++)
     752           0 :                                                 *o2p++ = o + i;
     753           0 :                                         for (o = hi; o < lci->ncand; o++)
     754           0 :                                                 *o2p++ = oid_nil;
     755           0 :                                         r2->tnonil = false;
     756           0 :                                         r2->tnil = true;
     757             :                                         /* sorted of no nils at end */
     758           0 :                                         r2->tsorted = hi == lci->ncand;
     759             :                                         /* reverse sorted if single non-nil at start */
     760           0 :                                         r2->trevsorted = lo == 0 && hi == 1;
     761           0 :                                         r2->tseqbase = oid_nil;
     762             :                                         /* (hi - lo) different OIDs in r2,
     763             :                                          * plus one for nil */
     764           0 :                                         r2->tkey = hi - lo + 1 == lci->ncand;
     765           0 :                                         BATsetcount(r2, lci->ncand);
     766             :                                 } else {
     767             :                                         /* no missing values */
     768           0 :                                         *r2p = r2 = BATdense(0, r->hseqbase + lo - r->tseqbase, lci->ncand);
     769           0 :                                         if (r2 == NULL) {
     770           0 :                                                 BBPreclaim(*r1p);
     771           0 :                                                 return GDK_FAIL;
     772             :                                         }
     773             :                                 }
     774             :                         }
     775           0 :                         if (r3p) {
     776           0 :                                 if (hi - lo < lci->ncand) {
     777           0 :                                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     778           0 :                                         if (r3 == NULL) {
     779           0 :                                                 BBPreclaim(*r1p);
     780           0 :                                                 BBPreclaim(r2);
     781           0 :                                                 return GDK_FAIL;
     782             :                                         }
     783           0 :                                         m3p = (bit *) Tloc(r3, 0);
     784           0 :                                         for (o = 0; o < lo; o++)
     785           0 :                                                 *m3p++ = 0;
     786           0 :                                         for (o = lo; o < hi; o++)
     787           0 :                                                 *m3p++ = 1;
     788           0 :                                         for (o = hi; o < lci->ncand; o++)
     789           0 :                                                 *m3p++ = 0;
     790           0 :                                         r3->tnonil = true;
     791           0 :                                         r3->tnil = false;
     792           0 :                                         r3->tsorted = hi == lci->ncand;
     793           0 :                                         r3->trevsorted = lo == 0;
     794           0 :                                         r3->tkey = false;
     795           0 :                                         BATsetcount(r3, lci->ncand);
     796             :                                 }
     797             :                         }
     798           0 :                         goto doreturn;
     799             :                 }
     800             :                 /* l is dense, but the candidate list exists and is
     801             :                  * not dense; we can, by manipulating the range
     802             :                  * [lo..hi), just look at the candidate list values */
     803             : 
     804             :                 /* translate lo and hi to l's OID values that now need
     805             :                  * to match */
     806           0 :                 lo = lo - l->tseqbase + l->hseqbase;
     807           0 :                 hi = hi - l->tseqbase + l->hseqbase;
     808             : 
     809           0 :                 *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     810           0 :                 if (r2p)
     811           0 :                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     812           0 :                 if (r3p)
     813           0 :                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     814           0 :                 if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     815           0 :                         BBPreclaim(r1);
     816           0 :                         BBPreclaim(r2);
     817           0 :                         BBPreclaim(r3);
     818           0 :                         return GDK_FAIL;
     819             :                 }
     820           0 :                 o1p = (oid *) Tloc(r1, 0);
     821           0 :                 if (r2) {
     822           0 :                         o2p = (oid *) Tloc(r2, 0);
     823           0 :                         r2->tnil = false;
     824           0 :                         r2->tnonil = true;
     825           0 :                         r2->tkey = true;
     826           0 :                         r2->tsorted = true;
     827             :                 }
     828           0 :                 if (r3) {
     829           0 :                         m3p = (bit *) Tloc(r3, 0);
     830           0 :                         r3->tnil = false;
     831           0 :                         r3->tnonil = true;
     832           0 :                         r3->tkey = false;
     833           0 :                         r3->tsorted = false;
     834             :                 }
     835           0 :                 o = canditer_next(lci);
     836           0 :                 for (i = 0; i < lci->ncand && o < lo; i++) {
     837           0 :                         *o1p++ = o;
     838           0 :                         if (r2)
     839           0 :                                 *o2p++ = oid_nil;
     840           0 :                         if (r3)
     841           0 :                                 *m3p++ = 0;
     842           0 :                         o = canditer_next(lci);
     843             :                 }
     844           0 :                 if (i > 0 && r2) {
     845           0 :                         r2->tnil = true;
     846           0 :                         r2->tnonil = false;
     847           0 :                         r2->tkey = i == 1;
     848             :                 }
     849           0 :                 for (; i < lci->ncand && o < hi; i++) {
     850           0 :                         *o1p++ = o;
     851           0 :                         if (r2)
     852           0 :                                 *o2p++ = o - l->hseqbase + l->tseqbase - r->tseqbase + r->hseqbase;
     853           0 :                         if (r3)
     854           0 :                                 *m3p++ = 1;
     855           0 :                         o = canditer_next(lci);
     856             :                 }
     857           0 :                 if (i < lci->ncand) {
     858           0 :                         if (r2) {
     859           0 :                                 r2->tkey = !r2->tnil && lci->ncand - i == 1;
     860           0 :                                 r2->tnil = true;
     861           0 :                                 r2->tnonil = false;
     862           0 :                                 r2->tsorted = false;
     863             :                         }
     864           0 :                         for (; i < lci->ncand; i++) {
     865           0 :                                 *o1p++ = o;
     866           0 :                                 if (r2)
     867           0 :                                         *o2p++ = oid_nil;
     868           0 :                                 if (r1)
     869           0 :                                         *m3p++ = 0;
     870           0 :                                 o = canditer_next(lci);
     871             :                         }
     872             :                 }
     873           0 :                 BATsetcount(r1, lci->ncand);
     874           0 :                 r1->tseqbase = BATcount(r1) == 1 ? *(oid*)Tloc(r1, 0) : oid_nil;
     875           0 :                 r1->tsorted = true;
     876           0 :                 r1->trevsorted = BATcount(r1) <= 1;
     877           0 :                 r1->tnil = false;
     878           0 :                 r1->tnonil = true;
     879           0 :                 r1->tkey = true;
     880           0 :                 if (r2) {
     881           0 :                         BATsetcount(r2, BATcount(r1));
     882           0 :                         r2->tseqbase = r2->tnil || BATcount(r2) > 1 ? oid_nil : BATcount(r2) == 1 ? *(oid*)Tloc(r2, 0) : 0;
     883           0 :                         r2->trevsorted = BATcount(r2) <= 1;
     884             :                 }
     885           0 :                 if (r3) {
     886           0 :                         BATsetcount(r3, BATcount(r1));
     887             :                 }
     888           0 :                 goto doreturn;
     889             :         }
     890             :         /* l is not dense, so we need to look at the values and check
     891             :          * whether they are in the range [lo..hi) */
     892             : 
     893             :         /* do indirection through the candidate list to look at the
     894             :          * value */
     895             : 
     896           0 :         *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     897           0 :         if (r2p)
     898           0 :                 *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     899           0 :         if (r3p)
     900           0 :                 *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     901           0 :         if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     902           0 :                 BBPreclaim(r1);
     903           0 :                 BBPreclaim(r2);
     904           0 :                 BBPreclaim(r3);
     905           0 :                 return GDK_FAIL;
     906             :         }
     907           0 :         o1p = (oid *) Tloc(r1, 0);
     908           0 :         if (r2) {
     909           0 :                 o2p = (oid *) Tloc(r2, 0);
     910           0 :                 r2->tnil = false;
     911           0 :                 r2->tnonil = true;
     912             :         }
     913           0 :         if (r3) {
     914           0 :                 m3p = (bit *) Tloc(r3, 0);
     915           0 :                 r3->tnil = false;
     916           0 :                 r3->tnonil = true;
     917             :         }
     918           0 :         if (complex_cand(l)) {
     919           0 :                 ltsorted = l->tsorted;
     920           0 :                 ltrevsorted = l->trevsorted;
     921           0 :                 ltkey = l->tkey;
     922           0 :                 TIMEOUT_LOOP(lci->ncand, timeoffset) {
     923           0 :                         oid c = canditer_next(lci);
     924             : 
     925           0 :                         o = BUNtoid(l, c - l->hseqbase);
     926           0 :                         *o1p++ = c;
     927           0 :                         if (r2) {
     928           0 :                                 if (o >= lo && o < hi) {
     929           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     930             :                                 } else {
     931           0 :                                         *o2p++ = oid_nil;
     932           0 :                                         r2->tnil = true;
     933           0 :                                         r2->tnonil = false;
     934             :                                 }
     935             :                         }
     936           0 :                         if (r3) {
     937           0 :                                 if (is_oid_nil(o)) {
     938           0 :                                         *m3p++ = bit_nil;
     939           0 :                                         r3->tnil = true;
     940           0 :                                         r3->tnonil = false;
     941             :                                 } else {
     942           0 :                                         *m3p++ = (o >= lo && o < hi);
     943             :                                 }
     944             :                         }
     945             :                 }
     946           0 :                 TIMEOUT_CHECK(timeoffset,
     947             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     948             :         } else {
     949           0 :                 BATiter li = bat_iterator(l);
     950           0 :                 const oid *lvals = (const oid *) li.base;
     951           0 :                 ltsorted = li.sorted;
     952           0 :                 ltrevsorted = li.revsorted;
     953           0 :                 ltkey = li.key;
     954           0 :                 TIMEOUT_LOOP(lci->ncand, timeoffset) {
     955           0 :                         oid c = canditer_next(lci);
     956             : 
     957           0 :                         o = lvals[c - l->hseqbase];
     958           0 :                         *o1p++ = c;
     959           0 :                         if (r2) {
     960           0 :                                 if (o >= lo && o < hi) {
     961           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     962             :                                 } else {
     963           0 :                                         *o2p++ = oid_nil;
     964           0 :                                         r2->tnil = true;
     965           0 :                                         r2->tnonil = false;
     966             :                                 }
     967             :                         }
     968           0 :                         if (r3) {
     969           0 :                                 if (is_oid_nil(o)) {
     970           0 :                                         *m3p++ = bit_nil;
     971           0 :                                         r3->tnil = true;
     972           0 :                                         r3->tnonil = false;
     973             :                                 } else {
     974           0 :                                         *m3p++ = (o >= lo && o < hi);
     975             :                                 }
     976             :                         }
     977             :                 }
     978           0 :                 bat_iterator_end(&li);
     979           0 :                 TIMEOUT_CHECK(timeoffset,
     980             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout));
     981             :         }
     982           0 :         r1->tsorted = true;
     983           0 :         r1->trevsorted = BATcount(r1) <= 1;
     984           0 :         r1->tkey = true;
     985           0 :         r1->tseqbase = oid_nil;
     986           0 :         r1->tnil = false;
     987           0 :         r1->tnonil = true;
     988           0 :         BATsetcount(r1, lci->ncand);
     989           0 :         if (r2) {
     990           0 :                 BATsetcount(r2, lci->ncand);
     991           0 :                 r2->tsorted = ltsorted || BATcount(r2) <= 1;
     992           0 :                 r2->trevsorted = ltrevsorted || BATcount(r2) <= 1;
     993           0 :                 r2->tkey = ltkey || BATcount(r2) <= 1;
     994           0 :                 r2->tseqbase = oid_nil;
     995             :         }
     996           0 :         if (r3) {
     997           0 :                 BATsetcount(r3, lci->ncand);
     998             :         }
     999             : 
    1000           0 :   doreturn:
    1001           0 :         if (r1->tkey)
    1002           0 :                 virtualize(r1);
    1003           0 :         if (r2 && r2->tkey && r2->tsorted)
    1004           0 :                 virtualize(r2);
    1005           0 :   doreturn2:
    1006       17455 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    1007             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    1008             :                   "sr=" ALGOOPTBATFMT ","
    1009             :                   "nil_on_miss=%s,only_misses=%s;%s %s "
    1010             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
    1011             :                   " (" LLFMT "usec)\n",
    1012             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1013             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    1014             :                   nil_on_miss ? "true" : "false",
    1015             :                   only_misses ? "true" : "false",
    1016             :                   swapped ? " swapped" : "", reason,
    1017             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
    1018             :                   GDKusec() - t0);
    1019             : 
    1020             :         return GDK_SUCCEED;
    1021             : 
    1022           0 :   bailout:
    1023           0 :         BBPreclaim(r1);
    1024           0 :         BBPreclaim(r2);
    1025             :         return GDK_FAIL;
    1026             : }
    1027             : 
    1028             : /* Implementation of mergejoin (see below) for the special case that
    1029             :  * the values are of type int, and some more conditions are met. */
    1030             : static gdk_return
    1031        4913 : mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1032             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1033             :               const char *reason)
    1034             : {
    1035        4913 :         BAT *r1, *r2;
    1036        4913 :         BUN lstart, lend, lcnt;
    1037        4913 :         BUN rstart, rend;
    1038        4913 :         BUN lscan, rscan;       /* opportunistic scan window */
    1039        4913 :         BUN maxsize;
    1040        4913 :         const int *lvals, *rvals;
    1041        4913 :         int v;
    1042        4913 :         BUN nl, nr;
    1043        4913 :         oid lv;
    1044        4913 :         BUN i;
    1045        4913 :         BATiter li = bat_iterator(l);
    1046        4913 :         BATiter ri = bat_iterator(r);
    1047             : 
    1048       14739 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1049        4913 :         assert(ri.sorted || ri.revsorted);
    1050             : 
    1051        4913 :         MT_thread_setalgorithm(__func__);
    1052        4913 :         lstart = rstart = 0;
    1053        4913 :         lend = BATcount(l);
    1054        4913 :         lcnt = lend - lstart;
    1055        4913 :         rend = BATcount(r);
    1056        4913 :         lvals = (const int *) li.base;
    1057        4913 :         rvals = (const int *) ri.base;
    1058        4913 :         size_t counter = 0;
    1059        4913 :         lng timeoffset = 0;
    1060        4913 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1061        4913 :         if (qry_ctx != NULL) {
    1062        4158 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1063             :         }
    1064             : 
    1065             :         /* basic properties will be adjusted if necessary later on,
    1066             :          * they were initially set by joininitresults() */
    1067             : 
    1068        4913 :         if (lend == 0 || rend == 0) {
    1069             :                 /* there are no matches */
    1070           0 :                 bat_iterator_end(&li);
    1071           0 :                 bat_iterator_end(&ri);
    1072           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1073           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1074             :                                0, false, false, __func__, t0);
    1075             :         }
    1076             : 
    1077        4913 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1078        4913 :                                        li.key, ri.key, false, false,
    1079             :                                        false, false, estimate)) == BUN_NONE) {
    1080           0 :                 bat_iterator_end(&li);
    1081           0 :                 bat_iterator_end(&ri);
    1082           0 :                 return GDK_FAIL;
    1083             :         }
    1084        4913 :         r1 = *r1p;
    1085        4913 :         r2 = r2p ? *r2p : NULL;
    1086             : 
    1087             :         /* determine opportunistic scan window for l and r */
    1088       30137 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1089       25224 :                 nl >>= 1;
    1090       35938 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1091       31025 :                 nr >>= 1;
    1092             : 
    1093        4913 :         if (!nil_matches) {
    1094             :                 /* skip over nils at the start of the columns */
    1095        3115 :                 if (lscan < lend - lstart && is_int_nil(lvals[lstart + lscan])) {
    1096           0 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1097             :                                                lend - 1, int_nil, 1, 1);
    1098             :                 } else {
    1099        3117 :                         while (is_int_nil(lvals[lstart]))
    1100           2 :                                 lstart++;
    1101             :                 }
    1102        3115 :                 if (rscan < rend - rstart && is_int_nil(rvals[rstart + rscan])) {
    1103           0 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1104             :                                                rend - 1, int_nil, 1, 1);
    1105             :                 } else {
    1106        3115 :                         while (is_int_nil(rvals[rstart]))
    1107           0 :                                 rstart++;
    1108             :                 }
    1109             :         }
    1110             :         /* from here on we don't have to worry about nil values */
    1111             : 
    1112      413621 :         while (lstart < lend && rstart < rend) {
    1113      409684 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1114             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1115             : 
    1116      409684 :                 v = rvals[rstart];
    1117             : 
    1118      409684 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1119         903 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1120             :                                                lend - 1, v, 1, 0);
    1121             :                 } else {
    1122             :                         /* scan l for v */
    1123      413804 :                         while (lstart < lend && lvals[lstart] < v)
    1124        5023 :                                 lstart++;
    1125             :                 }
    1126      408820 :                 if (lstart >= lend) {
    1127             :                         /* nothing found */
    1128             :                         break;
    1129             :                 }
    1130             : 
    1131             :                 /* Here we determine the next value in l that we are
    1132             :                  * going to try to match in r.  We will also count the
    1133             :                  * number of occurrences in l of that value.
    1134             :                  * Afterwards, v points to the value and nl is the
    1135             :                  * number of times it occurs.  Also, lstart will
    1136             :                  * point to the next value to be considered (ready for
    1137             :                  * the next iteration).
    1138             :                  * If there are many equal values in l (more than
    1139             :                  * lscan), we will use binary search to find the end
    1140             :                  * of the sequence.  Obviously, we can do this only if
    1141             :                  * l is actually sorted (lscan > 0). */
    1142      408562 :                 nl = 1;         /* we'll match (at least) one in l */
    1143      408562 :                 nr = 0;         /* maybe we won't match anything in r */
    1144      408562 :                 v = lvals[lstart];
    1145      408562 :                 if (li.key) {
    1146             :                         /* if l is key, there is a single value */
    1147       61581 :                         lstart++;
    1148      346981 :                 } else if (lscan < lend - lstart &&
    1149      341929 :                            v == lvals[lstart + lscan]) {
    1150             :                         /* lots of equal values: use binary search to
    1151             :                          * find end */
    1152       25181 :                         nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1153             :                                            lend - 1, v, 1, 1);
    1154       25142 :                         nl -= lstart;
    1155       25142 :                         lstart += nl;
    1156             :                 } else {
    1157             :                         /* just scan */
    1158     1511208 :                         while (++lstart < lend && v == lvals[lstart])
    1159     1189408 :                                 nl++;
    1160             :                 }
    1161             :                 /* lstart points one beyond the value we're
    1162             :                  * going to match: ready for the next iteration. */
    1163             : 
    1164             :                 /* First we find the first value in r that is at
    1165             :                  * least as large as v, then we find the first
    1166             :                  * value in r that is larger than v.  The difference
    1167             :                  * is the number of values equal to v and is stored in
    1168             :                  * nr.
    1169             :                  * We will use binary search on r to find both ends of
    1170             :                  * the sequence of values that are equal to v in case
    1171             :                  * the position is "too far" (more than rscan
    1172             :                  * away). */
    1173             : 
    1174             :                 /* first find the location of the first value in r
    1175             :                  * that is >= v, then find the location of the first
    1176             :                  * value in r that is > v; the difference is the
    1177             :                  * number of values equal to v */
    1178             : 
    1179             :                 /* look ahead a little (rscan) in r to see whether
    1180             :                  * we're better off doing a binary search */
    1181      408523 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1182             :                         /* value too far away in r: use binary
    1183             :                          * search */
    1184       17871 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1185             :                                                rend - 1, v, 1, 0);
    1186             :                 } else {
    1187             :                         /* scan r for v */
    1188      413653 :                         while (rstart < rend && rvals[rstart] < v)
    1189       23001 :                                 rstart++;
    1190             :                 }
    1191      408857 :                 if (rstart == rend) {
    1192             :                         /* nothing found */
    1193             :                         break;
    1194             :                 }
    1195             : 
    1196             :                 /* now find the end of the sequence of equal values v */
    1197             : 
    1198             :                 /* if r is key, there is zero or one match, otherwise
    1199             :                  * look ahead a little (rscan) in r to see whether
    1200             :                  * we're better off doing a binary search */
    1201      408140 :                 if (ri.key) {
    1202      202549 :                         if (rstart < rend && v == rvals[rstart]) {
    1203      201907 :                                 nr = 1;
    1204      201907 :                                 rstart++;
    1205             :                         }
    1206      205591 :                 } else if (rscan < rend - rstart &&
    1207      204779 :                            v == rvals[rstart + rscan]) {
    1208             :                         /* range too large: use binary search */
    1209       69576 :                         nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1210             :                                            rend - 1, v, 1, 1);
    1211       70032 :                         nr -= rstart;
    1212       70032 :                         rstart += nr;
    1213             :                 } else {
    1214             :                         /* scan r for end of range */
    1215     1086727 :                         while (rstart < rend && v == rvals[rstart]) {
    1216      950712 :                                 nr++;
    1217      950712 :                                 rstart++;
    1218             :                         }
    1219             :                 }
    1220             :                 /* rstart points to first value > v or end of
    1221             :                  * r, and nr is the number of values in r that
    1222             :                  * are equal to v */
    1223      407954 :                 if (nr == 0) {
    1224             :                         /* no entries in r found */
    1225         881 :                         continue;
    1226             :                 }
    1227             :                 /* make space: nl values in l match nr values in r, so
    1228             :                  * we need to add nl * nr values in the results */
    1229      407715 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1230           0 :                         goto bailout;
    1231             : 
    1232             :                 /* maintain properties */
    1233      407827 :                 if (nl > 1) {
    1234             :                         /* value occurs multiple times in l, so entry
    1235             :                          * in r will be repeated multiple times: hence
    1236             :                          * r2 is not key and not dense */
    1237      279938 :                         if (r2) {
    1238      243035 :                                 r2->tkey = false;
    1239      243035 :                                 r2->tseqbase = oid_nil;
    1240             :                         }
    1241             :                         /* multiple different values will be inserted
    1242             :                          * in r1 (always in order), so not reverse
    1243             :                          * ordered anymore */
    1244      279938 :                         r1->trevsorted = false;
    1245             :                 }
    1246      407827 :                 if (nr > 1) {
    1247             :                         /* value occurs multiple times in r, so entry
    1248             :                          * in l will be repeated multiple times: hence
    1249             :                          * r1 is not key and not dense */
    1250      165610 :                         r1->tkey = false;
    1251      165610 :                         r1->tseqbase = oid_nil;
    1252             :                         /* multiple different values will be inserted
    1253             :                          * in r2 (in order), so not reverse ordered
    1254             :                          * anymore */
    1255      165610 :                         if (r2) {
    1256      114917 :                                 r2->trevsorted = false;
    1257      114917 :                                 if (nl > 1) {
    1258             :                                         /* multiple values in l match
    1259             :                                          * multiple values in r, so an
    1260             :                                          * ordered sequence will be
    1261             :                                          * inserted multiple times in
    1262             :                                          * r2, so r2 is not ordered
    1263             :                                          * anymore */
    1264       83661 :                                         r2->tsorted = false;
    1265             :                                 }
    1266             :                         }
    1267             :                 }
    1268      407827 :                 if (BATcount(r1) > 0) {
    1269             :                         /* a new, higher value will be inserted into
    1270             :                          * r1, so r1 is not reverse ordered anymore */
    1271      403986 :                         r1->trevsorted = false;
    1272             :                         /* a new higher value will be added to r2 */
    1273      403986 :                         if (r2) {
    1274      334684 :                                 r2->trevsorted = false;
    1275             :                         }
    1276      403986 :                         if (BATtdense(r1) &&
    1277      232243 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1278          72 :                                 r1->tseqbase = oid_nil;
    1279             :                         }
    1280             :                 }
    1281             : 
    1282      407827 :                 if (r2 &&
    1283      337800 :                     BATcount(r2) > 0 &&
    1284      334643 :                     BATtdense(r2) &&
    1285       80603 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1286         525 :                         r2->tseqbase = oid_nil;
    1287             :                 }
    1288             : 
    1289             :                 /* insert values */
    1290      407827 :                 lv = l->hseqbase + lstart - nl;
    1291    16136397 :                 for (i = 0; i < nl; i++) {
    1292             :                         BUN j;
    1293             : 
    1294   114942932 :                         for (j = 0; j < nr; j++) {
    1295    99214362 :                                 APPEND(r1, lv);
    1296             :                         }
    1297    15728570 :                         if (r2) {
    1298    15525146 :                                 oid rv = r->hseqbase + rstart - nr;
    1299             : 
    1300   106350325 :                                 for (j = 0; j < nr; j++) {
    1301    90825179 :                                         APPEND(r2, rv);
    1302    90825179 :                                         rv++;
    1303             :                                 }
    1304             :                         }
    1305    15728570 :                         lv++;
    1306             :                 }
    1307             :         }
    1308             :         /* also set other bits of heap to correct value to indicate size */
    1309        4912 :         BATsetcount(r1, BATcount(r1));
    1310        4909 :         if (r2) {
    1311        3712 :                 BATsetcount(r2, BATcount(r2));
    1312        3713 :                 assert(BATcount(r1) == BATcount(r2));
    1313             :         }
    1314        4910 :         if (BATcount(r1) > 0) {
    1315        4142 :                 if (BATtdense(r1))
    1316        3385 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1317        4142 :                 if (r2 && BATtdense(r2))
    1318        2231 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1319             :         } else {
    1320         768 :                 r1->tseqbase = 0;
    1321         768 :                 if (r2) {
    1322         325 :                         r2->tseqbase = 0;
    1323             :                 }
    1324             :         }
    1325        4910 :         bat_iterator_end(&li);
    1326        4913 :         bat_iterator_end(&ri);
    1327        4912 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1328             :                   "nil_matches=%s;%s %s "
    1329             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1330             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1331             :                   nil_matches ? "true" : "false",
    1332             :                   swapped ? " swapped" : "", reason,
    1333             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1334             :                   GDKusec() - t0);
    1335             : 
    1336             :         return GDK_SUCCEED;
    1337             : 
    1338           0 :   bailout:
    1339           0 :         bat_iterator_end(&li);
    1340           0 :         bat_iterator_end(&ri);
    1341           0 :         BBPreclaim(r1);
    1342           0 :         BBPreclaim(r2);
    1343             :         return GDK_FAIL;
    1344             : }
    1345             : 
    1346             : /* Implementation of mergejoin (see below) for the special case that
    1347             :  * the values are of type lng, and some more conditions are met. */
    1348             : static gdk_return
    1349         210 : mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1350             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1351             :               const char *reason)
    1352             : {
    1353         210 :         BAT *r1, *r2;
    1354         210 :         BUN lstart, lend, lcnt;
    1355         210 :         BUN rstart, rend;
    1356         210 :         BUN lscan, rscan;       /* opportunistic scan window */
    1357         210 :         BUN maxsize;
    1358         210 :         const lng *lvals, *rvals;
    1359         210 :         lng v;
    1360         210 :         BUN nl, nr;
    1361         210 :         oid lv;
    1362         210 :         BUN i;
    1363         210 :         BATiter li = bat_iterator(l);
    1364         210 :         BATiter ri = bat_iterator(r);
    1365             : 
    1366         630 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1367         210 :         assert(ri.sorted || ri.revsorted);
    1368             : 
    1369         210 :         MT_thread_setalgorithm(__func__);
    1370         210 :         lstart = rstart = 0;
    1371         210 :         lend = BATcount(l);
    1372         210 :         lcnt = lend - lstart;
    1373         210 :         rend = BATcount(r);
    1374         210 :         lvals = (const lng *) li.base;
    1375         210 :         rvals = (const lng *) ri.base;
    1376         210 :         size_t counter = 0;
    1377         210 :         lng timeoffset = 0;
    1378         210 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1379         210 :         if (qry_ctx != NULL) {
    1380         210 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1381             :         }
    1382             : 
    1383             :         /* basic properties will be adjusted if necessary later on,
    1384             :          * they were initially set by joininitresults() */
    1385             : 
    1386         210 :         if (lend == 0 || rend == 0) {
    1387             :                 /* there are no matches */
    1388           0 :                 bat_iterator_end(&li);
    1389           0 :                 bat_iterator_end(&ri);
    1390           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1391           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1392             :                                0, false, false, __func__, t0);
    1393             :         }
    1394             : 
    1395         210 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1396         210 :                                        li.key, ri.key, false, false,
    1397             :                                        false, false, estimate)) == BUN_NONE) {
    1398           0 :                 bat_iterator_end(&li);
    1399           0 :                 bat_iterator_end(&ri);
    1400           0 :                 return GDK_FAIL;
    1401             :         }
    1402         210 :         r1 = *r1p;
    1403         210 :         r2 = r2p ? *r2p : NULL;
    1404             : 
    1405             :         /* determine opportunistic scan window for l and r */
    1406        1373 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1407        1163 :                 nl >>= 1;
    1408        1337 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1409        1127 :                 nr >>= 1;
    1410             : 
    1411         210 :         if (!nil_matches) {
    1412             :                 /* skip over nils at the start of the columns */
    1413         100 :                 if (lscan < lend - lstart && is_lng_nil(lvals[lstart + lscan])) {
    1414           0 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1415             :                                                lend - 1, lng_nil, 1, 1);
    1416             :                 } else {
    1417         100 :                         while (is_lng_nil(lvals[lstart]))
    1418           0 :                                 lstart++;
    1419             :                 }
    1420         100 :                 if (rscan < rend - rstart && is_lng_nil(rvals[rstart + rscan])) {
    1421           0 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1422             :                                                rend - 1, lng_nil, 1, 1);
    1423             :                 } else {
    1424         100 :                         while (is_lng_nil(rvals[rstart]))
    1425           0 :                                 rstart++;
    1426             :                 }
    1427             :         }
    1428             :         /* from here on we don't have to worry about nil values */
    1429             : 
    1430      400065 :         while (lstart < lend && rstart < rend) {
    1431      399939 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1432             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1433      399939 :                 v = rvals[rstart];
    1434             : 
    1435      399939 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1436        1615 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1437             :                                                lend - 1, v, 1, 0);
    1438             :                 } else {
    1439             :                         /* scan l for v */
    1440      498126 :                         while (lstart < lend && lvals[lstart] < v)
    1441       99802 :                                 lstart++;
    1442             :                 }
    1443      400043 :                 if (lstart >= lend) {
    1444             :                         /* nothing found */
    1445             :                         break;
    1446             :                 }
    1447             : 
    1448             :                 /* Here we determine the next value in l that we are
    1449             :                  * going to try to match in r.  We will also count the
    1450             :                  * number of occurrences in l of that value.
    1451             :                  * Afterwards, v points to the value and nl is the
    1452             :                  * number of times it occurs.  Also, lstart will
    1453             :                  * point to the next value to be considered (ready for
    1454             :                  * the next iteration).
    1455             :                  * If there are many equal values in l (more than
    1456             :                  * lscan), we will use binary search to find the end
    1457             :                  * of the sequence.  Obviously, we can do this only if
    1458             :                  * l is actually sorted (lscan > 0). */
    1459      399977 :                 nl = 1;         /* we'll match (at least) one in l */
    1460      399977 :                 nr = 0;         /* maybe we won't match anything in r */
    1461      399977 :                 v = lvals[lstart];
    1462      399977 :                 if (li.key) {
    1463             :                         /* if l is key, there is a single value */
    1464      350672 :                         lstart++;
    1465       49305 :                 } else if (lscan < lend - lstart &&
    1466       49217 :                            v == lvals[lstart + lscan]) {
    1467             :                         /* lots of equal values: use binary search to
    1468             :                          * find end */
    1469         395 :                         nl = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1470             :                                            lend - 1, v, 1, 1);
    1471         395 :                         nl -= lstart;
    1472         395 :                         lstart += nl;
    1473             :                 } else {
    1474             :                         /* just scan */
    1475       87693 :                         while (++lstart < lend && v == lvals[lstart])
    1476       38783 :                                 nl++;
    1477             :                 }
    1478             :                 /* lstart points one beyond the value we're
    1479             :                  * going to match: ready for the next iteration. */
    1480             : 
    1481             :                 /* First we find the first value in r that is at
    1482             :                  * least as large as v, then we find the first
    1483             :                  * value in r that is larger than v.  The difference
    1484             :                  * is the number of values equal to v and is stored in
    1485             :                  * nr.
    1486             :                  * We will use binary search on r to find both ends of
    1487             :                  * the sequence of values that are equal to v in case
    1488             :                  * the position is "too far" (more than rscan
    1489             :                  * away). */
    1490             : 
    1491             :                 /* first find the location of the first value in r
    1492             :                  * that is >= v, then find the location of the first
    1493             :                  * value in r that is > v; the difference is the
    1494             :                  * number of values equal to v */
    1495             : 
    1496             :                 /* look ahead a little (rscan) in r to see whether
    1497             :                  * we're better off doing a binary search */
    1498      399977 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1499             :                         /* value too far away in r: use binary
    1500             :                          * search */
    1501        1546 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1502             :                                                rend - 1, v, 1, 0);
    1503             :                 } else {
    1504             :                         /* scan r for v */
    1505     1463441 :                         while (rstart < rend && rvals[rstart] < v)
    1506     1065010 :                                 rstart++;
    1507             :                 }
    1508      399935 :                 if (rstart == rend) {
    1509             :                         /* nothing found */
    1510             :                         break;
    1511             :                 }
    1512             : 
    1513             :                 /* now find the end of the sequence of equal values v */
    1514             : 
    1515             :                 /* if r is key, there is zero or one match, otherwise
    1516             :                  * look ahead a little (rscan) in r to see whether
    1517             :                  * we're better off doing a binary search */
    1518      399917 :                 if (ri.key) {
    1519      365127 :                         if (rstart < rend && v == rvals[rstart]) {
    1520       69721 :                                 nr = 1;
    1521       69721 :                                 rstart++;
    1522             :                         }
    1523       34790 :                 } else if (rscan < rend - rstart &&
    1524       34755 :                            v == rvals[rstart + rscan]) {
    1525             :                         /* range too large: use binary search */
    1526           0 :                         nr = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1527             :                                            rend - 1, v, 1, 1);
    1528           0 :                         nr -= rstart;
    1529           0 :                         rstart += nr;
    1530             :                 } else {
    1531             :                         /* scan r for end of range */
    1532       76291 :                         while (rstart < rend && v == rvals[rstart]) {
    1533       41501 :                                 nr++;
    1534       41501 :                                 rstart++;
    1535             :                         }
    1536             :                 }
    1537             :                 /* rstart points to first value > v or end of
    1538             :                  * r, and nr is the number of values in r that
    1539             :                  * are equal to v */
    1540      104511 :                 if (nr == 0) {
    1541             :                         /* no entries in r found */
    1542      295370 :                         continue;
    1543             :                 }
    1544             :                 /* make space: nl values in l match nr values in r, so
    1545             :                  * we need to add nl * nr values in the results */
    1546      104547 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1547           0 :                         goto bailout;
    1548             : 
    1549             :                 /* maintain properties */
    1550      104485 :                 if (nl > 1) {
    1551             :                         /* value occurs multiple times in l, so entry
    1552             :                          * in r will be repeated multiple times: hence
    1553             :                          * r2 is not key and not dense */
    1554       10112 :                         if (r2) {
    1555        5109 :                                 r2->tkey = false;
    1556        5109 :                                 r2->tseqbase = oid_nil;
    1557             :                         }
    1558             :                         /* multiple different values will be inserted
    1559             :                          * in r1 (always in order), so not reverse
    1560             :                          * ordered anymore */
    1561       10112 :                         r1->trevsorted = false;
    1562             :                 }
    1563      104485 :                 if (nr > 1) {
    1564             :                         /* value occurs multiple times in r, so entry
    1565             :                          * in l will be repeated multiple times: hence
    1566             :                          * r1 is not key and not dense */
    1567        1946 :                         r1->tkey = false;
    1568        1946 :                         r1->tseqbase = oid_nil;
    1569             :                         /* multiple different values will be inserted
    1570             :                          * in r2 (in order), so not reverse ordered
    1571             :                          * anymore */
    1572        1946 :                         if (r2) {
    1573        1946 :                                 r2->trevsorted = false;
    1574        1946 :                                 if (nl > 1) {
    1575             :                                         /* multiple values in l match
    1576             :                                          * multiple values in r, so an
    1577             :                                          * ordered sequence will be
    1578             :                                          * inserted multiple times in
    1579             :                                          * r2, so r2 is not ordered
    1580             :                                          * anymore */
    1581          51 :                                         r2->tsorted = false;
    1582             :                                 }
    1583             :                         }
    1584             :                 }
    1585      104485 :                 if (BATcount(r1) > 0) {
    1586             :                         /* a new, higher value will be inserted into
    1587             :                          * r1, so r1 is not reverse ordered anymore */
    1588      104241 :                         r1->trevsorted = false;
    1589             :                         /* a new higher value will be added to r2 */
    1590      104241 :                         if (r2) {
    1591       97534 :                                 r2->trevsorted = false;
    1592             :                         }
    1593      104241 :                         if (BATtdense(r1) &&
    1594       36405 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1595          57 :                                 r1->tseqbase = oid_nil;
    1596             :                         }
    1597             :                 }
    1598             : 
    1599      104485 :                 if (r2 &&
    1600       97769 :                     BATcount(r2) > 0 &&
    1601       97535 :                     BATtdense(r2) &&
    1602       35259 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1603          22 :                         r2->tseqbase = oid_nil;
    1604             :                 }
    1605             : 
    1606             :                 /* insert values */
    1607      104485 :                 lv = l->hseqbase + lstart - nl;
    1608      248703 :                 for (i = 0; i < nl; i++) {
    1609             :                         BUN j;
    1610             : 
    1611      296489 :                         for (j = 0; j < nr; j++) {
    1612      152271 :                                 APPEND(r1, lv);
    1613             :                         }
    1614      144218 :                         if (r2) {
    1615      125878 :                                 oid rv = r->hseqbase + rstart - nr;
    1616             : 
    1617      259740 :                                 for (j = 0; j < nr; j++) {
    1618      133862 :                                         APPEND(r2, rv);
    1619      133862 :                                         rv++;
    1620             :                                 }
    1621             :                         }
    1622      144218 :                         lv++;
    1623             :                 }
    1624             :         }
    1625             :         /* also set other bits of heap to correct value to indicate size */
    1626         210 :         BATsetcount(r1, BATcount(r1));
    1627         209 :         if (r2) {
    1628         191 :                 BATsetcount(r2, BATcount(r2));
    1629         191 :                 assert(BATcount(r1) == BATcount(r2));
    1630             :         }
    1631         209 :         if (BATcount(r1) > 0) {
    1632         185 :                 if (BATtdense(r1))
    1633         116 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1634         185 :                 if (r2 && BATtdense(r2))
    1635         115 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1636             :         } else {
    1637          24 :                 r1->tseqbase = 0;
    1638          24 :                 if (r2) {
    1639          14 :                         r2->tseqbase = 0;
    1640             :                 }
    1641             :         }
    1642         209 :         bat_iterator_end(&li);
    1643         209 :         bat_iterator_end(&ri);
    1644         210 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1645             :                   "nil_matches=%s;%s %s "
    1646             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1647             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1648             :                   nil_matches ? "true" : "false",
    1649             :                   swapped ? " swapped" : "", reason,
    1650             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1651             :                   GDKusec() - t0);
    1652             : 
    1653             :         return GDK_SUCCEED;
    1654             : 
    1655           0 :   bailout:
    1656           0 :         bat_iterator_end(&li);
    1657           0 :         bat_iterator_end(&ri);
    1658           0 :         BBPreclaim(r1);
    1659           0 :         BBPreclaim(r2);
    1660             :         return GDK_FAIL;
    1661             : }
    1662             : 
    1663             : /* Implementation of mergejoin (see below) for the special case that
    1664             :  * the values are of type oid, and the right-hand side is a candidate
    1665             :  * list with exception, and some more conditions are met. */
    1666             : static gdk_return
    1667           0 : mergejoin_cand(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1668             :                bool nil_matches, BUN estimate, lng t0, bool swapped,
    1669             :                const char *reason)
    1670             : {
    1671             : /* the comments in this function have not been checked after making a
    1672             :  * copy of mergejoin below and adapting it to a mask right-hand side */
    1673           0 :         BAT *r1, *r2;
    1674           0 :         BUN lstart, lend, lcnt;
    1675           0 :         struct canditer lci, rci;
    1676           0 :         BUN lscan;              /* opportunistic scan window */
    1677           0 :         BUN maxsize;
    1678           0 :         const oid *lvals;
    1679           0 :         oid v;
    1680           0 :         BUN nl, nr;
    1681           0 :         oid lv;
    1682           0 :         BUN i;
    1683           0 :         BATiter li = bat_iterator(l);
    1684           0 :         BATiter ri = bat_iterator(r);
    1685             : 
    1686           0 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1687             : 
    1688           0 :         MT_thread_setalgorithm(__func__);
    1689           0 :         lstart = 0;
    1690           0 :         lend = BATcount(l);
    1691           0 :         lcnt = lend - lstart;
    1692           0 :         if (li.type == TYPE_void) {
    1693           0 :                 assert(!is_oid_nil(l->tseqbase));
    1694           0 :                 canditer_init(&lci, NULL, l);
    1695           0 :                 lcnt = lci.ncand;
    1696           0 :                 lvals = NULL;
    1697             :         } else {
    1698           0 :                 lci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1699           0 :                 lvals = (const oid *) li.base;
    1700           0 :                 assert(lvals != NULL);
    1701             :         }
    1702             : 
    1703           0 :         assert(complex_cand(r));
    1704           0 :         canditer_init(&rci, NULL, r);
    1705           0 :         size_t counter = 0;
    1706           0 :         lng timeoffset = 0;
    1707           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1708           0 :         if (qry_ctx != NULL) {
    1709           0 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    1710             :         }
    1711             : 
    1712             :         /* basic properties will be adjusted if necessary later on,
    1713             :          * they were initially set by joininitresults() */
    1714             : 
    1715           0 :         if (lend == 0 || rci.ncand == 0) {
    1716             :                 /* there are no matches */
    1717           0 :                 bat_iterator_end(&li);
    1718           0 :                 bat_iterator_end(&ri);
    1719           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1720           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1721             :                                0, false, false, __func__, t0);
    1722             :         }
    1723             : 
    1724           0 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1725           0 :                                        li.key, ri.key, false, false,
    1726             :                                        false, false, estimate)) == BUN_NONE) {
    1727           0 :                 bat_iterator_end(&li);
    1728           0 :                 bat_iterator_end(&ri);
    1729           0 :                 return GDK_FAIL;
    1730             :         }
    1731           0 :         r1 = *r1p;
    1732           0 :         r2 = r2p ? *r2p : NULL;
    1733             : 
    1734             :         /* determine opportunistic scan window for l and r */
    1735           0 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1736           0 :                 nl >>= 1;
    1737             : 
    1738           0 :         if (!nil_matches) {
    1739             :                 /* skip over nils at the start of the columns */
    1740           0 :                 if (lscan < lend - lstart && lvals && is_oid_nil(lvals[lstart + lscan])) {
    1741           0 :                         lstart = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1742             :                                                lend - 1, oid_nil, 1, 1);
    1743           0 :                 } else if (lvals) {
    1744           0 :                         while (is_oid_nil(lvals[lstart]))
    1745           0 :                                 lstart++;
    1746             :                 } /* else l is candidate list: no nils */
    1747             :         }
    1748             :         /* from here on we don't have to worry about nil values */
    1749             : 
    1750           0 :         while (lstart < lend && rci.next < rci.ncand) {
    1751           0 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    1752             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    1753           0 :                 v = canditer_peek(&rci);
    1754             : 
    1755           0 :                 if (lvals) {
    1756           0 :                         if (lscan < lend - lstart &&
    1757           0 :                             lvals[lstart + lscan] < v) {
    1758           0 :                                 lstart = binsearch_oid(NULL, 0, lvals,
    1759             :                                                        lstart + lscan,
    1760             :                                                        lend - 1, v, 1, 0);
    1761             :                         } else {
    1762             :                                 /* scan l for v */
    1763           0 :                                 while (lstart < lend && lvals[lstart] < v)
    1764           0 :                                         lstart++;
    1765             :                         }
    1766             :                 } else {
    1767           0 :                         lstart = canditer_search(&lci, v, true);
    1768           0 :                         canditer_setidx(&lci, lstart);
    1769             :                 }
    1770           0 :                 if (lstart >= lend) {
    1771             :                         /* nothing found */
    1772             :                         break;
    1773             :                 }
    1774             : 
    1775             :                 /* Here we determine the next value in l that we are
    1776             :                  * going to try to match in r.  We will also count the
    1777             :                  * number of occurrences in l of that value.
    1778             :                  * Afterwards, v points to the value and nl is the
    1779             :                  * number of times it occurs.  Also, lstart will
    1780             :                  * point to the next value to be considered (ready for
    1781             :                  * the next iteration).
    1782             :                  * If there are many equal values in l (more than
    1783             :                  * lscan), we will use binary search to find the end
    1784             :                  * of the sequence.  Obviously, we can do this only if
    1785             :                  * l is actually sorted (lscan > 0). */
    1786           0 :                 nl = 1;         /* we'll match (at least) one in l */
    1787           0 :                 nr = 0;         /* maybe we won't match anything in r */
    1788           0 :                 v = lvals ? lvals[lstart] : canditer_next(&lci);
    1789           0 :                 if (li.key || lvals == NULL) {
    1790             :                         /* if l is key, there is a single value */
    1791           0 :                         lstart++;
    1792           0 :                 } else if (lscan < lend - lstart &&
    1793           0 :                            v == lvals[lstart + lscan]) {
    1794             :                         /* lots of equal values: use binary search to
    1795             :                          * find end */
    1796           0 :                         nl = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1797             :                                            lend - 1, v, 1, 1);
    1798           0 :                         nl -= lstart;
    1799           0 :                         lstart += nl;
    1800             :                 } else {
    1801             :                         /* just scan */
    1802           0 :                         while (++lstart < lend && v == lvals[lstart])
    1803           0 :                                 nl++;
    1804             :                 }
    1805             :                 /* lstart points one beyond the value we're
    1806             :                  * going to match: ready for the next iteration. */
    1807             : 
    1808             :                 /* First we find the first value in r that is at
    1809             :                  * least as large as v, then we find the first
    1810             :                  * value in r that is larger than v.  The difference
    1811             :                  * is the number of values equal to v and is stored in
    1812             :                  * nr.
    1813             :                  * We will use binary search on r to find both ends of
    1814             :                  * the sequence of values that are equal to v in case
    1815             :                  * the position is "too far" (more than rscan
    1816             :                  * away). */
    1817             : 
    1818             :                 /* first find the location of the first value in r
    1819             :                  * that is >= v, then find the location of the first
    1820             :                  * value in r that is > v; the difference is the
    1821             :                  * number of values equal to v */
    1822           0 :                 nr = canditer_search(&rci, v, true);
    1823           0 :                 canditer_setidx(&rci, nr);
    1824           0 :                 if (nr == rci.ncand) {
    1825             :                         /* nothing found */
    1826             :                         break;
    1827             :                 }
    1828             : 
    1829             :                 /* now find the end of the sequence of equal values v */
    1830             : 
    1831             :                 /* if r is key, there is zero or one match, otherwise
    1832             :                  * look ahead a little (rscan) in r to see whether
    1833             :                  * we're better off doing a binary search */
    1834           0 :                 if (canditer_peek(&rci) == v) {
    1835           0 :                         nr = 1;
    1836           0 :                         canditer_next(&rci);
    1837             :                 } else {
    1838             :                         /* rci points to first value > v or end of
    1839             :                          * r, and nr is the number of values in r that
    1840             :                          * are equal to v */
    1841             :                         /* no entries in r found */
    1842           0 :                         continue;
    1843             :                 }
    1844             :                 /* make space: nl values in l match nr values in r, so
    1845             :                  * we need to add nl * nr values in the results */
    1846           0 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1847           0 :                         goto bailout;
    1848             : 
    1849             :                 /* maintain properties */
    1850           0 :                 if (nl > 1) {
    1851             :                         /* value occurs multiple times in l, so entry
    1852             :                          * in r will be repeated multiple times: hence
    1853             :                          * r2 is not key and not dense */
    1854           0 :                         if (r2) {
    1855           0 :                                 r2->tkey = false;
    1856           0 :                                 r2->tseqbase = oid_nil;
    1857             :                         }
    1858             :                         /* multiple different values will be inserted
    1859             :                          * in r1 (always in order), so not reverse
    1860             :                          * ordered anymore */
    1861           0 :                         r1->trevsorted = false;
    1862             :                 }
    1863           0 :                 if (BATcount(r1) > 0) {
    1864             :                         /* a new, higher value will be inserted into
    1865             :                          * r1, so r1 is not reverse ordered anymore */
    1866           0 :                         r1->trevsorted = false;
    1867             :                         /* a new higher value will be added to r2 */
    1868           0 :                         if (r2) {
    1869           0 :                                 r2->trevsorted = false;
    1870             :                         }
    1871           0 :                         if (BATtdense(r1) &&
    1872           0 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1873           0 :                                 r1->tseqbase = oid_nil;
    1874             :                         }
    1875             :                 }
    1876             : 
    1877           0 :                 if (r2 &&
    1878           0 :                     BATcount(r2) > 0 &&
    1879           0 :                     BATtdense(r2) &&
    1880           0 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rci.next - nr) {
    1881           0 :                         r2->tseqbase = oid_nil;
    1882             :                 }
    1883             : 
    1884             :                 /* insert values */
    1885           0 :                 lv = l->hseqbase + lstart - nl;
    1886           0 :                 for (i = 0; i < nl; i++) {
    1887             :                         BUN j;
    1888             : 
    1889           0 :                         for (j = 0; j < nr; j++) {
    1890           0 :                                 APPEND(r1, lv);
    1891             :                         }
    1892           0 :                         if (r2) {
    1893           0 :                                 oid rv = r->hseqbase + rci.next - nr;
    1894             : 
    1895           0 :                                 for (j = 0; j < nr; j++) {
    1896           0 :                                         APPEND(r2, rv);
    1897           0 :                                         rv++;
    1898             :                                 }
    1899             :                         }
    1900           0 :                         lv++;
    1901             :                 }
    1902             :         }
    1903             :         /* also set other bits of heap to correct value to indicate size */
    1904           0 :         BATsetcount(r1, BATcount(r1));
    1905           0 :         if (r2) {
    1906           0 :                 BATsetcount(r2, BATcount(r2));
    1907           0 :                 assert(BATcount(r1) == BATcount(r2));
    1908             :         }
    1909           0 :         if (BATcount(r1) > 0) {
    1910           0 :                 if (BATtdense(r1))
    1911           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1912           0 :                 if (r2 && BATtdense(r2))
    1913           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1914             :         } else {
    1915           0 :                 r1->tseqbase = 0;
    1916           0 :                 if (r2) {
    1917           0 :                         r2->tseqbase = 0;
    1918             :                 }
    1919             :         }
    1920           0 :         bat_iterator_end(&li);
    1921           0 :         bat_iterator_end(&ri);
    1922           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1923             :                   "nil_matches=%s;%s %s "
    1924             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1925             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1926             :                   nil_matches ? "true" : "false",
    1927             :                   swapped ? " swapped" : "", reason,
    1928             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1929             :                   GDKusec() - t0);
    1930             : 
    1931             :         return GDK_SUCCEED;
    1932             : 
    1933           0 :   bailout:
    1934           0 :         bat_iterator_end(&li);
    1935           0 :         bat_iterator_end(&ri);
    1936           0 :         BBPreclaim(r1);
    1937           0 :         BBPreclaim(r2);
    1938             :         return GDK_FAIL;
    1939             : }
    1940             : 
    1941             : /* Perform a "merge" join on l and r (if both are sorted) with
    1942             :  * optional candidate lists, or join using binary search on r if l is
    1943             :  * not sorted.
    1944             :  *
    1945             :  * If nil_matches is set, nil values are treated as ordinary values
    1946             :  * that can match; otherwise nil values never match.
    1947             :  *
    1948             :  * If nil_on_miss is set, a nil value is returned in r2 if there is no
    1949             :  * match in r for a particular value in l (left outer join).
    1950             :  *
    1951             :  * If semi is set, only a single set of values in r1/r2 is returned if
    1952             :  * there is a match of l in r, no matter how many matches there are in
    1953             :  * r; otherwise all matches are returned.
    1954             :  *
    1955             :  * If max_one is set, only a single match is allowed.  This is like
    1956             :  * semi, but enforces the single match.
    1957             :  *
    1958             :  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
    1959             :  */
    1960             : static gdk_return
    1961       11409 : mergejoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    1962             :           struct canditer *restrict lci, struct canditer *restrict rci,
    1963             :           bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    1964             :           bool not_in, bool max_one, bool min_one, BUN estimate,
    1965             :           lng t0, bool swapped,
    1966             :           const char *reason)
    1967             : {
    1968             :         /* [lr]scan determine how far we look ahead in l/r in order to
    1969             :          * decide whether we want to do a binary search or a scan */
    1970       11409 :         BUN lscan, rscan;
    1971       11409 :         const void *lvals, *rvals; /* the values of l/r (NULL if dense) */
    1972       11409 :         const char *lvars, *rvars; /* the indirect values (NULL if fixed size) */
    1973       11409 :         const void *nil = ATOMnilptr(l->ttype);
    1974       11409 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    1975       11409 :         const void *v;          /* points to value under consideration */
    1976       11409 :         const void *prev = NULL;
    1977       11409 :         BUN nl, nr;
    1978       11409 :         bool insert_nil;
    1979             :         /* equal_order is set if we can scan both BATs in the same
    1980             :          * order, so when both are sorted or both are reverse sorted
    1981             :          * -- important to know in order to skip over values; if l is
    1982             :          * not sorted, this must be set to true and we will always do a
    1983             :          * binary search on all of r */
    1984       11409 :         bool equal_order;
    1985             :         /* [lr]ordering is either 1 or -1 depending on the order of
    1986             :          * l/r: it determines the comparison function used */
    1987       11409 :         int lordering, rordering;
    1988       11409 :         oid lv;
    1989       11409 :         BUN i, j;               /* counters */
    1990       11409 :         oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
    1991       11409 :         struct canditer llci, rrci;
    1992       11409 :         struct canditer *mlci, xlci;
    1993       11409 :         struct canditer *mrci, xrci;
    1994             : 
    1995       11409 :         if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
    1996       11389 :             rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
    1997       10704 :             !nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
    1998        6214 :             !not_in &&
    1999        5236 :             l->tsorted && r->tsorted) {
    2000             :                 /* special cases with far fewer options */
    2001        5222 :                 if (complex_cand(r))
    2002           0 :                         return mergejoin_cand(r1p, r2p, l, r, nil_matches,
    2003             :                                               estimate, t0, swapped, __func__);
    2004       10401 :                 switch (ATOMbasetype(l->ttype)) {
    2005        4913 :                 case TYPE_int:
    2006        4913 :                         return mergejoin_int(r1p, r2p, l, r, nil_matches,
    2007             :                                              estimate, t0, swapped, __func__);
    2008         210 :                 case TYPE_lng:
    2009         210 :                         return mergejoin_lng(r1p, r2p, l, r, nil_matches,
    2010             :                                              estimate, t0, swapped, __func__);
    2011             :                 }
    2012             :         }
    2013             : 
    2014       18808 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2015        6286 :         assert(r->tsorted || r->trevsorted);
    2016             : 
    2017        6286 :         size_t counter = 0;
    2018        6286 :         lng timeoffset = 0;
    2019        6286 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2020        6285 :         if (qry_ctx != NULL) {
    2021        4676 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    2022             :         }
    2023             : 
    2024        6285 :         BATiter li = bat_iterator(l);
    2025        6286 :         BATiter ri = bat_iterator(r);
    2026        6286 :         MT_thread_setalgorithm(__func__);
    2027        6286 :         if (BATtvoid(l)) {
    2028             :                 /* l->ttype == TYPE_void && is_oid_nil(l->tseqbase) is
    2029             :                  * handled by selectjoin */
    2030          49 :                 assert(!is_oid_nil(l->tseqbase));
    2031          49 :                 canditer_init(&llci, NULL, l);
    2032          49 :                 lvals = NULL;
    2033             :         } else {
    2034        6237 :                 lvals = li.base;                              /* non NULL */
    2035        6237 :                 llci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2036             :         }
    2037        6286 :         rrci = (struct canditer) {.tpe = cand_dense};
    2038        6286 :         if (BATtvoid(r)) {
    2039           1 :                 if (!is_oid_nil(r->tseqbase))
    2040           1 :                         canditer_init(&rrci, NULL, r);
    2041             :                 rvals = NULL;
    2042             :         } else {
    2043        6285 :                 rvals = ri.base;
    2044             :         }
    2045        6286 :         if (li.vh && li.type) {
    2046         134 :                 assert(ri.vh && ri.type);
    2047         134 :                 lvars = li.vh->base;
    2048         134 :                 rvars = ri.vh->base;
    2049             :         } else {
    2050        6152 :                 assert(ri.vh == NULL || ri.type == TYPE_void);
    2051             :                 lvars = rvars = NULL;
    2052             :         }
    2053             :         /* if the var pointer is not NULL, then so is the val pointer */
    2054        6286 :         assert(lvars == NULL || lvals != NULL);
    2055        6286 :         assert(rvars == NULL || rvals != NULL);
    2056             : 
    2057        6286 :         const bool rhasnil = !ri.nonil &&
    2058        3201 :                 ((BATtvoid(r) && r->tseqbase == oid_nil) ||
    2059        3201 :                  (rvals && cmp(nil, VALUE(r, (ri.sorted ? rci->seq : canditer_last(rci)) - r->hseqbase)) == 0));
    2060          21 :         const bit defmark = rhasnil ? bit_nil : 0;
    2061             : 
    2062        6285 :         if (not_in && (rhasnil || (BATtvoid(l) && l->tseqbase == oid_nil))) {
    2063           0 :                 bat_iterator_end(&li);
    2064           0 :                 bat_iterator_end(&ri);
    2065           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark, false, false,
    2066             :                                __func__, t0);
    2067             :         }
    2068             : 
    2069        6285 :         if ((!nil_matches &&
    2070        6131 :              ((li.type == TYPE_void && is_oid_nil(l->tseqbase)) ||
    2071        6131 :               (ri.type == TYPE_void && is_oid_nil(r->tseqbase)))) ||
    2072        6285 :             (li.type == TYPE_void && is_oid_nil(l->tseqbase) &&
    2073           0 :              (ri.nonil ||
    2074           0 :               (ri.type == TYPE_void && !is_oid_nil(r->tseqbase)))) ||
    2075        6285 :             (ri.type == TYPE_void && is_oid_nil(r->tseqbase) &&
    2076           0 :              (li.nonil ||
    2077           0 :               (li.type == TYPE_void && !is_oid_nil(l->tseqbase))))) {
    2078             :                 /* there are no matches */
    2079           0 :                 bat_iterator_end(&li);
    2080           0 :                 bat_iterator_end(&ri);
    2081           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark,
    2082             :                                nil_on_miss, only_misses, __func__, t0);
    2083             :         }
    2084             : 
    2085       12571 :         BUN maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    2086        6285 :                                       li.key, ri.key, semi | max_one,
    2087             :                                       nil_on_miss, only_misses, min_one,
    2088             :                                       estimate);
    2089        6286 :         if (maxsize == BUN_NONE) {
    2090           0 :                 bat_iterator_end(&li);
    2091           0 :                 bat_iterator_end(&ri);
    2092           0 :                 return GDK_FAIL;
    2093             :         }
    2094        6286 :         BAT *r1 = *r1p;
    2095        6286 :         BAT *r2 = r2p ? *r2p : NULL;
    2096        6286 :         BAT *r3 = r3p ? *r3p : NULL;
    2097             : 
    2098        6286 :         if (lci->tpe == cand_mask) {
    2099           0 :                 mlci = lci;
    2100           0 :                 canditer_init(&xlci, l, NULL);
    2101           0 :                 lci = &xlci;
    2102             :         } else {
    2103        6286 :                 mlci = NULL;
    2104        6286 :                 xlci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2105             :         }
    2106        6286 :         if (rci->tpe == cand_mask) {
    2107           0 :                 mrci = rci;
    2108           0 :                 canditer_init(&xrci, r, NULL);
    2109           0 :                 rci = &xrci;
    2110             :         } else {
    2111        6286 :                 mrci = NULL;
    2112        6286 :                 xrci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2113             :         }
    2114             : 
    2115        6286 :         if (li.sorted || li.revsorted) {
    2116        5154 :                 equal_order = (li.sorted && ri.sorted) ||
    2117         194 :                         (li.revsorted && ri.revsorted &&
    2118          98 :                          !BATtvoid(l) && !BATtvoid(r));
    2119        5154 :                 lordering = li.sorted && (ri.sorted || !equal_order) ? 1 : -1;
    2120        5080 :                 rordering = equal_order ? lordering : -lordering;
    2121        5154 :                 if (!li.nonil && !nil_matches && !nil_on_miss && lvals != NULL) {
    2122             :                         /* find first non-nil */
    2123        2254 :                         nl = binsearch(NULL, 0, li.type, lvals, lvars, li.width, 0, BATcount(l), nil, li.sorted ? 1 : -1, li.sorted ? 1 : 0);
    2124        2179 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2125        2179 :                         if (li.sorted) {
    2126        2104 :                                 canditer_setidx(lci, nl);
    2127          75 :                         } else if (li.revsorted) {
    2128          75 :                                 lci->ncand = nl;
    2129             :                         }
    2130             :                 }
    2131             :                 /* determine opportunistic scan window for l */
    2132       10308 :                 lscan = 4 + ilog2(lci->ncand);
    2133             :         } else {
    2134             :                 /* if l not sorted, we will always use binary search
    2135             :                  * on r */
    2136        1132 :                 assert(!BATtvoid(l)); /* void is always sorted */
    2137        1132 :                 lscan = 0;
    2138        1132 :                 equal_order = true;
    2139        1132 :                 lordering = 1;
    2140        1132 :                 rordering = ri.sorted ? 1 : -1;
    2141             :         }
    2142             :         /* determine opportunistic scan window for r; if l is not
    2143             :          * sorted this is only used to find range of equal values */
    2144        6286 :         rscan = 4 + ilog2(rci->ncand);
    2145             : 
    2146        6286 :         if (!equal_order) {
    2147             :                 /* we go through r backwards */
    2148          96 :                 canditer_setidx(rci, rci->ncand);
    2149             :         }
    2150             :         /* At this point the various variables that help us through
    2151             :          * the algorithm have been set.  The table explains them.  The
    2152             :          * first two columns are the inputs, the next three columns
    2153             :          * are the variables, the final two columns indicate how the
    2154             :          * variables can be used.
    2155             :          *
    2156             :          * l/r    sl/sr | vals  cand  off | result   value being matched
    2157             :          * -------------+-----------------+----------------------------------
    2158             :          * dense  NULL  | NULL  NULL  set | i        off==nil?nil:i+off
    2159             :          * dense  dense | NULL  NULL  set | i        off==nil?nil:i+off
    2160             :          * dense  set   | NULL  set   set | cand[i]  off==nil?nil:cand[i]+off
    2161             :          * set    NULL  | set   NULL  0   | i        vals[i]
    2162             :          * set    dense | set   NULL  0   | i        vals[i]
    2163             :          * set    set   | set   set   0   | cand[i]  vals[cand[i]]
    2164             :          *
    2165             :          * If {l,r}off is lng_nil, all values in the corresponding bat
    2166             :          * are oid_nil because the bat has type VOID and the tseqbase
    2167             :          * is nil.
    2168             :          */
    2169             : 
    2170             : 
    2171             :         /* Before we start adding values to r1 and r2, the properties
    2172             :          * are as follows:
    2173             :          * tseqbase - 0
    2174             :          * tkey - true
    2175             :          * tsorted - true
    2176             :          * trevsorted - true
    2177             :          * tnil - false
    2178             :          * tnonil - true
    2179             :          * We will modify these as we go along.
    2180             :          */
    2181      267790 :         while (lci->next < lci->ncand) {
    2182      264219 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    2183             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    2184      264219 :                 bit mark = defmark;
    2185      264219 :                 if (lscan == 0) {
    2186             :                         /* always search r completely */
    2187       75160 :                         assert(equal_order);
    2188       75160 :                         canditer_reset(rci);
    2189             :                 } else {
    2190             :                         /* If l is sorted (lscan > 0), we look at the
    2191             :                          * next value in r to see whether we can jump
    2192             :                          * over a large section of l using binary
    2193             :                          * search.  We do this by looking ahead in l
    2194             :                          * (lscan far, to be precise) and seeing if
    2195             :                          * the value there is still too "small"
    2196             :                          * (definition depends on sort order of l).
    2197             :                          * If it is, we use binary search on l,
    2198             :                          * otherwise we scan l for the next position
    2199             :                          * with a value greater than or equal to the
    2200             :                          * value in r.
    2201             :                          * The next value to match in r is the first
    2202             :                          * if equal_order is set, the last
    2203             :                          * otherwise.
    2204             :                          * When skipping over values in l, we count
    2205             :                          * how many we skip in nlx.  We need this in
    2206             :                          * case only_misses or nil_on_miss is set, and
    2207             :                          * to properly set the dense property in the
    2208             :                          * first output BAT. */
    2209      189059 :                         BUN nlx = 0; /* number of non-matching values in l */
    2210             : 
    2211      189059 :                         if (equal_order) {
    2212      188836 :                                 if (rci->next == rci->ncand)
    2213             :                                         v = NULL; /* no more values */
    2214      186637 :                                 else if (mrci) {
    2215           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peek(rci), true);
    2216           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2217             :                                 } else
    2218      186637 :                                         v = VALUE(r, canditer_peek(rci) - r->hseqbase);
    2219             :                         } else {
    2220         223 :                                 if (rci->next == 0)
    2221             :                                         v = NULL; /* no more values */
    2222         211 :                                 else if (mrci) {
    2223           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peekprev(rci), false);
    2224           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2225             :                                 } else
    2226         211 :                                         v = VALUE(r, canditer_peekprev(rci) - r->hseqbase);
    2227             :                         }
    2228             :                         /* here, v points to next value in r, or if
    2229             :                          * we're at the end of r, v is NULL */
    2230          17 :                         if (v == NULL) {
    2231        2211 :                                 nlx = lci->ncand - lci->next;
    2232             :                         } else {
    2233      186783 :                                 if (lscan < lci->ncand - lci->next) {
    2234      164566 :                                         lv = canditer_idx(lci, lci->next + lscan);
    2235      165007 :                                         lv -= l->hseqbase;
    2236      165007 :                                         if (lvals) {
    2237      160610 :                                                 if (lordering * cmp(VALUE(l, lv), v) < 0) {
    2238        2038 :                                                         nlx = binsearch(NULL, 0, li.type, lvals, lvars, li.width, lv, BATcount(l), v, lordering, 0);
    2239        2038 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2240        2038 :                                                         nlx -= lci->next;
    2241             :                                                 }
    2242             :                                         } else {
    2243        4397 :                                                 assert(lordering == 1);
    2244        4397 :                                                 if (canditer_idx(&llci, lv) < *(const oid *)v) {
    2245           8 :                                                         nlx = canditer_search(&llci, *(const oid *)v, true);
    2246           8 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2247           8 :                                                         nlx -= lci->next;
    2248             :                                                 }
    2249             :                                         }
    2250      165054 :                                         if (mlci) {
    2251           0 :                                                 lv = canditer_mask_next(mlci, lci->seq + lci->next + nlx, true);
    2252           0 :                                                 if (lv == oid_nil)
    2253           0 :                                                         nlx = lci->ncand - lci->next;
    2254             :                                                 else
    2255           0 :                                                         nlx = lv - lci->seq - lci->next;
    2256             :                                         }
    2257      165054 :                                         if (lci->next + nlx == lci->ncand)
    2258         463 :                                                 v = NULL;
    2259             :                                 }
    2260             :                         }
    2261      167265 :                         if (nlx > 0) {
    2262        4257 :                                 if (only_misses) {
    2263        2464 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2264           0 :                                                 goto bailout;
    2265      267333 :                                         while (nlx > 0) {
    2266      264869 :                                                 lv = canditer_next(lci);
    2267      264869 :                                                 if (mlci == NULL || canditer_contains(mlci, lv))
    2268      264869 :                                                         APPEND(r1, lv);
    2269      264869 :                                                 nlx--;
    2270             :                                         }
    2271        2464 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2272         994 :                                                 r1->trevsorted = false;
    2273        1793 :                                 } else if (nil_on_miss) {
    2274          25 :                                         if (r2 && r2->tnonil) {
    2275           2 :                                                 r2->tnil = true;
    2276           2 :                                                 r2->tnonil = false;
    2277           2 :                                                 r2->tseqbase = oid_nil;
    2278           2 :                                                 r2->tsorted = false;
    2279           2 :                                                 r2->trevsorted = false;
    2280           2 :                                                 r2->tkey = false;
    2281             :                                         }
    2282          25 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2283           0 :                                                 goto bailout;
    2284          25 :                                         if (r3)
    2285          24 :                                                 r3->tnil = false;
    2286        2175 :                                         while (nlx > 0) {
    2287        2150 :                                                 lv = canditer_next(lci);
    2288        2150 :                                                 if (mlci == NULL || canditer_contains(mlci, lv)) {
    2289        2150 :                                                         APPEND(r1, lv);
    2290        2150 :                                                         if (r2)
    2291           2 :                                                                 APPEND(r2, oid_nil);
    2292        2150 :                                                         if (r3) {
    2293        2149 :                                                                 if (rhasnil || cmp(VALUE(l, lv - l->hseqbase), nil) == 0) {
    2294           0 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = bit_nil;
    2295           0 :                                                                         r3->tnil = true;
    2296             :                                                                 } else {
    2297        2149 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = 0;
    2298             :                                                                 }
    2299             :                                                         }
    2300             :                                                 }
    2301        2150 :                                                 nlx--;
    2302             :                                         }
    2303          25 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2304           8 :                                                 r1->trevsorted = false;
    2305             :                                 } else {
    2306        1768 :                                         canditer_setidx(lci, lci->next + nlx);
    2307             :                                 }
    2308             :                         }
    2309      189482 :                         if (v == NULL) {
    2310             :                                 /* we have exhausted the inputs */
    2311             :                                 break;
    2312             :                         }
    2313             :                 }
    2314             : 
    2315             :                 /* Here we determine the next value in l that we are
    2316             :                  * going to try to match in r.  We will also count the
    2317             :                  * number of occurrences in l of that value.
    2318             :                  * Afterwards, v points to the value and nl is the
    2319             :                  * number of times it occurs.  Also, lci will point to
    2320             :                  * the next value to be considered (ready for the next
    2321             :                  * iteration).
    2322             :                  * If there are many equal values in l (more than
    2323             :                  * lscan), we will use binary search to find the end
    2324             :                  * of the sequence.  Obviously, we can do this only if
    2325             :                  * l is actually sorted (lscan > 0). */
    2326      258262 :                 nl = 1;         /* we'll match (at least) one in l */
    2327      258262 :                 nr = 0;         /* maybe we won't match anything in r */
    2328      258262 :                 lv = canditer_peek(lci);
    2329      257714 :                 if (mlci) {
    2330           0 :                         lv = canditer_mask_next(mlci, lv, true);
    2331           0 :                         if (lv == oid_nil)
    2332             :                                 break;
    2333           0 :                         canditer_setidx(lci, canditer_search(lci, lv, true));
    2334             :                 }
    2335      257730 :                 v = VALUE(l, lv - l->hseqbase);
    2336      258508 :                 if (li.key) {
    2337             :                         /* if l is key, there is a single value */
    2338      120506 :                 } else if (lscan > 0 &&
    2339      124293 :                            lscan < lci->ncand - lci->next &&
    2340       61565 :                            cmp(v, VALUE(l, canditer_idx(lci, lci->next + lscan) - l->hseqbase)) == 0) {
    2341             :                         /* lots of equal values: use binary search to
    2342             :                          * find end */
    2343         852 :                         assert(lvals != NULL);
    2344        1704 :                         nl = binsearch(NULL, 0,
    2345         852 :                                        li.type, lvals, lvars,
    2346         852 :                                        li.width, lci->next + lscan,
    2347             :                                        BATcount(l),
    2348             :                                        v, lordering, 1);
    2349         852 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2350         852 :                         nl -= lci->next;
    2351             :                 } else {
    2352      119565 :                         struct canditer ci = *lci; /* work on copy */
    2353      119565 :                         nl = 0; /* it will be incremented again */
    2354      296333 :                         do {
    2355      296333 :                                 canditer_next(&ci);
    2356      294571 :                                 nl++;
    2357      585045 :                         } while (ci.next < ci.ncand &&
    2358      293504 :                                  cmp(v, VALUE(l, canditer_peek(&ci) - l->hseqbase)) == 0);
    2359             :                 }
    2360             :                 /* lci->next + nl is the position for the next iteration */
    2361             : 
    2362      253627 :                 if ((!nil_matches || not_in) && !li.nonil && cmp(v, nil) == 0) {
    2363         688 :                         if (not_in) {
    2364             :                                 /* just skip the whole thing: nils
    2365             :                                  * don't cause any output */
    2366           1 :                                 canditer_setidx(lci, lci->next + nl);
    2367           1 :                                 continue;
    2368             :                         }
    2369             :                         /* v is nil and nils don't match anything, set
    2370             :                          * to NULL to indicate nil */
    2371         687 :                         v = NULL;
    2372         687 :                         mark = bit_nil;
    2373         687 :                         if (r3)
    2374          57 :                                 r3->tnil = true;
    2375             :                 }
    2376             : 
    2377             :                 /* First we find the "first" value in r that is "at
    2378             :                  * least as large" as v, then we find the "first"
    2379             :                  * value in r that is "larger" than v.  The difference
    2380             :                  * is the number of values equal to v and is stored in
    2381             :                  * nr.  The definitions of "larger" and "first" depend
    2382             :                  * on the orderings of l and r.  If equal_order is
    2383             :                  * set, we go through r from low to high (this
    2384             :                  * includes the case that l is not sorted); otherwise
    2385             :                  * we go through r from high to low.
    2386             :                  * In either case, we will use binary search on r to
    2387             :                  * find both ends of the sequence of values that are
    2388             :                  * equal to v in case the position is "too far" (more
    2389             :                  * than rscan away). */
    2390          57 :                 if (v == NULL) {
    2391             :                         nr = 0; /* nils don't match anything */
    2392      252109 :                 } else if (ri.type == TYPE_void && is_oid_nil(r->tseqbase)) {
    2393           0 :                         if (is_oid_nil(*(const oid *) v)) {
    2394             :                                 /* all values in r match */
    2395           0 :                                 nr = rci->ncand;
    2396             :                         } else {
    2397             :                                 /* no value in r matches */
    2398             :                                 nr = 0;
    2399             :                         }
    2400             :                         /* in either case, we're done after this */
    2401           0 :                         canditer_setidx(rci, equal_order ? rci->ncand : 0);
    2402      252109 :                 } else if (equal_order) {
    2403             :                         /* first find the location of the first value
    2404             :                          * in r that is >= v, then find the location
    2405             :                          * of the first value in r that is > v; the
    2406             :                          * difference is the number of values equal
    2407             :                          * v; we change rci */
    2408             : 
    2409             :                         /* look ahead a little (rscan) in r to
    2410             :                          * see whether we're better off doing
    2411             :                          * a binary search */
    2412      251898 :                         if (rvals) {
    2413      251881 :                                 if (rscan < rci->ncand - rci->next &&
    2414      206251 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) > 0) {
    2415             :                                         /* value too far away in r:
    2416             :                                          * use binary search */
    2417       55053 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 0);
    2418       67978 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2419       67406 :                                         canditer_setidx(rci, lv);
    2420             :                                 } else {
    2421             :                                         /* scan r for v */
    2422      220738 :                                         while (rci->next < rci->ncand) {
    2423      220489 :                                                 if (rordering * cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) <= 0)
    2424             :                                                         break;
    2425       25095 :                                                 canditer_next(rci);
    2426             :                                         }
    2427             :                                 }
    2428      515589 :                                 if (rci->next < rci->ncand &&
    2429      255813 :                                     cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0) {
    2430             :                                         /* if we found an equal value,
    2431             :                                          * look for the last equal
    2432             :                                          * value */
    2433      186467 :                                         if (ri.key) {
    2434             :                                                 /* r is key, there can
    2435             :                                                  * only be a single
    2436             :                                                  * equal value */
    2437      128781 :                                                 nr = 1;
    2438      128781 :                                                 canditer_next(rci);
    2439      113543 :                                         } else if (rscan < rci->ncand - rci->next &&
    2440       55865 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) == 0) {
    2441             :                                                 /* many equal values:
    2442             :                                                  * use binary search
    2443             :                                                  * to find the end */
    2444         629 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 1);
    2445         629 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2446         629 :                                                 nr -= rci->next;
    2447         629 :                                                 canditer_setidx(rci, rci->next + nr);
    2448             :                                         } else {
    2449             :                                                 /* scan r for end of
    2450             :                                                  * range */
    2451      156046 :                                                 do {
    2452      156046 :                                                         nr++;
    2453      156046 :                                                         canditer_next(rci);
    2454      311679 :                                                 } while (rci->next < rci->ncand &&
    2455      155645 :                                                          cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0);
    2456             :                                         }
    2457             :                                 }
    2458             :                         } else {
    2459          17 :                                 assert(rordering == 1);
    2460          17 :                                 rval = canditer_search(&rrci, *(const oid*)v, true) + r->hseqbase;
    2461          17 :                                 lv = canditer_search(rci, rval, true);
    2462          17 :                                 canditer_setidx(rci, lv);
    2463          17 :                                 nr = (canditer_idx(&rrci, canditer_peek(rci) - r->hseqbase) == *(oid*)v);
    2464          17 :                                 if (nr == 1)
    2465          17 :                                         canditer_next(rci);
    2466             :                         }
    2467             :                         /* rci points to first value > v or end of r,
    2468             :                          * and nr is the number of values in r that
    2469             :                          * are equal to v */
    2470             :                 } else {
    2471             :                         /* first find the location of the first value
    2472             :                          * in r that is > v, then find the location
    2473             :                          * of the first value in r that is >= v; the
    2474             :                          * difference is the number of values equal
    2475             :                          * v; we change rci */
    2476             : 
    2477             :                         /* look back from the end a little
    2478             :                          * (rscan) in r to see whether we're
    2479             :                          * better off doing a binary search */
    2480         211 :                         if (rvals) {
    2481         211 :                                 if (rci->next > rscan &&
    2482          64 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) < 0) {
    2483             :                                         /* value too far away
    2484             :                                          * in r: use binary
    2485             :                                          * search */
    2486          21 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 1);
    2487          21 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2488          21 :                                         canditer_setidx(rci, lv);
    2489             :                                 } else {
    2490             :                                         /* scan r for v */
    2491         463 :                                         while (rci->next > 0 &&
    2492         456 :                                                rordering * cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) < 0)
    2493         273 :                                                 canditer_prev(rci);
    2494             :                                 }
    2495         415 :                                 if (rci->next > 0 &&
    2496         204 :                                     cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0) {
    2497             :                                         /* if we found an equal value,
    2498             :                                          * look for the last equal
    2499             :                                          * value */
    2500         110 :                                         if (ri.key) {
    2501             :                                                 /* r is key, there can only be a single equal value */
    2502          85 :                                                 nr = 1;
    2503          85 :                                                 canditer_prev(rci);
    2504          42 :                                         } else if (rci->next > rscan &&
    2505          17 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) == 0) {
    2506             :                                                 /* use binary search to find the start */
    2507           4 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 0);
    2508           4 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2509           4 :                                                 nr = rci->next - nr;
    2510           4 :                                                 canditer_setidx(rci, rci->next - nr);
    2511             :                                         } else {
    2512             :                                                 /* scan r for start of range */
    2513          21 :                                                 do {
    2514          21 :                                                         canditer_prev(rci);
    2515          21 :                                                         nr++;
    2516          41 :                                                 } while (rci->next > 0 &&
    2517          20 :                                                          cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0);
    2518             :                                         }
    2519             :                                 }
    2520             :                         } else {
    2521           0 :                                 lv = canditer_search(&rrci, *(const oid *)v, true);
    2522           0 :                                 lv = canditer_search(rci, lv + r->hseqbase, true);
    2523           0 :                                 nr = (canditer_idx(rci, lv) == *(const oid*)v);
    2524           0 :                                 canditer_setidx(rci, lv);
    2525             :                         }
    2526             :                         /* rci points to first value > v
    2527             :                          * or end of r, and nr is the number of values
    2528             :                          * in r that are equal to v */
    2529             :                 }
    2530             : 
    2531      188609 :                 if (nr == 0) {
    2532             :                         /* no entries in r found */
    2533       76123 :                         if (!(nil_on_miss | only_misses)) {
    2534       46939 :                                 if (min_one) {
    2535           0 :                                         GDKerror("not enough matches");
    2536           0 :                                         goto bailout;
    2537             :                                 }
    2538       51129 :                                 if (lscan > 0 &&
    2539        4190 :                                     (equal_order ? rci->next == rci->ncand : rci->next == 0)) {
    2540             :                                         /* nothing more left to match
    2541             :                                          * in r */
    2542             :                                         break;
    2543             :                                 }
    2544       46919 :                                 canditer_setidx(lci, lci->next + nl);
    2545       46054 :                                 continue;
    2546             :                         }
    2547             :                         /* insert a nil to indicate a non-match */
    2548       29184 :                         insert_nil = true;
    2549       29184 :                         nr = 1;
    2550       29184 :                         if (r2) {
    2551          26 :                                 r2->tnil = true;
    2552          26 :                                 r2->tnonil = false;
    2553          26 :                                 r2->tsorted = false;
    2554          26 :                                 r2->trevsorted = false;
    2555          26 :                                 r2->tseqbase = oid_nil;
    2556          26 :                                 r2->tkey = false;
    2557             :                         }
    2558      186542 :                 } else if (nr > 1 && max_one) {
    2559          21 :                         GDKerror("more than one match");
    2560          21 :                         goto bailout;
    2561      186521 :                 } else if (only_misses) {
    2562             :                         /* we had a match, so we're not interested */
    2563       99993 :                         canditer_setidx(lci, lci->next + nl);
    2564       99981 :                         continue;
    2565             :                 } else {
    2566       86528 :                         insert_nil = false;
    2567       86528 :                         if (semi) {
    2568             :                                 /* for semi-join, only insert single
    2569             :                                  * value */
    2570       20252 :                                 nr = 1;
    2571             :                         }
    2572             :                 }
    2573             :                 /* make space: nl values in l match nr values in r, so
    2574             :                  * we need to add nl * nr values in the results */
    2575      115712 :                 if (maybeextend(r1, r2, r3, nl * nr, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2576           0 :                         goto bailout;
    2577             : 
    2578             :                 /* maintain properties */
    2579      115761 :                 if (nl > 1) {
    2580       52684 :                         if (r2) {
    2581             :                                 /* value occurs multiple times in l,
    2582             :                                  * so entry in r will be repeated
    2583             :                                  * multiple times: hence r2 is not key
    2584             :                                  * and not dense */
    2585        8239 :                                 r2->tkey = false;
    2586        8239 :                                 r2->tseqbase = oid_nil;
    2587             :                         }
    2588             :                         /* multiple different values will be inserted
    2589             :                          * in r1 (always in order), so not reverse
    2590             :                          * ordered anymore */
    2591       52684 :                         r1->trevsorted = false;
    2592             :                 }
    2593      115761 :                 if (nr > 1) {
    2594             :                         /* value occurs multiple times in r, so entry
    2595             :                          * in l will be repeated multiple times: hence
    2596             :                          * r1 is not key and not dense */
    2597         488 :                         r1->tkey = false;
    2598         488 :                         if (r2) {
    2599             :                                 /* multiple different values will be
    2600             :                                  * inserted in r2 (in order), so not
    2601             :                                  * reverse ordered anymore */
    2602          53 :                                 r2->trevsorted = false;
    2603          53 :                                 if (nl > 1) {
    2604             :                                         /* multiple values in l match
    2605             :                                          * multiple values in r, so an
    2606             :                                          * ordered sequence will be
    2607             :                                          * inserted multiple times in
    2608             :                                          * r2, so r2 is not ordered
    2609             :                                          * anymore */
    2610          37 :                                         r2->tsorted = false;
    2611             :                                 }
    2612             :                         }
    2613             :                 }
    2614      115761 :                 if (lscan == 0) {
    2615             :                         /* deduce relative positions of r matches for
    2616             :                          * this and previous value in v */
    2617       24862 :                         if (prev && r2) {
    2618             :                                 /* keyness or r2 can only be assured
    2619             :                                  * as long as matched values are
    2620             :                                  * ordered */
    2621       14722 :                                 int ord = rordering * cmp(prev, v ? v : nil);
    2622       14944 :                                 if (ord < 0) {
    2623             :                                         /* previous value in l was
    2624             :                                          * less than current */
    2625        7857 :                                         r2->trevsorted = false;
    2626        7857 :                                         r2->tkey &= r2->tsorted;
    2627        7087 :                                 } else if (ord > 0) {
    2628             :                                         /* previous value was
    2629             :                                          * greater */
    2630        7055 :                                         r2->tsorted = false;
    2631        7055 :                                         r2->tkey &= r2->trevsorted;
    2632             :                                 } else {
    2633             :                                         /* value can be equal if
    2634             :                                          * intervening values in l
    2635             :                                          * didn't match anything; if
    2636             :                                          * multiple values match in r,
    2637             :                                          * r2 won't be sorted */
    2638          32 :                                         r2->tkey = false;
    2639          32 :                                         if (nr > 1) {
    2640           3 :                                                 r2->tsorted = false;
    2641           3 :                                                 r2->trevsorted = false;
    2642             :                                         }
    2643             :                                 }
    2644             :                         }
    2645       25087 :                         prev = v ? v : nil;
    2646             :                 }
    2647      115986 :                 if (BATcount(r1) > 0) {
    2648             :                         /* a new, higher value will be inserted into
    2649             :                          * r1, so r1 is not reverse ordered anymore */
    2650      111715 :                         r1->trevsorted = false;
    2651      111715 :                         if (r2) {
    2652             :                                 /* depending on whether l and r are
    2653             :                                  * ordered the same or not, a new
    2654             :                                  * higher or lower value will be added
    2655             :                                  * to r2 */
    2656       18137 :                                 if (equal_order)
    2657       18081 :                                         r2->trevsorted = false;
    2658             :                                 else {
    2659          56 :                                         r2->tsorted = false;
    2660          56 :                                         r2->tseqbase = oid_nil;
    2661             :                                 }
    2662             :                         }
    2663             :                 }
    2664             : 
    2665             :                 /* insert values: first the left output */
    2666             :                 BUN nladded = 0;
    2667      346190 :                 for (i = 0; i < nl; i++) {
    2668      230788 :                         lv = canditer_next(lci);
    2669      230204 :                         if (mlci == NULL || canditer_contains(mlci, lv)) {
    2670      230204 :                                 nladded++;
    2671      675060 :                                 for (j = 0; j < nr; j++)
    2672      444856 :                                         APPEND(r1, lv);
    2673             :                         }
    2674             :                 }
    2675      115402 :                 nl = nladded;
    2676             :                 /* then the right output, various different ways of
    2677             :                  * doing it */
    2678      115402 :                 if (r2) {
    2679       18476 :                         if (insert_nil) {
    2680          67 :                                 for (i = 0; i < nl; i++) {
    2681          82 :                                         for (j = 0; j < nr; j++) {
    2682          41 :                                                 APPEND(r2, oid_nil);
    2683             :                                         }
    2684             :                                 }
    2685       18450 :                         } else if (equal_order) {
    2686       18345 :                                 struct canditer ci = *rci; /* work on copy */
    2687       18345 :                                 if (r2->batCount > 0 &&
    2688       20814 :                                     BATtdense(r2) &&
    2689        2892 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_idx(&ci, ci.next - nr))
    2690          68 :                                         r2->tseqbase = oid_nil;
    2691       57821 :                                 for (i = 0; i < nl; i++) {
    2692       39410 :                                         canditer_setidx(&ci, ci.next - nr);
    2693      313361 :                                         for (j = 0; j < nr; j++) {
    2694      234475 :                                                 APPEND(r2, canditer_next(&ci));
    2695             :                                         }
    2696             :                                 }
    2697             :                         } else {
    2698         105 :                                 if (r2->batCount > 0 &&
    2699          56 :                                     BATtdense(r2) &&
    2700           0 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_peek(rci))
    2701           0 :                                         r2->tseqbase = oid_nil;
    2702        1185 :                                 for (i = 0; i < nl; i++) {
    2703        1080 :                                         struct canditer ci = *rci; /* work on copy */
    2704       21552 :                                         for (j = 0; j < nr; j++) {
    2705       20472 :                                                 APPEND(r2, canditer_next(&ci));
    2706             :                                         }
    2707             :                                 }
    2708             :                         }
    2709             :                 }
    2710             :                 /* finally the mark output */
    2711      115468 :                 if (r3) {
    2712        3032 :                         if (insert_nil) {
    2713         358 :                                 r3->tnil |= rhasnil;
    2714         889 :                                 for (i = 0; i < nl; i++) {
    2715        1062 :                                         for (j = 0; j < nr; j++) {
    2716         531 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    2717             :                                         }
    2718             :                                 }
    2719             :                         } else {
    2720        9143 :                                 for (i = 0; i < nl; i++) {
    2721       12942 :                                         for (j = 0; j < nr; j++) {
    2722        6473 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = 1;
    2723             :                                         }
    2724             :                                 }
    2725             :                         }
    2726             :                 }
    2727             :         }
    2728             :         /* also set other bits of heap to correct value to indicate size */
    2729        6265 :         BATsetcount(r1, BATcount(r1));
    2730        6265 :         r1->tseqbase = oid_nil;
    2731        6265 :         if (r1->tkey)
    2732        6232 :                 r1 = virtualize(r1);
    2733        6265 :         if (r2) {
    2734        1106 :                 BATsetcount(r2, BATcount(r2));
    2735        1106 :                 assert(BATcount(r1) == BATcount(r2));
    2736        1106 :                 r2->tseqbase = oid_nil;
    2737        1106 :                 if (BATcount(r2) <= 1) {
    2738         572 :                         r2->tkey = true;
    2739         572 :                         r2 = virtualize(r2);
    2740             :                 }
    2741             :         }
    2742        6265 :         if (r3) {
    2743          65 :                 BATsetcount(r3, BATcount(r3));
    2744          65 :                 assert(BATcount(r1) == BATcount(r3));
    2745          65 :                 r3->tseqbase = oid_nil;
    2746          65 :                 r3->tnonil = !r3->tnil;
    2747          65 :                 if (BATcount(r3) <= 1) {
    2748           0 :                         r3->tkey = true;
    2749           0 :                         r3->tsorted = true;
    2750           0 :                         r3->trevsorted = true;
    2751             :                 }
    2752             :         }
    2753        6265 :         bat_iterator_end(&li);
    2754        6265 :         bat_iterator_end(&ri);
    2755        6265 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2756             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    2757             :                   "sr=" ALGOOPTBATFMT ","
    2758             :                   "nil_on_miss=%s,semi=%s,only_misses=%s,not_in=%s;%s %s "
    2759             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    2760             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    2761             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    2762             :                   nil_on_miss ? "true" : "false",
    2763             :                   semi ? "true" : "false",
    2764             :                   only_misses ? "true" : "false",
    2765             :                   not_in ? "true" : "false",
    2766             :                   swapped ? " swapped" : "", reason,
    2767             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    2768             :                   GDKusec() - t0);
    2769             : 
    2770             :         return GDK_SUCCEED;
    2771             : 
    2772          21 :   bailout:
    2773          21 :         bat_iterator_end(&li);
    2774          21 :         bat_iterator_end(&ri);
    2775          21 :         BBPreclaim(r1);
    2776          21 :         BBPreclaim(r2);
    2777          21 :         BBPreclaim(r3);
    2778             :         return GDK_FAIL;
    2779             : }
    2780             : 
    2781             : #define HASHLOOPBODY()                                                  \
    2782             :         do {                                                            \
    2783             :                 if (nr >= 1 && max_one) {                            \
    2784             :                         GDKerror("more than one match");              \
    2785             :                         goto bailout;                                   \
    2786             :                 }                                                       \
    2787             :                 if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2788             :                         goto bailout;                                   \
    2789             :                 APPEND(r1, lo);                                         \
    2790             :                 if (r2)                                                 \
    2791             :                         APPEND(r2, ro);                                 \
    2792             :                 if (r3)                                                 \
    2793             :                         ((bit *) r3->theap->base)[r3->batCount++] = 1; \
    2794             :                 nr++;                                                   \
    2795             :         } while (false)
    2796             : 
    2797             : #define EQ_int(a, b)    ((a) == (b))
    2798             : #define EQ_lng(a, b)    ((a) == (b))
    2799             : #ifdef HAVE_HGE
    2800             : #define EQ_uuid(a, b)   ((a).h == (b).h)
    2801             : #else
    2802             : #define EQ_uuid(a, b)   (memcmp((a).u, (b).u, UUID_SIZE) == 0)
    2803             : #endif
    2804             : 
    2805             : #define HASHJOIN(TYPE)                                                  \
    2806             :         do {                                                            \
    2807             :                 TYPE *rvals = ri.base;                                  \
    2808             :                 TYPE *lvals = li.base;                                  \
    2809             :                 TYPE v;                                                 \
    2810             :                 while (lci->next < lci->ncand) {                       \
    2811             :                         GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
    2812             :                         lo = canditer_next(lci);                        \
    2813             :                         v = lvals[lo - l->hseqbase];                 \
    2814             :                         nr = 0;                                         \
    2815             :                         bit mark = defmark;                             \
    2816             :                         if ((!nil_matches || not_in) && is_##TYPE##_nil(v)) { \
    2817             :                                 /* no match */                          \
    2818             :                                 if (not_in) {                           \
    2819             :                                         lskipped = BATcount(r1) > 0; \
    2820             :                                         continue;                       \
    2821             :                                 }                                       \
    2822             :                                 mark = bit_nil;                         \
    2823             :                         } else if (hash_cand) {                         \
    2824             :                                 /* private hash: no locks */            \
    2825             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2826             :                                      rb != BUN_NONE;                    \
    2827             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2828             :                                         ro = canditer_idx(rci, rb);     \
    2829             :                                         if (!EQ_##TYPE(v, rvals[ro - r->hseqbase])) \
    2830             :                                                 continue;               \
    2831             :                                         if (only_misses) {              \
    2832             :                                                 nr++;                   \
    2833             :                                                 break;                  \
    2834             :                                         }                               \
    2835             :                                         HASHLOOPBODY();                 \
    2836             :                                         if (semi && !max_one)           \
    2837             :                                                 break;                  \
    2838             :                                 }                                       \
    2839             :                         } else if (rci->tpe != cand_dense) {         \
    2840             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2841             :                                      rb != BUN_NONE;                    \
    2842             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2843             :                                         if (rb >= rl && rb < rh &&        \
    2844             :                                             EQ_##TYPE(v, rvals[rb]) &&  \
    2845             :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) { \
    2846             :                                                 if (only_misses) {      \
    2847             :                                                         nr++;           \
    2848             :                                                         break;          \
    2849             :                                                 }                       \
    2850             :                                                 HASHLOOPBODY();         \
    2851             :                                                 if (semi && !max_one)   \
    2852             :                                                         break;          \
    2853             :                                         }                               \
    2854             :                                 }                                       \
    2855             :                         } else {                                        \
    2856             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2857             :                                      rb != BUN_NONE;                    \
    2858             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2859             :                                         if (rb >= rl && rb < rh &&        \
    2860             :                                             EQ_##TYPE(v, rvals[rb])) {  \
    2861             :                                                 if (only_misses) {      \
    2862             :                                                         nr++;           \
    2863             :                                                         break;          \
    2864             :                                                 }                       \
    2865             :                                                 ro = (oid) (rb - roff + rseq); \
    2866             :                                                 HASHLOOPBODY();         \
    2867             :                                                 if (semi && !max_one)   \
    2868             :                                                         break;          \
    2869             :                                         }                               \
    2870             :                                 }                                       \
    2871             :                         }                                               \
    2872             :                         if (nr == 0) {                                  \
    2873             :                                 if (only_misses) {                      \
    2874             :                                         nr = 1;                         \
    2875             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2876             :                                                 goto bailout;           \
    2877             :                                         APPEND(r1, lo);                 \
    2878             :                                         if (lskipped)                   \
    2879             :                                                 r1->tseqbase = oid_nil;      \
    2880             :                                 } else if (nil_on_miss) {               \
    2881             :                                         nr = 1;                         \
    2882             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2883             :                                                 goto bailout;           \
    2884             :                                         APPEND(r1, lo);                 \
    2885             :                                         if (r2) {                       \
    2886             :                                                 r2->tnil = true;     \
    2887             :                                                 r2->tnonil = false;  \
    2888             :                                                 r2->tkey = false;    \
    2889             :                                                 APPEND(r2, oid_nil);    \
    2890             :                                         }                               \
    2891             :                                         if (r3) {                       \
    2892             :                                                 r3->tnil |= mark == bit_nil; \
    2893             :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark; \
    2894             :                                         }                               \
    2895             :                                 } else if (min_one) {                   \
    2896             :                                         GDKerror("not enough matches");       \
    2897             :                                         goto bailout;                   \
    2898             :                                 } else {                                \
    2899             :                                         lskipped = BATcount(r1) > 0; \
    2900             :                                 }                                       \
    2901             :                         } else if (only_misses) {                       \
    2902             :                                 lskipped = BATcount(r1) > 0;         \
    2903             :                         } else {                                        \
    2904             :                                 if (lskipped) {                         \
    2905             :                                         /* note, we only get here in an \
    2906             :                                          * iteration *after* lskipped was \
    2907             :                                          * first set to true, i.e. we did \
    2908             :                                          * indeed skip values in l */   \
    2909             :                                         r1->tseqbase = oid_nil;              \
    2910             :                                 }                                       \
    2911             :                                 if (nr > 1) {                                \
    2912             :                                         r1->tkey = false;            \
    2913             :                                         r1->tseqbase = oid_nil;              \
    2914             :                                 }                                       \
    2915             :                         }                                               \
    2916             :                         if (nr > 0 && BATcount(r1) > nr)          \
    2917             :                                 r1->trevsorted = false;                      \
    2918             :                 }                                                       \
    2919             :         } while (0)
    2920             : 
    2921             : /* Implementation of join using a hash lookup of values in the right
    2922             :  * column. */
    2923             : static gdk_return
    2924        9944 : hashjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    2925             :          struct canditer *restrict lci, struct canditer *restrict rci,
    2926             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    2927             :          bool not_in, bool max_one, bool min_one,
    2928             :          BUN estimate, lng t0, bool swapped,
    2929             :          bool hash, bool phash, bool hash_cand,
    2930             :          const char *reason)
    2931             : {
    2932        9944 :         oid lo, ro;
    2933        9944 :         BATiter li, ri;
    2934        9944 :         BUN rb, roff = 0;
    2935             :         /* rl, rh: lower and higher bounds for BUN values in hash table */
    2936        9944 :         BUN rl, rh;
    2937        9944 :         oid rseq;
    2938        9944 :         BUN nr;
    2939        9944 :         const char *lvals;
    2940        9944 :         const char *lvars;
    2941        9944 :         const void *nil = ATOMnilptr(l->ttype);
    2942        9944 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2943        9944 :         oid lval = oid_nil;     /* hold value if l is dense */
    2944        9944 :         const char *v = (const char *) &lval;
    2945        9944 :         bool lskipped = false;  /* whether we skipped values in l */
    2946        9944 :         Hash *restrict hsh = NULL;
    2947        9944 :         bool locked = false;
    2948        9944 :         BUN maxsize;
    2949        9944 :         BAT *r1 = NULL;
    2950        9944 :         BAT *r2 = NULL;
    2951        9944 :         BAT *r3 = NULL;
    2952        9944 :         BAT *b = NULL;
    2953             : 
    2954       29823 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2955             : 
    2956        9944 :         size_t counter = 0;
    2957        9944 :         lng timeoffset = 0;
    2958        9944 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2959        9943 :         if (qry_ctx != NULL) {
    2960        9385 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    2961             :         }
    2962             : 
    2963        9943 :         li = bat_iterator(l);
    2964        9944 :         ri = bat_iterator(r);
    2965             : 
    2966        9944 :         int t = ATOMbasetype(ri.type);
    2967        9944 :         if (BATtvoid(r) || BATtvoid(l))
    2968           9 :                 t = TYPE_void;
    2969             : 
    2970        9944 :         lvals = (const char *) li.base;
    2971        9944 :         if (li.vh && li.type) {
    2972         722 :                 assert(ri.vh && ri.type);
    2973         722 :                 lvars = li.vh->base;
    2974             :         } else {
    2975        9222 :                 assert(ri.vh == NULL);
    2976             :                 lvars = NULL;
    2977             :         }
    2978             :         /* offset to convert BUN to OID for value in right column */
    2979        9944 :         rseq = r->hseqbase;
    2980             : 
    2981        9944 :         rl = rci->seq - r->hseqbase;
    2982        9944 :         rh = canditer_last(rci) + 1 - r->hseqbase;
    2983        9944 :         if (hash_cand) {
    2984             :                 /* we need to create a hash on r specific for the
    2985             :                  * candidate list */
    2986         132 :                 char ext[32];
    2987         132 :                 assert(rci->s);
    2988         161 :                 MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash");
    2989         132 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
    2990             :                           "hash for candidate list " ALGOBATFMT "%s%s\n",
    2991             :                           ALGOBATPAR(r), ALGOBATPAR(rci->s),
    2992             :                           r->thash ? " ignoring existing hash" : "",
    2993             :                           swapped ? " (swapped)" : "");
    2994         132 :                 if (snprintf(ext, sizeof(ext), "thshjn%x",
    2995         132 :                              (unsigned) MT_getpid()) >= (int) sizeof(ext))
    2996           0 :                         goto bailout;
    2997         132 :                 if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
    2998           0 :                         goto bailout;
    2999             :                 }
    3000        9812 :         } else if (phash) {
    3001             :                 /* there is a hash on the parent which we should use */
    3002         802 :                 MT_thread_setalgorithm(swapped ? "hashjoin using parent hash (swapped)" : "hashjoin using parent hash");
    3003         645 :                 b = BATdescriptor(VIEWtparent(r));
    3004         645 :                 if (b == NULL)
    3005           0 :                         goto bailout;
    3006         645 :                 TRC_DEBUG(ALGO, "%s(%s): using "
    3007             :                           "parent(" ALGOBATFMT ") for hash%s\n",
    3008             :                           __func__,
    3009             :                           BATgetId(r), ALGOBATPAR(b),
    3010             :                           swapped ? " (swapped)" : "");
    3011         645 :                 roff = r->tbaseoff - b->tbaseoff;
    3012         645 :                 rl += roff;
    3013         645 :                 rh += roff;
    3014         645 :                 r = b;
    3015         645 :                 bat_iterator_end(&ri);
    3016         645 :                 ri = bat_iterator(r);
    3017         645 :                 MT_rwlock_rdlock(&r->thashlock);
    3018         645 :                 hsh = r->thash;
    3019         645 :                 locked = true;
    3020        9167 :         } else if (hash) {
    3021             :                 /* there is a hash on r which we should use */
    3022        6612 :                 MT_thread_setalgorithm(swapped ? "hashjoin using existing hash (swapped)" : "hashjoin using existing hash");
    3023        4150 :                 MT_rwlock_rdlock(&r->thashlock);
    3024        4150 :                 hsh = r->thash;
    3025        4150 :                 locked = true;
    3026        4150 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": using "
    3027             :                           "existing hash%s\n",
    3028             :                           ALGOBATPAR(r),
    3029             :                           swapped ? " (swapped)" : "");
    3030        5017 :         } else if (BATtdensebi(&ri)) {
    3031             :                 /* no hash, just dense lookup */
    3032           1 :                 MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" : "hashjoin on dense");
    3033             :         } else {
    3034             :                 /* we need to create a hash on r */
    3035        6788 :                 MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash");
    3036        5016 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating hash%s\n",
    3037             :                           ALGOBATPAR(r),
    3038             :                           swapped ? " (swapped)" : "");
    3039        5016 :                 if (BAThash(r) != GDK_SUCCEED)
    3040           0 :                         goto bailout;
    3041        5016 :                 MT_rwlock_rdlock(&r->thashlock);
    3042        5016 :                 hsh = r->thash;
    3043        5016 :                 locked = true;
    3044             :         }
    3045        9944 :         if (locked && hsh == NULL) {
    3046           0 :                 GDKerror("Hash disappeared for "ALGOBATFMT"\n", ALGOBATPAR(r));
    3047           0 :                 goto bailout;
    3048             :         }
    3049        9944 :         assert(hsh != NULL || BATtdensebi(&ri));
    3050             :         if (hsh) {
    3051        9943 :                 TRC_DEBUG(ALGO, "hash for " ALGOBATFMT ": nbucket " BUNFMT ", nunique " BUNFMT ", nheads " BUNFMT "\n", ALGOBATPAR(r), hsh->nbucket, hsh->nunique, hsh->nheads);
    3052             :         }
    3053             : 
    3054        9944 :         bit defmark = 0;
    3055        9944 :         if ((not_in || r3p) && !ri.nonil) {
    3056             :                 /* check whether there is a nil on the right, since if
    3057             :                  * so, we should return an empty result if not_in is
    3058             :                  * set, or use a NIL mark for non-matches if r3p is
    3059             :                  * set */
    3060         261 :                 if (hash_cand) {
    3061           0 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3062           0 :                              rb != BUN_NONE;
    3063           0 :                              rb = HASHgetlink(hsh, rb)) {
    3064           0 :                                 ro = canditer_idx(rci, rb);
    3065           0 :                                 if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
    3066           0 :                                         assert(!locked);
    3067           0 :                                         if (r3p) {
    3068           0 :                                                 defmark = bit_nil;
    3069           0 :                                                 break;
    3070             :                                         }
    3071           0 :                                         HEAPfree(&hsh->heaplink, true);
    3072           0 :                                         HEAPfree(&hsh->heapbckt, true);
    3073           0 :                                         GDKfree(hsh);
    3074           0 :                                         bat_iterator_end(&li);
    3075           0 :                                         bat_iterator_end(&ri);
    3076           0 :                                         BBPreclaim(b);
    3077           0 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3078             :                                                        bit_nil, false, false,
    3079             :                                                        __func__, t0);
    3080             :                                 }
    3081             :                         }
    3082         261 :                 } else if (!BATtdensebi(&ri)) {
    3083         261 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3084         309 :                              rb != BUN_NONE;
    3085          48 :                              rb = HASHgetlink(hsh, rb)) {
    3086          70 :                                 if (rb >= rl && rb < rh &&
    3087          70 :                                     (cmp == NULL ||
    3088          70 :                                      (*cmp)(nil, BUNtail(ri, rb)) == 0)) {
    3089          22 :                                         if (r3p) {
    3090          21 :                                                 defmark = bit_nil;
    3091          21 :                                                 break;
    3092             :                                         }
    3093           1 :                                         if (locked)
    3094           1 :                                                 MT_rwlock_rdunlock(&r->thashlock);
    3095           1 :                                         bat_iterator_end(&li);
    3096           1 :                                         bat_iterator_end(&ri);
    3097           1 :                                         BBPreclaim(b);
    3098           1 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3099             :                                                        bit_nil, false, false,
    3100             :                                                        __func__, t0);
    3101             :                                 }
    3102             :                         }
    3103             :                 }
    3104             :         }
    3105             : 
    3106       19886 :         maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    3107        9943 :                                   li.key, ri.key, semi | max_one,
    3108             :                                   nil_on_miss, only_misses, min_one,
    3109             :                                   estimate);
    3110        9943 :         if (maxsize == BUN_NONE) {
    3111           0 :                 goto bailout;
    3112             :         }
    3113             : 
    3114        9943 :         r1 = *r1p;
    3115        9943 :         r2 = r2p ? *r2p : NULL;
    3116        9943 :         r3 = r3p ? *r3p : NULL;
    3117             : 
    3118             :         /* basic properties will be adjusted if necessary later on,
    3119             :          * they were initially set by joininitresults() */
    3120             : 
    3121        9943 :         if (r2) {
    3122        8542 :                 r2->tkey = li.key;
    3123             :                 /* r2 is not likely to be sorted (although it is
    3124             :                  * certainly possible) */
    3125        8542 :                 r2->tsorted = false;
    3126        8542 :                 r2->trevsorted = false;
    3127        8542 :                 r2->tseqbase = oid_nil;
    3128             :         }
    3129             : 
    3130        9943 :         if (lci->tpe != cand_dense)
    3131         289 :                 r1->tseqbase = oid_nil;
    3132             : 
    3133             : 
    3134        9943 :         switch (t) {
    3135        7839 :         case TYPE_int:
    3136   400864618 :                 HASHJOIN(int);
    3137             :                 break;
    3138         993 :         case TYPE_lng:
    3139    96985755 :                 HASHJOIN(lng);
    3140             :                 break;
    3141           0 :         case TYPE_uuid:
    3142           0 :                 HASHJOIN(uuid);
    3143           0 :                 break;
    3144             :         default:
    3145     2642375 :                 while (lci->next < lci->ncand) {
    3146     2641271 :                         GDK_CHECK_TIMEOUT(timeoffset, counter,
    3147             :                                         GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    3148     2641271 :                         lo = canditer_next(lci);
    3149     2662168 :                         if (BATtdensebi(&li))
    3150         323 :                                 lval = lo - l->hseqbase + l->tseqbase;
    3151     2661845 :                         else if (li.type != TYPE_void)
    3152     2672114 :                                 v = VALUE(l, lo - l->hseqbase);
    3153     2690833 :                         nr = 0;
    3154     2690833 :                         bit mark = defmark;
    3155     2690833 :                         if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
    3156             :                                 /* no match */
    3157        2975 :                                 if (not_in) {
    3158          10 :                                         lskipped = BATcount(r1) > 0;
    3159          10 :                                         continue;
    3160             :                                 }
    3161        2965 :                                 mark = bit_nil;
    3162     2687616 :                         } else if (hash_cand) {
    3163           0 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3164           0 :                                      rb != BUN_NONE;
    3165           0 :                                      rb = HASHgetlink(hsh, rb)) {
    3166           0 :                                         ro = canditer_idx(rci, rb);
    3167           0 :                                         if ((*cmp)(v, BUNtail(ri, ro - r->hseqbase)) != 0)
    3168           0 :                                                 continue;
    3169           0 :                                         if (only_misses) {
    3170           0 :                                                 nr++;
    3171           0 :                                                 break;
    3172             :                                         }
    3173           0 :                                         HASHLOOPBODY();
    3174           0 :                                         if (semi && !max_one)
    3175             :                                                 break;
    3176             :                                 }
    3177     2687616 :                         } else if (hsh == NULL) {
    3178           4 :                                 assert(BATtdensebi(&ri));
    3179           4 :                                 ro = *(const oid *) v;
    3180           4 :                                 if (ro >= r->tseqbase &&
    3181           4 :                                     ro < r->tseqbase + r->batCount) {
    3182           4 :                                         ro -= r->tseqbase;
    3183           4 :                                         ro += rseq;
    3184           4 :                                         if (canditer_contains(rci, ro)) {
    3185           4 :                                                 if (only_misses) {
    3186        9935 :                                                         nr++;
    3187             :                                                         break;
    3188             :                                                 }
    3189           4 :                                                 HASHLOOPBODY();
    3190           4 :                                                 if (semi && !max_one)
    3191             :                                                         break;
    3192             :                                         }
    3193             :                                 }
    3194     2687612 :                         } else if (rci->tpe != cand_dense) {
    3195       16131 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3196       24189 :                                      rb != BUN_NONE;
    3197        8058 :                                      rb = HASHgetlink(hsh, rb)) {
    3198       16075 :                                         if (rb >= rl && rb < rh &&
    3199        8489 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0 &&
    3200         472 :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) {
    3201         394 :                                                 if (only_misses) {
    3202           0 :                                                         nr++;
    3203           0 :                                                         break;
    3204             :                                                 }
    3205         394 :                                                 HASHLOOPBODY();
    3206         394 :                                                 if (semi && !max_one)
    3207             :                                                         break;
    3208             :                                         }
    3209             :                                 }
    3210             :                         } else {
    3211     2671481 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3212     5247085 :                                      rb != BUN_NONE;
    3213     2573534 :                                      rb = HASHgetlink(hsh, rb)) {
    3214     5314709 :                                         if (rb >= rl && rb < rh &&
    3215     2677413 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0) {
    3216     2228511 :                                                 if (only_misses) {
    3217       41712 :                                                         nr++;
    3218       41712 :                                                         break;
    3219             :                                                 }
    3220     2186799 :                                                 ro = (oid) (rb - roff + rseq);
    3221     2186799 :                                                 HASHLOOPBODY();
    3222     2165742 :                                                 if (semi && !max_one)
    3223             :                                                         break;
    3224             :                                         }
    3225             :                                 }
    3226             :                         }
    3227     2641185 :                         if (nr == 0) {
    3228      442494 :                                 if (only_misses) {
    3229         386 :                                         nr = 1;
    3230         386 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3231           0 :                                                 goto bailout;
    3232         386 :                                         APPEND(r1, lo);
    3233         386 :                                         if (lskipped)
    3234         350 :                                                 r1->tseqbase = oid_nil;
    3235      442108 :                                 } else if (nil_on_miss) {
    3236       11382 :                                         nr = 1;
    3237       11382 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3238           0 :                                                 goto bailout;
    3239       11447 :                                         APPEND(r1, lo);
    3240       11447 :                                         if (r2) {
    3241           0 :                                                 r2->tnil = true;
    3242           0 :                                                 r2->tnonil = false;
    3243           0 :                                                 r2->tkey = false;
    3244           0 :                                                 APPEND(r2, oid_nil);
    3245             :                                         }
    3246       11447 :                                         if (r3) {
    3247       11447 :                                                 r3->tnil |= mark == bit_nil;
    3248       11447 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    3249             :                                         }
    3250      430726 :                                 } else if (min_one) {
    3251           0 :                                         GDKerror("not enough matches");
    3252           0 :                                         goto bailout;
    3253             :                                 } else {
    3254      430726 :                                         lskipped = BATcount(r1) > 0;
    3255             :                                 }
    3256     2198695 :                         } else if (only_misses) {
    3257       41780 :                                 lskipped = BATcount(r1) > 0;
    3258             :                         } else {
    3259     2156915 :                                 if (lskipped) {
    3260             :                                         /* note, we only get here in an
    3261             :                                          * iteration *after* lskipped was
    3262             :                                          * first set to true, i.e. we did
    3263             :                                          * indeed skip values in l */
    3264     2035841 :                                         r1->tseqbase = oid_nil;
    3265             :                                 }
    3266     2156915 :                                 if (nr > 1) {
    3267        2369 :                                         r1->tkey = false;
    3268        2369 :                                         r1->tseqbase = oid_nil;
    3269             :                                 }
    3270             :                         }
    3271     2641254 :                         if (nr > 0 && BATcount(r1) > nr)
    3272     2184233 :                                 r1->trevsorted = false;
    3273             :                 }
    3274             :                 break;
    3275             :         }
    3276        9935 :         if (locked) {
    3277        9802 :                 locked = false;
    3278        9802 :                 MT_rwlock_rdunlock(&r->thashlock);
    3279             :         }
    3280        9937 :         bat_iterator_end(&li);
    3281        9937 :         bat_iterator_end(&ri);
    3282             : 
    3283        9937 :         if (hash_cand) {
    3284         132 :                 HEAPfree(&hsh->heaplink, true);
    3285         132 :                 HEAPfree(&hsh->heapbckt, true);
    3286         132 :                 GDKfree(hsh);
    3287             :         }
    3288             :         /* also set other bits of heap to correct value to indicate size */
    3289        9937 :         BATsetcount(r1, BATcount(r1));
    3290        9937 :         if (BATcount(r1) <= 1) {
    3291        3629 :                 r1->tsorted = true;
    3292        3629 :                 r1->trevsorted = true;
    3293        3629 :                 r1->tkey = true;
    3294        3629 :                 r1->tseqbase = 0;
    3295             :         }
    3296        9937 :         if (r2) {
    3297        8536 :                 BATsetcount(r2, BATcount(r2));
    3298        8536 :                 assert(BATcount(r1) == BATcount(r2));
    3299        8536 :                 if (BATcount(r2) <= 1) {
    3300        3293 :                         r2->tsorted = true;
    3301        3293 :                         r2->trevsorted = true;
    3302        3293 :                         r2->tkey = true;
    3303        3293 :                         r2->tseqbase = 0;
    3304             :                 }
    3305             :         }
    3306        9937 :         if (r3) {
    3307          50 :                 r3->tnonil = !r3->tnil;
    3308          50 :                 BATsetcount(r3, BATcount(r3));
    3309          50 :                 assert(BATcount(r1) == BATcount(r3));
    3310             :         }
    3311        9937 :         if (BATcount(r1) > 0) {
    3312        7411 :                 if (BATtdense(r1))
    3313        4102 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3314        7411 :                 if (r2 && BATtdense(r2))
    3315        1092 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3316             :         } else {
    3317        2526 :                 r1->tseqbase = 0;
    3318        2526 :                 if (r2) {
    3319        2201 :                         r2->tseqbase = 0;
    3320             :                 }
    3321             :         }
    3322        9937 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3323             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3324             :                   "nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s,"
    3325             :                   "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3326             :                   " (" LLFMT "usec)\n",
    3327             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3328             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    3329             :                   nil_matches ? "true" : "false",
    3330             :                   nil_on_miss ? "true" : "false",
    3331             :                   semi ? "true" : "false",
    3332             :                   only_misses ? "true" : "false",
    3333             :                   not_in ? "true" : "false",
    3334             :                   max_one ? "true" : "false",
    3335             :                   min_one ? "true" : "false",
    3336             :                   swapped ? " swapped" : "", reason,
    3337             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3338             :                   GDKusec() - t0);
    3339             : 
    3340        9937 :         BBPreclaim(b);
    3341             :         return GDK_SUCCEED;
    3342             : 
    3343           6 :   bailout:
    3344           6 :         bat_iterator_end(&li);
    3345           6 :         bat_iterator_end(&ri);
    3346           6 :         if (locked)
    3347           6 :                 MT_rwlock_rdunlock(&r->thashlock);
    3348           6 :         if (hash_cand && hsh) {
    3349           0 :                 HEAPfree(&hsh->heaplink, true);
    3350           0 :                 HEAPfree(&hsh->heapbckt, true);
    3351           0 :                 GDKfree(hsh);
    3352             :         }
    3353           6 :         BBPreclaim(r1);
    3354           6 :         BBPreclaim(r2);
    3355           6 :         BBPreclaim(b);
    3356             :         return GDK_FAIL;
    3357             : }
    3358             : 
    3359             : /* Count the number of unique values for the first half and the complete
    3360             :  * set (the sample s of b) and return the two values in *cnt1 and
    3361             :  * *cnt2. In case of error, both values are 0. */
    3362             : static gdk_return
    3363     1060086 : count_unique(BAT *b, BAT *s, BUN *cnt1, BUN *cnt2)
    3364             : {
    3365     1060086 :         struct canditer ci;
    3366     1060086 :         BUN half;
    3367     1060086 :         BUN cnt = 0;
    3368     1060086 :         const void *v;
    3369     1060086 :         const char *bvals;
    3370     1060086 :         const char *bvars;
    3371     1060086 :         oid bval;
    3372     1060086 :         oid i, o;
    3373     1060086 :         const char *nme;
    3374     1060086 :         BUN hb;
    3375     1060086 :         BATiter bi;
    3376     1060086 :         int (*cmp)(const void *, const void *);
    3377     1060086 :         const char *algomsg = "";
    3378     1060086 :         lng t0 = 0;
    3379             : 
    3380     1060086 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    3381     1060086 :         canditer_init(&ci, b, s);
    3382     1060084 :         half = ci.ncand / 2;
    3383             : 
    3384     1060084 :         MT_lock_set(&b->theaplock);
    3385     1060085 :         if (b->tkey || ci.ncand <= 1 || BATtdense(b)) {
    3386             :                 /* trivial: already unique */
    3387        1121 :                 MT_lock_unset(&b->theaplock);
    3388        1121 :                 *cnt1 = half;
    3389        1121 :                 *cnt2 = ci.ncand;
    3390        1121 :                 return GDK_SUCCEED;
    3391             :         }
    3392     1058964 :         MT_lock_unset(&b->theaplock);
    3393             : 
    3394     1058957 :         (void) BATordered(b);
    3395     1058965 :         (void) BATordered_rev(b);
    3396     1058967 :         bi = bat_iterator(b);
    3397     1058967 :         if ((bi.sorted && bi.revsorted) ||
    3398     1034759 :             (bi.type == TYPE_void && is_oid_nil(bi.tseq))) {
    3399             :                 /* trivial: all values are the same */
    3400       24208 :                 *cnt1 = *cnt2 = 1;
    3401       24208 :                 bat_iterator_end(&bi);
    3402       24208 :                 return GDK_SUCCEED;
    3403             :         }
    3404             : 
    3405     1034759 :         assert(bi.type != TYPE_void);
    3406             : 
    3407     1034759 :         bvals = bi.base;
    3408     1034759 :         if (bi.vh && bi.type)
    3409       35759 :                 bvars = bi.vh->base;
    3410             :         else
    3411             :                 bvars = NULL;
    3412     1034759 :         cmp = ATOMcompare(bi.type);
    3413             : 
    3414     1034759 :         *cnt1 = *cnt2 = 0;
    3415             : 
    3416     1034759 :         if (bi.sorted || bi.revsorted) {
    3417             :                 const void *prev = NULL;
    3418     4526729 :                 algomsg = "sorted";
    3419     4526729 :                 for (i = 0; i < ci.ncand; i++) {
    3420     4488000 :                         if (i == half)
    3421       38729 :                                 *cnt1 = cnt;
    3422     4488000 :                         o = canditer_next(&ci);
    3423     4487992 :                         v = VALUE(b, o - b->hseqbase);
    3424     4487992 :                         if (prev == NULL || (*cmp)(v, prev) != 0) {
    3425     2082835 :                                 cnt++;
    3426             :                         }
    3427     4487997 :                         prev = v;
    3428             :                 }
    3429       38729 :                 *cnt2 = cnt;
    3430      996027 :         } else if (ATOMbasetype(bi.type) == TYPE_bte) {
    3431       39424 :                 unsigned char val;
    3432       39424 :                 uint32_t seen[256 / 32];
    3433             : 
    3434       39424 :                 algomsg = "byte-sized atoms";
    3435       39424 :                 assert(bvars == NULL);
    3436       39424 :                 memset(seen, 0, sizeof(seen));
    3437     2836755 :                 for (i = 0; i < ci.ncand; i++) {
    3438     2797333 :                         if (i == ci.ncand/ 2) {
    3439             :                                 cnt = 0;
    3440      354546 :                                 for (int j = 0; j < 256 / 32; j++)
    3441      315120 :                                         cnt += candmask_pop(seen[j]);
    3442       39426 :                                 *cnt1 = cnt;
    3443             :                         }
    3444     2797333 :                         o = canditer_next(&ci);
    3445     2797331 :                         val = ((const unsigned char *) bvals)[o - b->hseqbase];
    3446     2797331 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3447      103263 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3448             :                         }
    3449             :                 }
    3450             :                 cnt = 0;
    3451      354702 :                 for (int j = 0; j < 256 / 32; j++)
    3452      315280 :                         cnt += candmask_pop(seen[j]);
    3453       39422 :                 *cnt2 = cnt;
    3454      956603 :         } else if (ATOMbasetype(bi.type) == TYPE_sht) {
    3455       16299 :                 unsigned short val;
    3456       16299 :                 uint32_t *seen = NULL;
    3457             : 
    3458       16299 :                 algomsg = "short-sized atoms";
    3459       16299 :                 assert(bvars == NULL);
    3460       16299 :                 seen = GDKzalloc((65536 / 32) * sizeof(seen[0]));
    3461       16299 :                 if (seen == NULL) {
    3462           0 :                         bat_iterator_end(&bi);
    3463           0 :                         return GDK_FAIL;
    3464             :                 }
    3465     4369081 :                 for (i = 0; i < ci.ncand; i++) {
    3466     4352782 :                         if (i == half) {
    3467             :                                 cnt = 0;
    3468    33357739 :                                 for (int j = 0; j < 65536 / 32; j++)
    3469    33341440 :                                         cnt += candmask_pop(seen[j]);
    3470       16299 :                                 *cnt1 = cnt;
    3471             :                         }
    3472     4352782 :                         o = canditer_next(&ci);
    3473     4352782 :                         val = ((const unsigned short *) bvals)[o - b->hseqbase];
    3474     4352782 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3475       75767 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3476             :                         }
    3477             :                 }
    3478             :                 cnt = 0;
    3479    33355691 :                 for (int j = 0; j < 65536 / 32; j++)
    3480    33339392 :                         cnt += candmask_pop(seen[j]);
    3481       16299 :                 *cnt2 = cnt;
    3482       16299 :                 GDKfree(seen);
    3483       16299 :                 seen = NULL;
    3484             :         } else {
    3485      940304 :                 BUN prb;
    3486      940304 :                 BUN mask;
    3487      940304 :                 Hash hs = {
    3488             :                         .heapbckt.parentid = b->batCacheid,
    3489      940304 :                         .heaplink.parentid = b->batCacheid,
    3490             :                 };
    3491             : 
    3492      940304 :                 GDKclrerr();    /* not interested in BAThash errors */
    3493      940304 :                 algomsg = "new partial hash";
    3494      940304 :                 nme = BBP_physical(b->batCacheid);
    3495      940304 :                 mask = HASHmask(ci.ncand);
    3496      940304 :                 if (mask < ((BUN) 1 << 16))
    3497             :                         mask = (BUN) 1 << 16;
    3498      940304 :                 if ((hs.heaplink.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3499      940304 :                     (hs.heapbckt.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3500      940304 :                     snprintf(hs.heaplink.filename, sizeof(hs.heaplink.filename), "%s.thshjnl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heaplink.filename) ||
    3501     1880608 :                     snprintf(hs.heapbckt.filename, sizeof(hs.heapbckt.filename), "%s.thshjnb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heapbckt.filename) ||
    3502      940304 :                     HASHnew(&hs, bi.type, ci.ncand, mask, BUN_NONE, false) != GDK_SUCCEED) {
    3503           0 :                         GDKerror("cannot allocate hash table\n");
    3504           0 :                         HEAPfree(&hs.heaplink, true);
    3505           0 :                         HEAPfree(&hs.heapbckt, true);
    3506           0 :                         bat_iterator_end(&bi);
    3507           0 :                         return GDK_FAIL;
    3508             :                 }
    3509   595541149 :                 for (i = 0; i < ci.ncand; i++) {
    3510   594600846 :                         if (i == half)
    3511      940303 :                                 *cnt1 = cnt;
    3512   594600846 :                         o = canditer_next(&ci);
    3513   594602033 :                         v = VALUE(b, o - b->hseqbase);
    3514   594601392 :                         prb = HASHprobe(&hs, v);
    3515   594602456 :                         for (hb = HASHget(&hs, prb);
    3516   594609857 :                              hb != BUN_NONE;
    3517        7401 :                              hb = HASHgetlink(&hs, hb)) {
    3518   382607636 :                                 BUN p = canditer_idx(&ci, hb) - b->hseqbase;
    3519   382621103 :                                 if (cmp(v, BUNtail(bi, p)) == 0)
    3520             :                                         break;
    3521             :                         }
    3522   594601348 :                         if (hb == BUN_NONE) {
    3523   212001594 :                                 cnt++;
    3524             :                                 /* enter into hash table */
    3525   212001594 :                                 HASHputlink(&hs, i, HASHget(&hs, prb));
    3526   212001259 :                                 HASHput(&hs, prb, i);
    3527             :                         }
    3528             :                 }
    3529      940303 :                 *cnt2 = cnt;
    3530      940303 :                 HEAPfree(&hs.heaplink, true);
    3531      940304 :                 HEAPfree(&hs.heapbckt, true);
    3532             :         }
    3533     1034754 :         bat_iterator_end(&bi);
    3534             : 
    3535     1034758 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
    3536             :                   " -> " BUNFMT " " BUNFMT " (%s -- " LLFMT "usec)\n",
    3537             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    3538             :                   *cnt1, *cnt2, algomsg, GDKusec() - t0);
    3539             : 
    3540             :         return GDK_SUCCEED;
    3541             : }
    3542             : 
    3543             : static double
    3544     1233652 : guess_uniques(BAT *b, struct canditer *ci)
    3545             : {
    3546     1233652 :         BUN cnt1, cnt2;
    3547     1233652 :         BAT *s1;
    3548             : 
    3549     1233652 :         MT_lock_set(&b->theaplock);
    3550     1233634 :         bool key = b->tkey;
    3551     1233634 :         double unique_est = b->tunique_est;
    3552     1233634 :         BUN batcount = BATcount(b);
    3553     1233634 :         MT_lock_unset(&b->theaplock);
    3554     1233647 :         if (key)
    3555      173420 :                 return (double) ci->ncand;
    3556             : 
    3557     1060227 :         if (ci->s == NULL ||
    3558           0 :             (ci->tpe == cand_dense && ci->ncand == batcount)) {
    3559     1060227 :                 if (unique_est != 0) {
    3560         145 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
    3561             :                                   ALGOBATPAR(b));
    3562         145 :                         return unique_est;
    3563             :                 }
    3564     1060082 :                 s1 = BATsample(b, 1000);
    3565             :         } else {
    3566           0 :                 BAT *s2 = BATsample(ci->s, 1000);
    3567           0 :                 if (s2 == NULL)
    3568             :                         return -1;
    3569           0 :                 s1 = BATproject(s2, ci->s);
    3570           0 :                 BBPreclaim(s2);
    3571             :         }
    3572     1060086 :         if (s1 == NULL)
    3573             :                 return -1;
    3574     1060086 :         BUN n2 = BATcount(s1);
    3575     1060086 :         BUN n1 = n2 / 2;
    3576     1060086 :         if (count_unique(b, s1, &cnt1, &cnt2) != GDK_SUCCEED) {
    3577           0 :                 BBPreclaim(s1);
    3578           0 :                 return -1;
    3579             :         }
    3580     1060088 :         BBPreclaim(s1);
    3581             : 
    3582     1060084 :         double A = (double) (cnt2 - cnt1) / (n2 - n1);
    3583     1060084 :         double B = cnt1 - n1 * A;
    3584             : 
    3585     1060084 :         B += A * ci->ncand;
    3586     1060084 :         MT_lock_set(&b->theaplock);
    3587     1060085 :         if (ci->s == NULL ||
    3588           0 :             (ci->tpe == cand_dense && ci->ncand == BATcount(b) && ci->ncand == batcount)) {
    3589     1060085 :                 if (b->tunique_est == 0)
    3590     1059140 :                         b->tunique_est = B;
    3591             :         }
    3592     1060085 :         MT_lock_unset(&b->theaplock);
    3593     1060085 :         return B;
    3594             : }
    3595             : 
    3596             : BUN
    3597      202359 : BATguess_uniques(BAT *b, struct canditer *ci)
    3598             : {
    3599      202359 :         struct canditer lci;
    3600      202359 :         if (ci == NULL) {
    3601      202359 :                 canditer_init(&lci, b, NULL);
    3602      202359 :                 ci = &lci;
    3603             :         }
    3604      202359 :         return (BUN) guess_uniques(b, ci);
    3605             : }
    3606             : 
    3607             : /* estimate the cost of doing a hashjoin with a hash on r; return value
    3608             :  * is the estimated cost, the last three arguments receive some extra
    3609             :  * information */
    3610             : double
    3611     1664194 : joincost(BAT *r, BUN lcount, struct canditer *rci,
    3612             :          bool *hash, bool *phash, bool *cand)
    3613             : {
    3614     1664194 :         bool rhash;
    3615     1664194 :         bool prhash = false;
    3616     1664194 :         bool rcand = false;
    3617     1664194 :         double rcost = 1;
    3618     1664194 :         bat parent;
    3619     1664194 :         BAT *b;
    3620     1664194 :         BUN nheads;
    3621     1664194 :         BUN cnt;
    3622             : 
    3623     1664194 :         (void) BATcheckhash(r);
    3624     1664182 :         MT_rwlock_rdlock(&r->thashlock);
    3625     1664166 :         rhash = r->thash != NULL;
    3626     1664166 :         nheads = r->thash ? r->thash->nheads : 0;
    3627     1664166 :         cnt = BATcount(r);
    3628     1664166 :         MT_rwlock_rdunlock(&r->thashlock);
    3629             : 
    3630     1664178 :         if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
    3631      294859 :             rci->nvals > 0) {
    3632             :                 /* if we need to do binary search on candidate list,
    3633             :                  * take that into account; note checking the other
    3634             :                  * candidate types is essentially free */
    3635      294859 :                 rcost += log2((double) rci->nvals);
    3636             :         }
    3637     1664178 :         rcost *= lcount;
    3638     1664178 :         if (BATtdense(r)) {
    3639             :                 /* no need for a hash, and lookup is free */
    3640             :                 rhash = false;  /* don't use it, even if it's there */
    3641             :         } else {
    3642     1663644 :                 if (rhash) {
    3643             :                         /* average chain length */
    3644        6496 :                         rcost *= (double) cnt / nheads;
    3645     1657148 :                 } else if ((parent = VIEWtparent(r)) != 0 &&
    3646     1537270 :                            (b = BATdescriptor(parent)) != NULL) {
    3647     1537258 :                         if (BATcheckhash(b)) {
    3648       53383 :                                 MT_rwlock_rdlock(&b->thashlock);
    3649       53385 :                                 rhash = prhash = b->thash != NULL;
    3650       53385 :                                 if (rhash) {
    3651             :                                         /* average chain length */
    3652       53385 :                                         rcost *= (double) BATcount(b) / b->thash->nheads;
    3653             :                                 }
    3654       53385 :                                 MT_rwlock_rdunlock(&b->thashlock);
    3655             :                         }
    3656     1537261 :                         BBPunfix(b->batCacheid);
    3657             :                 }
    3658     1663622 :                 if (!rhash) {
    3659     1603745 :                         MT_lock_set(&r->theaplock);
    3660     1603705 :                         double unique_est = r->tunique_est;
    3661     1603705 :                         MT_lock_unset(&r->theaplock);
    3662     1603727 :                         if (unique_est == 0) {
    3663     1031221 :                                 unique_est = guess_uniques(r, &(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)});
    3664     1031231 :                                 if (unique_est < 0)
    3665           0 :                                         return -1;
    3666             :                         }
    3667             :                         /* we have an estimate of the number of unique
    3668             :                          * values, assume some collisions */
    3669     1603737 :                         rcost *= 1.1 * ((double) cnt / unique_est);
    3670             : #ifdef PERSISTENTHASH
    3671             :                         /* only count the cost of creating the hash for
    3672             :                          * non-persistent bats */
    3673     1603737 :                         MT_lock_set(&r->theaplock);
    3674     1603749 :                         if (r->batRole != PERSISTENT /* || r->theap->dirty */ || GDKinmemory(r->theap->farmid))
    3675     1575423 :                                 rcost += cnt * 2.0;
    3676     1603749 :                         MT_lock_unset(&r->theaplock);
    3677             : #else
    3678             :                         rcost += cnt * 2.0;
    3679             : #endif
    3680             :                 }
    3681             :         }
    3682     1664153 :         if (cand) {
    3683       21254 :                 if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
    3684             :                         /* instead of using the hash on r (cost in
    3685             :                          * rcost), we can build a new hash on r taking
    3686             :                          * the candidate list into account; don't do
    3687             :                          * this for masked candidate since the searching
    3688             :                          * of the candidate list (canditer_idx) will
    3689             :                          * kill us */
    3690        1876 :                         double rccost;
    3691        1876 :                         if (rhash && !prhash) {
    3692         673 :                                 rccost = (double) cnt / nheads;
    3693             :                         } else {
    3694        1203 :                                 MT_lock_set(&r->theaplock);
    3695        1203 :                                 double unique_est = r->tunique_est;
    3696        1203 :                                 MT_lock_unset(&r->theaplock);
    3697        1203 :                                 if (unique_est == 0) {
    3698          62 :                                         unique_est = guess_uniques(r, rci);
    3699          62 :                                         if (unique_est < 0)
    3700             :                                                 return -1;
    3701             :                                 }
    3702             :                                 /* we have an estimate of the number of unique
    3703             :                                  * values, assume some chains */
    3704        1203 :                                 rccost = 1.1 * ((double) cnt / unique_est);
    3705             :                         }
    3706        1876 :                         rccost *= lcount;
    3707        1876 :                         rccost += rci->ncand * 2.0; /* cost of building the hash */
    3708        1876 :                         if (rccost < rcost) {
    3709       21254 :                                 rcost = rccost;
    3710       21254 :                                 rcand = true;
    3711             :                         }
    3712             :                 }
    3713       21254 :                 *cand = rcand;
    3714             :         }
    3715     1664153 :         *hash = rhash;
    3716     1664153 :         *phash = prhash;
    3717     1664153 :         return rcost;
    3718             : }
    3719             : 
    3720             : #define MASK_EQ         1
    3721             : #define MASK_LT         2
    3722             : #define MASK_GT         4
    3723             : #define MASK_LE         (MASK_EQ | MASK_LT)
    3724             : #define MASK_GE         (MASK_EQ | MASK_GT)
    3725             : #define MASK_NE         (MASK_LT | MASK_GT)
    3726             : 
    3727             : static gdk_return
    3728       15712 : thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode, BUN estimate, const char *reason, lng t0)
    3729             : {
    3730       15712 :         struct canditer lci, rci;
    3731       15712 :         const char *lvals, *rvals;
    3732       15712 :         const char *lvars, *rvars;
    3733       15712 :         const void *nil = ATOMnilptr(l->ttype);
    3734       15712 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    3735       15712 :         const void *vl, *vr;
    3736       15712 :         oid lastr = 0;          /* last value inserted into r2 */
    3737       15712 :         BUN nr;
    3738       15712 :         oid lo, ro;
    3739       15712 :         int c;
    3740       15712 :         bool lskipped = false;  /* whether we skipped values in l */
    3741       15712 :         lng loff = 0, roff = 0;
    3742       15712 :         oid lval = oid_nil, rval = oid_nil;
    3743             : 
    3744       15712 :         lng timeoffset = 0;
    3745       15712 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    3746       15711 :         if (qry_ctx != NULL) {
    3747       15711 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    3748             :         }
    3749             : 
    3750       47135 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    3751       15711 :         assert((opcode & (MASK_EQ | MASK_LT | MASK_GT)) != 0);
    3752             : 
    3753       15711 :         BATiter li = bat_iterator(l);
    3754       15713 :         BATiter ri = bat_iterator(r);
    3755             : 
    3756       15713 :         canditer_init(&lci, l, sl);
    3757       15709 :         canditer_init(&rci, r, sr);
    3758             : 
    3759       15708 :         lvals = BATtvoid(l) ? NULL : (const char *) li.base;
    3760       15708 :         rvals = BATtvoid(r) ? NULL : (const char *) ri.base;
    3761       15708 :         if (li.vh && li.type) {
    3762           8 :                 assert(ri.vh && ri.type);
    3763           8 :                 lvars = li.vh->base;
    3764           8 :                 rvars = ri.vh->base;
    3765             :         } else {
    3766       15700 :                 assert(ri.vh == NULL);
    3767             :                 lvars = rvars = NULL;
    3768             :         }
    3769             : 
    3770       15708 :         if (BATtvoid(l)) {
    3771           0 :                 if (!BATtdensebi(&li)) {
    3772             :                         /* trivial: nils don't match anything */
    3773           0 :                         bat_iterator_end(&li);
    3774           0 :                         bat_iterator_end(&ri);
    3775           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    3776             :                                        0, false, false, __func__, t0);
    3777             :                 }
    3778           0 :                 loff = (lng) l->tseqbase - (lng) l->hseqbase;
    3779             :         }
    3780       15708 :         if (BATtvoid(r)) {
    3781           1 :                 if (!BATtdensebi(&ri)) {
    3782             :                         /* trivial: nils don't match anything */
    3783           0 :                         bat_iterator_end(&li);
    3784           0 :                         bat_iterator_end(&ri);
    3785           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    3786             :                                        0, false, false, __func__, t0);
    3787             :                 }
    3788           1 :                 roff = (lng) r->tseqbase - (lng) r->hseqbase;
    3789             :         }
    3790             : 
    3791       15708 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    3792             :                                       false, false, false, false, estimate);
    3793       15712 :         if (maxsize == BUN_NONE) {
    3794           0 :                 bat_iterator_end(&li);
    3795           0 :                 bat_iterator_end(&ri);
    3796           0 :                 return GDK_FAIL;
    3797             :         }
    3798       15712 :         BAT *r1 = *r1p;
    3799       15712 :         BAT *r2 = r2p ? *r2p : NULL;
    3800             : 
    3801       15712 :         r1->tkey = true;
    3802       15712 :         r1->tsorted = true;
    3803       15712 :         r1->trevsorted = true;
    3804       15712 :         if (r2) {
    3805        4289 :                 r2->tkey = true;
    3806        4289 :                 r2->tsorted = true;
    3807        4289 :                 r2->trevsorted = true;
    3808             :         }
    3809             : 
    3810             :         /* nested loop implementation for theta join */
    3811             :         vl = &lval;
    3812             :         vr = &rval;
    3813      376415 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    3814      360726 :                 lo = canditer_next(&lci);
    3815      359297 :                 if (lvals)
    3816      359297 :                         vl = VALUE(l, lo - l->hseqbase);
    3817             :                 else
    3818           0 :                         lval = (oid) ((lng) lo + loff);
    3819      359297 :                 nr = 0;
    3820      359297 :                 if (cmp(vl, nil) != 0) {
    3821      355200 :                         canditer_reset(&rci);
    3822     3985192 :                         TIMEOUT_LOOP(rci.ncand, timeoffset) {
    3823     3276114 :                                 ro = canditer_next(&rci);
    3824     3239043 :                                 if (rvals)
    3825     3239039 :                                         vr = VALUE(r, ro - r->hseqbase);
    3826             :                                 else
    3827           4 :                                         rval = (oid) ((lng) ro + roff);
    3828     3239042 :                                 if (cmp(vr, nil) == 0)
    3829       60240 :                                         continue;
    3830     3183025 :                                 c = cmp(vl, vr);
    3831     3195733 :                                 if (!((opcode & MASK_LT && c < 0) ||
    3832     2987204 :                                       (opcode & MASK_GT && c > 0) ||
    3833     1696103 :                                       (opcode & MASK_EQ && c == 0)))
    3834     1696081 :                                         continue;
    3835     1499652 :                                 if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    3836           0 :                                         goto bailout;
    3837     1518803 :                                 if (BATcount(r1) > 0) {
    3838     1509248 :                                         if (r2 && lastr + 1 != ro)
    3839       43283 :                                                 r2->tseqbase = oid_nil;
    3840     1509248 :                                         if (nr == 0) {
    3841      115374 :                                                 r1->trevsorted = false;
    3842      115374 :                                                 if (r2 == NULL) {
    3843             :                                                         /* nothing */
    3844       33085 :                                                 } else if (lastr > ro) {
    3845       31056 :                                                         r2->tsorted = false;
    3846       31056 :                                                         r2->tkey = false;
    3847        2029 :                                                 } else if (lastr < ro) {
    3848           0 :                                                         r2->trevsorted = false;
    3849             :                                                 } else {
    3850        2029 :                                                         r2->tkey = false;
    3851             :                                                 }
    3852             :                                         }
    3853             :                                 }
    3854     1518803 :                                 APPEND(r1, lo);
    3855     1518803 :                                 if (r2) {
    3856     1207990 :                                         APPEND(r2, ro);
    3857             :                                 }
    3858     1518803 :                                 lastr = ro;
    3859     1518803 :                                 nr++;
    3860             :                         }
    3861      354831 :                         TIMEOUT_CHECK(timeoffset,
    3862             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    3863             :                 }
    3864      360703 :                 if (nr > 1) {
    3865       85274 :                         r1->tkey = false;
    3866       85274 :                         r1->tseqbase = oid_nil;
    3867       85274 :                         if (r2) {
    3868       34615 :                                 r2->trevsorted = false;
    3869             :                         }
    3870      275429 :                 } else if (nr == 0) {
    3871      234769 :                         lskipped = BATcount(r1) > 0;
    3872       40660 :                 } else if (lskipped) {
    3873       33346 :                         r1->tseqbase = oid_nil;
    3874             :                 }
    3875             :         }
    3876             :         /* also set other bits of heap to correct value to indicate size */
    3877       15689 :         BATsetcount(r1, BATcount(r1));
    3878       15704 :         if (r2) {
    3879        4282 :                 BATsetcount(r2, BATcount(r2));
    3880        4282 :                 assert(BATcount(r1) == BATcount(r2));
    3881             :         }
    3882       15704 :         if (BATcount(r1) > 0) {
    3883       10841 :                 if (BATtdense(r1))
    3884         161 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3885       10841 :                 if (r2 && BATtdense(r2))
    3886         381 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3887             :         } else {
    3888        4863 :                 r1->tseqbase = 0;
    3889        4863 :                 if (r2) {
    3890         624 :                         r2->tseqbase = 0;
    3891             :                 }
    3892             :         }
    3893       15704 :         bat_iterator_end(&li);
    3894       15713 :         bat_iterator_end(&ri);
    3895       15712 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3896             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3897             :                   "opcode=%s%s%s; %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3898             :                   " (" LLFMT "usec)\n",
    3899             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3900             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3901             :                   opcode & MASK_LT ? "<" : "",
    3902             :                   opcode & MASK_GT ? ">" : "",
    3903             :                   opcode & MASK_EQ ? "=" : "",
    3904             :                   reason,
    3905             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3906             :                   GDKusec() - t0);
    3907             :         return GDK_SUCCEED;
    3908             : 
    3909           0 :   bailout:
    3910           0 :         bat_iterator_end(&li);
    3911           0 :         bat_iterator_end(&ri);
    3912           0 :         BBPreclaim(r1);
    3913           0 :         BBPreclaim(r2);
    3914             :         return GDK_FAIL;
    3915             : }
    3916             : 
    3917             : /* small ordered right, dense left, oid's only, do fetches */
    3918             : static gdk_return
    3919           0 : fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3920             :           struct canditer *restrict lci, struct canditer *restrict rci,
    3921             :           const char *reason, lng t0)
    3922             : {
    3923           0 :         oid lo = lci->seq - l->hseqbase + l->tseqbase, hi = lo + lci->ncand;
    3924           0 :         BUN b, e, p;
    3925           0 :         BAT *r1, *r2 = NULL;
    3926             : 
    3927           0 :         MT_thread_setalgorithm(__func__);
    3928           0 :         if (r->tsorted) {
    3929           0 :                 b = SORTfndfirst(r, &lo);
    3930           0 :                 e = SORTfndfirst(r, &hi);
    3931             :         } else {
    3932           0 :                 assert(r->trevsorted);
    3933           0 :                 b = SORTfndlast(r, &hi);
    3934           0 :                 e = SORTfndlast(r, &lo);
    3935             :         }
    3936           0 :         if (b < rci->seq - r->hseqbase)
    3937             :                 b = rci->seq - r->hseqbase;
    3938           0 :         if (e > rci->seq + rci->ncand - r->hseqbase)
    3939             :                 e = rci->seq + rci->ncand - r->hseqbase;
    3940           0 :         if (e == b) {
    3941           0 :                 return nomatch(r1p, r2p, NULL, l, r, lci,
    3942             :                                0, false, false, __func__, t0);
    3943             :         }
    3944           0 :         r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
    3945           0 :         if (r1 == NULL)
    3946             :                 return GDK_FAIL;
    3947           0 :         if (r2p) {
    3948           0 :                 if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
    3949           0 :                         BBPreclaim(r1);
    3950           0 :                         return GDK_FAIL;
    3951             :                 }
    3952           0 :                 *r2p = r2;
    3953             :         }
    3954           0 :         *r1p = r1;
    3955           0 :         oid *op = (oid *) Tloc(r1, 0);
    3956           0 :         BATiter ri = bat_iterator(r);
    3957           0 :         const oid *rp = (const oid *) ri.base;
    3958           0 :         for (p = b; p < e; p++) {
    3959           0 :                 *op++ = rp[p] + l->hseqbase - l->tseqbase;
    3960             :         }
    3961           0 :         BATsetcount(r1, e - b);
    3962           0 :         r1->tkey = ri.key;
    3963           0 :         r1->tsorted = ri.sorted || e - b <= 1;
    3964           0 :         r1->trevsorted = ri.revsorted || e - b <= 1;
    3965           0 :         r1->tseqbase = e == b ? 0 : e - b == 1 ? *(const oid *)Tloc(r1, 0) : oid_nil;
    3966           0 :         bat_iterator_end(&ri);
    3967           0 :         TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    3968             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3969             :                   "sr=" ALGOOPTBATFMT ") %s "
    3970             :                   "-> (" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us\n",
    3971             :                   __func__,
    3972             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3973             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3974             :                   reason,
    3975             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3976             :                   GDKusec() - t0);
    3977             : 
    3978             :         return GDK_SUCCEED;
    3979             : }
    3980             : 
    3981             : static BAT *
    3982        4880 : bitmaskjoin(BAT *l, BAT *r,
    3983             :             struct canditer *restrict lci, struct canditer *restrict rci,
    3984             :             bool only_misses,
    3985             :             const char *reason, lng t0)
    3986             : {
    3987        4880 :         BAT *r1;
    3988        4880 :         size_t nmsk = (lci->ncand + 31) / 32;
    3989        4880 :         uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t));
    3990        4880 :         BUN cnt = 0;
    3991             : 
    3992        4880 :         MT_thread_setalgorithm(__func__);
    3993        4880 :         if (mask == NULL)
    3994             :                 return NULL;
    3995             : 
    3996    22954153 :         for (BUN n = 0; n < rci->ncand; n++) {
    3997    22949273 :                 oid o = canditer_next(rci) - r->hseqbase;
    3998    22888812 :                 o = BUNtoid(r, o);
    3999    22949273 :                 if (is_oid_nil(o))
    4000           0 :                         continue;
    4001    22949273 :                 o += l->hseqbase;
    4002    22949273 :                 if (o < lci->seq + l->tseqbase)
    4003           2 :                         continue;
    4004    22949271 :                 o -= lci->seq + l->tseqbase;
    4005    22949271 :                 if (o >= lci->ncand)
    4006           5 :                         continue;
    4007    22949266 :                 if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) {
    4008    16384480 :                         cnt++;
    4009    16384480 :                         mask[o >> 5] |= 1U << (o & 0x1F);
    4010             :                 }
    4011             :         }
    4012        4880 :         if (only_misses)
    4013        3646 :                 cnt = lci->ncand - cnt;
    4014        4880 :         if (cnt == 0 || cnt == lci->ncand) {
    4015        1099 :                 GDKfree(mask);
    4016        1099 :                 if (cnt == 0)
    4017         243 :                         return BATdense(0, 0, 0);
    4018         856 :                 return BATdense(0, lci->seq, lci->ncand);
    4019             :         }
    4020        3781 :         r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
    4021        3781 :         if (r1 != NULL) {
    4022        3781 :                 oid *r1p = Tloc(r1, 0);
    4023             : 
    4024        3781 :                 r1->tkey = true;
    4025        3781 :                 r1->tnil = false;
    4026        3781 :                 r1->tnonil = true;
    4027        3781 :                 r1->tsorted = true;
    4028        3781 :                 r1->trevsorted = cnt <= 1;
    4029        3781 :                 if (only_misses) {
    4030             :                         /* set the bits for unused values at the
    4031             :                          * end so that we don't need special
    4032             :                          * code in the loop */
    4033        3404 :                         if (lci->ncand & 0x1F)
    4034        3367 :                                 mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F);
    4035     1877126 :                         for (size_t i = 0; i < nmsk; i++)
    4036     1873722 :                                 if (mask[i] != ~0U)
    4037    59298525 :                                         for (uint32_t j = 0; j < 32; j++)
    4038    57501600 :                                                 if ((mask[i] & (1U << j)) == 0)
    4039    50996287 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4040             :                 } else {
    4041      306596 :                         for (size_t i = 0; i < nmsk; i++)
    4042      306219 :                                 if (mask[i] != 0U)
    4043     7720680 :                                         for (uint32_t j = 0; j < 32; j++)
    4044     7486720 :                                                 if ((mask[i] & (1U << j)) != 0)
    4045     6733065 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4046             :                 }
    4047        3781 :                 BATsetcount(r1, cnt);
    4048        3781 :                 assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1));
    4049             : 
    4050        3781 :                 TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    4051             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4052             :                           "sr=" ALGOOPTBATFMT ",only_misses=%s; %s "
    4053             :                           "-> " ALGOBATFMT " (" LLFMT "usec)\n",
    4054             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4055             :                           ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    4056             :                           only_misses ? "true" : "false",
    4057             :                           reason,
    4058             :                           ALGOBATPAR(r1),
    4059             :                           GDKusec() - t0);
    4060             :         }
    4061        3781 :         GDKfree(mask);
    4062        3781 :         return r1;
    4063             : }
    4064             : 
    4065             : /* Make the implementation choices for various left joins.
    4066             :  * If r3p is set, this is a "mark join" and *r3p will be a third return value containing a bat with type msk with a bit set for each
    4067             :  * nil_matches: nil is an ordinary value that can match;
    4068             :  * nil_on_miss: outer join: fill in a nil value in case of no match;
    4069             :  * semi: semi join: return one of potentially more than one matches;
    4070             :  * only_misses: difference: list rows without match on the right;
    4071             :  * not_in: for implementing NOT IN: if nil on right then there are no matches;
    4072             :  * max_one: error if there is more than one match;
    4073             :  * min_one: error if there are no matches. */
    4074             : static gdk_return
    4075       76732 : leftjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4076             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    4077             :          bool not_in, bool max_one, bool min_one, BUN estimate,
    4078             :          const char *func, lng t0)
    4079             : {
    4080       76732 :         struct canditer lci, rci;
    4081       76732 :         bool rhash, prhash, rcand;
    4082       76732 :         bat parent;
    4083       76732 :         double rcost = 0;
    4084       76732 :         gdk_return rc;
    4085       76732 :         BAT *lp = NULL;
    4086       76732 :         BAT *rp = NULL;
    4087             : 
    4088       76732 :         MT_thread_setalgorithm(__func__);
    4089             :         /* only_misses implies left output only */
    4090       76726 :         assert(!only_misses || r2p == NULL);
    4091             :         /* if nil_on_miss is set, we really need a right output */
    4092       76726 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
    4093             :         /* if not_in is set, then so is only_misses */
    4094       76726 :         assert(!not_in || only_misses);
    4095             :         /* if r3p is set, then so is nil_on_miss */
    4096       76726 :         assert(r3p == NULL || nil_on_miss);
    4097       76726 :         *r1p = NULL;
    4098       76726 :         if (r2p)
    4099         935 :                 *r2p = NULL;
    4100       76726 :         if (r3p)
    4101        3305 :                 *r3p = NULL;
    4102             : 
    4103       76726 :         canditer_init(&lci, l, sl);
    4104       76721 :         canditer_init(&rci, r, sr);
    4105             : 
    4106       76719 :         if ((parent = VIEWtparent(l)) != 0) {
    4107        3420 :                 lp = BATdescriptor(parent);
    4108        3420 :                 if (lp == NULL)
    4109             :                         return GDK_FAIL;
    4110        3420 :                 if (l->hseqbase == lp->hseqbase &&
    4111        4441 :                     BATcount(l) == BATcount(lp) &&
    4112        2900 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4113             :                         l = lp;
    4114             :                 } else {
    4115        1970 :                         BBPunfix(lp->batCacheid);
    4116        1970 :                         lp = NULL;
    4117             :                 }
    4118             :         }
    4119       76719 :         if ((parent = VIEWtparent(r)) != 0) {
    4120        3940 :                 rp = BATdescriptor(parent);
    4121        3940 :                 if (rp == NULL) {
    4122           0 :                         BBPreclaim(lp);
    4123           0 :                         return GDK_FAIL;
    4124             :                 }
    4125        3940 :                 if (r->hseqbase == rp->hseqbase &&
    4126        6338 :                     BATcount(r) == BATcount(rp) &&
    4127        4796 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4128             :                         r = rp;
    4129             :                 } else {
    4130        1544 :                         BBPunfix(rp->batCacheid);
    4131        1544 :                         rp = NULL;
    4132             :                 }
    4133             :         }
    4134             : 
    4135       76719 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4136           5 :                 l = BATunmask(l);
    4137           5 :                 BBPreclaim(lp);
    4138           5 :                 if (l == NULL) {
    4139           0 :                         BBPreclaim(rp);
    4140           0 :                         return GDK_FAIL;
    4141             :                 }
    4142             :                 lp = l;
    4143             :         }
    4144       76719 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4145           2 :                 r = BATunmask(r);
    4146           2 :                 BBPreclaim(rp);
    4147           2 :                 if (r == NULL) {
    4148           0 :                         BBPreclaim(lp);
    4149           0 :                         return GDK_FAIL;
    4150             :                 }
    4151             :                 rp = r;
    4152             :         }
    4153             : 
    4154       76719 :         if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED) {
    4155           0 :                 rc = GDK_FAIL;
    4156           0 :                 goto doreturn;
    4157             :         }
    4158             : 
    4159       76730 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4160       48455 :                 TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    4161             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4162             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
    4163             :                           "nil_on_miss=%d,semi=%d,only_misses=%d,"
    4164             :                           "not_in=%d,max_one=%d,min_one=%d)\n",
    4165             :                           func,
    4166             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4167             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4168             :                           nil_matches, nil_on_miss, semi, only_misses,
    4169             :                           not_in, max_one, min_one);
    4170       48455 :                 rc = nomatch(r1p, r2p, r3p, l, r, &lci,
    4171             :                              0, nil_on_miss, only_misses, func, t0);
    4172       48454 :                 goto doreturn;
    4173             :         }
    4174             : 
    4175       28275 :         if (!only_misses && !not_in &&
    4176        3210 :             (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) ||
    4177        3162 :              (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
    4178             :                 /* single value to join, use select */
    4179        1105 :                 rc = selectjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4180             :                                 nil_matches, nil_on_miss, semi, max_one, min_one,
    4181             :                                 t0, false, func);
    4182        1105 :                 goto doreturn;
    4183       27170 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4184             :                 /* use special implementation for dense right-hand side */
    4185       16451 :                 rc = mergejoin_void(r1p, r2p, r3p, l, r, &lci, &rci,
    4186             :                                     nil_on_miss, only_misses, t0, false,
    4187             :                                     func);
    4188       16451 :                 goto doreturn;
    4189       10719 :         } else if (BATtdense(l)
    4190        4937 :                    && lci.tpe == cand_dense
    4191        4927 :                    && rci.tpe == cand_dense
    4192             :                    && !semi
    4193        4927 :                    && !max_one
    4194             :                    && !min_one
    4195        3657 :                    && !nil_matches
    4196             :                    && !only_misses
    4197        3657 :                    && !not_in
    4198             :                    /* && (rci.ncand * 1024) < lci.ncand */
    4199           0 :                    && (BATordered(r) || BATordered_rev(r))) {
    4200           0 :                 assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
    4201           0 :                 rc = fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0);
    4202           0 :                 goto doreturn;
    4203       10719 :         } else if (BATtdense(l)
    4204        4937 :                    && lci.tpe == cand_dense
    4205        4927 :                    && r2p == NULL
    4206        4892 :                    && (semi || only_misses)
    4207             :                    && !nil_on_miss
    4208        4892 :                    && !not_in
    4209             :                    && !max_one
    4210        4881 :                    && !min_one) {
    4211        4880 :                 *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
    4212        4880 :                 rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
    4213        4880 :                 goto doreturn;
    4214             :         } else {
    4215             :                 /* looking at r->tvheap, so we need a lock */
    4216        5839 :                 MT_lock_set(&r->theaplock);
    4217        5839 :                 BUN hsz = r->tvheap ? r->tvheap->size : 0;
    4218        5839 :                 MT_lock_unset(&r->theaplock);
    4219        5839 :                 if ((BATordered(r) || BATordered_rev(r))
    4220        4695 :                     && (BATordered(l)
    4221         403 :                         || BATordered_rev(l)
    4222         396 :                         || BATtdense(r)
    4223         396 :                         || lci.ncand < 1024
    4224         254 :                         || BATcount(r) * (r->twidth + hsz + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) {
    4225        4568 :                         rc = mergejoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4226             :                                        nil_matches, nil_on_miss, semi, only_misses,
    4227             :                                        not_in, max_one, min_one, estimate, t0, false, func);
    4228        4568 :                         goto doreturn;
    4229             :                 }
    4230             :         }
    4231        1271 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4232        1271 :         if (rcost < 0) {
    4233           0 :                 rc = GDK_FAIL;
    4234           0 :                 goto doreturn;
    4235             :         }
    4236             : 
    4237        1271 :         if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
    4238             :                 /* maybe do a hash join on the swapped operands; if we
    4239             :                  * do, we need to sort the output, so we take that into
    4240             :                  * account as well */
    4241         912 :                 bool lhash, plhash, lcand;
    4242         912 :                 double lcost;
    4243             : 
    4244         912 :                 lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4245         912 :                 if (lcost < 0) {
    4246           0 :                         rc = GDK_FAIL;
    4247         762 :                         goto doreturn;
    4248             :                 }
    4249         912 :                 if (semi)
    4250         809 :                         lcost += rci.ncand; /* cost of BATunique(r) */
    4251             :                 /* add cost of sorting; obviously we don't know the
    4252             :                  * size, so we guess that the size of the output is
    4253             :                  * the same as the right input */
    4254         912 :                 lcost += rci.ncand * log((double) rci.ncand); /* sort */
    4255         912 :                 if (lcost < rcost) {
    4256         762 :                         BAT *tmp = sr;
    4257         762 :                         BAT *r1, *r2;
    4258         762 :                         if (semi) {
    4259         757 :                                 sr = BATunique(r, sr);
    4260         757 :                                 if (sr == NULL) {
    4261           0 :                                         rc = GDK_FAIL;
    4262           0 :                                         goto doreturn;
    4263             :                                 }
    4264         757 :                                 canditer_init(&rci, r, sr);
    4265             :                         }
    4266         762 :                         rc = hashjoin(&r2, &r1, NULL, r, l, &rci, &lci, nil_matches,
    4267             :                                       false, false, false, false, false, false, estimate,
    4268             :                                       t0, true, lhash, plhash, lcand, func);
    4269         762 :                         if (semi)
    4270         757 :                                 BBPunfix(sr->batCacheid);
    4271         762 :                         if (rc != GDK_SUCCEED)
    4272           0 :                                 goto doreturn;
    4273         762 :                         if (r2p == NULL) {
    4274         757 :                                 BBPunfix(r2->batCacheid);
    4275         757 :                                 r2 = NULL;
    4276             :                         }
    4277         762 :                         if (semi)
    4278         757 :                                 r1->tkey = true;
    4279         762 :                         if (!VIEWtparent(r1) &&
    4280         762 :                             r1->ttype == TYPE_oid &&
    4281         762 :                             BBP_refs(r1->batCacheid) == 1 &&
    4282         762 :                             (r2 == NULL ||
    4283           5 :                              (!VIEWtparent(r2) &&
    4284           5 :                               BBP_refs(r2->batCacheid) == 1 &&
    4285           5 :                               r2->ttype == TYPE_oid))) {
    4286             :                                 /* in-place sort if we can */
    4287         762 :                                 if (r2) {
    4288           5 :                                         GDKqsort(r1->theap->base, r2->theap->base,
    4289           5 :                                                  NULL, r1->batCount, r1->twidth,
    4290           5 :                                                  r2->twidth, TYPE_oid, false,
    4291             :                                                  false);
    4292           5 :                                         r2->tsorted = false;
    4293           5 :                                         r2->trevsorted = false;
    4294           5 :                                         r2->tseqbase = oid_nil;
    4295           5 :                                         *r2p = r2;
    4296             :                                 } else {
    4297         757 :                                         GDKqsort(r1->theap->base, NULL, NULL,
    4298         757 :                                                  r1->batCount, r1->twidth, 0,
    4299             :                                                  TYPE_oid, false, false);
    4300             :                                 }
    4301         762 :                                 r1->tsorted = true;
    4302         762 :                                 r1->trevsorted = false;
    4303         762 :                                 *r1p = r1;
    4304             :                         } else {
    4305           0 :                                 BAT *ob;
    4306           0 :                                 rc = BATsort(&tmp, r2p ? &ob : NULL, NULL,
    4307             :                                              r1, NULL, NULL, false, false, false);
    4308           0 :                                 BBPunfix(r1->batCacheid);
    4309           0 :                                 if (rc != GDK_SUCCEED) {
    4310           0 :                                         BBPreclaim(r2);
    4311           0 :                                         goto doreturn;
    4312             :                                 }
    4313           0 :                                 *r1p = r1 = tmp;
    4314           0 :                                 if (r2p) {
    4315           0 :                                         tmp = BATproject(ob, r2);
    4316           0 :                                         BBPunfix(r2->batCacheid);
    4317           0 :                                         BBPunfix(ob->batCacheid);
    4318           0 :                                         if (tmp == NULL) {
    4319           0 :                                                 BBPunfix(r1->batCacheid);
    4320           0 :                                                 rc = GDK_FAIL;
    4321           0 :                                                 goto doreturn;
    4322             :                                         }
    4323           0 :                                         *r2p = tmp;
    4324             :                                 }
    4325             :                         }
    4326         762 :                         rc = GDK_SUCCEED;
    4327         762 :                         goto doreturn;
    4328             :                 }
    4329             :         }
    4330         509 :         rc = hashjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4331             :                       nil_matches, nil_on_miss, semi, only_misses,
    4332             :                       not_in, max_one, min_one, estimate, t0, false, rhash, prhash,
    4333             :                       rcand, func);
    4334       76729 :   doreturn:
    4335       76729 :         BBPreclaim(lp);
    4336       76729 :         BBPreclaim(rp);
    4337       76729 :         if (rc == GDK_SUCCEED && (semi | only_misses))
    4338       75971 :                 *r1p = virtualize(*r1p);
    4339             :         return rc;
    4340             : }
    4341             : 
    4342             : /* Perform an equi-join over l and r.  Returns two new, aligned, bats
    4343             :  * with the oids of matching tuples.  The result is in the same order
    4344             :  * as l (i.e. r1 is sorted). */
    4345             : gdk_return
    4346         642 : BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4347             : {
    4348         642 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4349             :                         false, false, false, false, false, false,
    4350             :                         estimate, __func__,
    4351         642 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4352             : }
    4353             : 
    4354             : /* Performs a left outer join over l and r.  Returns two new, aligned,
    4355             :  * bats with the oids of matching tuples, or the oid in the first
    4356             :  * output bat and nil in the second output bat if the value in l does
    4357             :  * not occur in r.  The result is in the same order as l (i.e. r1 is
    4358             :  * sorted). */
    4359             : gdk_return
    4360          47 : BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
    4361             : {
    4362          47 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4363             :                         true, false, false, false, match_one, match_one,
    4364             :                         estimate, __func__,
    4365          47 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4366             : }
    4367             : 
    4368             : /* Perform a semi-join over l and r.  Returns one or two new bats
    4369             :  * with the oids of matching tuples.  The result is in the same order
    4370             :  * as l (i.e. r1 is sorted).  If a single bat is returned, it is a
    4371             :  * candidate list. */
    4372             : gdk_return
    4373         963 : BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4374             :             bool nil_matches, bool max_one, BUN estimate)
    4375             : {
    4376         963 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4377             :                         false, true, false, false, max_one, false,
    4378             :                         estimate, __func__,
    4379         963 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4380             : }
    4381             : 
    4382             : /* Perform a mark-join over l and r.  Returns one or two new bats with
    4383             :  * the oids of matching tuples.  In addition, returns a bat with "marks"
    4384             :  * that indicate the type of match.  This is an outer join, so returns
    4385             :  * at least one value for each row on the left.  If the second output
    4386             :  * pointer (r2p) is NULL, this is also a semi-join, so returns exactly
    4387             :  * one row for each row on the left.  If there is a match, the mark
    4388             :  * column will be TRUE, of there is no match, the second output is NIL,
    4389             :  * and the mark output is FALSE if there are no NILs in the right input,
    4390             :  * and the left input is also not NIL, otherwise the mark output is
    4391             :  * NIL. */
    4392             : gdk_return
    4393        3306 : BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4394             :             BUN estimate)
    4395             : {
    4396        3306 :         return leftjoin(r1p, r2p, r3p, l, r, sl, sr, false, true, r2p == NULL,
    4397             :                         false, false, false, false, estimate, __func__,
    4398        3306 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4399             : }
    4400             : 
    4401             : /* Return a candidate list with the list of rows in l whose value also
    4402             :  * occurs in r.  This is just the left output of a semi-join. */
    4403             : BAT *
    4404        4653 : BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one,
    4405             :              BUN estimate)
    4406             : {
    4407        4653 :         BAT *bn;
    4408             : 
    4409        4653 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4410             :                      false, true, false, false, max_one, false,
    4411             :                      estimate, __func__,
    4412        4653 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4413        4652 :                 return bn;
    4414             :         return NULL;
    4415             : }
    4416             : 
    4417             : /* Return the difference of l and r.  The result is a BAT with the
    4418             :  * oids of those values in l that do not occur in r.  This is what you
    4419             :  * might call an anti-semi-join.  The result is a candidate list. */
    4420             : BAT *
    4421       67121 : BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in,
    4422             :         BUN estimate)
    4423             : {
    4424       67121 :         BAT *bn;
    4425             : 
    4426       67121 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4427             :                      false, false, true, not_in, false, false,
    4428             :                      estimate, __func__,
    4429       67121 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4430       67120 :                 return bn;
    4431             :         return NULL;
    4432             : }
    4433             : 
    4434             : gdk_return
    4435       15714 : BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
    4436             : {
    4437       15714 :         int opcode = 0;
    4438       15714 :         lng t0 = 0;
    4439             : 
    4440             :         /* encode operator as a bit mask into opcode */
    4441       15714 :         switch (op) {
    4442           0 :         case JOIN_EQ:
    4443           0 :                 return BATjoin(r1p, r2p, l, r, sl, sr, nil_matches, estimate);
    4444             :         case JOIN_NE:
    4445             :                 opcode = MASK_NE;
    4446             :                 break;
    4447        4219 :         case JOIN_LT:
    4448        4219 :                 opcode = MASK_LT;
    4449        4219 :                 break;
    4450           7 :         case JOIN_LE:
    4451           7 :                 opcode = MASK_LE;
    4452           7 :                 break;
    4453       11413 :         case JOIN_GT:
    4454       11413 :                 opcode = MASK_GT;
    4455       11413 :                 break;
    4456          18 :         case JOIN_GE:
    4457          18 :                 opcode = MASK_GE;
    4458          18 :                 break;
    4459           0 :         default:
    4460           0 :                 GDKerror("unknown operator %d.\n", op);
    4461           0 :                 return GDK_FAIL;
    4462             :         }
    4463             : 
    4464       15714 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4465       15714 :         *r1p = NULL;
    4466       15714 :         if (r2p) {
    4467        4290 :                 *r2p = NULL;
    4468             :         }
    4469       15714 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4470             :                 return GDK_FAIL;
    4471             : 
    4472       15714 :         return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate,
    4473             :                          __func__, t0);
    4474             : }
    4475             : 
    4476             : gdk_return
    4477      182820 : BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4478             : {
    4479      182820 :         struct canditer lci, rci;
    4480      182820 :         bool lhash = false, rhash = false, lcand = false;
    4481      182820 :         bool plhash = false, prhash = false, rcand = false;
    4482      182820 :         bool swap;
    4483      182820 :         bat parent;
    4484      182820 :         double rcost = 0;
    4485      182820 :         double lcost = 0;
    4486      182820 :         gdk_return rc;
    4487      182820 :         lng t0 = 0;
    4488      182820 :         BAT *r2 = NULL;
    4489      182820 :         BAT *lp = NULL;
    4490      182820 :         BAT *rp = NULL;
    4491             : 
    4492      182820 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4493             : 
    4494      182820 :         canditer_init(&lci, l, sl);
    4495      182799 :         canditer_init(&rci, r, sr);
    4496             : 
    4497      182772 :         if ((parent = VIEWtparent(l)) != 0) {
    4498       47422 :                 lp = BATdescriptor(parent);
    4499       47423 :                 if (lp == NULL)
    4500             :                         return GDK_FAIL;
    4501       47423 :                 if (l->hseqbase == lp->hseqbase &&
    4502       51854 :                     BATcount(l) == BATcount(lp) &&
    4503       18980 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4504             :                         l = lp;
    4505             :                 } else {
    4506       37933 :                         BBPunfix(lp->batCacheid);
    4507       37933 :                         lp = NULL;
    4508             :                 }
    4509             :         }
    4510      182773 :         if ((parent = VIEWtparent(r)) != 0) {
    4511      147454 :                 rp = BATdescriptor(parent);
    4512      147486 :                 if (rp == NULL) {
    4513           0 :                         BBPreclaim(lp);
    4514           0 :                         return GDK_FAIL;
    4515             :                 }
    4516      147486 :                 if (r->hseqbase == rp->hseqbase &&
    4517      260437 :                     BATcount(r) == BATcount(rp) &&
    4518      238307 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4519             :                         r = rp;
    4520             :                 } else {
    4521       28330 :                         BBPunfix(rp->batCacheid);
    4522       28330 :                         rp = NULL;
    4523             :                 }
    4524             :         }
    4525             : 
    4526      182812 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4527           0 :                 l = BATunmask(l);
    4528           0 :                 BBPreclaim(lp);
    4529           0 :                 if (l == NULL) {
    4530           0 :                         BBPreclaim(rp);
    4531           0 :                         return GDK_FAIL;
    4532             :                 }
    4533             :                 lp = l;
    4534             :         }
    4535      182812 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4536          24 :                 r = BATunmask(r);
    4537          24 :                 BBPreclaim(rp);
    4538          24 :                 if (r == NULL) {
    4539           0 :                         BBPreclaim(lp);
    4540           0 :                         return GDK_FAIL;
    4541             :                 }
    4542             :                 rp = r;
    4543             :         }
    4544             : 
    4545      182812 :         *r1p = NULL;
    4546      182812 :         if (r2p)
    4547      160619 :                 *r2p = NULL;
    4548             : 
    4549      182812 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED) {
    4550           0 :                 rc = GDK_FAIL;
    4551           0 :                 goto doreturn;
    4552             :         }
    4553             : 
    4554      182817 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4555      127353 :                 TRC_DEBUG(ALGO, "BATjoin(l=" ALGOBATFMT ","
    4556             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4557             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d)\n",
    4558             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4559             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4560             :                           nil_matches);
    4561      127353 :                 rc = nomatch(r1p, r2p, NULL, l, r, &lci,
    4562             :                              0, false, false, __func__, t0);
    4563      127338 :                 goto doreturn;
    4564             :         }
    4565             : 
    4566       55464 :         swap = false;
    4567             : 
    4568       55464 :         if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase))) {
    4569             :                 /* single value to join, use select */
    4570       33110 :                 rc = selectjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4571             :                                 nil_matches, false, false, false, false,
    4572             :                                 t0, false, __func__);
    4573       33110 :                 goto doreturn;
    4574       22355 :         } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) {
    4575             :                 /* single value to join, use select */
    4576        9148 :                 rc = selectjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4577             :                                 nil_matches, false, false, false, false,
    4578             :                                 t0, true, __func__);
    4579        5837 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4580        2526 :                         BBPunfix(r2->batCacheid);
    4581        5837 :                 goto doreturn;
    4582       16518 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4583             :                 /* use special implementation for dense right-hand side */
    4584         972 :                 rc = mergejoin_void(r1p, r2p, NULL, l, r, &lci, &rci,
    4585             :                                     false, false, t0, false, __func__);
    4586         972 :                 goto doreturn;
    4587       15546 :         } else if (BATtdense(l) && lci.tpe == cand_dense) {
    4588             :                 /* use special implementation for dense right-hand side */
    4589          44 :                 rc = mergejoin_void(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4590             :                                     false, false, t0, true, __func__);
    4591          32 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4592          20 :                         BBPunfix(r2->batCacheid);
    4593          32 :                 goto doreturn;
    4594       23376 :         } else if ((BATordered(l) || BATordered_rev(l)) &&
    4595        9781 :                    (BATordered(r) || BATordered_rev(r))) {
    4596             :                 /* both sorted */
    4597        5978 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4598             :                                nil_matches, false, false, false, false, false, false,
    4599             :                                estimate, t0, false, __func__);
    4600        5977 :                 goto doreturn;
    4601             :         }
    4602             : 
    4603        9535 :         lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4604        9536 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4605        9535 :         if (lcost < 0 || rcost < 0) {
    4606           0 :                 rc = GDK_FAIL;
    4607           0 :                 goto doreturn;
    4608             :         }
    4609             : 
    4610             :         /* if the cost of doing searches on l is lower than the cost
    4611             :          * of doing searches on r, we swap */
    4612        9535 :         swap = (lcost < rcost);
    4613             : 
    4614       19071 :         if ((r->ttype == TYPE_void && r->tvheap != NULL) ||
    4615       19192 :             ((BATordered(r) || BATordered_rev(r)) &&
    4616        3086 :              (lci.ncand * (log2((double) rci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4617             :                 /* r is sorted and it is cheaper to do multiple binary
    4618             :                  * searches than it is to use a hash */
    4619         145 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4620             :                                nil_matches, false, false, false, false, false, false,
    4621             :                                estimate, t0, false, __func__);
    4622       18781 :         } else if ((l->ttype == TYPE_void && l->tvheap != NULL) ||
    4623       18910 :             ((BATordered(l) || BATordered_rev(l)) &&
    4624        1884 :              (rci.ncand * (log2((double) lci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4625             :                 /* l is sorted and it is cheaper to do multiple binary
    4626             :                  * searches than it is to use a hash */
    4627        1433 :                 rc = mergejoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4628             :                                nil_matches, false, false, false, false, false, false,
    4629             :                                estimate, t0, true, __func__);
    4630         718 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4631           3 :                         BBPunfix(r2->batCacheid);
    4632        8673 :         } else if (swap) {
    4633        9207 :                 rc = hashjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4634             :                               nil_matches, false, false, false, false, false, false,
    4635             :                               estimate, t0, true, lhash, plhash, lcand,
    4636             :                               __func__);
    4637        4762 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4638         317 :                         BBPunfix(r2->batCacheid);
    4639             :         } else {
    4640        3911 :                 rc = hashjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4641             :                               nil_matches, false, false, false, false, false, false,
    4642             :                               estimate, t0, false, rhash, prhash, rcand,
    4643             :                               __func__);
    4644             :         }
    4645      182802 :   doreturn:
    4646      182802 :         BBPreclaim(lp);
    4647      182796 :         BBPreclaim(rp);
    4648             :         return rc;
    4649             : }
    4650             : 
    4651             : gdk_return
    4652           0 : BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4653             :             const void *c1, const void *c2, bool linc, bool hinc, BUN estimate)
    4654             : {
    4655           0 :         lng t0 = 0;
    4656           0 :         struct canditer lci, rci;
    4657           0 :         const char *lvals, *rvals;
    4658           0 :         int t;
    4659           0 :         const void *nil = ATOMnilptr(l->ttype);
    4660           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    4661           0 :         const char *vl, *vr;
    4662           0 :         oid lastr = 0;          /* last value inserted into r2 */
    4663           0 :         BUN nr;
    4664           0 :         oid lo, ro;
    4665           0 :         bool lskipped = false;  /* whether we skipped values in l */
    4666             : 
    4667           0 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4668             : 
    4669           0 :         size_t counter = 0;
    4670           0 :         lng timeoffset = 0;
    4671           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    4672           0 :         if (qry_ctx != NULL) {
    4673           0 :                 timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
    4674             :         }
    4675             : 
    4676             : 
    4677           0 :         MT_thread_setalgorithm(__func__);
    4678           0 :         *r1p = NULL;
    4679           0 :         if (r2p) {
    4680           0 :                 *r2p = NULL;
    4681             :         }
    4682           0 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4683             :                 return GDK_FAIL;
    4684             : 
    4685           0 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    4686             : 
    4687           0 :         t = ATOMtype(l->ttype);
    4688           0 :         t = ATOMbasetype(t);
    4689             : 
    4690           0 :         canditer_init(&lci, l, sl);
    4691           0 :         canditer_init(&rci, r, sr);
    4692             : 
    4693           0 :         if (lci.ncand == 0 || rci.ncand == 0)
    4694           0 :                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    4695             :                                0, false, false, __func__, t0);
    4696             : 
    4697           0 :         switch (t) {
    4698           0 :         case TYPE_bte:
    4699           0 :                 if (is_bte_nil(*(const bte *)c1) ||
    4700           0 :                     is_bte_nil(*(const bte *)c2) ||
    4701           0 :                     -*(const bte *)c1 > *(const bte *)c2 ||
    4702           0 :                     ((!hinc || !linc) && -*(const bte *)c1 == *(const bte *)c2))
    4703           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4704             :                                        0, false, false, __func__, t0);
    4705             :                 break;
    4706           0 :         case TYPE_sht:
    4707           0 :                 if (is_sht_nil(*(const sht *)c1) ||
    4708           0 :                     is_sht_nil(*(const sht *)c2) ||
    4709           0 :                     -*(const sht *)c1 > *(const sht *)c2 ||
    4710           0 :                     ((!hinc || !linc) && -*(const sht *)c1 == *(const sht *)c2))
    4711           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4712             :                                        0, false, false, __func__, t0);
    4713             :                 break;
    4714           0 :         case TYPE_int:
    4715           0 :                 if (is_int_nil(*(const int *)c1) ||
    4716           0 :                     is_int_nil(*(const int *)c2) ||
    4717           0 :                     -*(const int *)c1 > *(const int *)c2 ||
    4718           0 :                     ((!hinc || !linc) && -*(const int *)c1 == *(const int *)c2))
    4719           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4720             :                                        0, false, false, __func__, t0);
    4721             :                 break;
    4722           0 :         case TYPE_lng:
    4723           0 :                 if (is_lng_nil(*(const lng *)c1) ||
    4724           0 :                     is_lng_nil(*(const lng *)c2) ||
    4725           0 :                     -*(const lng *)c1 > *(const lng *)c2 ||
    4726           0 :                     ((!hinc || !linc) && -*(const lng *)c1 == *(const lng *)c2))
    4727           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4728             :                                        0, false, false, __func__, t0);
    4729             :                 break;
    4730             : #ifdef HAVE_HGE
    4731           0 :         case TYPE_hge:
    4732           0 :                 if (is_hge_nil(*(const hge *)c1) ||
    4733           0 :                     is_hge_nil(*(const hge *)c2) ||
    4734           0 :                     -*(const hge *)c1 > *(const hge *)c2 ||
    4735           0 :                     ((!hinc || !linc) && -*(const hge *)c1 == *(const hge *)c2))
    4736           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4737             :                                        0, false, false, __func__, t0);
    4738             :                 break;
    4739             : #endif
    4740           0 :         case TYPE_flt:
    4741           0 :                 if (is_flt_nil(*(const flt *)c1) ||
    4742           0 :                     is_flt_nil(*(const flt *)c2) ||
    4743           0 :                     -*(const flt *)c1 > *(const flt *)c2 ||
    4744           0 :                     ((!hinc || !linc) && -*(const flt *)c1 == *(const flt *)c2))
    4745           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4746             :                                        0, false, false, __func__, t0);
    4747             :                 break;
    4748           0 :         case TYPE_dbl:
    4749           0 :                 if (is_dbl_nil(*(const dbl *)c1) ||
    4750           0 :                     is_dbl_nil(*(const dbl *)c2) ||
    4751           0 :                     -*(const dbl *)c1 > *(const dbl *)c2 ||
    4752           0 :                     ((!hinc || !linc) && -*(const dbl *)c1 == *(const dbl *)c2))
    4753           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4754             :                                        0, false, false, __func__, t0);
    4755             :                 break;
    4756           0 :         default:
    4757           0 :                 GDKerror("unsupported type\n");
    4758           0 :                 return GDK_FAIL;
    4759             :         }
    4760             : 
    4761           0 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    4762             :                                       false, false, false, false, estimate);
    4763           0 :         if (maxsize == BUN_NONE)
    4764             :                 return GDK_FAIL;
    4765           0 :         BAT *r1 = *r1p;
    4766           0 :         BAT *r2 = r2p ? *r2p : NULL;
    4767           0 :         BATiter li = bat_iterator(l);
    4768           0 :         BATiter ri = bat_iterator(r);
    4769             : 
    4770           0 :         lvals = (const char *) li.base;
    4771           0 :         rvals = (const char *) ri.base;
    4772           0 :         assert(ri.vh == NULL);
    4773             : 
    4774           0 :         assert(lvals != NULL);
    4775           0 :         assert(rvals != NULL);
    4776             : 
    4777           0 :         r1->tkey = true;
    4778           0 :         r1->tsorted = true;
    4779           0 :         r1->trevsorted = true;
    4780           0 :         if (r2) {
    4781           0 :                 r2->tkey = true;
    4782           0 :                 r2->tsorted = true;
    4783           0 :                 r2->trevsorted = true;
    4784             :         }
    4785             : 
    4786             :         /* nested loop implementation for band join */
    4787           0 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    4788           0 :                 GDK_CHECK_TIMEOUT(timeoffset, counter,
    4789             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout));
    4790           0 :                 lo = canditer_next(&lci);
    4791           0 :                 vl = FVALUE(l, lo - l->hseqbase);
    4792           0 :                 if (cmp(vl, nil) == 0)
    4793           0 :                         continue;
    4794           0 :                 nr = 0;
    4795           0 :                 canditer_reset(&rci);
    4796           0 :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
    4797           0 :                         ro = canditer_next(&rci);
    4798           0 :                         vr = FVALUE(r, ro - r->hseqbase);
    4799           0 :                         switch (ATOMtype(li.type)) {
    4800           0 :                         case TYPE_bte: {
    4801           0 :                                 if (is_bte_nil(*(const bte *) vr))
    4802           0 :                                         continue;
    4803           0 :                                 sht v1 = (sht) *(const bte *) vr, v2;
    4804           0 :                                 v2 = v1;
    4805           0 :                                 v1 -= *(const bte *)c1;
    4806           0 :                                 if (*(const bte *)vl <= v1 &&
    4807           0 :                                     (!linc || *(const bte *)vl != v1))
    4808           0 :                                         continue;
    4809           0 :                                 v2 += *(const bte *)c2;
    4810           0 :                                 if (*(const bte *)vl >= v2 &&
    4811           0 :                                     (!hinc || *(const bte *)vl != v2))
    4812           0 :                                         continue;
    4813             :                                 break;
    4814             :                         }
    4815           0 :                         case TYPE_sht: {
    4816           0 :                                 if (is_sht_nil(*(const sht *) vr))
    4817           0 :                                         continue;
    4818           0 :                                 int v1 = (int) *(const sht *) vr, v2;
    4819           0 :                                 v2 = v1;
    4820           0 :                                 v1 -= *(const sht *)c1;
    4821           0 :                                 if (*(const sht *)vl <= v1 &&
    4822           0 :                                     (!linc || *(const sht *)vl != v1))
    4823           0 :                                         continue;
    4824           0 :                                 v2 += *(const sht *)c2;
    4825           0 :                                 if (*(const sht *)vl >= v2 &&
    4826           0 :                                     (!hinc || *(const sht *)vl != v2))
    4827           0 :                                         continue;
    4828             :                                 break;
    4829             :                         }
    4830           0 :                         case TYPE_int: {
    4831           0 :                                 if (is_int_nil(*(const int *) vr))
    4832           0 :                                         continue;
    4833           0 :                                 lng v1 = (lng) *(const int *) vr, v2;
    4834           0 :                                 v2 = v1;
    4835           0 :                                 v1 -= *(const int *)c1;
    4836           0 :                                 if (*(const int *)vl <= v1 &&
    4837           0 :                                     (!linc || *(const int *)vl != v1))
    4838           0 :                                         continue;
    4839           0 :                                 v2 += *(const int *)c2;
    4840           0 :                                 if (*(const int *)vl >= v2 &&
    4841           0 :                                     (!hinc || *(const int *)vl != v2))
    4842           0 :                                         continue;
    4843             :                                 break;
    4844             :                         }
    4845             : #ifdef HAVE_HGE
    4846           0 :                         case TYPE_lng: {
    4847           0 :                                 if (is_lng_nil(*(const lng *) vr))
    4848           0 :                                         continue;
    4849           0 :                                 hge v1 = (hge) *(const lng *) vr, v2;
    4850           0 :                                 v2 = v1;
    4851           0 :                                 v1 -= *(const lng *)c1;
    4852           0 :                                 if (*(const lng *)vl <= v1 &&
    4853           0 :                                     (!linc || *(const lng *)vl != v1))
    4854           0 :                                         continue;
    4855           0 :                                 v2 += *(const lng *)c2;
    4856           0 :                                 if (*(const lng *)vl >= v2 &&
    4857           0 :                                     (!hinc || *(const lng *)vl != v2))
    4858           0 :                                         continue;
    4859             :                                 break;
    4860             :                         }
    4861             : #else
    4862             : #ifdef HAVE___INT128
    4863             :                         case TYPE_lng: {
    4864             :                                 if (is_lng_nil(*(const lng *) vr))
    4865             :                                         continue;
    4866             :                                 __int128 v1 = (__int128) *(const lng *) vr, v2;
    4867             :                                 v2 = v1;
    4868             :                                 v1 -= *(const lng *)c1;
    4869             :                                 if (*(const lng *)vl <= v1 &&
    4870             :                                     (!linc || *(const lng *)vl != v1))
    4871             :                                         continue;
    4872             :                                 v2 += *(const lng *)c2;
    4873             :                                 if (*(const lng *)vl >= v2 &&
    4874             :                                     (!hinc || *(const lng *)vl != v2))
    4875             :                                         continue;
    4876             :                                 break;
    4877             :                         }
    4878             : #else
    4879             : #ifdef HAVE___INT128_T
    4880             :                         case TYPE_lng: {
    4881             :                                 if (is_lng_nil(*(const lng *) vr))
    4882             :                                         continue;
    4883             :                                 __int128_t v1 = (__int128_t) *(const lng *) vr, v2;
    4884             :                                 v2 = v1;
    4885             :                                 v1 -= *(const lng *)c1;
    4886             :                                 if (*(const lng *)vl <= v1 &&
    4887             :                                     (!linc || *(const lng *)vl != v1))
    4888             :                                         continue;
    4889             :                                 v2 += *(const lng *)c2;
    4890             :                                 if (*(const lng *)vl >= v2 &&
    4891             :                                     (!hinc || *(const lng *)vl != v2))
    4892             :                                         continue;
    4893             :                                 break;
    4894             :                         }
    4895             : #else
    4896             :                         case TYPE_lng: {
    4897             :                                 if (is_lng_nil(*(const lng *) vr))
    4898             :                                         continue;
    4899             :                                 lng v1, v2;
    4900             :                                 SUB_WITH_CHECK(*(const lng *)vr,
    4901             :                                                *(const lng *)c1,
    4902             :                                                lng, v1,
    4903             :                                                GDK_lng_max,
    4904             :                                                do{if(*(const lng*)c1<0)goto nolmatch;else goto lmatch1;}while(false));
    4905             :                                 if (*(const lng *)vl <= v1 &&
    4906             :                                     (!linc || *(const lng *)vl != v1))
    4907             :                                         continue;
    4908             :                                   lmatch1:
    4909             :                                 ADD_WITH_CHECK(*(const lng *)vr,
    4910             :                                                *(const lng *)c2,
    4911             :                                                lng, v2,
    4912             :                                                GDK_lng_max,
    4913             :                                                do{if(*(const lng*)c2>0)goto nolmatch;else goto lmatch2;}while(false));
    4914             :                                 if (*(const lng *)vl >= v2 &&
    4915             :                                     (!hinc || *(const lng *)vl != v2))
    4916             :                                         continue;
    4917             :                                   lmatch2:
    4918             :                                 break;
    4919             :                                   nolmatch:
    4920             :                                 continue;
    4921             :                         }
    4922             : #endif
    4923             : #endif
    4924             : #endif
    4925             : #ifdef HAVE_HGE
    4926           0 :                         case TYPE_hge: {
    4927           0 :                                 if (is_hge_nil(*(const hge *) vr))
    4928           0 :                                         continue;
    4929           0 :                                 hge v1, v2;
    4930           0 :                                 SUB_WITH_CHECK(*(const hge *)vr,
    4931             :                                                *(const hge *)c1,
    4932             :                                                hge, v1,
    4933             :                                                GDK_hge_max,
    4934             :                                                do{if(*(const hge*)c1<0)goto nohmatch;else goto hmatch1;}while(false));
    4935           0 :                                 if (*(const hge *)vl <= v1 &&
    4936           0 :                                     (!linc || *(const hge *)vl != v1))
    4937           0 :                                         continue;
    4938           0 :                                   hmatch1:
    4939           0 :                                 ADD_WITH_CHECK(*(const hge *)vr,
    4940             :                                                *(const hge *)c2,
    4941             :                                                hge, v2,
    4942             :                                                GDK_hge_max,
    4943             :                                                do{if(*(const hge*)c2>0)goto nohmatch;else goto hmatch2;}while(false));
    4944           0 :                                 if (*(const hge *)vl >= v2 &&
    4945           0 :                                     (!hinc || *(const hge *)vl != v2))
    4946           0 :                                         continue;
    4947           0 :                                   hmatch2:
    4948             :                                 break;
    4949           0 :                                   nohmatch:
    4950           0 :                                 continue;
    4951             :                         }
    4952             : #endif
    4953           0 :                         case TYPE_flt: {
    4954           0 :                                 if (is_flt_nil(*(const flt *) vr))
    4955           0 :                                         continue;
    4956           0 :                                 dbl v1 = (dbl) *(const flt *) vr, v2;
    4957           0 :                                 v2 = v1;
    4958           0 :                                 v1 -= *(const flt *)c1;
    4959           0 :                                 if (*(const flt *)vl <= v1 &&
    4960           0 :                                     (!linc || *(const flt *)vl != v1))
    4961           0 :                                         continue;
    4962           0 :                                 v2 += *(const flt *)c2;
    4963           0 :                                 if (*(const flt *)vl >= v2 &&
    4964           0 :                                     (!hinc || *(const flt *)vl != v2))
    4965           0 :                                         continue;
    4966             :                                 break;
    4967             :                         }
    4968           0 :                         case TYPE_dbl: {
    4969           0 :                                 if (is_dbl_nil(*(const dbl *) vr))
    4970           0 :                                         continue;
    4971           0 :                                 dbl v1, v2;
    4972           0 :                                 SUB_WITH_CHECK(*(const dbl *)vr,
    4973             :                                                *(const dbl *)c1,
    4974             :                                                dbl, v1,
    4975             :                                                GDK_dbl_max,
    4976             :                                                do{if(*(const dbl*)c1<0)goto nodmatch;else goto dmatch1;}while(false));
    4977           0 :                                 if (*(const dbl *)vl <= v1 &&
    4978           0 :                                     (!linc || *(const dbl *)vl != v1))
    4979           0 :                                         continue;
    4980           0 :                                   dmatch1:
    4981           0 :                                 ADD_WITH_CHECK(*(const dbl *)vr,
    4982             :                                                *(const dbl *)c2,
    4983             :                                                dbl, v2,
    4984             :                                                GDK_dbl_max,
    4985             :                                                do{if(*(const dbl*)c2>0)goto nodmatch;else goto dmatch2;}while(false));
    4986           0 :                                 if (*(const dbl *)vl >= v2 &&
    4987           0 :                                     (!hinc || *(const dbl *)vl != v2))
    4988           0 :                                         continue;
    4989           0 :                                   dmatch2:
    4990             :                                 break;
    4991           0 :                                   nodmatch:
    4992           0 :                                 continue;
    4993             :                         }
    4994             :                         }
    4995           0 :                         if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    4996           0 :                                 goto bailout;
    4997           0 :                         if (BATcount(r1) > 0) {
    4998           0 :                                 if (r2 && lastr + 1 != ro)
    4999           0 :                                         r2->tseqbase = oid_nil;
    5000           0 :                                 if (nr == 0) {
    5001           0 :                                         r1->trevsorted = false;
    5002           0 :                                         if (r2 == NULL) {
    5003             :                                                 /* nothing */
    5004           0 :                                         } else if (lastr > ro) {
    5005           0 :                                                 r2->tsorted = false;
    5006           0 :                                                 r2->tkey = false;
    5007           0 :                                         } else if (lastr < ro) {
    5008           0 :                                                 r2->trevsorted = false;
    5009             :                                         } else {
    5010           0 :                                                 r2->tkey = false;
    5011             :                                         }
    5012             :                                 }
    5013             :                         }
    5014           0 :                         APPEND(r1, lo);
    5015           0 :                         if (r2) {
    5016           0 :                                 APPEND(r2, ro);
    5017             :                         }
    5018           0 :                         lastr = ro;
    5019           0 :                         nr++;
    5020             :                 }
    5021           0 :                 if (nr > 1) {
    5022           0 :                         r1->tkey = false;
    5023           0 :                         r1->tseqbase = oid_nil;
    5024           0 :                         if (r2) {
    5025           0 :                                 r2->trevsorted = false;
    5026             :                         }
    5027           0 :                 } else if (nr == 0) {
    5028           0 :                         lskipped = BATcount(r1) > 0;
    5029           0 :                 } else if (lskipped) {
    5030           0 :                         r1->tseqbase = oid_nil;
    5031             :                 }
    5032             :         }
    5033             :         /* also set other bits of heap to correct value to indicate size */
    5034           0 :         BATsetcount(r1, BATcount(r1));
    5035           0 :         if (r2) {
    5036           0 :                 BATsetcount(r2, BATcount(r2));
    5037           0 :                 assert(BATcount(r1) == BATcount(r2));
    5038             :         }
    5039           0 :         if (BATcount(r1) > 0) {
    5040           0 :                 if (BATtdense(r1))
    5041           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    5042           0 :                 if (r2 && BATtdense(r2))
    5043           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    5044             :         } else {
    5045           0 :                 r1->tseqbase = 0;
    5046           0 :                 if (r2) {
    5047           0 :                         r2->tseqbase = 0;
    5048             :                 }
    5049             :         }
    5050           0 :         bat_iterator_end(&li);
    5051           0 :         bat_iterator_end(&ri);
    5052           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    5053             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    5054             :                   " -> " ALGOBATFMT "," ALGOOPTBATFMT
    5055             :                   " (" LLFMT "usec)\n",
    5056             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    5057             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    5058             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    5059             :                   GDKusec() - t0);
    5060             :         return GDK_SUCCEED;
    5061             : 
    5062           0 :   bailout:
    5063           0 :         bat_iterator_end(&li);
    5064           0 :         bat_iterator_end(&ri);
    5065           0 :         BBPreclaim(r1);
    5066           0 :         BBPreclaim(r2);
    5067             :         return GDK_FAIL;
    5068             : }
    5069             : 
    5070             : gdk_return
    5071         134 : BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh,
    5072             :              BAT *sl, BAT *sr, bool linc, bool hinc, bool anti, bool symmetric,
    5073             :              BUN estimate)
    5074             : {
    5075         134 :         struct canditer lci, rci;
    5076         134 :         BAT *r1 = NULL, *r2 = NULL;
    5077         134 :         BUN maxsize;
    5078         134 :         lng t0 = 0;
    5079             : 
    5080         134 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    5081         134 :         *r1p = NULL;
    5082         134 :         if (r2p) {
    5083         111 :                 *r2p = NULL;
    5084             :         }
    5085         134 :         if (joinparamcheck(l, rl, rh, sl, sr, __func__) != GDK_SUCCEED)
    5086             :                 return GDK_FAIL;
    5087         134 :         canditer_init(&lci, l, sl);
    5088         134 :         canditer_init(&rci, rl, sr);
    5089         134 :         if (lci.ncand == 0 ||
    5090         127 :             rci.ncand == 0 ||
    5091         120 :             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) ||
    5092         120 :             ((rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) &&
    5093           0 :              (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)))) {
    5094             :                 /* trivial: empty input */
    5095          14 :                 return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5096             :                                __func__, t0);
    5097             :         }
    5098         120 :         if (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) {
    5099           0 :                 if (!anti)
    5100           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5101             :                                        __func__, t0);
    5102           0 :                 return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate,
    5103             :                                  __func__, t0);
    5104             :         }
    5105         120 :         if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) {
    5106           0 :                 if (!anti)
    5107           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5108             :                                        __func__, t0);
    5109           0 :                 return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate,
    5110             :                                  __func__, t0);
    5111             :         }
    5112             : 
    5113         137 :         if ((maxsize = joininitresults(&r1, r2p ? &r2 : NULL, NULL, sl ? BATcount(sl) : BATcount(l), sr ? BATcount(sr) : BATcount(rl), false, false, false, false, false, false, estimate)) == BUN_NONE)
    5114             :                 return GDK_FAIL;
    5115         120 :         *r1p = r1;
    5116         120 :         if (r2p) {
    5117         103 :                 *r2p = r2;
    5118             :         }
    5119         120 :         if (maxsize == 0)
    5120             :                 return GDK_SUCCEED;
    5121             : 
    5122             :         /* note, the rangejoin implementation is in gdk_select.c since
    5123             :          * it uses the imprints code there */
    5124         120 :         return rangejoin(r1, r2, l, rl, rh, &lci, &rci, linc, hinc, anti, symmetric, maxsize);
    5125             : }

Generated by: LCOV version 1.14