LCOV - code coverage report
Current view: top level - gdk - gdk_join.c (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1801 2755 65.4 %
Date: 2024-11-14 20:04:02 Functions: 26 29 89.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #include "monetdb_config.h"
      14             : #include "gdk.h"
      15             : #include "gdk_private.h"
      16             : #include "gdk_calc_private.h"
      17             : 
      18             : /*
      19             :  * All join variants produce some sort of join on two input BATs,
      20             :  * optionally subject to up to two candidate lists.  Only values in
      21             :  * the input BATs that are mentioned in the associated candidate list
      22             :  * (if provided) are eligible.  They all return two output BATs in the
      23             :  * first two arguments.  The join operations differ in the way in
      24             :  * which tuples from the two inputs are matched.
      25             :  *
      26             :  * The outputs consist of two aligned BATs (i.e. same length and same
      27             :  * hseqbase (0@0)) that contain the OIDs of the input BATs that match.
      28             :  * The candidate lists, if given, contain the OIDs of the associated
      29             :  * input BAT which must be considered for matching.  The input BATs
      30             :  * must have the same type.
      31             :  *
      32             :  * All functions also have a parameter nil_matches which indicates
      33             :  * whether NIL must be considered an ordinary value that can match, or
      34             :  * whether NIL must be considered to never match.
      35             :  *
      36             :  * The join functions that are provided here are:
      37             :  * BATjoin
      38             :  *      normal equi-join
      39             :  * BATleftjoin
      40             :  *      normal equi-join, but the left output is sorted
      41             :  * BATouterjoin
      42             :  *      equi-join, but the left output is sorted, and if there is no
      43             :  *      match for a value in the left input, there is still an output
      44             :  *      with NIL in the right output
      45             :  * BATsemijoin
      46             :  *      equi-join, but the left output is sorted, and if there are
      47             :  *      multiple matches, only one is returned (i.e., the left output
      48             :  *      is also key, making it a candidate list)
      49             :  * BATmarkjoin
      50             :  *      equi-join, but the left output is sorted, if there is no
      51             :  *      match for a value in the left input, there is still an output
      52             :  *      with NIL in the right output, and there is a third output column
      53             :  *      containing a flag that indicates the "certainty" of the match: 1
      54             :  *      there is a match, 0, there is no match and there are no NIL
      55             :  *      values, NIL, there is no match but there are NIL values
      56             :  * BATthetajoin
      57             :  *      theta-join: an extra operator must be provided encoded as an
      58             :  *      integer (macros JOIN_EQ, JOIN_NE, JOIN_LT, JOIN_LE, JOIN_GT,
      59             :  *      JOIN_GE); values match if the left input has the given
      60             :  *      relationship with the right input; order of the outputs is not
      61             :  *      guaranteed
      62             :  * BATbandjoin
      63             :  *      band-join: two extra input values (c1, c2) must be provided as
      64             :  *      well as Booleans (li, hi) that indicate whether the value
      65             :  *      ranges are inclusive or not; values in the left and right
      66             :  *      inputs match if right - c1 <[=] left <[=] right + c2; if c1 or
      67             :  *      c2 is NIL, there are no matches
      68             :  * BATrangejoin
      69             :  *      range-join: the right input consists of two aligned BATs,
      70             :  *      values match if the left value is between two corresponding
      71             :  *      right values; two extra Boolean parameters, li and hi,
      72             :  *      indicate whether equal values match
      73             :  *
      74             :  * In addition to these functions, there are two more functions that
      75             :  * are closely related:
      76             :  * BATintersect
      77             :  *      intersection: return a candidate list with OIDs of tuples in
      78             :  *      the left input whose value occurs in the right input
      79             :  * BATdiff
      80             :  *      difference: return a candidate list with OIDs of tuples in the
      81             :  *      left input whose value does not occur in the right input
      82             :  */
      83             : 
      84             : /* Perform a bunch of sanity checks on the inputs to a join. */
      85             : static gdk_return
      86      345954 : joinparamcheck(BAT *l, BAT *r1, BAT *r2, BAT *sl, BAT *sr, const char *func)
      87             : {
      88      866030 :         if (ATOMtype(l->ttype) != ATOMtype(r1->ttype) ||
      89         258 :             (r2 && ATOMtype(l->ttype) != ATOMtype(r2->ttype))) {
      90           0 :                 GDKerror("%s: inputs not compatible.\n", func);
      91           0 :                 return GDK_FAIL;
      92             :         }
      93         129 :         if (r2 &&
      94         129 :             (BATcount(r1) != BATcount(r2) || r1->hseqbase != r2->hseqbase)) {
      95           0 :                 GDKerror("%s: right inputs not aligned.\n", func);
      96           0 :                 return GDK_FAIL;
      97             :         }
      98      345954 :         if ((sl && !BATiscand(sl)) || (sr && !BATiscand(sr))) {
      99           0 :                 GDKerror("%s: argument not a candidate list.\n", func);
     100           0 :                 return GDK_FAIL;
     101             :         }
     102             :         return GDK_SUCCEED;
     103             : }
     104             : 
     105             : #define INCRSIZELOG     (8 + (SIZEOF_OID / 2))
     106             : #define INCRSIZE        (1 << INCRSIZELOG)
     107             : 
     108             : /* Create the result bats for a join, returns the absolute maximum
     109             :  * number of outputs that could possibly be generated. */
     110             : static BUN
     111       42415 : joininitresults(BAT **r1p, BAT **r2p, BAT **r3p, BUN lcnt, BUN rcnt,
     112             :                 bool lkey, bool rkey, bool semi, bool nil_on_miss,
     113             :                 bool only_misses, bool min_one, BUN estimate)
     114             : {
     115       42415 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     116       42415 :         BUN maxsize, size;
     117             : 
     118             :         /* if nil_on_miss is set, we really need a right output */
     119       42415 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
     120             : 
     121       42415 :         lkey |= lcnt <= 1;
     122       42415 :         rkey |= rcnt <= 1;
     123             : 
     124       42415 :         *r1p = NULL;
     125       42415 :         if (r2p)
     126       21072 :                 *r2p = NULL;
     127       42415 :         if (r3p)
     128          86 :                 *r3p = NULL;
     129       42415 :         if (lcnt == 0) {
     130             :                 /* there is nothing to match */
     131             :                 maxsize = 0;
     132       40060 :         } else if (!only_misses && !nil_on_miss && rcnt == 0) {
     133             :                 /* if right is empty, we have no hits, so if we don't
     134             :                  * want misses, the result is empty */
     135             :                 maxsize = 0;
     136       40058 :         } else if (rkey | semi | only_misses) {
     137             :                 /* each entry left matches at most one on right, in
     138             :                  * case nil_on_miss is also set, each entry matches
     139             :                  * exactly one (see below) */
     140             :                 maxsize = lcnt;
     141       22213 :         } else if (lkey) {
     142             :                 /* each entry on right is matched at most once */
     143        4138 :                 if (nil_on_miss) {
     144             :                         /* one entry left could match all right, and
     145             :                          * all other entries left match nil */
     146          15 :                         maxsize = lcnt + rcnt - 1;
     147             :                 } else {
     148             :                         maxsize = rcnt;
     149             :                 }
     150       18075 :         } else if (rcnt == 0) {
     151             :                 /* nil_on_miss must be true due to previous checks, so
     152             :                  * all values on left miss */
     153             :                 maxsize = lcnt;
     154       18075 :         } else if (BUN_MAX / lcnt >= rcnt) {
     155             :                 /* in the worst case we have a full cross product */
     156       18072 :                 maxsize = lcnt * rcnt;
     157             :         } else {
     158             :                 /* a BAT cannot grow larger than BUN_MAX */
     159             :                 maxsize = BUN_MAX;
     160             :         }
     161       42415 :         size = estimate == BUN_NONE ? lcnt < rcnt ? lcnt : rcnt : estimate;
     162       42415 :         if (size < INCRSIZE)
     163             :                 size = INCRSIZE;
     164       42415 :         if (size > maxsize)
     165             :                 size = maxsize;
     166       42415 :         if ((rkey | semi | only_misses) & nil_on_miss) {
     167             :                 /* see comment above: each entry left matches exactly
     168             :                  * once */
     169         101 :                 size = maxsize;
     170             :         }
     171       42415 :         if (min_one && size < lcnt)
     172           0 :                 size = lcnt;
     173             : 
     174       42415 :         if (maxsize == 0) {
     175        2367 :                 r1 = BATdense(0, 0, 0);
     176        2365 :                 if (r1 == NULL) {
     177             :                         return BUN_NONE;
     178             :                 }
     179        2365 :                 if (r2p) {
     180         290 :                         r2 = BATdense(0, 0, 0);
     181         290 :                         if (r2 == NULL) {
     182           0 :                                 BBPreclaim(r1);
     183           0 :                                 return BUN_NONE;
     184             :                         }
     185         290 :                         *r2p = r2;
     186             :                 }
     187        2365 :                 if (r3p) {
     188           0 :                         r3 = COLnew(0, TYPE_bit, 0, TRANSIENT);
     189           0 :                         if (r3 == NULL) {
     190           0 :                                 BBPreclaim(r1);
     191           0 :                                 BBPreclaim(r2);
     192           0 :                                 if (r2p)
     193           0 :                                         *r2p = NULL;
     194           0 :                                 return BUN_NONE;
     195             :                         }
     196           0 :                         *r3p = r3;
     197             :                 }
     198        2365 :                 *r1p = r1;
     199        2365 :                 return 0;
     200             :         }
     201             : 
     202       40048 :         r1 = COLnew(0, TYPE_oid, size, TRANSIENT);
     203       40060 :         if (r1 == NULL) {
     204             :                 return BUN_NONE;
     205             :         }
     206       40060 :         r1->tnil = false;
     207       40060 :         r1->tnonil = true;
     208       40060 :         r1->tkey = true;
     209       40060 :         r1->tsorted = true;
     210       40060 :         r1->trevsorted = true;
     211       40060 :         r1->tseqbase = 0;
     212       40060 :         r1->theap->dirty = true;
     213       40060 :         *r1p = r1;
     214       40060 :         if (r2p) {
     215       20792 :                 r2 = COLnew(0, TYPE_oid, size, TRANSIENT);
     216       20793 :                 if (r2 == NULL) {
     217           0 :                         BBPreclaim(r1);
     218           0 :                         return BUN_NONE;
     219             :                 }
     220       20793 :                 r2->tnil = false;
     221       20793 :                 r2->tnonil = true;
     222       20793 :                 r2->tkey = true;
     223       20793 :                 r2->tsorted = true;
     224       20793 :                 r2->trevsorted = true;
     225       20793 :                 r2->tseqbase = 0;
     226       20793 :                 r2->theap->dirty = true;
     227       20793 :                 *r2p = r2;
     228             :         }
     229       40061 :         if (r3p) {
     230          95 :                 BAT *r3 = COLnew(0, TYPE_bit, size, TRANSIENT);
     231          95 :                 if (r3 == NULL) {
     232           0 :                         BBPreclaim(r1);
     233           0 :                         BBPreclaim(r2);
     234           0 :                         return BUN_NONE;
     235             :                 }
     236          95 :                 r3->tnil = false;
     237          95 :                 r3->tnonil = true;
     238          95 :                 r3->tkey = false;
     239          95 :                 r3->tsorted = false;
     240          95 :                 r3->trevsorted = false;
     241          95 :                 r3->tseqbase = oid_nil;
     242          95 :                 r3->theap->dirty = true;
     243          95 :                 *r3p = r3;
     244             :         }
     245             :         return maxsize;
     246             : }
     247             : 
     248             : #define VALUE(s, x)     (s##vars ?                                      \
     249             :                          s##vars + VarHeapVal(s##vals, (x), s##i.width) : \
     250             :                          s##vals ? (const char *) s##vals + ((x) * s##i.width) : \
     251             :                          (s##val = BUNtoid(s, (x)), (const char *) &s##val))
     252             : #define FVALUE(s, x)    ((const char *) s##vals + ((x) * s##i.width))
     253             : 
     254             : #define APPEND(b, o)            (((oid *) b->theap->base)[b->batCount++] = (o))
     255             : 
     256             : static inline gdk_return
     257   187956492 : maybeextend(BAT *restrict r1, BAT *restrict r2, BAT *restrict r3,
     258             :             BUN cnt, BUN lcur, BUN lcnt, BUN maxsize)
     259             : {
     260   187956492 :         if (BATcount(r1) + cnt > BATcapacity(r1)) {
     261             :                 /* make some extra space by extrapolating how much more
     262             :                  * we need (fraction of l we've seen so far is used to
     263             :                  * estimate a new size but with a shallow slope so that
     264             :                  * a skewed join doesn't overwhelm, whilst making sure
     265             :                  * there is somewhat significant progress) */
     266        1782 :                 BUN newcap = (BUN) (lcnt / (lcnt / 4.0 + lcur * .75) * (BATcount(r1) + cnt));
     267        1782 :                 newcap = (newcap + INCRSIZE - 1) & ~(((BUN) 1 << INCRSIZELOG) - 1);
     268        1782 :                 if (newcap < cnt + BATcount(r1))
     269           0 :                         newcap = cnt + BATcount(r1) + INCRSIZE;
     270             :                 /* if close to maxsize, then just use maxsize */
     271        1782 :                 if (newcap + INCRSIZE > maxsize)
     272         154 :                         newcap = maxsize;
     273             :                 /* make sure heap.free is set properly before
     274             :                  * extending */
     275        1782 :                 BATsetcount(r1, BATcount(r1));
     276        1783 :                 if (BATextend(r1, newcap) != GDK_SUCCEED)
     277             :                         return GDK_FAIL;
     278        1782 :                 if (r2) {
     279        1188 :                         BATsetcount(r2, BATcount(r2));
     280        1188 :                         if (BATextend(r2, newcap) != GDK_SUCCEED)
     281             :                                 return GDK_FAIL;
     282        1188 :                         assert(BATcapacity(r1) == BATcapacity(r2));
     283             :                 }
     284        1782 :                 if (r3) {
     285           0 :                         BATsetcount(r3, BATcount(r3));
     286           0 :                         if (BATextend(r3, newcap) != GDK_SUCCEED)
     287             :                                 return GDK_FAIL;
     288           0 :                         assert(BATcapacity(r1) == BATcapacity(r3));
     289             :                 }
     290             :         }
     291             :         return GDK_SUCCEED;
     292             : }
     293             : 
     294             : /* Return BATs through r1p, r2p, and r3p for the case that there is no
     295             :  * match between l and r, taking all flags into consideration.
     296             :  *
     297             :  * This means, if nil_on_miss is set or only_misses is set, *r1p is a
     298             :  * copy of the left candidate list or a dense list of all "head"
     299             :  * values of l, and *r2p (if r2p is not NULL) is all nil.  If neither
     300             :  * of those flags is set, the result is two empty BATs. */
     301             : static gdk_return
     302      243662 : nomatch(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     303             :         struct canditer *restrict lci, bit defmark,
     304             :         bool nil_on_miss, bool only_misses, const char *func, lng t0)
     305             : {
     306      243662 :         BAT *r1, *r2 = NULL, *r3 = NULL;
     307             : 
     308      243662 :         MT_thread_setalgorithm(__func__);
     309      243633 :         if (lci->ncand == 0 || !(nil_on_miss | only_misses)) {
     310             :                 /* return empty BATs */
     311      230797 :                 if ((r1 = BATdense(0, 0, 0)) == NULL)
     312             :                         return GDK_FAIL;
     313      230799 :                 if (r2p) {
     314      148583 :                         if ((r2 = BATdense(0, 0, 0)) == NULL) {
     315           0 :                                 BBPreclaim(r1);
     316           0 :                                 return GDK_FAIL;
     317             :                         }
     318      148572 :                         *r2p = r2;
     319             :                 }
     320      230788 :                 if (r3p) {
     321        9303 :                         if ((r3 = COLnew(0, TYPE_bit, 0, TRANSIENT)) == NULL) {
     322           0 :                                 BBPreclaim(r1);
     323           0 :                                 BBPreclaim(r2);
     324           0 :                                 return GDK_FAIL;
     325             :                         }
     326        9304 :                         *r3p = r3;
     327             :                 }
     328             :         } else {
     329       12836 :                 r1 = canditer_slice(lci, 0, lci->ncand);
     330       12836 :                 if (r2p) {
     331          22 :                         if ((r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand, TRANSIENT)) == NULL) {
     332           0 :                                 BBPreclaim(r1);
     333           0 :                                 return GDK_FAIL;
     334             :                         }
     335          22 :                         *r2p = r2;
     336             :                 }
     337       12836 :                 if (r3p) {
     338          56 :                         if ((r3 = BATconstant(0, TYPE_bit, &defmark, lci->ncand, TRANSIENT)) == NULL) {
     339           0 :                                 BBPreclaim(r1);
     340           0 :                                 BBPreclaim(r2);
     341           0 :                                 return GDK_FAIL;
     342             :                         }
     343          56 :                         *r3p = r3;
     344             :                 }
     345             :         }
     346      243625 :         *r1p = r1;
     347      243625 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ",r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT
     348             :                   ",nil_on_miss=%s,only_misses=%s"
     349             :                   " - > " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     350             :                   " (%s -- " LLFMT "usec)\n",
     351             :                   ALGOBATPAR(l), ALGOBATPAR(r), ALGOOPTBATPAR(lci->s),
     352             :                   nil_on_miss ? "true" : "false",
     353             :                   only_misses ? "true" : "false",
     354             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     355             :                   func, GDKusec() - t0);
     356             :         return GDK_SUCCEED;
     357             : }
     358             : 
     359             : /* Implementation of join where there is a single value (possibly
     360             :  * repeated multiple times) on the left.  This means we can use a
     361             :  * point select to find matches in the right column. */
     362             : static gdk_return
     363       43860 : selectjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     364             :            struct canditer *lci, struct canditer *rci,
     365             :            bool nil_matches, bool nil_on_miss, bool semi, bool max_one, bool min_one,
     366             :            lng t0, bool swapped, const char *reason)
     367             : {
     368       43860 :         BATiter li = bat_iterator(l);
     369       43860 :         const void *v;
     370       43860 :         BAT *bn = NULL;
     371       43860 :         BAT *r1 = NULL;
     372       43860 :         BAT *r2 = NULL;
     373       43860 :         BUN bncount;
     374             : 
     375       43860 :         assert(lci->ncand > 0);
     376       43860 :         assert(lci->ncand == 1 || (li.sorted && li.revsorted));
     377             : 
     378       43860 :         size_t counter = 0;
     379       43860 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     380             : 
     381       43858 :         MT_thread_setalgorithm(__func__);
     382       43857 :         oid o = canditer_next(lci);
     383       43858 :         v = BUNtail(li, o - l->hseqbase);
     384             : 
     385       86003 :         if (!nil_matches &&
     386       42144 :             (*ATOMcompare(li.type))(v, ATOMnilptr(li.type)) == 0) {
     387             :                 /* NIL doesn't match anything */
     388         164 :                 bat_iterator_end(&li);
     389         164 :                 gdk_return rc = nomatch(r1p, r2p, r3p, l, r, lci, bit_nil, nil_on_miss,
     390             :                                         false, reason, t0);
     391         164 :                 return rc;
     392             :         }
     393             : 
     394       43695 :         bn = BATselect(r, rci->s, v, NULL, true, true, false);
     395       43696 :         bat_iterator_end(&li);
     396       43696 :         if (bn == NULL) {
     397             :                 return GDK_FAIL;
     398             :         }
     399       43696 :         bncount = BATcount(bn);
     400       43696 :         if (bncount == 0) {
     401        7998 :                 BBPreclaim(bn);
     402        7998 :                 if (min_one) {
     403           0 :                         GDKerror("not enough matches");
     404           0 :                         return GDK_FAIL;
     405             :                 }
     406        7998 :                 if (!nil_on_miss) {
     407        7850 :                         assert(r3p == NULL);
     408        7850 :                         return nomatch(r1p, r2p, r3p, l, r, lci, 0, nil_on_miss,
     409             :                                        false, reason, t0);
     410             :                 }
     411             :                 /* special case: return nil on RHS */
     412             :                 bncount = 1;
     413             :                 bn = NULL;
     414             :         }
     415       35698 :         if (bncount > 1) {
     416        1403 :                 if (semi)
     417         361 :                         bncount = 1;
     418        1403 :                 if (max_one) {
     419          16 :                         GDKerror("more than one match");
     420          16 :                         goto bailout;
     421             :                 }
     422             :         }
     423       35830 :         r1 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     424       35830 :         if (r1 == NULL)
     425           0 :                 goto bailout;
     426       35830 :         r1->tsorted = true;
     427       35830 :         r1->trevsorted = lci->ncand == 1;
     428       35830 :         r1->tseqbase = bncount == 1 && lci->tpe == cand_dense ? o : oid_nil;
     429       35830 :         r1->tkey = bncount == 1;
     430       35830 :         r1->tnil = false;
     431       35830 :         r1->tnonil = true;
     432       35830 :         if (bn == NULL) {
     433             :                 /* left outer join, no match, we're returning nil in r2 */
     434         148 :                 oid *o1p = (oid *) Tloc(r1, 0);
     435         148 :                 BUN p, q = bncount;
     436             : 
     437         148 :                 if (r2p) {
     438           2 :                         r2 = BATconstant(0, TYPE_void, &oid_nil, lci->ncand * bncount, TRANSIENT);
     439           2 :                         if (r2 == NULL)
     440           0 :                                 goto bailout;
     441           2 :                         *r2p = r2;
     442             :                 }
     443         305 :                 do {
     444         305 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
     445             :                                           GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     446         610 :                         for (p = 0; p < q; p++) {
     447         305 :                                 *o1p++ = o;
     448             :                         }
     449         305 :                         o = canditer_next(lci);
     450         305 :                 } while (!is_oid_nil(o));
     451             :         } else {
     452       35682 :                 oid *o1p = (oid *) Tloc(r1, 0);
     453       35682 :                 oid *o2p;
     454       35682 :                 BUN p, q = bncount;
     455             : 
     456       35682 :                 if (r2p) {
     457       31096 :                         r2 = COLnew(0, TYPE_oid, lci->ncand * bncount, TRANSIENT);
     458       31096 :                         if (r2 == NULL)
     459           0 :                                 goto bailout;
     460       31096 :                         r2->tsorted = lci->ncand == 1 || bncount == 1;
     461       31096 :                         r2->trevsorted = bncount == 1;
     462       31096 :                         r2->tseqbase = lci->ncand == 1 && BATtdense(bn) ? bn->tseqbase : oid_nil;
     463       31096 :                         r2->tkey = lci->ncand == 1;
     464       31096 :                         r2->tnil = false;
     465       31096 :                         r2->tnonil = true;
     466       31096 :                         *r2p = r2;
     467       31096 :                         o2p = (oid *) Tloc(r2, 0);
     468             :                 } else {
     469             :                         o2p = NULL;
     470             :                 }
     471             : 
     472       35682 :                 if (BATtdense(bn)) {
     473             :                         oid bno = bn->tseqbase;
     474             : 
     475     1307838 :                         do {
     476     1307838 :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
     477             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     478     2669328 :                                 for (p = 0; p < q; p++) {
     479     1361490 :                                         *o1p++ = o;
     480             :                                 }
     481     1307838 :                                 if (o2p) {
     482      391472 :                                         for (p = 0; p < q; p++) {
     483      222512 :                                                 *o2p++ = bno + p;
     484             :                                         }
     485             :                                 }
     486     1307838 :                                 o = canditer_next(lci);
     487     1307837 :                         } while (!is_oid_nil(o));
     488             :                 } else {
     489         175 :                         const oid *bnp = (const oid *) Tloc(bn, 0);
     490             : 
     491      148273 :                         do {
     492      148273 :                                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
     493             :                                                   GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     494     6007208 :                                 for (p = 0; p < q; p++) {
     495     5858935 :                                         *o1p++ = o;
     496             :                                 }
     497      148273 :                                 if (o2p) {
     498     5715276 :                                         for (p = 0; p < q; p++) {
     499     5558720 :                                                 *o2p++ = bnp[p];
     500             :                                         }
     501             :                                 }
     502      148273 :                                 o = canditer_next(lci);
     503      148273 :                         } while (!is_oid_nil(o));
     504             :                 }
     505       35681 :                 if (r2)
     506       31095 :                         BATsetcount(r2, lci->ncand * bncount);
     507             :         }
     508       35829 :         BATsetcount(r1, lci->ncand * bncount);
     509       35829 :         *r1p = r1;
     510       35829 :         BAT *r3 = NULL;
     511       35829 :         if (r3p) {
     512         187 :                 bit mark;
     513         187 :                 if (bn) {
     514             :                         /* there is a match */
     515          40 :                         mark = 1;
     516         147 :                 } else if (r->tnonil) {
     517             :                         /* no match, no NIL in r */
     518         142 :                         mark = 0;
     519             :                 } else {
     520             :                         /* no match, search for NIL in r */
     521           5 :                         BAT *n = BATselect(r, rci->s, ATOMnilptr(r->ttype), NULL, true, true, false);
     522           5 :                         if (n == NULL)
     523           0 :                                 goto bailout;
     524           5 :                         mark = BATcount(n) == 0 ? 0 : bit_nil;
     525           5 :                         BBPreclaim(n);
     526             :                 }
     527         187 :                 r3 = BATconstant(0, TYPE_bit, &mark, lci->ncand, TRANSIENT);
     528         187 :                 if (r3 == NULL)
     529           0 :                         goto bailout;
     530         187 :                 *r3p = r3;
     531             :         }
     532       35829 :         BBPreclaim(bn);
     533       35830 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     534             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
     535             :                   "sr=" ALGOOPTBATFMT ",nil_matches=%s;%s %s "
     536             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
     537             :                   " (" LLFMT "usec)\n",
     538             :                   ALGOBATPAR(l), ALGOBATPAR(r),
     539             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
     540             :                   nil_matches ? "true" : "false",
     541             :                   swapped ? " swapped" : "", reason,
     542             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
     543             :                   GDKusec() - t0);
     544             : 
     545             :         return GDK_SUCCEED;
     546             : 
     547          16 :   bailout:
     548          16 :         BBPreclaim(bn);
     549          16 :         BBPreclaim(r1);
     550          16 :         BBPreclaim(r2);
     551          16 :         if (r2p)
     552          15 :                 *r2p = NULL;
     553             :         return GDK_FAIL;
     554             : }
     555             : 
     556             : #if SIZEOF_OID == SIZEOF_INT
     557             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_int(indir, offset, (const int *) vals, lo, hi, (int) (v), ordering, last)
     558             : #endif
     559             : #if SIZEOF_OID == SIZEOF_LNG
     560             : #define binsearch_oid(indir, offset, vals, lo, hi, v, ordering, last) binsearch_lng(indir, offset, (const lng *) vals, lo, hi, (lng) (v), ordering, last)
     561             : #endif
     562             : 
     563             : /* Implementation of join where the right-hand side is dense, and if
     564             :  * there is a right candidate list, it too is dense.  This means there
     565             :  * are no NIL values in r.  In case nil_on_miss is not set, we use a
     566             :  * range select (BATselect) to find the matching values in the left
     567             :  * column and then calculate the corresponding matches from the right.
     568             :  * If nil_on_miss is set, we need to do some more work. The latter is
     569             :  * also the only case in which r3p van be set. */
     570             : static gdk_return
     571       18817 : mergejoin_void(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
     572             :                struct canditer *restrict lci, struct canditer *restrict rci,
     573             :                bool nil_on_miss, bool only_misses, lng t0, bool swapped,
     574             :                const char *reason)
     575             : {
     576       18817 :         oid lo, hi;
     577       18817 :         BUN i;
     578       18817 :         oid o, *o1p = NULL, *o2p = NULL;
     579       18817 :         bit *m3p = NULL;
     580       18817 :         BAT *r1 = NULL, *r2 = NULL, *r3 = NULL;
     581       18817 :         bool ltsorted = false, ltrevsorted = false, ltkey = false;
     582             : 
     583             :         /* r is dense, and if there is a candidate list, it too is
     584             :          * dense.  This means we don't have to do any searches, we
     585             :          * only need to compare ranges to know whether a value from l
     586             :          * has a match in r */
     587       27668 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
     588       18817 :         assert(r->tsorted || r->trevsorted);
     589       18817 :         assert(BATcount(l) > 0);
     590       18817 :         assert(rci->tpe == cand_dense);
     591       18817 :         assert(BATcount(r) > 0);
     592             : 
     593       18817 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
     594             : 
     595       18814 :         MT_thread_setalgorithm(__func__);
     596             :         /* figure out range [lo..hi) of values in r that we need to match */
     597       18816 :         lo = r->tseqbase;
     598       18816 :         hi = lo + BATcount(r);
     599             :         /* restrict [lo..hi) range further using candidate list */
     600       18816 :         if (rci->seq > r->hseqbase)
     601           0 :                 lo += rci->seq - r->hseqbase;
     602       18816 :         if (rci->seq + rci->ncand < r->hseqbase + BATcount(r))
     603           0 :                 hi -= r->hseqbase + BATcount(r) - rci->seq - rci->ncand;
     604             : 
     605             :         /* at this point, the matchable values in r are [lo..hi) */
     606       18816 :         if (!nil_on_miss) {
     607       18816 :                 assert(r3p == NULL);
     608       18816 :                 r1 = BATselect(l, lci->s, &lo, &hi, true, false, only_misses);
     609       18816 :                 if (r1 == NULL)
     610             :                         return GDK_FAIL;
     611       18816 :                 if (only_misses && !l->tnonil) {
     612             :                         /* also look for NILs */
     613           0 :                         r2 = BATselect(l, lci->s, &oid_nil, NULL, true, false, false);
     614           0 :                         if (r2 == NULL) {
     615           0 :                                 BBPreclaim(r1);
     616           0 :                                 return GDK_FAIL;
     617             :                         }
     618           0 :                         if (BATcount(r2) > 0) {
     619           0 :                                 BAT *mg = BATmergecand(r1, r2);
     620           0 :                                 BBPunfix(r1->batCacheid);
     621           0 :                                 BBPunfix(r2->batCacheid);
     622           0 :                                 r1 = mg;
     623           0 :                                 if (r1 == NULL)
     624             :                                         return GDK_FAIL;
     625             :                         } else {
     626           0 :                                 BBPunfix(r2->batCacheid);
     627             :                         }
     628             :                         r2 = NULL;
     629             :                 }
     630       18816 :                 *r1p = r1;
     631       18816 :                 if (r2p == NULL)
     632       18112 :                         goto doreturn2;
     633         704 :                 if (BATcount(r1) == 0) {
     634          14 :                         r2 = BATdense(0, 0, 0);
     635          14 :                         if (r2 == NULL) {
     636           0 :                                 BBPreclaim(r1);
     637           0 :                                 return GDK_FAIL;
     638             :                         }
     639         690 :                 } else if (BATtdense(r1) && BATtdense(l)) {
     640          77 :                         r2 = BATdense(0, l->tseqbase + r1->tseqbase - l->hseqbase + r->hseqbase - r->tseqbase, BATcount(r1));
     641          77 :                         if (r2 == NULL) {
     642           0 :                                 BBPreclaim(r1);
     643           0 :                                 return GDK_FAIL;
     644             :                         }
     645             :                 } else {
     646         613 :                         r2 = COLnew(0, TYPE_oid, BATcount(r1), TRANSIENT);
     647         612 :                         if (r2 == NULL) {
     648           0 :                                 BBPreclaim(r1);
     649           0 :                                 return GDK_FAIL;
     650             :                         }
     651         612 :                         BATiter li = bat_iterator(l);
     652         613 :                         const oid *lp = (const oid *) li.base;
     653         613 :                         const oid *o1p = (const oid *) Tloc(r1, 0);
     654         613 :                         oid *o2p = (oid *) Tloc(r2, 0);
     655         613 :                         hi = BATcount(r1);
     656         613 :                         if (complex_cand(l)) {
     657             :                                 /* this is actually generic code */
     658           0 :                                 for (o = 0; o < hi; o++)
     659           0 :                                         o2p[o] = BUNtoid(l, BUNtoid(r1, o) - l->hseqbase) - r->tseqbase + r->hseqbase;
     660         613 :                         } else if (BATtdense(r1)) {
     661         283 :                                 lo = r1->tseqbase - l->hseqbase;
     662         283 :                                 if (r->tseqbase == r->hseqbase) {
     663         276 :                                         memcpy(o2p, lp + lo, hi * SIZEOF_OID);
     664             :                                 } else {
     665           7 :                                         hi += lo;
     666     5085011 :                                         for (o = 0; lo < hi; o++, lo++) {
     667     5085004 :                                                 o2p[o] = lp[lo] - r->tseqbase + r->hseqbase;
     668             :                                         }
     669             :                                 }
     670         330 :                         } else if (BATtdense(l)) {
     671           0 :                                 for (o = 0; o < hi; o++) {
     672           0 :                                         o2p[o] = o1p[o] - l->hseqbase + li.tseq - r->tseqbase + r->hseqbase;
     673             :                                 }
     674             :                         } else {
     675    42933205 :                                 for (o = 0; o < hi; o++) {
     676    42932875 :                                         o2p[o] = lp[o1p[o] - l->hseqbase] - r->tseqbase + r->hseqbase;
     677             :                                 }
     678             :                         }
     679         613 :                         r2->tkey = li.key;
     680         613 :                         r2->tsorted = li.sorted;
     681         613 :                         r2->trevsorted = li.revsorted;
     682         613 :                         bat_iterator_end(&li);
     683         613 :                         r2->tnil = false;
     684         613 :                         r2->tnonil = true;
     685         613 :                         BATsetcount(r2, BATcount(r1));
     686             :                 }
     687         704 :                 *r2p = r2;
     688         704 :                 goto doreturn2;
     689             :         }
     690             :         /* nil_on_miss is set, this means we must have a second or third
     691             :          * output */
     692           0 :         assert(r2p || r3p);
     693           0 :         if (BATtdense(l)) {
     694             :                 /* if l is dense, we can further restrict the [lo..hi)
     695             :                  * range to values in l that match with values in r */
     696           0 :                 o = lo;
     697           0 :                 i = lci->seq - l->hseqbase;
     698           0 :                 if (l->tseqbase + i > lo)
     699           0 :                         lo = l->tseqbase + i;
     700           0 :                 i = canditer_last(lci) + 1 - l->hseqbase;
     701           0 :                 if (l->tseqbase + i < hi)
     702           0 :                         hi = l->tseqbase + i;
     703           0 :                 if (lci->tpe == cand_dense) {
     704             :                         /* l is dense, and so is the left candidate
     705             :                          * list (if it exists); this means we don't
     706             :                          * have to actually look at any values in l:
     707             :                          * we can just do some arithmetic; it also
     708             :                          * means that r1 will be dense, and if
     709             :                          * nil_on_miss is not set, or if all values in
     710             :                          * l match, r2 will too */
     711           0 :                         if (hi <= lo) {
     712           0 :                                 return nomatch(r1p, r2p, r3p, l, r, lci, 0,
     713             :                                                nil_on_miss, only_misses,
     714             :                                                __func__, t0);
     715             :                         }
     716             : 
     717             :                         /* at this point, the matched values in l and
     718             :                          * r (taking candidate lists into account) are
     719             :                          * [lo..hi) which we can translate back to the
     720             :                          * respective OID values that we can store in
     721             :                          * r1 and r2; note that r1 will be dense since
     722             :                          * all values in l will match something (even
     723             :                          * if nil since nil_on_miss is set) */
     724           0 :                         *r1p = r1 = BATdense(0, lci->seq, lci->ncand);
     725           0 :                         if (r1 == NULL)
     726             :                                 return GDK_FAIL;
     727           0 :                         if (r2p) {
     728           0 :                                 if (hi - lo < lci->ncand) {
     729             :                                         /* we need to fill in nils in r2 for
     730             :                                          * missing values */
     731           0 :                                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     732           0 :                                         if (r2 == NULL) {
     733           0 :                                                 BBPreclaim(*r1p);
     734           0 :                                                 return GDK_FAIL;
     735             :                                         }
     736           0 :                                         o2p = (oid *) Tloc(r2, 0);
     737           0 :                                         i = l->tseqbase + lci->seq - l->hseqbase;
     738           0 :                                         lo -= i;
     739           0 :                                         hi -= i;
     740           0 :                                         i += r->hseqbase - r->tseqbase;
     741           0 :                                         for (o = 0; o < lo; o++)
     742           0 :                                                 *o2p++ = oid_nil;
     743           0 :                                         for (o = lo; o < hi; o++)
     744           0 :                                                 *o2p++ = o + i;
     745           0 :                                         for (o = hi; o < lci->ncand; o++)
     746           0 :                                                 *o2p++ = oid_nil;
     747           0 :                                         r2->tnonil = false;
     748           0 :                                         r2->tnil = true;
     749             :                                         /* sorted of no nils at end */
     750           0 :                                         r2->tsorted = hi == lci->ncand;
     751             :                                         /* reverse sorted if single non-nil at start */
     752           0 :                                         r2->trevsorted = lo == 0 && hi == 1;
     753           0 :                                         r2->tseqbase = oid_nil;
     754             :                                         /* (hi - lo) different OIDs in r2,
     755             :                                          * plus one for nil */
     756           0 :                                         r2->tkey = hi - lo + 1 == lci->ncand;
     757           0 :                                         BATsetcount(r2, lci->ncand);
     758             :                                 } else {
     759             :                                         /* no missing values */
     760           0 :                                         *r2p = r2 = BATdense(0, r->hseqbase + lo - r->tseqbase, lci->ncand);
     761           0 :                                         if (r2 == NULL) {
     762           0 :                                                 BBPreclaim(*r1p);
     763           0 :                                                 return GDK_FAIL;
     764             :                                         }
     765             :                                 }
     766             :                         }
     767           0 :                         if (r3p) {
     768           0 :                                 if (hi - lo < lci->ncand) {
     769           0 :                                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     770           0 :                                         if (r3 == NULL) {
     771           0 :                                                 BBPreclaim(*r1p);
     772           0 :                                                 BBPreclaim(r2);
     773           0 :                                                 return GDK_FAIL;
     774             :                                         }
     775           0 :                                         m3p = (bit *) Tloc(r3, 0);
     776           0 :                                         for (o = 0; o < lo; o++)
     777           0 :                                                 *m3p++ = 0;
     778           0 :                                         for (o = lo; o < hi; o++)
     779           0 :                                                 *m3p++ = 1;
     780           0 :                                         for (o = hi; o < lci->ncand; o++)
     781           0 :                                                 *m3p++ = 0;
     782           0 :                                         r3->tnonil = true;
     783           0 :                                         r3->tnil = false;
     784           0 :                                         r3->tsorted = hi == lci->ncand;
     785           0 :                                         r3->trevsorted = lo == 0;
     786           0 :                                         r3->tkey = false;
     787           0 :                                         BATsetcount(r3, lci->ncand);
     788             :                                 }
     789             :                         }
     790           0 :                         goto doreturn;
     791             :                 }
     792             :                 /* l is dense, but the candidate list exists and is
     793             :                  * not dense; we can, by manipulating the range
     794             :                  * [lo..hi), just look at the candidate list values */
     795             : 
     796             :                 /* translate lo and hi to l's OID values that now need
     797             :                  * to match */
     798           0 :                 lo = lo - l->tseqbase + l->hseqbase;
     799           0 :                 hi = hi - l->tseqbase + l->hseqbase;
     800             : 
     801           0 :                 *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     802           0 :                 if (r2p)
     803           0 :                         *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     804           0 :                 if (r3p)
     805           0 :                         *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     806           0 :                 if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     807           0 :                         BBPreclaim(r1);
     808           0 :                         BBPreclaim(r2);
     809           0 :                         BBPreclaim(r3);
     810           0 :                         return GDK_FAIL;
     811             :                 }
     812           0 :                 o1p = (oid *) Tloc(r1, 0);
     813           0 :                 if (r2) {
     814           0 :                         o2p = (oid *) Tloc(r2, 0);
     815           0 :                         r2->tnil = false;
     816           0 :                         r2->tnonil = true;
     817           0 :                         r2->tkey = true;
     818           0 :                         r2->tsorted = true;
     819             :                 }
     820           0 :                 if (r3) {
     821           0 :                         m3p = (bit *) Tloc(r3, 0);
     822           0 :                         r3->tnil = false;
     823           0 :                         r3->tnonil = true;
     824           0 :                         r3->tkey = false;
     825           0 :                         r3->tsorted = false;
     826             :                 }
     827           0 :                 o = canditer_next(lci);
     828           0 :                 for (i = 0; i < lci->ncand && o < lo; i++) {
     829           0 :                         *o1p++ = o;
     830           0 :                         if (r2)
     831           0 :                                 *o2p++ = oid_nil;
     832           0 :                         if (r3)
     833           0 :                                 *m3p++ = 0;
     834           0 :                         o = canditer_next(lci);
     835             :                 }
     836           0 :                 if (i > 0 && r2) {
     837           0 :                         r2->tnil = true;
     838           0 :                         r2->tnonil = false;
     839           0 :                         r2->tkey = i == 1;
     840             :                 }
     841           0 :                 for (; i < lci->ncand && o < hi; i++) {
     842           0 :                         *o1p++ = o;
     843           0 :                         if (r2)
     844           0 :                                 *o2p++ = o - l->hseqbase + l->tseqbase - r->tseqbase + r->hseqbase;
     845           0 :                         if (r3)
     846           0 :                                 *m3p++ = 1;
     847           0 :                         o = canditer_next(lci);
     848             :                 }
     849           0 :                 if (i < lci->ncand) {
     850           0 :                         if (r2) {
     851           0 :                                 r2->tkey = !r2->tnil && lci->ncand - i == 1;
     852           0 :                                 r2->tnil = true;
     853           0 :                                 r2->tnonil = false;
     854           0 :                                 r2->tsorted = false;
     855             :                         }
     856           0 :                         for (; i < lci->ncand; i++) {
     857           0 :                                 *o1p++ = o;
     858           0 :                                 if (r2)
     859           0 :                                         *o2p++ = oid_nil;
     860           0 :                                 if (r1)
     861           0 :                                         *m3p++ = 0;
     862           0 :                                 o = canditer_next(lci);
     863             :                         }
     864             :                 }
     865           0 :                 BATsetcount(r1, lci->ncand);
     866           0 :                 r1->tseqbase = BATcount(r1) == 1 ? *(oid*)Tloc(r1, 0) : oid_nil;
     867           0 :                 r1->tsorted = true;
     868           0 :                 r1->trevsorted = BATcount(r1) <= 1;
     869           0 :                 r1->tnil = false;
     870           0 :                 r1->tnonil = true;
     871           0 :                 r1->tkey = true;
     872           0 :                 if (r2) {
     873           0 :                         BATsetcount(r2, BATcount(r1));
     874           0 :                         r2->tseqbase = r2->tnil || BATcount(r2) > 1 ? oid_nil : BATcount(r2) == 1 ? *(oid*)Tloc(r2, 0) : 0;
     875           0 :                         r2->trevsorted = BATcount(r2) <= 1;
     876             :                 }
     877           0 :                 if (r3) {
     878           0 :                         BATsetcount(r3, BATcount(r1));
     879             :                 }
     880           0 :                 goto doreturn;
     881             :         }
     882             :         /* l is not dense, so we need to look at the values and check
     883             :          * whether they are in the range [lo..hi) */
     884             : 
     885             :         /* do indirection through the candidate list to look at the
     886             :          * value */
     887             : 
     888           0 :         *r1p = r1 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     889           0 :         if (r2p)
     890           0 :                 *r2p = r2 = COLnew(0, TYPE_oid, lci->ncand, TRANSIENT);
     891           0 :         if (r3p)
     892           0 :                 *r3p = r3 = COLnew(0, TYPE_bit, lci->ncand, TRANSIENT);
     893           0 :         if (r1 == NULL || (r2p != NULL && r2 == NULL) || (r3p != NULL && r3 == NULL)) {
     894           0 :                 BBPreclaim(r1);
     895           0 :                 BBPreclaim(r2);
     896           0 :                 BBPreclaim(r3);
     897           0 :                 return GDK_FAIL;
     898             :         }
     899           0 :         o1p = (oid *) Tloc(r1, 0);
     900           0 :         if (r2) {
     901           0 :                 o2p = (oid *) Tloc(r2, 0);
     902           0 :                 r2->tnil = false;
     903           0 :                 r2->tnonil = true;
     904             :         }
     905           0 :         if (r3) {
     906           0 :                 m3p = (bit *) Tloc(r3, 0);
     907           0 :                 r3->tnil = false;
     908           0 :                 r3->tnonil = true;
     909             :         }
     910           0 :         if (complex_cand(l)) {
     911           0 :                 ltsorted = l->tsorted;
     912           0 :                 ltrevsorted = l->trevsorted;
     913           0 :                 ltkey = l->tkey;
     914           0 :                 TIMEOUT_LOOP(lci->ncand, qry_ctx) {
     915           0 :                         oid c = canditer_next(lci);
     916             : 
     917           0 :                         o = BUNtoid(l, c - l->hseqbase);
     918           0 :                         *o1p++ = c;
     919           0 :                         if (r2) {
     920           0 :                                 if (o >= lo && o < hi) {
     921           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     922             :                                 } else {
     923           0 :                                         *o2p++ = oid_nil;
     924           0 :                                         r2->tnil = true;
     925           0 :                                         r2->tnonil = false;
     926             :                                 }
     927             :                         }
     928           0 :                         if (r3) {
     929           0 :                                 if (is_oid_nil(o)) {
     930           0 :                                         *m3p++ = bit_nil;
     931           0 :                                         r3->tnil = true;
     932           0 :                                         r3->tnonil = false;
     933             :                                 } else {
     934           0 :                                         *m3p++ = (o >= lo && o < hi);
     935             :                                 }
     936             :                         }
     937             :                 }
     938           0 :                 TIMEOUT_CHECK(qry_ctx,
     939             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     940             :         } else {
     941           0 :                 BATiter li = bat_iterator(l);
     942           0 :                 const oid *lvals = (const oid *) li.base;
     943           0 :                 ltsorted = li.sorted;
     944           0 :                 ltrevsorted = li.revsorted;
     945           0 :                 ltkey = li.key;
     946           0 :                 TIMEOUT_LOOP(lci->ncand, qry_ctx) {
     947           0 :                         oid c = canditer_next(lci);
     948             : 
     949           0 :                         o = lvals[c - l->hseqbase];
     950           0 :                         *o1p++ = c;
     951           0 :                         if (r2) {
     952           0 :                                 if (o >= lo && o < hi) {
     953           0 :                                         *o2p++ = o - r->tseqbase + r->hseqbase;
     954             :                                 } else {
     955           0 :                                         *o2p++ = oid_nil;
     956           0 :                                         r2->tnil = true;
     957           0 :                                         r2->tnonil = false;
     958             :                                 }
     959             :                         }
     960           0 :                         if (r3) {
     961           0 :                                 if (is_oid_nil(o)) {
     962           0 :                                         *m3p++ = bit_nil;
     963           0 :                                         r3->tnil = true;
     964           0 :                                         r3->tnonil = false;
     965             :                                 } else {
     966           0 :                                         *m3p++ = (o >= lo && o < hi);
     967             :                                 }
     968             :                         }
     969             :                 }
     970           0 :                 bat_iterator_end(&li);
     971           0 :                 TIMEOUT_CHECK(qry_ctx,
     972             :                               GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
     973             :         }
     974           0 :         r1->tsorted = true;
     975           0 :         r1->trevsorted = BATcount(r1) <= 1;
     976           0 :         r1->tkey = true;
     977           0 :         r1->tseqbase = oid_nil;
     978           0 :         r1->tnil = false;
     979           0 :         r1->tnonil = true;
     980           0 :         BATsetcount(r1, lci->ncand);
     981           0 :         if (r2) {
     982           0 :                 BATsetcount(r2, lci->ncand);
     983           0 :                 r2->tsorted = ltsorted || BATcount(r2) <= 1;
     984           0 :                 r2->trevsorted = ltrevsorted || BATcount(r2) <= 1;
     985           0 :                 r2->tkey = ltkey || BATcount(r2) <= 1;
     986           0 :                 r2->tseqbase = oid_nil;
     987             :         }
     988           0 :         if (r3) {
     989           0 :                 BATsetcount(r3, lci->ncand);
     990             :         }
     991             : 
     992           0 :   doreturn:
     993           0 :         if (r1->tkey)
     994           0 :                 virtualize(r1);
     995           0 :         if (r2 && r2->tkey && r2->tsorted)
     996           0 :                 virtualize(r2);
     997           0 :   doreturn2:
     998       18816 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
     999             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    1000             :                   "sr=" ALGOOPTBATFMT ","
    1001             :                   "nil_on_miss=%s,only_misses=%s;%s %s "
    1002             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT "," ALGOOPTBATFMT
    1003             :                   " (" LLFMT "usec)\n",
    1004             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1005             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    1006             :                   nil_on_miss ? "true" : "false",
    1007             :                   only_misses ? "true" : "false",
    1008             :                   swapped ? " swapped" : "", reason,
    1009             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2), ALGOOPTBATPAR(r3),
    1010             :                   GDKusec() - t0);
    1011             : 
    1012             :         return GDK_SUCCEED;
    1013             : 
    1014           0 :   bailout:
    1015           0 :         BBPreclaim(r1);
    1016           0 :         BBPreclaim(r2);
    1017             :         return GDK_FAIL;
    1018             : }
    1019             : 
    1020             : /* Implementation of mergejoin (see below) for the special case that
    1021             :  * the values are of type int, and some more conditions are met. */
    1022             : static gdk_return
    1023        5253 : mergejoin_int(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1024             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1025             :               const char *reason)
    1026             : {
    1027        5253 :         BAT *r1, *r2;
    1028        5253 :         BUN lstart, lend, lcnt;
    1029        5253 :         BUN rstart, rend;
    1030        5253 :         BUN lscan, rscan;       /* opportunistic scan window */
    1031        5253 :         BUN maxsize;
    1032        5253 :         const int *lvals, *rvals;
    1033        5253 :         int v;
    1034        5253 :         BUN nl, nr;
    1035        5253 :         oid lv;
    1036        5253 :         BUN i;
    1037        5253 :         BATiter li = bat_iterator(l);
    1038        5253 :         BATiter ri = bat_iterator(r);
    1039             : 
    1040       15759 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1041        5253 :         assert(ri.sorted || ri.revsorted);
    1042             : 
    1043        5253 :         MT_thread_setalgorithm(__func__);
    1044        5253 :         lstart = rstart = 0;
    1045        5253 :         lend = BATcount(l);
    1046        5253 :         lcnt = lend - lstart;
    1047        5253 :         rend = BATcount(r);
    1048        5253 :         lvals = (const int *) li.base;
    1049        5253 :         rvals = (const int *) ri.base;
    1050        5253 :         size_t counter = 0;
    1051        5253 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1052             : 
    1053             :         /* basic properties will be adjusted if necessary later on,
    1054             :          * they were initially set by joininitresults() */
    1055             : 
    1056        5253 :         if (lend == 0 || rend == 0) {
    1057             :                 /* there are no matches */
    1058           0 :                 bat_iterator_end(&li);
    1059           0 :                 bat_iterator_end(&ri);
    1060           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1061           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1062             :                                0, false, false, __func__, t0);
    1063             :         }
    1064             : 
    1065        5253 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1066        5253 :                                        li.key, ri.key, false, false,
    1067             :                                        false, false, estimate)) == BUN_NONE) {
    1068           0 :                 bat_iterator_end(&li);
    1069           0 :                 bat_iterator_end(&ri);
    1070           0 :                 return GDK_FAIL;
    1071             :         }
    1072        5253 :         r1 = *r1p;
    1073        5253 :         r2 = r2p ? *r2p : NULL;
    1074             : 
    1075             :         /* determine opportunistic scan window for l and r */
    1076       33498 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1077       28245 :                 nl >>= 1;
    1078       39321 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1079       34068 :                 nr >>= 1;
    1080             : 
    1081        5253 :         if (!nil_matches) {
    1082             :                 /* skip over nils at the start of the columns */
    1083        3463 :                 if (lscan < lend - lstart && is_int_nil(lvals[lstart + lscan])) {
    1084           0 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1085             :                                                lend - 1, int_nil, 1, 1);
    1086             :                 } else {
    1087        3465 :                         while (is_int_nil(lvals[lstart]))
    1088           2 :                                 lstart++;
    1089             :                 }
    1090        3463 :                 if (rscan < rend - rstart && is_int_nil(rvals[rstart + rscan])) {
    1091           0 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1092             :                                                rend - 1, int_nil, 1, 1);
    1093             :                 } else {
    1094        3463 :                         while (is_int_nil(rvals[rstart]))
    1095           0 :                                 rstart++;
    1096             :                 }
    1097             :         }
    1098             :         /* from here on we don't have to worry about nil values */
    1099             : 
    1100      375292 :         while (lstart < lend && rstart < rend) {
    1101      371178 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1102             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1103             : 
    1104      371178 :                 v = rvals[rstart];
    1105             : 
    1106      371178 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1107        1142 :                         lstart = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1108             :                                                lend - 1, v, 1, 0);
    1109             :                 } else {
    1110             :                         /* scan l for v */
    1111      375174 :                         while (lstart < lend && lvals[lstart] < v)
    1112        5138 :                                 lstart++;
    1113             :                 }
    1114      369678 :                 if (lstart >= lend) {
    1115             :                         /* nothing found */
    1116             :                         break;
    1117             :                 }
    1118             : 
    1119             :                 /* Here we determine the next value in l that we are
    1120             :                  * going to try to match in r.  We will also count the
    1121             :                  * number of occurrences in l of that value.
    1122             :                  * Afterwards, v points to the value and nl is the
    1123             :                  * number of times it occurs.  Also, lstart will
    1124             :                  * point to the next value to be considered (ready for
    1125             :                  * the next iteration).
    1126             :                  * If there are many equal values in l (more than
    1127             :                  * lscan), we will use binary search to find the end
    1128             :                  * of the sequence.  Obviously, we can do this only if
    1129             :                  * l is actually sorted (lscan > 0). */
    1130      369303 :                 nl = 1;         /* we'll match (at least) one in l */
    1131      369303 :                 nr = 0;         /* maybe we won't match anything in r */
    1132      369303 :                 v = lvals[lstart];
    1133      369303 :                 if (li.key) {
    1134             :                         /* if l is key, there is a single value */
    1135       52279 :                         lstart++;
    1136      317024 :                 } else if (lscan < lend - lstart &&
    1137      311476 :                            v == lvals[lstart + lscan]) {
    1138             :                         /* lots of equal values: use binary search to
    1139             :                          * find end */
    1140       25406 :                         nl = binsearch_int(NULL, 0, lvals, lstart + lscan,
    1141             :                                            lend - 1, v, 1, 1);
    1142       25396 :                         nl -= lstart;
    1143       25396 :                         lstart += nl;
    1144             :                 } else {
    1145             :                         /* just scan */
    1146     1391884 :                         while (++lstart < lend && v == lvals[lstart])
    1147     1100266 :                                 nl++;
    1148             :                 }
    1149             :                 /* lstart points one beyond the value we're
    1150             :                  * going to match: ready for the next iteration. */
    1151             : 
    1152             :                 /* First we find the first value in r that is at
    1153             :                  * least as large as v, then we find the first
    1154             :                  * value in r that is larger than v.  The difference
    1155             :                  * is the number of values equal to v and is stored in
    1156             :                  * nr.
    1157             :                  * We will use binary search on r to find both ends of
    1158             :                  * the sequence of values that are equal to v in case
    1159             :                  * the position is "too far" (more than rscan
    1160             :                  * away). */
    1161             : 
    1162             :                 /* first find the location of the first value in r
    1163             :                  * that is >= v, then find the location of the first
    1164             :                  * value in r that is > v; the difference is the
    1165             :                  * number of values equal to v */
    1166             : 
    1167             :                 /* look ahead a little (rscan) in r to see whether
    1168             :                  * we're better off doing a binary search */
    1169      369293 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1170             :                         /* value too far away in r: use binary
    1171             :                          * search */
    1172       18299 :                         rstart = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1173             :                                                rend - 1, v, 1, 0);
    1174             :                 } else {
    1175             :                         /* scan r for v */
    1176      372927 :                         while (rstart < rend && rvals[rstart] < v)
    1177       21933 :                                 rstart++;
    1178             :                 }
    1179      370072 :                 if (rstart == rend) {
    1180             :                         /* nothing found */
    1181             :                         break;
    1182             :                 }
    1183             : 
    1184             :                 /* now find the end of the sequence of equal values v */
    1185             : 
    1186             :                 /* if r is key, there is zero or one match, otherwise
    1187             :                  * look ahead a little (rscan) in r to see whether
    1188             :                  * we're better off doing a binary search */
    1189      369308 :                 if (ri.key) {
    1190      174190 :                         if (rstart < rend && v == rvals[rstart]) {
    1191      174311 :                                 nr = 1;
    1192      174311 :                                 rstart++;
    1193             :                         }
    1194      195118 :                 } else if (rscan < rend - rstart &&
    1195      194049 :                            v == rvals[rstart + rscan]) {
    1196             :                         /* range too large: use binary search */
    1197       70171 :                         nr = binsearch_int(NULL, 0, rvals, rstart + rscan,
    1198             :                                            rend - 1, v, 1, 1);
    1199       70377 :                         nr -= rstart;
    1200       70377 :                         rstart += nr;
    1201             :                 } else {
    1202             :                         /* scan r for end of range */
    1203     1020945 :                         while (rstart < rend && v == rvals[rstart]) {
    1204      895998 :                                 nr++;
    1205      895998 :                                 rstart++;
    1206             :                         }
    1207             :                 }
    1208             :                 /* rstart points to first value > v or end of
    1209             :                  * r, and nr is the number of values in r that
    1210             :                  * are equal to v */
    1211      369635 :                 if (nr == 0) {
    1212             :                         /* no entries in r found */
    1213         488 :                         continue;
    1214             :                 }
    1215             :                 /* make space: nl values in l match nr values in r, so
    1216             :                  * we need to add nl * nr values in the results */
    1217      369026 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1218           0 :                         goto bailout;
    1219             : 
    1220             :                 /* maintain properties */
    1221      369551 :                 if (nl > 1) {
    1222             :                         /* value occurs multiple times in l, so entry
    1223             :                          * in r will be repeated multiple times: hence
    1224             :                          * r2 is not key and not dense */
    1225      251659 :                         if (r2) {
    1226      216415 :                                 r2->tkey = false;
    1227      216415 :                                 r2->tseqbase = oid_nil;
    1228             :                         }
    1229             :                         /* multiple different values will be inserted
    1230             :                          * in r1 (always in order), so not reverse
    1231             :                          * ordered anymore */
    1232      251659 :                         r1->trevsorted = false;
    1233             :                 }
    1234      369551 :                 if (nr > 1) {
    1235             :                         /* value occurs multiple times in r, so entry
    1236             :                          * in l will be repeated multiple times: hence
    1237             :                          * r1 is not key and not dense */
    1238      156001 :                         r1->tkey = false;
    1239      156001 :                         r1->tseqbase = oid_nil;
    1240             :                         /* multiple different values will be inserted
    1241             :                          * in r2 (in order), so not reverse ordered
    1242             :                          * anymore */
    1243      156001 :                         if (r2) {
    1244      105488 :                                 r2->trevsorted = false;
    1245      105488 :                                 if (nl > 1) {
    1246             :                                         /* multiple values in l match
    1247             :                                          * multiple values in r, so an
    1248             :                                          * ordered sequence will be
    1249             :                                          * inserted multiple times in
    1250             :                                          * r2, so r2 is not ordered
    1251             :                                          * anymore */
    1252       82343 :                                         r2->tsorted = false;
    1253             :                                 }
    1254             :                         }
    1255             :                 }
    1256      369551 :                 if (BATcount(r1) > 0) {
    1257             :                         /* a new, higher value will be inserted into
    1258             :                          * r1, so r1 is not reverse ordered anymore */
    1259      365213 :                         r1->trevsorted = false;
    1260             :                         /* a new higher value will be added to r2 */
    1261      365213 :                         if (r2) {
    1262      298026 :                                 r2->trevsorted = false;
    1263             :                         }
    1264      365213 :                         if (BATtdense(r1) &&
    1265      203951 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1266          68 :                                 r1->tseqbase = oid_nil;
    1267             :                         }
    1268             :                 }
    1269             : 
    1270      369551 :                 if (r2 &&
    1271      301649 :                     BATcount(r2) > 0 &&
    1272      297466 :                     BATtdense(r2) &&
    1273       71472 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1274         545 :                         r2->tseqbase = oid_nil;
    1275             :                 }
    1276             : 
    1277             :                 /* insert values */
    1278      369551 :                 lv = l->hseqbase + lstart - nl;
    1279    15919568 :                 for (i = 0; i < nl; i++) {
    1280             :                         BUN j;
    1281             : 
    1282   113617243 :                         for (j = 0; j < nr; j++) {
    1283    98067226 :                                 APPEND(r1, lv);
    1284             :                         }
    1285    15550017 :                         if (r2) {
    1286    15367683 :                                 oid rv = r->hseqbase + rstart - nr;
    1287             : 
    1288   105307091 :                                 for (j = 0; j < nr; j++) {
    1289    89939408 :                                         APPEND(r2, rv);
    1290    89939408 :                                         rv++;
    1291             :                                 }
    1292             :                         }
    1293    15550017 :                         lv++;
    1294             :                 }
    1295             :         }
    1296             :         /* also set other bits of heap to correct value to indicate size */
    1297        5253 :         BATsetcount(r1, BATcount(r1));
    1298        5251 :         if (r2) {
    1299        4102 :                 BATsetcount(r2, BATcount(r2));
    1300        4103 :                 assert(BATcount(r1) == BATcount(r2));
    1301             :         }
    1302        5252 :         if (BATcount(r1) > 0) {
    1303        4442 :                 if (BATtdense(r1))
    1304        3522 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1305        4442 :                 if (r2 && BATtdense(r2))
    1306        2362 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1307             :         } else {
    1308         810 :                 r1->tseqbase = 0;
    1309         810 :                 if (r2) {
    1310         384 :                         r2->tseqbase = 0;
    1311             :                 }
    1312             :         }
    1313        5252 :         bat_iterator_end(&li);
    1314        5253 :         bat_iterator_end(&ri);
    1315        5252 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1316             :                   "nil_matches=%s;%s %s "
    1317             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1318             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1319             :                   nil_matches ? "true" : "false",
    1320             :                   swapped ? " swapped" : "", reason,
    1321             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1322             :                   GDKusec() - t0);
    1323             : 
    1324             :         return GDK_SUCCEED;
    1325             : 
    1326           0 :   bailout:
    1327           0 :         bat_iterator_end(&li);
    1328           0 :         bat_iterator_end(&ri);
    1329           0 :         BBPreclaim(r1);
    1330           0 :         BBPreclaim(r2);
    1331             :         return GDK_FAIL;
    1332             : }
    1333             : 
    1334             : /* Implementation of mergejoin (see below) for the special case that
    1335             :  * the values are of type lng, and some more conditions are met. */
    1336             : static gdk_return
    1337         216 : mergejoin_lng(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1338             :               bool nil_matches, BUN estimate, lng t0, bool swapped,
    1339             :               const char *reason)
    1340             : {
    1341         216 :         BAT *r1, *r2;
    1342         216 :         BUN lstart, lend, lcnt;
    1343         216 :         BUN rstart, rend;
    1344         216 :         BUN lscan, rscan;       /* opportunistic scan window */
    1345         216 :         BUN maxsize;
    1346         216 :         const lng *lvals, *rvals;
    1347         216 :         lng v;
    1348         216 :         BUN nl, nr;
    1349         216 :         oid lv;
    1350         216 :         BUN i;
    1351         216 :         BATiter li = bat_iterator(l);
    1352         216 :         BATiter ri = bat_iterator(r);
    1353             : 
    1354         648 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1355         216 :         assert(ri.sorted || ri.revsorted);
    1356             : 
    1357         216 :         MT_thread_setalgorithm(__func__);
    1358         216 :         lstart = rstart = 0;
    1359         216 :         lend = BATcount(l);
    1360         216 :         lcnt = lend - lstart;
    1361         216 :         rend = BATcount(r);
    1362         216 :         lvals = (const lng *) li.base;
    1363         216 :         rvals = (const lng *) ri.base;
    1364         216 :         size_t counter = 0;
    1365         216 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1366             : 
    1367             :         /* basic properties will be adjusted if necessary later on,
    1368             :          * they were initially set by joininitresults() */
    1369             : 
    1370         216 :         if (lend == 0 || rend == 0) {
    1371             :                 /* there are no matches */
    1372           0 :                 bat_iterator_end(&li);
    1373           0 :                 bat_iterator_end(&ri);
    1374           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1375           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1376             :                                0, false, false, __func__, t0);
    1377             :         }
    1378             : 
    1379         216 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1380         216 :                                        li.key, ri.key, false, false,
    1381             :                                        false, false, estimate)) == BUN_NONE) {
    1382           0 :                 bat_iterator_end(&li);
    1383           0 :                 bat_iterator_end(&ri);
    1384           0 :                 return GDK_FAIL;
    1385             :         }
    1386         216 :         r1 = *r1p;
    1387         216 :         r2 = r2p ? *r2p : NULL;
    1388             : 
    1389             :         /* determine opportunistic scan window for l and r */
    1390        1408 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1391        1192 :                 nl >>= 1;
    1392        1384 :         for (nr = rend - rstart, rscan = 4; nr > 0; rscan++)
    1393        1168 :                 nr >>= 1;
    1394             : 
    1395         216 :         if (!nil_matches) {
    1396             :                 /* skip over nils at the start of the columns */
    1397         100 :                 if (lscan < lend - lstart && is_lng_nil(lvals[lstart + lscan])) {
    1398           0 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1399             :                                                lend - 1, lng_nil, 1, 1);
    1400             :                 } else {
    1401         100 :                         while (is_lng_nil(lvals[lstart]))
    1402           0 :                                 lstart++;
    1403             :                 }
    1404         100 :                 if (rscan < rend - rstart && is_lng_nil(rvals[rstart + rscan])) {
    1405           0 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1406             :                                                rend - 1, lng_nil, 1, 1);
    1407             :                 } else {
    1408         100 :                         while (is_lng_nil(rvals[rstart]))
    1409           0 :                                 rstart++;
    1410             :                 }
    1411             :         }
    1412             :         /* from here on we don't have to worry about nil values */
    1413             : 
    1414      416609 :         while (lstart < lend && rstart < rend) {
    1415      416478 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1416             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1417      416477 :                 v = rvals[rstart];
    1418             : 
    1419      416477 :                 if (lscan < lend - lstart && lvals[lstart + lscan] < v) {
    1420         888 :                         lstart = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1421             :                                                lend - 1, v, 1, 0);
    1422             :                 } else {
    1423             :                         /* scan l for v */
    1424      498974 :                         while (lstart < lend && lvals[lstart] < v)
    1425       83385 :                                 lstart++;
    1426             :                 }
    1427      416355 :                 if (lstart >= lend) {
    1428             :                         /* nothing found */
    1429             :                         break;
    1430             :                 }
    1431             : 
    1432             :                 /* Here we determine the next value in l that we are
    1433             :                  * going to try to match in r.  We will also count the
    1434             :                  * number of occurrences in l of that value.
    1435             :                  * Afterwards, v points to the value and nl is the
    1436             :                  * number of times it occurs.  Also, lstart will
    1437             :                  * point to the next value to be considered (ready for
    1438             :                  * the next iteration).
    1439             :                  * If there are many equal values in l (more than
    1440             :                  * lscan), we will use binary search to find the end
    1441             :                  * of the sequence.  Obviously, we can do this only if
    1442             :                  * l is actually sorted (lscan > 0). */
    1443      416290 :                 nl = 1;         /* we'll match (at least) one in l */
    1444      416290 :                 nr = 0;         /* maybe we won't match anything in r */
    1445      416290 :                 v = lvals[lstart];
    1446      416290 :                 if (li.key) {
    1447             :                         /* if l is key, there is a single value */
    1448      370854 :                         lstart++;
    1449       45436 :                 } else if (lscan < lend - lstart &&
    1450       45359 :                            v == lvals[lstart + lscan]) {
    1451             :                         /* lots of equal values: use binary search to
    1452             :                          * find end */
    1453         395 :                         nl = binsearch_lng(NULL, 0, lvals, lstart + lscan,
    1454             :                                            lend - 1, v, 1, 1);
    1455         395 :                         nl -= lstart;
    1456         395 :                         lstart += nl;
    1457             :                 } else {
    1458             :                         /* just scan */
    1459       72232 :                         while (++lstart < lend && v == lvals[lstart])
    1460       27191 :                                 nl++;
    1461             :                 }
    1462             :                 /* lstart points one beyond the value we're
    1463             :                  * going to match: ready for the next iteration. */
    1464             : 
    1465             :                 /* First we find the first value in r that is at
    1466             :                  * least as large as v, then we find the first
    1467             :                  * value in r that is larger than v.  The difference
    1468             :                  * is the number of values equal to v and is stored in
    1469             :                  * nr.
    1470             :                  * We will use binary search on r to find both ends of
    1471             :                  * the sequence of values that are equal to v in case
    1472             :                  * the position is "too far" (more than rscan
    1473             :                  * away). */
    1474             : 
    1475             :                 /* first find the location of the first value in r
    1476             :                  * that is >= v, then find the location of the first
    1477             :                  * value in r that is > v; the difference is the
    1478             :                  * number of values equal to v */
    1479             : 
    1480             :                 /* look ahead a little (rscan) in r to see whether
    1481             :                  * we're better off doing a binary search */
    1482      416290 :                 if (rscan < rend - rstart && rvals[rstart + rscan] < v) {
    1483             :                         /* value too far away in r: use binary
    1484             :                          * search */
    1485        2231 :                         rstart = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1486             :                                                rend - 1, v, 1, 0);
    1487             :                 } else {
    1488             :                         /* scan r for v */
    1489     1496517 :                         while (rstart < rend && rvals[rstart] < v)
    1490     1082458 :                                 rstart++;
    1491             :                 }
    1492      416294 :                 if (rstart == rend) {
    1493             :                         /* nothing found */
    1494             :                         break;
    1495             :                 }
    1496             : 
    1497             :                 /* now find the end of the sequence of equal values v */
    1498             : 
    1499             :                 /* if r is key, there is zero or one match, otherwise
    1500             :                  * look ahead a little (rscan) in r to see whether
    1501             :                  * we're better off doing a binary search */
    1502      416275 :                 if (ri.key) {
    1503      377736 :                         if (rstart < rend && v == rvals[rstart]) {
    1504       82334 :                                 nr = 1;
    1505       82334 :                                 rstart++;
    1506             :                         }
    1507       38539 :                 } else if (rscan < rend - rstart &&
    1508       38497 :                            v == rvals[rstart + rscan]) {
    1509             :                         /* range too large: use binary search */
    1510           0 :                         nr = binsearch_lng(NULL, 0, rvals, rstart + rscan,
    1511             :                                            rend - 1, v, 1, 1);
    1512           0 :                         nr -= rstart;
    1513           0 :                         rstart += nr;
    1514             :                 } else {
    1515             :                         /* scan r for end of range */
    1516       94770 :                         while (rstart < rend && v == rvals[rstart]) {
    1517       56231 :                                 nr++;
    1518       56231 :                                 rstart++;
    1519             :                         }
    1520             :                 }
    1521             :                 /* rstart points to first value > v or end of
    1522             :                  * r, and nr is the number of values in r that
    1523             :                  * are equal to v */
    1524      120873 :                 if (nr == 0) {
    1525             :                         /* no entries in r found */
    1526      295312 :                         continue;
    1527             :                 }
    1528             :                 /* make space: nl values in l match nr values in r, so
    1529             :                  * we need to add nl * nr values in the results */
    1530      120963 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1531           0 :                         goto bailout;
    1532             : 
    1533             :                 /* maintain properties */
    1534      121081 :                 if (nl > 1) {
    1535             :                         /* value occurs multiple times in l, so entry
    1536             :                          * in r will be repeated multiple times: hence
    1537             :                          * r2 is not key and not dense */
    1538        6850 :                         if (r2) {
    1539        1847 :                                 r2->tkey = false;
    1540        1847 :                                 r2->tseqbase = oid_nil;
    1541             :                         }
    1542             :                         /* multiple different values will be inserted
    1543             :                          * in r1 (always in order), so not reverse
    1544             :                          * ordered anymore */
    1545        6850 :                         r1->trevsorted = false;
    1546             :                 }
    1547      121081 :                 if (nr > 1) {
    1548             :                         /* value occurs multiple times in r, so entry
    1549             :                          * in l will be repeated multiple times: hence
    1550             :                          * r1 is not key and not dense */
    1551        5209 :                         r1->tkey = false;
    1552        5209 :                         r1->tseqbase = oid_nil;
    1553             :                         /* multiple different values will be inserted
    1554             :                          * in r2 (in order), so not reverse ordered
    1555             :                          * anymore */
    1556        5209 :                         if (r2) {
    1557        5209 :                                 r2->trevsorted = false;
    1558        5209 :                                 if (nl > 1) {
    1559             :                                         /* multiple values in l match
    1560             :                                          * multiple values in r, so an
    1561             :                                          * ordered sequence will be
    1562             :                                          * inserted multiple times in
    1563             :                                          * r2, so r2 is not ordered
    1564             :                                          * anymore */
    1565          51 :                                         r2->tsorted = false;
    1566             :                                 }
    1567             :                         }
    1568             :                 }
    1569      121081 :                 if (BATcount(r1) > 0) {
    1570             :                         /* a new, higher value will be inserted into
    1571             :                          * r1, so r1 is not reverse ordered anymore */
    1572      120696 :                         r1->trevsorted = false;
    1573             :                         /* a new higher value will be added to r2 */
    1574      120696 :                         if (r2) {
    1575      113989 :                                 r2->trevsorted = false;
    1576             :                         }
    1577      120696 :                         if (BATtdense(r1) &&
    1578       52804 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1579          53 :                                 r1->tseqbase = oid_nil;
    1580             :                         }
    1581             :                 }
    1582             : 
    1583      121081 :                 if (r2 &&
    1584      114365 :                     BATcount(r2) > 0 &&
    1585      113972 :                     BATtdense(r2) &&
    1586       51667 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rstart - nr) {
    1587          26 :                         r2->tseqbase = oid_nil;
    1588             :                 }
    1589             : 
    1590             :                 /* insert values */
    1591      121081 :                 lv = l->hseqbase + lstart - nl;
    1592      272667 :                 for (i = 0; i < nl; i++) {
    1593             :                         BUN j;
    1594             : 
    1595      321359 :                         for (j = 0; j < nr; j++) {
    1596      169773 :                                 APPEND(r1, lv);
    1597             :                         }
    1598      151586 :                         if (r2) {
    1599      133219 :                                 oid rv = r->hseqbase + rstart - nr;
    1600             : 
    1601      284563 :                                 for (j = 0; j < nr; j++) {
    1602      151344 :                                         APPEND(r2, rv);
    1603      151344 :                                         rv++;
    1604             :                                 }
    1605             :                         }
    1606      151586 :                         lv++;
    1607             :                 }
    1608             :         }
    1609             :         /* also set other bits of heap to correct value to indicate size */
    1610         215 :         BATsetcount(r1, BATcount(r1));
    1611         215 :         if (r2) {
    1612         196 :                 BATsetcount(r2, BATcount(r2));
    1613         196 :                 assert(BATcount(r1) == BATcount(r2));
    1614             :         }
    1615         215 :         if (BATcount(r1) > 0) {
    1616         191 :                 if (BATtdense(r1))
    1617         122 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1618         191 :                 if (r2 && BATtdense(r2))
    1619         121 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1620             :         } else {
    1621          24 :                 r1->tseqbase = 0;
    1622          24 :                 if (r2) {
    1623          14 :                         r2->tseqbase = 0;
    1624             :                 }
    1625             :         }
    1626         215 :         bat_iterator_end(&li);
    1627         215 :         bat_iterator_end(&ri);
    1628         215 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1629             :                   "nil_matches=%s;%s %s "
    1630             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1631             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1632             :                   nil_matches ? "true" : "false",
    1633             :                   swapped ? " swapped" : "", reason,
    1634             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1635             :                   GDKusec() - t0);
    1636             : 
    1637             :         return GDK_SUCCEED;
    1638             : 
    1639           1 :   bailout:
    1640           1 :         bat_iterator_end(&li);
    1641           1 :         bat_iterator_end(&ri);
    1642           1 :         BBPreclaim(r1);
    1643           1 :         BBPreclaim(r2);
    1644             :         return GDK_FAIL;
    1645             : }
    1646             : 
    1647             : /* Implementation of mergejoin (see below) for the special case that
    1648             :  * the values are of type oid, and the right-hand side is a candidate
    1649             :  * list with exception, and some more conditions are met. */
    1650             : static gdk_return
    1651           0 : mergejoin_cand(BAT **r1p, BAT **r2p, BAT *l, BAT *r,
    1652             :                bool nil_matches, BUN estimate, lng t0, bool swapped,
    1653             :                const char *reason)
    1654             : {
    1655             : /* the comments in this function have not been checked after making a
    1656             :  * copy of mergejoin below and adapting it to a mask right-hand side */
    1657           0 :         BAT *r1, *r2;
    1658           0 :         BUN lstart, lend, lcnt;
    1659           0 :         struct canditer lci, rci;
    1660           0 :         BUN lscan;              /* opportunistic scan window */
    1661           0 :         BUN maxsize;
    1662           0 :         const oid *lvals;
    1663           0 :         oid v;
    1664           0 :         BUN nl, nr;
    1665           0 :         oid lv;
    1666           0 :         BUN i;
    1667           0 :         BATiter li = bat_iterator(l);
    1668           0 :         BATiter ri = bat_iterator(r);
    1669             : 
    1670           0 :         assert(ATOMtype(li.type) == ATOMtype(ri.type));
    1671             : 
    1672           0 :         MT_thread_setalgorithm(__func__);
    1673           0 :         lstart = 0;
    1674           0 :         lend = BATcount(l);
    1675           0 :         lcnt = lend - lstart;
    1676           0 :         if (li.type == TYPE_void) {
    1677           0 :                 assert(!is_oid_nil(l->tseqbase));
    1678           0 :                 canditer_init(&lci, NULL, l);
    1679           0 :                 lcnt = lci.ncand;
    1680           0 :                 lvals = NULL;
    1681             :         } else {
    1682           0 :                 lci = (struct canditer) {.tpe = cand_dense}; /* not used */
    1683           0 :                 lvals = (const oid *) li.base;
    1684           0 :                 assert(lvals != NULL);
    1685             :         }
    1686             : 
    1687           0 :         assert(complex_cand(r));
    1688           0 :         canditer_init(&rci, NULL, r);
    1689           0 :         size_t counter = 0;
    1690           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1691             : 
    1692             :         /* basic properties will be adjusted if necessary later on,
    1693             :          * they were initially set by joininitresults() */
    1694             : 
    1695           0 :         if (lend == 0 || rci.ncand == 0) {
    1696             :                 /* there are no matches */
    1697           0 :                 bat_iterator_end(&li);
    1698           0 :                 bat_iterator_end(&ri);
    1699           0 :                 return nomatch(r1p, r2p, NULL, l, r,
    1700           0 :                                &(struct canditer) {.tpe = cand_dense, .ncand = lcnt,},
    1701             :                                0, false, false, __func__, t0);
    1702             :         }
    1703             : 
    1704           0 :         if ((maxsize = joininitresults(r1p, r2p, NULL, BATcount(l), BATcount(r),
    1705           0 :                                        li.key, ri.key, false, false,
    1706             :                                        false, false, estimate)) == BUN_NONE) {
    1707           0 :                 bat_iterator_end(&li);
    1708           0 :                 bat_iterator_end(&ri);
    1709           0 :                 return GDK_FAIL;
    1710             :         }
    1711           0 :         r1 = *r1p;
    1712           0 :         r2 = r2p ? *r2p : NULL;
    1713             : 
    1714             :         /* determine opportunistic scan window for l and r */
    1715           0 :         for (nl = lend - lstart, lscan = 4; nl > 0; lscan++)
    1716           0 :                 nl >>= 1;
    1717             : 
    1718           0 :         if (!nil_matches) {
    1719             :                 /* skip over nils at the start of the columns */
    1720           0 :                 if (lscan < lend - lstart && lvals && is_oid_nil(lvals[lstart + lscan])) {
    1721           0 :                         lstart = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1722             :                                                lend - 1, oid_nil, 1, 1);
    1723           0 :                 } else if (lvals) {
    1724           0 :                         while (is_oid_nil(lvals[lstart]))
    1725           0 :                                 lstart++;
    1726             :                 } /* else l is candidate list: no nils */
    1727             :         }
    1728             :         /* from here on we don't have to worry about nil values */
    1729             : 
    1730           0 :         while (lstart < lend && rci.next < rci.ncand) {
    1731           0 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    1732             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    1733           0 :                 v = canditer_peek(&rci);
    1734             : 
    1735           0 :                 if (lvals) {
    1736           0 :                         if (lscan < lend - lstart &&
    1737           0 :                             lvals[lstart + lscan] < v) {
    1738           0 :                                 lstart = binsearch_oid(NULL, 0, lvals,
    1739             :                                                        lstart + lscan,
    1740             :                                                        lend - 1, v, 1, 0);
    1741             :                         } else {
    1742             :                                 /* scan l for v */
    1743           0 :                                 while (lstart < lend && lvals[lstart] < v)
    1744           0 :                                         lstart++;
    1745             :                         }
    1746             :                 } else {
    1747           0 :                         lstart = canditer_search(&lci, v, true);
    1748           0 :                         canditer_setidx(&lci, lstart);
    1749             :                 }
    1750           0 :                 if (lstart >= lend) {
    1751             :                         /* nothing found */
    1752             :                         break;
    1753             :                 }
    1754             : 
    1755             :                 /* Here we determine the next value in l that we are
    1756             :                  * going to try to match in r.  We will also count the
    1757             :                  * number of occurrences in l of that value.
    1758             :                  * Afterwards, v points to the value and nl is the
    1759             :                  * number of times it occurs.  Also, lstart will
    1760             :                  * point to the next value to be considered (ready for
    1761             :                  * the next iteration).
    1762             :                  * If there are many equal values in l (more than
    1763             :                  * lscan), we will use binary search to find the end
    1764             :                  * of the sequence.  Obviously, we can do this only if
    1765             :                  * l is actually sorted (lscan > 0). */
    1766           0 :                 nl = 1;         /* we'll match (at least) one in l */
    1767           0 :                 nr = 0;         /* maybe we won't match anything in r */
    1768           0 :                 v = lvals ? lvals[lstart] : canditer_next(&lci);
    1769           0 :                 if (li.key || lvals == NULL) {
    1770             :                         /* if l is key, there is a single value */
    1771           0 :                         lstart++;
    1772           0 :                 } else if (lscan < lend - lstart &&
    1773           0 :                            v == lvals[lstart + lscan]) {
    1774             :                         /* lots of equal values: use binary search to
    1775             :                          * find end */
    1776           0 :                         nl = binsearch_oid(NULL, 0, lvals, lstart + lscan,
    1777             :                                            lend - 1, v, 1, 1);
    1778           0 :                         nl -= lstart;
    1779           0 :                         lstart += nl;
    1780             :                 } else {
    1781             :                         /* just scan */
    1782           0 :                         while (++lstart < lend && v == lvals[lstart])
    1783           0 :                                 nl++;
    1784             :                 }
    1785             :                 /* lstart points one beyond the value we're
    1786             :                  * going to match: ready for the next iteration. */
    1787             : 
    1788             :                 /* First we find the first value in r that is at
    1789             :                  * least as large as v, then we find the first
    1790             :                  * value in r that is larger than v.  The difference
    1791             :                  * is the number of values equal to v and is stored in
    1792             :                  * nr.
    1793             :                  * We will use binary search on r to find both ends of
    1794             :                  * the sequence of values that are equal to v in case
    1795             :                  * the position is "too far" (more than rscan
    1796             :                  * away). */
    1797             : 
    1798             :                 /* first find the location of the first value in r
    1799             :                  * that is >= v, then find the location of the first
    1800             :                  * value in r that is > v; the difference is the
    1801             :                  * number of values equal to v */
    1802           0 :                 nr = canditer_search(&rci, v, true);
    1803           0 :                 canditer_setidx(&rci, nr);
    1804           0 :                 if (nr == rci.ncand) {
    1805             :                         /* nothing found */
    1806             :                         break;
    1807             :                 }
    1808             : 
    1809             :                 /* now find the end of the sequence of equal values v */
    1810             : 
    1811             :                 /* if r is key, there is zero or one match, otherwise
    1812             :                  * look ahead a little (rscan) in r to see whether
    1813             :                  * we're better off doing a binary search */
    1814           0 :                 if (canditer_peek(&rci) == v) {
    1815           0 :                         nr = 1;
    1816           0 :                         canditer_next(&rci);
    1817             :                 } else {
    1818             :                         /* rci points to first value > v or end of
    1819             :                          * r, and nr is the number of values in r that
    1820             :                          * are equal to v */
    1821             :                         /* no entries in r found */
    1822           0 :                         continue;
    1823             :                 }
    1824             :                 /* make space: nl values in l match nr values in r, so
    1825             :                  * we need to add nl * nr values in the results */
    1826           0 :                 if (maybeextend(r1, r2, NULL, nl * nr, lstart, lend, maxsize) != GDK_SUCCEED)
    1827           0 :                         goto bailout;
    1828             : 
    1829             :                 /* maintain properties */
    1830           0 :                 if (nl > 1) {
    1831             :                         /* value occurs multiple times in l, so entry
    1832             :                          * in r will be repeated multiple times: hence
    1833             :                          * r2 is not key and not dense */
    1834           0 :                         if (r2) {
    1835           0 :                                 r2->tkey = false;
    1836           0 :                                 r2->tseqbase = oid_nil;
    1837             :                         }
    1838             :                         /* multiple different values will be inserted
    1839             :                          * in r1 (always in order), so not reverse
    1840             :                          * ordered anymore */
    1841           0 :                         r1->trevsorted = false;
    1842             :                 }
    1843           0 :                 if (BATcount(r1) > 0) {
    1844             :                         /* a new, higher value will be inserted into
    1845             :                          * r1, so r1 is not reverse ordered anymore */
    1846           0 :                         r1->trevsorted = false;
    1847             :                         /* a new higher value will be added to r2 */
    1848           0 :                         if (r2) {
    1849           0 :                                 r2->trevsorted = false;
    1850             :                         }
    1851           0 :                         if (BATtdense(r1) &&
    1852           0 :                             ((oid *) r1->theap->base)[r1->batCount - 1] + 1 != l->hseqbase + lstart - nl) {
    1853           0 :                                 r1->tseqbase = oid_nil;
    1854             :                         }
    1855             :                 }
    1856             : 
    1857           0 :                 if (r2 &&
    1858           0 :                     BATcount(r2) > 0 &&
    1859           0 :                     BATtdense(r2) &&
    1860           0 :                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != r->hseqbase + rci.next - nr) {
    1861           0 :                         r2->tseqbase = oid_nil;
    1862             :                 }
    1863             : 
    1864             :                 /* insert values */
    1865           0 :                 lv = l->hseqbase + lstart - nl;
    1866           0 :                 for (i = 0; i < nl; i++) {
    1867             :                         BUN j;
    1868             : 
    1869           0 :                         for (j = 0; j < nr; j++) {
    1870           0 :                                 APPEND(r1, lv);
    1871             :                         }
    1872           0 :                         if (r2) {
    1873           0 :                                 oid rv = r->hseqbase + rci.next - nr;
    1874             : 
    1875           0 :                                 for (j = 0; j < nr; j++) {
    1876           0 :                                         APPEND(r2, rv);
    1877           0 :                                         rv++;
    1878             :                                 }
    1879             :                         }
    1880           0 :                         lv++;
    1881             :                 }
    1882             :         }
    1883             :         /* also set other bits of heap to correct value to indicate size */
    1884           0 :         BATsetcount(r1, BATcount(r1));
    1885           0 :         if (r2) {
    1886           0 :                 BATsetcount(r2, BATcount(r2));
    1887           0 :                 assert(BATcount(r1) == BATcount(r2));
    1888             :         }
    1889           0 :         if (BATcount(r1) > 0) {
    1890           0 :                 if (BATtdense(r1))
    1891           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    1892           0 :                 if (r2 && BATtdense(r2))
    1893           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    1894             :         } else {
    1895           0 :                 r1->tseqbase = 0;
    1896           0 :                 if (r2) {
    1897           0 :                         r2->tseqbase = 0;
    1898             :                 }
    1899             :         }
    1900           0 :         bat_iterator_end(&li);
    1901           0 :         bat_iterator_end(&ri);
    1902           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT ","
    1903             :                   "nil_matches=%s;%s %s "
    1904             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    1905             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    1906             :                   nil_matches ? "true" : "false",
    1907             :                   swapped ? " swapped" : "", reason,
    1908             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    1909             :                   GDKusec() - t0);
    1910             : 
    1911             :         return GDK_SUCCEED;
    1912             : 
    1913           0 :   bailout:
    1914           0 :         bat_iterator_end(&li);
    1915           0 :         bat_iterator_end(&ri);
    1916           0 :         BBPreclaim(r1);
    1917           0 :         BBPreclaim(r2);
    1918             :         return GDK_FAIL;
    1919             : }
    1920             : 
    1921             : /* Perform a "merge" join on l and r (if both are sorted) with
    1922             :  * optional candidate lists, or join using binary search on r if l is
    1923             :  * not sorted.
    1924             :  *
    1925             :  * If nil_matches is set, nil values are treated as ordinary values
    1926             :  * that can match; otherwise nil values never match.
    1927             :  *
    1928             :  * If nil_on_miss is set, a nil value is returned in r2 if there is no
    1929             :  * match in r for a particular value in l (left outer join).
    1930             :  *
    1931             :  * If semi is set, only a single set of values in r1/r2 is returned if
    1932             :  * there is a match of l in r, no matter how many matches there are in
    1933             :  * r; otherwise all matches are returned.
    1934             :  *
    1935             :  * If max_one is set, only a single match is allowed.  This is like
    1936             :  * semi, but enforces the single match.
    1937             :  *
    1938             :  * t0 and swapped are only for debugging (ALGOMASK set in GDKdebug).
    1939             :  */
    1940             : static gdk_return
    1941       11574 : mergejoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    1942             :           struct canditer *restrict lci, struct canditer *restrict rci,
    1943             :           bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    1944             :           bool not_in, bool max_one, bool min_one, BUN estimate,
    1945             :           lng t0, bool swapped,
    1946             :           const char *reason)
    1947             : {
    1948             :         /* [lr]scan determine how far we look ahead in l/r in order to
    1949             :          * decide whether we want to do a binary search or a scan */
    1950       11574 :         BUN lscan, rscan;
    1951       11574 :         const void *lvals, *rvals; /* the values of l/r (NULL if dense) */
    1952       11574 :         const char *lvars, *rvars; /* the indirect values (NULL if fixed size) */
    1953       11574 :         const void *nil = ATOMnilptr(l->ttype);
    1954       11574 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    1955       11574 :         const void *v;          /* points to value under consideration */
    1956       11574 :         const void *prev = NULL;
    1957       11574 :         BUN nl, nr;
    1958       11574 :         bool insert_nil;
    1959             :         /* equal_order is set if we can scan both BATs in the same
    1960             :          * order, so when both are sorted or both are reverse sorted
    1961             :          * -- important to know in order to skip over values; if l is
    1962             :          * not sorted, this must be set to true and we will always do a
    1963             :          * binary search on all of r */
    1964       11574 :         bool equal_order;
    1965             :         /* [lr]ordering is either 1 or -1 depending on the order of
    1966             :          * l/r: it determines the comparison function used */
    1967       11574 :         int lordering, rordering;
    1968       11574 :         oid lv;
    1969       11574 :         BUN i, j;               /* counters */
    1970       11574 :         oid lval = oid_nil, rval = oid_nil; /* temporary space to point v to */
    1971       11574 :         struct canditer llci, rrci;
    1972       11574 :         struct canditer *mlci, xlci;
    1973       11574 :         struct canditer *mrci, xrci;
    1974             : 
    1975       11574 :         if (lci->tpe == cand_dense && lci->ncand == BATcount(l) &&
    1976       11548 :             rci->tpe == cand_dense && rci->ncand == BATcount(r) &&
    1977       10973 :             !nil_on_miss && !semi && !max_one && !min_one && !only_misses &&
    1978        6482 :             !not_in &&
    1979        5585 :             l->tsorted && r->tsorted) {
    1980             :                 /* special cases with far fewer options */
    1981        5569 :                 if (complex_cand(r))
    1982           0 :                         return mergejoin_cand(r1p, r2p, l, r, nil_matches,
    1983             :                                               estimate, t0, swapped, __func__);
    1984       11095 :                 switch (ATOMbasetype(l->ttype)) {
    1985        5252 :                 case TYPE_int:
    1986        5252 :                         return mergejoin_int(r1p, r2p, l, r, nil_matches,
    1987             :                                              estimate, t0, swapped, __func__);
    1988         216 :                 case TYPE_lng:
    1989         216 :                         return mergejoin_lng(r1p, r2p, l, r, nil_matches,
    1990             :                                              estimate, t0, swapped, __func__);
    1991             :                 }
    1992             :         }
    1993             : 
    1994       18202 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    1995        6106 :         assert(r->tsorted || r->trevsorted);
    1996             : 
    1997        6106 :         size_t counter = 0;
    1998        6106 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    1999             : 
    2000        6106 :         BATiter li = bat_iterator(l);
    2001        6106 :         BATiter ri = bat_iterator(r);
    2002        6106 :         MT_thread_setalgorithm(__func__);
    2003        6106 :         if (BATtvoid(l)) {
    2004             :                 /* l->ttype == TYPE_void && is_oid_nil(l->tseqbase) is
    2005             :                  * handled by selectjoin */
    2006          63 :                 assert(!is_oid_nil(l->tseqbase));
    2007          63 :                 canditer_init(&llci, NULL, l);
    2008          63 :                 lvals = NULL;
    2009             :         } else {
    2010        6043 :                 lvals = li.base;                              /* non NULL */
    2011        6043 :                 llci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2012             :         }
    2013        6106 :         rrci = (struct canditer) {.tpe = cand_dense};
    2014        6106 :         if (BATtvoid(r)) {
    2015          53 :                 if (!is_oid_nil(r->tseqbase))
    2016          53 :                         canditer_init(&rrci, NULL, r);
    2017             :                 rvals = NULL;
    2018             :         } else {
    2019        6053 :                 rvals = ri.base;
    2020             :         }
    2021        6106 :         if (li.vh && li.type) {
    2022         164 :                 assert(ri.vh && ri.type);
    2023         164 :                 lvars = li.vh->base;
    2024         164 :                 rvars = ri.vh->base;
    2025             :         } else {
    2026        5942 :                 assert(ri.vh == NULL || ri.type == TYPE_void);
    2027             :                 lvars = rvars = NULL;
    2028             :         }
    2029             :         /* if the var pointer is not NULL, then so is the val pointer */
    2030        6106 :         assert(lvars == NULL || lvals != NULL);
    2031        6106 :         assert(rvars == NULL || rvals != NULL);
    2032             : 
    2033        6106 :         const bool rhasnil = !ri.nonil &&
    2034         518 :                 ((BATtvoid(r) && r->tseqbase == oid_nil) ||
    2035         518 :                  (rvals && cmp(nil, VALUE(r, (ri.sorted ? rci->seq : canditer_last(rci)) - r->hseqbase)) == 0));
    2036          19 :         const bit defmark = rhasnil ? bit_nil : 0;
    2037             : 
    2038        6106 :         if (not_in && (rhasnil || (BATtvoid(l) && l->tseqbase == oid_nil))) {
    2039           0 :                 bat_iterator_end(&li);
    2040           0 :                 bat_iterator_end(&ri);
    2041           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark, false, false,
    2042             :                                __func__, t0);
    2043             :         }
    2044             : 
    2045        6106 :         if ((!nil_matches &&
    2046        5949 :              ((li.type == TYPE_void && is_oid_nil(l->tseqbase)) ||
    2047        5949 :               (ri.type == TYPE_void && is_oid_nil(r->tseqbase)))) ||
    2048        6106 :             (li.type == TYPE_void && is_oid_nil(l->tseqbase) &&
    2049           0 :              (ri.nonil ||
    2050           0 :               (ri.type == TYPE_void && !is_oid_nil(r->tseqbase)))) ||
    2051        6106 :             (ri.type == TYPE_void && is_oid_nil(r->tseqbase) &&
    2052           0 :              (li.nonil ||
    2053           0 :               (li.type == TYPE_void && !is_oid_nil(l->tseqbase))))) {
    2054             :                 /* there are no matches */
    2055           0 :                 bat_iterator_end(&li);
    2056           0 :                 bat_iterator_end(&ri);
    2057           0 :                 return nomatch(r1p, r2p, r3p, l, r, lci, defmark,
    2058             :                                nil_on_miss, only_misses, __func__, t0);
    2059             :         }
    2060             : 
    2061       12211 :         BUN maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    2062        6106 :                                       li.key, ri.key, semi | max_one,
    2063             :                                       nil_on_miss, only_misses, min_one,
    2064             :                                       estimate);
    2065        6105 :         if (maxsize == BUN_NONE) {
    2066           0 :                 bat_iterator_end(&li);
    2067           0 :                 bat_iterator_end(&ri);
    2068           0 :                 return GDK_FAIL;
    2069             :         }
    2070        6105 :         BAT *r1 = *r1p;
    2071        6105 :         BAT *r2 = r2p ? *r2p : NULL;
    2072        6105 :         BAT *r3 = r3p ? *r3p : NULL;
    2073             : 
    2074        6105 :         if (lci->tpe == cand_mask) {
    2075           0 :                 mlci = lci;
    2076           0 :                 canditer_init(&xlci, l, NULL);
    2077           0 :                 lci = &xlci;
    2078             :         } else {
    2079        6105 :                 mlci = NULL;
    2080        6105 :                 xlci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2081             :         }
    2082        6105 :         if (rci->tpe == cand_mask) {
    2083           0 :                 mrci = rci;
    2084           0 :                 canditer_init(&xrci, r, NULL);
    2085           0 :                 rci = &xrci;
    2086             :         } else {
    2087        6105 :                 mrci = NULL;
    2088        6105 :                 xrci = (struct canditer) {.tpe = cand_dense}; /* not used */
    2089             :         }
    2090             : 
    2091        6105 :         if (li.sorted || li.revsorted) {
    2092        4969 :                 equal_order = (li.sorted && ri.sorted) ||
    2093         223 :                         (li.revsorted && ri.revsorted &&
    2094         114 :                          !BATtvoid(l) && !BATtvoid(r));
    2095        4969 :                 lordering = li.sorted && (ri.sorted || !equal_order) ? 1 : -1;
    2096        4877 :                 rordering = equal_order ? lordering : -lordering;
    2097        4969 :                 if (!li.nonil && !nil_matches && !nil_on_miss && lvals != NULL) {
    2098             :                         /* find first non-nil */
    2099         303 :                         nl = binsearch(NULL, 0, li.type, lvals, lvars, li.width, 0, BATcount(l), nil, li.sorted ? 1 : -1, li.sorted ? 1 : 0);
    2100         283 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2101         283 :                         if (li.sorted) {
    2102         263 :                                 canditer_setidx(lci, nl);
    2103          20 :                         } else if (li.revsorted) {
    2104          20 :                                 lci->ncand = nl;
    2105             :                         }
    2106             :                 }
    2107             :                 /* determine opportunistic scan window for l */
    2108        9938 :                 lscan = 4 + ilog2(lci->ncand);
    2109             :         } else {
    2110             :                 /* if l not sorted, we will always use binary search
    2111             :                  * on r */
    2112        1136 :                 assert(!BATtvoid(l)); /* void is always sorted */
    2113        1136 :                 lscan = 0;
    2114        1136 :                 equal_order = true;
    2115        1136 :                 lordering = 1;
    2116        1136 :                 rordering = ri.sorted ? 1 : -1;
    2117             :         }
    2118             :         /* determine opportunistic scan window for r; if l is not
    2119             :          * sorted this is only used to find range of equal values */
    2120        6105 :         rscan = 4 + ilog2(rci->ncand);
    2121             : 
    2122        6105 :         if (!equal_order) {
    2123             :                 /* we go through r backwards */
    2124         109 :                 canditer_setidx(rci, rci->ncand);
    2125             :         }
    2126             :         /* At this point the various variables that help us through
    2127             :          * the algorithm have been set.  The table explains them.  The
    2128             :          * first two columns are the inputs, the next three columns
    2129             :          * are the variables, the final two columns indicate how the
    2130             :          * variables can be used.
    2131             :          *
    2132             :          * l/r    sl/sr | vals  cand  off | result   value being matched
    2133             :          * -------------+-----------------+----------------------------------
    2134             :          * dense  NULL  | NULL  NULL  set | i        off==nil?nil:i+off
    2135             :          * dense  dense | NULL  NULL  set | i        off==nil?nil:i+off
    2136             :          * dense  set   | NULL  set   set | cand[i]  off==nil?nil:cand[i]+off
    2137             :          * set    NULL  | set   NULL  0   | i        vals[i]
    2138             :          * set    dense | set   NULL  0   | i        vals[i]
    2139             :          * set    set   | set   set   0   | cand[i]  vals[cand[i]]
    2140             :          *
    2141             :          * If {l,r}off is lng_nil, all values in the corresponding bat
    2142             :          * are oid_nil because the bat has type VOID and the tseqbase
    2143             :          * is nil.
    2144             :          */
    2145             : 
    2146             : 
    2147             :         /* Before we start adding values to r1 and r2, the properties
    2148             :          * are as follows:
    2149             :          * tseqbase - 0
    2150             :          * tkey - true
    2151             :          * tsorted - true
    2152             :          * trevsorted - true
    2153             :          * tnil - false
    2154             :          * tnonil - true
    2155             :          * We will modify these as we go along.
    2156             :          */
    2157      382445 :         while (lci->next < lci->ncand) {
    2158      378717 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    2159             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    2160      378717 :                 bit mark = defmark;
    2161      378717 :                 if (lscan == 0) {
    2162             :                         /* always search r completely */
    2163      152308 :                         assert(equal_order);
    2164      152308 :                         canditer_reset(rci);
    2165             :                 } else {
    2166             :                         /* If l is sorted (lscan > 0), we look at the
    2167             :                          * next value in r to see whether we can jump
    2168             :                          * over a large section of l using binary
    2169             :                          * search.  We do this by looking ahead in l
    2170             :                          * (lscan far, to be precise) and seeing if
    2171             :                          * the value there is still too "small"
    2172             :                          * (definition depends on sort order of l).
    2173             :                          * If it is, we use binary search on l,
    2174             :                          * otherwise we scan l for the next position
    2175             :                          * with a value greater than or equal to the
    2176             :                          * value in r.
    2177             :                          * The next value to match in r is the first
    2178             :                          * if equal_order is set, the last
    2179             :                          * otherwise.
    2180             :                          * When skipping over values in l, we count
    2181             :                          * how many we skip in nlx.  We need this in
    2182             :                          * case only_misses or nil_on_miss is set, and
    2183             :                          * to properly set the dense property in the
    2184             :                          * first output BAT. */
    2185      226409 :                         BUN nlx = 0; /* number of non-matching values in l */
    2186             : 
    2187      226409 :                         if (equal_order) {
    2188      225614 :                                 if (rci->next == rci->ncand)
    2189             :                                         v = NULL; /* no more values */
    2190      223301 :                                 else if (mrci) {
    2191           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peek(rci), true);
    2192           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2193             :                                 } else
    2194      223301 :                                         v = VALUE(r, canditer_peek(rci) - r->hseqbase);
    2195             :                         } else {
    2196         795 :                                 if (rci->next == 0)
    2197             :                                         v = NULL; /* no more values */
    2198         785 :                                 else if (mrci) {
    2199           0 :                                         oid rv = canditer_mask_next(mrci, canditer_peekprev(rci), false);
    2200           0 :                                         v = rv == oid_nil ? NULL : VALUE(r, rv - r->hseqbase);
    2201             :                                 } else
    2202         785 :                                         v = VALUE(r, canditer_peekprev(rci) - r->hseqbase);
    2203             :                         }
    2204             :                         /* here, v points to next value in r, or if
    2205             :                          * we're at the end of r, v is NULL */
    2206       11206 :                         if (v == NULL) {
    2207        2323 :                                 nlx = lci->ncand - lci->next;
    2208             :                         } else {
    2209      224096 :                                 if (lscan < lci->ncand - lci->next) {
    2210      201665 :                                         lv = canditer_idx(lci, lci->next + lscan);
    2211      202587 :                                         lv -= l->hseqbase;
    2212      202587 :                                         if (lvals) {
    2213      197661 :                                                 if (lordering * cmp(VALUE(l, lv), v) < 0) {
    2214        2151 :                                                         nlx = binsearch(NULL, 0, li.type, lvals, lvars, li.width, lv, BATcount(l), v, lordering, 0);
    2215        2151 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2216        2151 :                                                         nlx -= lci->next;
    2217             :                                                 }
    2218             :                                         } else {
    2219        4926 :                                                 assert(lordering == 1);
    2220        4926 :                                                 if (canditer_idx(&llci, lv) < *(const oid *)v) {
    2221           8 :                                                         nlx = canditer_search(&llci, *(const oid *)v, true);
    2222           8 :                                                         nlx = canditer_search(lci, nlx + l->hseqbase, true);
    2223           8 :                                                         nlx -= lci->next;
    2224             :                                                 }
    2225             :                                         }
    2226      202644 :                                         if (mlci) {
    2227           0 :                                                 lv = canditer_mask_next(mlci, lci->seq + lci->next + nlx, true);
    2228           0 :                                                 if (lv == oid_nil)
    2229           0 :                                                         nlx = lci->ncand - lci->next;
    2230             :                                                 else
    2231           0 :                                                         nlx = lv - lci->seq - lci->next;
    2232             :                                         }
    2233      202644 :                                         if (lci->next + nlx == lci->ncand)
    2234          11 :                                                 v = NULL;
    2235             :                                 }
    2236             :                         }
    2237      204967 :                         if (nlx > 0) {
    2238        4482 :                                 if (only_misses) {
    2239        2873 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2240           0 :                                                 goto bailout;
    2241      230210 :                                         while (nlx > 0) {
    2242      227337 :                                                 lv = canditer_next(lci);
    2243      227337 :                                                 if (mlci == NULL || canditer_contains(mlci, lv))
    2244      227337 :                                                         APPEND(r1, lv);
    2245      227337 :                                                 nlx--;
    2246             :                                         }
    2247        2873 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2248         669 :                                                 r1->trevsorted = false;
    2249        1609 :                                 } else if (nil_on_miss) {
    2250          21 :                                         if (r2 && r2->tnonil) {
    2251           2 :                                                 r2->tnil = true;
    2252           2 :                                                 r2->tnonil = false;
    2253           2 :                                                 r2->tseqbase = oid_nil;
    2254           2 :                                                 r2->tsorted = false;
    2255           2 :                                                 r2->trevsorted = false;
    2256           2 :                                                 r2->tkey = false;
    2257             :                                         }
    2258          21 :                                         if (maybeextend(r1, r2, r3, nlx, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2259           0 :                                                 goto bailout;
    2260          21 :                                         if (r3)
    2261          20 :                                                 r3->tnil = false;
    2262        2071 :                                         while (nlx > 0) {
    2263        2050 :                                                 lv = canditer_next(lci);
    2264        2050 :                                                 if (mlci == NULL || canditer_contains(mlci, lv)) {
    2265        2050 :                                                         APPEND(r1, lv);
    2266        2050 :                                                         if (r2)
    2267           2 :                                                                 APPEND(r2, oid_nil);
    2268        2050 :                                                         if (r3) {
    2269        2049 :                                                                 if (rhasnil || cmp(VALUE(l, lv - l->hseqbase), nil) == 0) {
    2270           0 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = bit_nil;
    2271           0 :                                                                         r3->tnil = true;
    2272             :                                                                 } else {
    2273        2049 :                                                                         ((bit *) r3->theap->base)[r3->batCount++] = 0;
    2274             :                                                                 }
    2275             :                                                         }
    2276             :                                                 }
    2277        2050 :                                                 nlx--;
    2278             :                                         }
    2279          21 :                                         if (r1->trevsorted && BATcount(r1) > 1)
    2280           8 :                                                 r1->trevsorted = false;
    2281             :                                 } else {
    2282        1588 :                                         canditer_setidx(lci, lci->next + nlx);
    2283             :                                 }
    2284             :                         }
    2285      227398 :                         if (v == NULL) {
    2286             :                                 /* we have exhausted the inputs */
    2287             :                                 break;
    2288             :                         }
    2289             :                 }
    2290             : 
    2291             :                 /* Here we determine the next value in l that we are
    2292             :                  * going to try to match in r.  We will also count the
    2293             :                  * number of occurrences in l of that value.
    2294             :                  * Afterwards, v points to the value and nl is the
    2295             :                  * number of times it occurs.  Also, lci will point to
    2296             :                  * the next value to be considered (ready for the next
    2297             :                  * iteration).
    2298             :                  * If there are many equal values in l (more than
    2299             :                  * lscan), we will use binary search to find the end
    2300             :                  * of the sequence.  Obviously, we can do this only if
    2301             :                  * l is actually sorted (lscan > 0). */
    2302      373814 :                 nl = 1;         /* we'll match (at least) one in l */
    2303      373814 :                 nr = 0;         /* maybe we won't match anything in r */
    2304      373814 :                 lv = canditer_peek(lci);
    2305      373278 :                 if (mlci) {
    2306           0 :                         lv = canditer_mask_next(mlci, lv, true);
    2307           0 :                         if (lv == oid_nil)
    2308             :                                 break;
    2309           0 :                         canditer_setidx(lci, canditer_search(lci, lv, true));
    2310             :                 }
    2311      373312 :                 v = VALUE(l, lv - l->hseqbase);
    2312      374257 :                 if (li.key) {
    2313             :                         /* if l is key, there is a single value */
    2314      195280 :                 } else if (lscan > 0 &&
    2315      108073 :                            lscan < lci->ncand - lci->next &&
    2316       53459 :                            cmp(v, VALUE(l, canditer_idx(lci, lci->next + lscan) - l->hseqbase)) == 0) {
    2317             :                         /* lots of equal values: use binary search to
    2318             :                          * find end */
    2319         829 :                         assert(lvals != NULL);
    2320        1658 :                         nl = binsearch(NULL, 0,
    2321         829 :                                        li.type, lvals, lvars,
    2322         829 :                                        li.width, lci->next + lscan,
    2323             :                                        BATcount(l),
    2324             :                                        v, lordering, 1);
    2325         829 :                         nl = canditer_search(lci, nl + l->hseqbase, true);
    2326         829 :                         nl -= lci->next;
    2327             :                 } else {
    2328      194354 :                         struct canditer ci = *lci; /* work on copy */
    2329      194354 :                         nl = 0; /* it will be incremented again */
    2330      378772 :                         do {
    2331      378772 :                                 canditer_next(&ci);
    2332      377135 :                                 nl++;
    2333      749981 :                         } while (ci.next < ci.ncand &&
    2334      375840 :                                  cmp(v, VALUE(l, canditer_peek(&ci) - l->hseqbase)) == 0);
    2335             :                 }
    2336             :                 /* lci->next + nl is the position for the next iteration */
    2337             : 
    2338      369529 :                 if ((!nil_matches || not_in) && !li.nonil && cmp(v, nil) == 0) {
    2339         664 :                         if (not_in) {
    2340             :                                 /* just skip the whole thing: nils
    2341             :                                  * don't cause any output */
    2342           1 :                                 canditer_setidx(lci, lci->next + nl);
    2343           1 :                                 continue;
    2344             :                         }
    2345             :                         /* v is nil and nils don't match anything, set
    2346             :                          * to NULL to indicate nil */
    2347         663 :                         v = NULL;
    2348         663 :                         mark = bit_nil;
    2349         663 :                         if (r3)
    2350          54 :                                 r3->tnil = true;
    2351             :                 }
    2352             : 
    2353             :                 /* First we find the "first" value in r that is "at
    2354             :                  * least as large" as v, then we find the "first"
    2355             :                  * value in r that is "larger" than v.  The difference
    2356             :                  * is the number of values equal to v and is stored in
    2357             :                  * nr.  The definitions of "larger" and "first" depend
    2358             :                  * on the orderings of l and r.  If equal_order is
    2359             :                  * set, we go through r from low to high (this
    2360             :                  * includes the case that l is not sorted); otherwise
    2361             :                  * we go through r from high to low.
    2362             :                  * In either case, we will use binary search on r to
    2363             :                  * find both ends of the sequence of values that are
    2364             :                  * equal to v in case the position is "too far" (more
    2365             :                  * than rscan away). */
    2366          54 :                 if (v == NULL) {
    2367             :                         nr = 0; /* nils don't match anything */
    2368      368300 :                 } else if (ri.type == TYPE_void && is_oid_nil(r->tseqbase)) {
    2369           0 :                         if (is_oid_nil(*(const oid *) v)) {
    2370             :                                 /* all values in r match */
    2371           0 :                                 nr = rci->ncand;
    2372             :                         } else {
    2373             :                                 /* no value in r matches */
    2374             :                                 nr = 0;
    2375             :                         }
    2376             :                         /* in either case, we're done after this */
    2377           0 :                         canditer_setidx(rci, equal_order ? rci->ncand : 0);
    2378      368300 :                 } else if (equal_order) {
    2379             :                         /* first find the location of the first value
    2380             :                          * in r that is >= v, then find the location
    2381             :                          * of the first value in r that is > v; the
    2382             :                          * difference is the number of values equal
    2383             :                          * v; we change rci */
    2384             : 
    2385             :                         /* look ahead a little (rscan) in r to
    2386             :                          * see whether we're better off doing
    2387             :                          * a binary search */
    2388      367515 :                         if (rvals) {
    2389      356309 :                                 if (rscan < rci->ncand - rci->next &&
    2390      313669 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) > 0) {
    2391             :                                         /* value too far away in r:
    2392             :                                          * use binary search */
    2393       93274 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 0);
    2394      105563 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2395      105082 :                                         canditer_setidx(rci, lv);
    2396             :                                 } else {
    2397             :                                         /* scan r for v */
    2398      281313 :                                         while (rci->next < rci->ncand) {
    2399      281224 :                                                 if (rordering * cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) <= 0)
    2400             :                                                         break;
    2401       19325 :                                                 canditer_next(rci);
    2402             :                                         }
    2403             :                                 }
    2404      705967 :                                 if (rci->next < rci->ncand &&
    2405      343650 :                                     cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0) {
    2406             :                                         /* if we found an equal value,
    2407             :                                          * look for the last equal
    2408             :                                          * value */
    2409      236487 :                                         if (ri.key) {
    2410             :                                                 /* r is key, there can
    2411             :                                                  * only be a single
    2412             :                                                  * equal value */
    2413      139511 :                                                 nr = 1;
    2414      139511 :                                                 canditer_next(rci);
    2415      192272 :                                         } else if (rscan < rci->ncand - rci->next &&
    2416       95301 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next + rscan) - r->hseqbase)) == 0) {
    2417             :                                                 /* many equal values:
    2418             :                                                  * use binary search
    2419             :                                                  * to find the end */
    2420       43204 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, rci->next + rscan, BATcount(r), v, rordering, 1);
    2421       43204 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2422       43204 :                                                 nr -= rci->next;
    2423       43204 :                                                 canditer_setidx(rci, rci->next + nr);
    2424             :                                         } else {
    2425             :                                                 /* scan r for end of
    2426             :                                                  * range */
    2427      146816 :                                                 do {
    2428      146816 :                                                         nr++;
    2429      146816 :                                                         canditer_next(rci);
    2430      293203 :                                                 } while (rci->next < rci->ncand &&
    2431      146412 :                                                          cmp(v, VALUE(r, canditer_peek(rci) - r->hseqbase)) == 0);
    2432             :                                         }
    2433             :                                 }
    2434             :                         } else {
    2435       11206 :                                 assert(rordering == 1);
    2436       11206 :                                 rval = canditer_search(&rrci, *(const oid*)v, true) + r->hseqbase;
    2437       11206 :                                 lv = canditer_search(rci, rval, true);
    2438       11206 :                                 canditer_setidx(rci, lv);
    2439       11206 :                                 nr = (canditer_idx(&rrci, canditer_peek(rci) - r->hseqbase) == *(oid*)v);
    2440       11206 :                                 if (nr == 1)
    2441       11206 :                                         canditer_next(rci);
    2442             :                         }
    2443             :                         /* rci points to first value > v or end of r,
    2444             :                          * and nr is the number of values in r that
    2445             :                          * are equal to v */
    2446             :                 } else {
    2447             :                         /* first find the location of the first value
    2448             :                          * in r that is > v, then find the location
    2449             :                          * of the first value in r that is >= v; the
    2450             :                          * difference is the number of values equal
    2451             :                          * v; we change rci */
    2452             : 
    2453             :                         /* look back from the end a little
    2454             :                          * (rscan) in r to see whether we're
    2455             :                          * better off doing a binary search */
    2456         785 :                         if (rvals) {
    2457         785 :                                 if (rci->next > rscan &&
    2458         501 :                                     rordering * cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) < 0) {
    2459             :                                         /* value too far away
    2460             :                                          * in r: use binary
    2461             :                                          * search */
    2462          15 :                                         lv = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 1);
    2463          15 :                                         lv = canditer_search(rci, lv + r->hseqbase, true);
    2464          15 :                                         canditer_setidx(rci, lv);
    2465             :                                 } else {
    2466             :                                         /* scan r for v */
    2467        1055 :                                         while (rci->next > 0 &&
    2468        1048 :                                                rordering * cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) < 0)
    2469         285 :                                                 canditer_prev(rci);
    2470             :                                 }
    2471        1563 :                                 if (rci->next > 0 &&
    2472         778 :                                     cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0) {
    2473             :                                         /* if we found an equal value,
    2474             :                                          * look for the last equal
    2475             :                                          * value */
    2476         588 :                                         if (ri.key) {
    2477             :                                                 /* r is key, there can only be a single equal value */
    2478         101 :                                                 nr = 1;
    2479         101 :                                                 canditer_prev(rci);
    2480         934 :                                         } else if (rci->next > rscan &&
    2481         447 :                                                    cmp(v, VALUE(r, canditer_idx(rci, rci->next - rscan) - r->hseqbase)) == 0) {
    2482             :                                                 /* use binary search to find the start */
    2483           0 :                                                 nr = binsearch(NULL, 0, ri.type, rvals, rvars, ri.width, 0, rci->next - rscan, v, rordering, 0);
    2484           0 :                                                 nr = canditer_search(rci, nr + r->hseqbase, true);
    2485           0 :                                                 nr = rci->next - nr;
    2486           0 :                                                 canditer_setidx(rci, rci->next - nr);
    2487             :                                         } else {
    2488             :                                                 /* scan r for start of range */
    2489         525 :                                                 do {
    2490         525 :                                                         canditer_prev(rci);
    2491         525 :                                                         nr++;
    2492        1045 :                                                 } while (rci->next > 0 &&
    2493         520 :                                                          cmp(v, VALUE(r, canditer_peekprev(rci) - r->hseqbase)) == 0);
    2494             :                                         }
    2495             :                                 }
    2496             :                         } else {
    2497           0 :                                 lv = canditer_search(&rrci, *(const oid *)v, true);
    2498           0 :                                 lv = canditer_search(rci, lv + r->hseqbase, true);
    2499           0 :                                 nr = (canditer_idx(rci, lv) == *(const oid*)v);
    2500           0 :                                 canditer_setidx(rci, lv);
    2501             :                         }
    2502             :                         /* rci points to first value > v
    2503             :                          * or end of r, and nr is the number of values
    2504             :                          * in r that are equal to v */
    2505             :                 }
    2506             : 
    2507      250621 :                 if (nr == 0) {
    2508             :                         /* no entries in r found */
    2509      129049 :                         if (!(nil_on_miss | only_misses)) {
    2510       88002 :                                 if (min_one) {
    2511           0 :                                         GDKerror("not enough matches");
    2512           0 :                                         goto bailout;
    2513             :                                 }
    2514       92289 :                                 if (lscan > 0 &&
    2515        4287 :                                     (equal_order ? rci->next == rci->ncand : rci->next == 0)) {
    2516             :                                         /* nothing more left to match
    2517             :                                          * in r */
    2518             :                                         break;
    2519             :                                 }
    2520       87982 :                                 canditer_setidx(lci, lci->next + nl);
    2521       87165 :                                 continue;
    2522             :                         }
    2523             :                         /* insert a nil to indicate a non-match */
    2524       41047 :                         insert_nil = true;
    2525       41047 :                         nr = 1;
    2526       41047 :                         if (r2) {
    2527           4 :                                 r2->tnil = true;
    2528           4 :                                 r2->tnonil = false;
    2529           4 :                                 r2->tsorted = false;
    2530           4 :                                 r2->trevsorted = false;
    2531           4 :                                 r2->tseqbase = oid_nil;
    2532           4 :                                 r2->tkey = false;
    2533             :                         }
    2534      248166 :                 } else if (nr > 1 && max_one) {
    2535          21 :                         GDKerror("more than one match");
    2536          21 :                         goto bailout;
    2537      248145 :                 } else if (only_misses) {
    2538             :                         /* we had a match, so we're not interested */
    2539      112195 :                         canditer_setidx(lci, lci->next + nl);
    2540      112215 :                         continue;
    2541             :                 } else {
    2542      135950 :                         insert_nil = false;
    2543      135950 :                         if (semi) {
    2544             :                                 /* for semi-join, only insert single
    2545             :                                  * value */
    2546       33811 :                                 nr = 1;
    2547             :                         }
    2548             :                 }
    2549             :                 /* make space: nl values in l match nr values in r, so
    2550             :                  * we need to add nl * nr values in the results */
    2551      176997 :                 if (maybeextend(r1, r2, r3, nl * nr, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    2552           0 :                         goto bailout;
    2553             : 
    2554             :                 /* maintain properties */
    2555      177101 :                 if (nl > 1) {
    2556       50933 :                         if (r2) {
    2557             :                                 /* value occurs multiple times in l,
    2558             :                                  * so entry in r will be repeated
    2559             :                                  * multiple times: hence r2 is not key
    2560             :                                  * and not dense */
    2561       11473 :                                 r2->tkey = false;
    2562       11473 :                                 r2->tseqbase = oid_nil;
    2563             :                         }
    2564             :                         /* multiple different values will be inserted
    2565             :                          * in r1 (always in order), so not reverse
    2566             :                          * ordered anymore */
    2567       50933 :                         r1->trevsorted = false;
    2568             :                 }
    2569      177101 :                 if (nr > 1) {
    2570             :                         /* value occurs multiple times in r, so entry
    2571             :                          * in l will be repeated multiple times: hence
    2572             :                          * r1 is not key and not dense */
    2573       43057 :                         r1->tkey = false;
    2574       43057 :                         if (r2) {
    2575             :                                 /* multiple different values will be
    2576             :                                  * inserted in r2 (in order), so not
    2577             :                                  * reverse ordered anymore */
    2578       42618 :                                 r2->trevsorted = false;
    2579       42618 :                                 if (nl > 1) {
    2580             :                                         /* multiple values in l match
    2581             :                                          * multiple values in r, so an
    2582             :                                          * ordered sequence will be
    2583             :                                          * inserted multiple times in
    2584             :                                          * r2, so r2 is not ordered
    2585             :                                          * anymore */
    2586        3917 :                                         r2->tsorted = false;
    2587             :                                 }
    2588             :                         }
    2589             :                 }
    2590      177101 :                 if (lscan == 0) {
    2591             :                         /* deduce relative positions of r matches for
    2592             :                          * this and previous value in v */
    2593       58061 :                         if (prev && r2) {
    2594             :                                 /* keyness or r2 can only be assured
    2595             :                                  * as long as matched values are
    2596             :                                  * ordered */
    2597       56952 :                                 int ord = rordering * cmp(prev, v ? v : nil);
    2598       57218 :                                 if (ord < 0) {
    2599             :                                         /* previous value in l was
    2600             :                                          * less than current */
    2601       22570 :                                         r2->trevsorted = false;
    2602       22570 :                                         r2->tkey &= r2->tsorted;
    2603       34648 :                                 } else if (ord > 0) {
    2604             :                                         /* previous value was
    2605             :                                          * greater */
    2606       21961 :                                         r2->tsorted = false;
    2607       21961 :                                         r2->tkey &= r2->trevsorted;
    2608             :                                 } else {
    2609             :                                         /* value can be equal if
    2610             :                                          * intervening values in l
    2611             :                                          * didn't match anything; if
    2612             :                                          * multiple values match in r,
    2613             :                                          * r2 won't be sorted */
    2614       12687 :                                         r2->tkey = false;
    2615       12687 :                                         if (nr > 1) {
    2616       12658 :                                                 r2->tsorted = false;
    2617       12658 :                                                 r2->trevsorted = false;
    2618             :                                         }
    2619             :                                 }
    2620             :                         }
    2621       58327 :                         prev = v ? v : nil;
    2622             :                 }
    2623      177367 :                 if (BATcount(r1) > 0) {
    2624             :                         /* a new, higher value will be inserted into
    2625             :                          * r1, so r1 is not reverse ordered anymore */
    2626      173231 :                         r1->trevsorted = false;
    2627      173231 :                         if (r2) {
    2628             :                                 /* depending on whether l and r are
    2629             :                                  * ordered the same or not, a new
    2630             :                                  * higher or lower value will be added
    2631             :                                  * to r2 */
    2632       59749 :                                 if (equal_order)
    2633       59687 :                                         r2->trevsorted = false;
    2634             :                                 else {
    2635          62 :                                         r2->tsorted = false;
    2636          62 :                                         r2->tseqbase = oid_nil;
    2637             :                                 }
    2638             :                         }
    2639             :                 }
    2640             : 
    2641             :                 /* insert values: first the left output */
    2642             :                 BUN nladded = 0;
    2643      463005 :                 for (i = 0; i < nl; i++) {
    2644      286023 :                         lv = canditer_next(lci);
    2645      285638 :                         if (mlci == NULL || canditer_contains(mlci, lv)) {
    2646      285638 :                                 nladded++;
    2647    39694529 :                                 for (j = 0; j < nr; j++)
    2648    39408891 :                                         APPEND(r1, lv);
    2649             :                         }
    2650             :                 }
    2651      176982 :                 nl = nladded;
    2652             :                 /* then the right output, various different ways of
    2653             :                  * doing it */
    2654      176982 :                 if (r2) {
    2655       60136 :                         if (insert_nil) {
    2656          11 :                                 for (i = 0; i < nl; i++) {
    2657          14 :                                         for (j = 0; j < nr; j++) {
    2658           7 :                                                 APPEND(r2, oid_nil);
    2659             :                                         }
    2660             :                                 }
    2661       60132 :                         } else if (equal_order) {
    2662       60023 :                                 struct canditer ci = *rci; /* work on copy */
    2663       60023 :                                 if (r2->batCount > 0 &&
    2664       62380 :                                     BATtdense(r2) &&
    2665        2759 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_idx(&ci, ci.next - nr))
    2666          55 :                                         r2->tseqbase = oid_nil;
    2667      146233 :                                 for (i = 0; i < nl; i++) {
    2668       86233 :                                         canditer_setidx(&ci, ci.next - nr);
    2669    39379400 :                                         for (j = 0; j < nr; j++) {
    2670    39206957 :                                                 APPEND(r2, canditer_next(&ci));
    2671             :                                         }
    2672             :                                 }
    2673             :                         } else {
    2674         109 :                                 if (r2->batCount > 0 &&
    2675          62 :                                     BATtdense(r2) &&
    2676           0 :                                     ((oid *) r2->theap->base)[r2->batCount - 1] + 1 != canditer_peek(rci))
    2677           0 :                                         r2->tseqbase = oid_nil;
    2678         458 :                                 for (i = 0; i < nl; i++) {
    2679         349 :                                         struct canditer ci = *rci; /* work on copy */
    2680         698 :                                         for (j = 0; j < nr; j++) {
    2681         349 :                                                 APPEND(r2, canditer_next(&ci));
    2682             :                                         }
    2683             :                                 }
    2684             :                         }
    2685             :                 }
    2686             :                 /* finally the mark output */
    2687      176959 :                 if (r3) {
    2688        2856 :                         if (insert_nil) {
    2689         332 :                                 r3->tnil |= rhasnil;
    2690         826 :                                 for (i = 0; i < nl; i++) {
    2691         988 :                                         for (j = 0; j < nr; j++) {
    2692         494 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    2693             :                                         }
    2694             :                                 }
    2695             :                         } else {
    2696        8732 :                                 for (i = 0; i < nl; i++) {
    2697       12417 :                                         for (j = 0; j < nr; j++) {
    2698        6209 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = 1;
    2699             :                                         }
    2700             :                                 }
    2701             :                         }
    2702             :                 }
    2703             :         }
    2704             :         /* also set other bits of heap to correct value to indicate size */
    2705        6082 :         BATsetcount(r1, BATcount(r1));
    2706        6082 :         r1->tseqbase = oid_nil;
    2707        6082 :         if (r1->tkey)
    2708        6027 :                 r1 = virtualize(r1);
    2709        6084 :         if (r2) {
    2710        1016 :                 BATsetcount(r2, BATcount(r2));
    2711        1016 :                 assert(BATcount(r1) == BATcount(r2));
    2712        1016 :                 r2->tseqbase = oid_nil;
    2713        1016 :                 if (BATcount(r2) <= 1) {
    2714         543 :                         r2->tkey = true;
    2715         543 :                         r2 = virtualize(r2);
    2716             :                 }
    2717             :         }
    2718        6084 :         if (r3) {
    2719          60 :                 BATsetcount(r3, BATcount(r3));
    2720          60 :                 assert(BATcount(r1) == BATcount(r3));
    2721          60 :                 r3->tseqbase = oid_nil;
    2722          60 :                 r3->tnonil = !r3->tnil;
    2723          60 :                 if (BATcount(r3) <= 1) {
    2724           0 :                         r3->tkey = true;
    2725           0 :                         r3->tsorted = true;
    2726           0 :                         r3->trevsorted = true;
    2727             :                 }
    2728             :         }
    2729        6084 :         bat_iterator_end(&li);
    2730        6085 :         bat_iterator_end(&ri);
    2731        6085 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    2732             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    2733             :                   "sr=" ALGOOPTBATFMT ","
    2734             :                   "nil_on_miss=%s,semi=%s,only_misses=%s,not_in=%s;%s %s "
    2735             :                   "-> " ALGOBATFMT "," ALGOOPTBATFMT " (" LLFMT "usec)\n",
    2736             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    2737             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    2738             :                   nil_on_miss ? "true" : "false",
    2739             :                   semi ? "true" : "false",
    2740             :                   only_misses ? "true" : "false",
    2741             :                   not_in ? "true" : "false",
    2742             :                   swapped ? " swapped" : "", reason,
    2743             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    2744             :                   GDKusec() - t0);
    2745             : 
    2746             :         return GDK_SUCCEED;
    2747             : 
    2748          21 :   bailout:
    2749          21 :         bat_iterator_end(&li);
    2750          21 :         bat_iterator_end(&ri);
    2751          21 :         BBPreclaim(r1);
    2752          21 :         BBPreclaim(r2);
    2753          21 :         BBPreclaim(r3);
    2754             :         return GDK_FAIL;
    2755             : }
    2756             : 
    2757             : #define HASHLOOPBODY()                                                  \
    2758             :         do {                                                            \
    2759             :                 if (nr >= 1 && max_one) {                            \
    2760             :                         GDKerror("more than one match");              \
    2761             :                         goto bailout;                                   \
    2762             :                 }                                                       \
    2763             :                 if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2764             :                         goto bailout;                                   \
    2765             :                 APPEND(r1, lo);                                         \
    2766             :                 if (r2)                                                 \
    2767             :                         APPEND(r2, ro);                                 \
    2768             :                 if (r3)                                                 \
    2769             :                         ((bit *) r3->theap->base)[r3->batCount++] = 1; \
    2770             :                 nr++;                                                   \
    2771             :         } while (false)
    2772             : 
    2773             : #define EQ_int(a, b)    ((a) == (b))
    2774             : #define EQ_lng(a, b)    ((a) == (b))
    2775             : #ifdef HAVE_HGE
    2776             : #define EQ_uuid(a, b)   ((a).h == (b).h)
    2777             : #else
    2778             : #define EQ_uuid(a, b)   (memcmp((a).u, (b).u, UUID_SIZE) == 0)
    2779             : #endif
    2780             : 
    2781             : #define HASHJOIN(TYPE)                                                  \
    2782             :         do {                                                            \
    2783             :                 TYPE *rvals = ri.base;                                  \
    2784             :                 TYPE *lvals = li.base;                                  \
    2785             :                 TYPE v;                                                 \
    2786             :                 while (lci->next < lci->ncand) {                       \
    2787             :                         GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
    2788             :                         lo = canditer_next(lci);                        \
    2789             :                         v = lvals[lo - l->hseqbase];                 \
    2790             :                         nr = 0;                                         \
    2791             :                         bit mark = defmark;                             \
    2792             :                         if ((!nil_matches || not_in) && is_##TYPE##_nil(v)) { \
    2793             :                                 /* no match */                          \
    2794             :                                 if (not_in) {                           \
    2795             :                                         lskipped = BATcount(r1) > 0; \
    2796             :                                         continue;                       \
    2797             :                                 }                                       \
    2798             :                                 mark = bit_nil;                         \
    2799             :                         } else if (hash_cand) {                         \
    2800             :                                 /* private hash: no locks */            \
    2801             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2802             :                                      rb != BUN_NONE;                    \
    2803             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2804             :                                         ro = canditer_idx(rci, rb);     \
    2805             :                                         if (!EQ_##TYPE(v, rvals[ro - r->hseqbase])) \
    2806             :                                                 continue;               \
    2807             :                                         if (only_misses) {              \
    2808             :                                                 nr++;                   \
    2809             :                                                 break;                  \
    2810             :                                         }                               \
    2811             :                                         HASHLOOPBODY();                 \
    2812             :                                         if (semi && !max_one)           \
    2813             :                                                 break;                  \
    2814             :                                 }                                       \
    2815             :                         } else if (rci->tpe != cand_dense) {         \
    2816             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2817             :                                      rb != BUN_NONE;                    \
    2818             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2819             :                                         if (rb >= rl && rb < rh &&        \
    2820             :                                             EQ_##TYPE(v, rvals[rb]) &&  \
    2821             :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) { \
    2822             :                                                 if (only_misses) {      \
    2823             :                                                         nr++;           \
    2824             :                                                         break;          \
    2825             :                                                 }                       \
    2826             :                                                 HASHLOOPBODY();         \
    2827             :                                                 if (semi && !max_one)   \
    2828             :                                                         break;          \
    2829             :                                         }                               \
    2830             :                                 }                                       \
    2831             :                         } else {                                        \
    2832             :                                 for (rb = HASHget(hsh, hash_##TYPE(hsh, &v)); \
    2833             :                                      rb != BUN_NONE;                    \
    2834             :                                      rb = HASHgetlink(hsh, rb)) {       \
    2835             :                                         if (rb >= rl && rb < rh &&        \
    2836             :                                             EQ_##TYPE(v, rvals[rb])) {  \
    2837             :                                                 if (only_misses) {      \
    2838             :                                                         nr++;           \
    2839             :                                                         break;          \
    2840             :                                                 }                       \
    2841             :                                                 ro = (oid) (rb - roff + rseq); \
    2842             :                                                 HASHLOOPBODY();         \
    2843             :                                                 if (semi && !max_one)   \
    2844             :                                                         break;          \
    2845             :                                         }                               \
    2846             :                                 }                                       \
    2847             :                         }                                               \
    2848             :                         if (nr == 0) {                                  \
    2849             :                                 if (only_misses) {                      \
    2850             :                                         nr = 1;                         \
    2851             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2852             :                                                 goto bailout;           \
    2853             :                                         APPEND(r1, lo);                 \
    2854             :                                         if (lskipped)                   \
    2855             :                                                 r1->tseqbase = oid_nil;      \
    2856             :                                 } else if (nil_on_miss) {               \
    2857             :                                         nr = 1;                         \
    2858             :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED) \
    2859             :                                                 goto bailout;           \
    2860             :                                         APPEND(r1, lo);                 \
    2861             :                                         if (r2) {                       \
    2862             :                                                 r2->tnil = true;     \
    2863             :                                                 r2->tnonil = false;  \
    2864             :                                                 r2->tkey = false;    \
    2865             :                                                 APPEND(r2, oid_nil);    \
    2866             :                                         }                               \
    2867             :                                         if (r3) {                       \
    2868             :                                                 r3->tnil |= mark == bit_nil; \
    2869             :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark; \
    2870             :                                         }                               \
    2871             :                                 } else if (min_one) {                   \
    2872             :                                         GDKerror("not enough matches");       \
    2873             :                                         goto bailout;                   \
    2874             :                                 } else {                                \
    2875             :                                         lskipped = BATcount(r1) > 0; \
    2876             :                                 }                                       \
    2877             :                         } else if (only_misses) {                       \
    2878             :                                 lskipped = BATcount(r1) > 0;         \
    2879             :                         } else {                                        \
    2880             :                                 if (lskipped) {                         \
    2881             :                                         /* note, we only get here in an \
    2882             :                                          * iteration *after* lskipped was \
    2883             :                                          * first set to true, i.e. we did \
    2884             :                                          * indeed skip values in l */   \
    2885             :                                         r1->tseqbase = oid_nil;              \
    2886             :                                 }                                       \
    2887             :                                 if (nr > 1) {                                \
    2888             :                                         r1->tkey = false;            \
    2889             :                                         r1->tseqbase = oid_nil;              \
    2890             :                                 }                                       \
    2891             :                         }                                               \
    2892             :                         if (nr > 0 && BATcount(r1) > nr)          \
    2893             :                                 r1->trevsorted = false;                      \
    2894             :                 }                                                       \
    2895             :         } while (0)
    2896             : 
    2897             : /* Implementation of join using a hash lookup of values in the right
    2898             :  * column. */
    2899             : static gdk_return
    2900       13905 : hashjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r,
    2901             :          struct canditer *restrict lci, struct canditer *restrict rci,
    2902             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    2903             :          bool not_in, bool max_one, bool min_one,
    2904             :          BUN estimate, lng t0, bool swapped,
    2905             :          bool hash, bool phash, bool hash_cand,
    2906             :          const char *reason)
    2907             : {
    2908       13905 :         oid lo, ro;
    2909       13905 :         BATiter li, ri;
    2910       13905 :         BUN rb, roff = 0;
    2911             :         /* rl, rh: lower and higher bounds for BUN values in hash table */
    2912       13905 :         BUN rl, rh;
    2913       13905 :         oid rseq;
    2914       13905 :         BUN nr;
    2915       13905 :         const char *lvals;
    2916       13905 :         const char *lvars;
    2917       13905 :         const void *nil = ATOMnilptr(l->ttype);
    2918       13905 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    2919       13905 :         oid lval = oid_nil;     /* hold value if l is dense */
    2920       13905 :         const char *v = (const char *) &lval;
    2921       13905 :         bool lskipped = false;  /* whether we skipped values in l */
    2922       13905 :         Hash *restrict hsh = NULL;
    2923       13905 :         bool locked = false;
    2924       13905 :         BUN maxsize;
    2925       13905 :         BAT *r1 = NULL;
    2926       13905 :         BAT *r2 = NULL;
    2927       13905 :         BAT *r3 = NULL;
    2928       13905 :         BAT *b = NULL;
    2929             : 
    2930       41707 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    2931             : 
    2932       13905 :         size_t counter = 0;
    2933       13905 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    2934             : 
    2935       13905 :         li = bat_iterator(l);
    2936       13905 :         ri = bat_iterator(r);
    2937             : 
    2938       13905 :         int t = ATOMbasetype(ri.type);
    2939       13905 :         if (BATtvoid(r) || BATtvoid(l))
    2940           8 :                 t = TYPE_void;
    2941             : 
    2942       13905 :         lvals = (const char *) li.base;
    2943       13905 :         if (li.vh && li.type) {
    2944         905 :                 assert(ri.vh && ri.type);
    2945         905 :                 lvars = li.vh->base;
    2946             :         } else {
    2947       13000 :                 assert(ri.vh == NULL);
    2948             :                 lvars = NULL;
    2949             :         }
    2950             :         /* offset to convert BUN to OID for value in right column */
    2951       13905 :         rseq = r->hseqbase;
    2952             : 
    2953       13905 :         rl = rci->seq - r->hseqbase;
    2954       13905 :         rh = canditer_last(rci) + 1 - r->hseqbase;
    2955       13905 :         if (hash_cand) {
    2956             :                 /* we need to create a hash on r specific for the
    2957             :                  * candidate list */
    2958         171 :                 char ext[32];
    2959         171 :                 assert(rci->s);
    2960         215 :                 MT_thread_setalgorithm(swapped ? "hashjoin using candidate hash (swapped)" : "hashjoin using candidate hash");
    2961         171 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating "
    2962             :                           "hash for candidate list " ALGOBATFMT "%s%s\n",
    2963             :                           ALGOBATPAR(r), ALGOBATPAR(rci->s),
    2964             :                           r->thash ? " ignoring existing hash" : "",
    2965             :                           swapped ? " (swapped)" : "");
    2966         171 :                 if (snprintf(ext, sizeof(ext), "thshjn%x",
    2967         171 :                              (unsigned) MT_getpid()) >= (int) sizeof(ext))
    2968           0 :                         goto bailout;
    2969         171 :                 if ((hsh = BAThash_impl(r, rci, ext)) == NULL) {
    2970           0 :                         goto bailout;
    2971             :                 }
    2972       13734 :         } else if (phash) {
    2973             :                 /* there is a hash on the parent which we should use */
    2974        1343 :                 MT_thread_setalgorithm(swapped ? "hashjoin using parent hash (swapped)" : "hashjoin using parent hash");
    2975        1195 :                 b = BATdescriptor(VIEWtparent(r));
    2976        1195 :                 if (b == NULL)
    2977           0 :                         goto bailout;
    2978        1195 :                 TRC_DEBUG(ALGO, "%s(%s): using "
    2979             :                           "parent(" ALGOBATFMT ") for hash%s\n",
    2980             :                           __func__,
    2981             :                           BATgetId(r), ALGOBATPAR(b),
    2982             :                           swapped ? " (swapped)" : "");
    2983        1195 :                 roff = r->tbaseoff - b->tbaseoff;
    2984        1195 :                 rl += roff;
    2985        1195 :                 rh += roff;
    2986        1195 :                 r = b;
    2987        1195 :                 bat_iterator_end(&ri);
    2988        1195 :                 ri = bat_iterator(r);
    2989        1195 :                 MT_rwlock_rdlock(&r->thashlock);
    2990        1195 :                 hsh = r->thash;
    2991        1195 :                 locked = true;
    2992       12539 :         } else if (hash) {
    2993             :                 /* there is a hash on r which we should use */
    2994        9482 :                 MT_thread_setalgorithm(swapped ? "hashjoin using existing hash (swapped)" : "hashjoin using existing hash");
    2995        5707 :                 MT_rwlock_rdlock(&r->thashlock);
    2996        5706 :                 hsh = r->thash;
    2997        5706 :                 locked = true;
    2998        5706 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": using "
    2999             :                           "existing hash%s\n",
    3000             :                           ALGOBATPAR(r),
    3001             :                           swapped ? " (swapped)" : "");
    3002        6832 :         } else if (BATtdensebi(&ri)) {
    3003             :                 /* no hash, just dense lookup */
    3004           0 :                 MT_thread_setalgorithm(swapped ? "hashjoin on dense (swapped)" : "hashjoin on dense");
    3005             :         } else {
    3006             :                 /* we need to create a hash on r */
    3007        9644 :                 MT_thread_setalgorithm(swapped ? "hashjoin using new hash (swapped)" : "hashjoin using new hash");
    3008        6831 :                 TRC_DEBUG(ALGO, ALGOBATFMT ": creating hash%s\n",
    3009             :                           ALGOBATPAR(r),
    3010             :                           swapped ? " (swapped)" : "");
    3011        6831 :                 if (BAThash(r) != GDK_SUCCEED)
    3012           0 :                         goto bailout;
    3013        6832 :                 MT_rwlock_rdlock(&r->thashlock);
    3014        6832 :                 hsh = r->thash;
    3015        6832 :                 locked = true;
    3016             :         }
    3017       13904 :         if (locked && hsh == NULL) {
    3018           0 :                 GDKerror("Hash disappeared for "ALGOBATFMT"\n", ALGOBATPAR(r));
    3019           0 :                 goto bailout;
    3020             :         }
    3021       13904 :         assert(hsh != NULL || BATtdensebi(&ri));
    3022             :         if (hsh) {
    3023       13904 :                 TRC_DEBUG(ALGO, "hash for " ALGOBATFMT ": nbucket " BUNFMT ", nunique " BUNFMT ", nheads " BUNFMT "\n", ALGOBATPAR(r), hsh->nbucket, hsh->nunique, hsh->nheads);
    3024             :         }
    3025             : 
    3026       13904 :         bit defmark = 0;
    3027       13904 :         if ((not_in || r3p) && !ri.nonil) {
    3028             :                 /* check whether there is a nil on the right, since if
    3029             :                  * so, we should return an empty result if not_in is
    3030             :                  * set, or use a NIL mark for non-matches if r3p is
    3031             :                  * set */
    3032         295 :                 if (hash_cand) {
    3033           0 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3034           0 :                              rb != BUN_NONE;
    3035           0 :                              rb = HASHgetlink(hsh, rb)) {
    3036           0 :                                 ro = canditer_idx(rci, rb);
    3037           0 :                                 if ((*cmp)(nil, BUNtail(ri, ro - r->hseqbase)) == 0) {
    3038           0 :                                         assert(!locked);
    3039           0 :                                         if (r3p) {
    3040           0 :                                                 defmark = bit_nil;
    3041           0 :                                                 break;
    3042             :                                         }
    3043           0 :                                         HEAPfree(&hsh->heaplink, true);
    3044           0 :                                         HEAPfree(&hsh->heapbckt, true);
    3045           0 :                                         GDKfree(hsh);
    3046           0 :                                         bat_iterator_end(&li);
    3047           0 :                                         bat_iterator_end(&ri);
    3048           0 :                                         BBPreclaim(b);
    3049           0 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3050             :                                                        bit_nil, false, false,
    3051             :                                                        __func__, t0);
    3052             :                                 }
    3053             :                         }
    3054         295 :                 } else if (!BATtdensebi(&ri)) {
    3055         295 :                         for (rb = HASHget(hsh, HASHprobe(hsh, nil));
    3056         357 :                              rb != BUN_NONE;
    3057          62 :                              rb = HASHgetlink(hsh, rb)) {
    3058          78 :                                 if (rb >= rl && rb < rh &&
    3059          78 :                                     (cmp == NULL ||
    3060          78 :                                      (*cmp)(nil, BUNtail(ri, rb)) == 0)) {
    3061          16 :                                         if (r3p) {
    3062          15 :                                                 defmark = bit_nil;
    3063          15 :                                                 break;
    3064             :                                         }
    3065           1 :                                         if (locked)
    3066           1 :                                                 MT_rwlock_rdunlock(&r->thashlock);
    3067           1 :                                         bat_iterator_end(&li);
    3068           1 :                                         bat_iterator_end(&ri);
    3069           1 :                                         BBPreclaim(b);
    3070           1 :                                         return nomatch(r1p, r2p, r3p, l, r, lci,
    3071             :                                                        bit_nil, false, false,
    3072             :                                                        __func__, t0);
    3073             :                                 }
    3074             :                         }
    3075             :                 }
    3076             :         }
    3077             : 
    3078       27807 :         maxsize = joininitresults(r1p, r2p, r3p, lci->ncand, rci->ncand,
    3079       13903 :                                   li.key, ri.key, semi | max_one,
    3080             :                                   nil_on_miss, only_misses, min_one,
    3081             :                                   estimate);
    3082       13904 :         if (maxsize == BUN_NONE) {
    3083           0 :                 goto bailout;
    3084             :         }
    3085             : 
    3086       13904 :         r1 = *r1p;
    3087       13904 :         r2 = r2p ? *r2p : NULL;
    3088       13904 :         r3 = r3p ? *r3p : NULL;
    3089             : 
    3090             :         /* basic properties will be adjusted if necessary later on,
    3091             :          * they were initially set by joininitresults() */
    3092             : 
    3093       13904 :         if (r2) {
    3094       11351 :                 r2->tkey = li.key;
    3095             :                 /* r2 is not likely to be sorted (although it is
    3096             :                  * certainly possible) */
    3097       11351 :                 r2->tsorted = false;
    3098       11351 :                 r2->trevsorted = false;
    3099       11351 :                 r2->tseqbase = oid_nil;
    3100             :         }
    3101             : 
    3102       13904 :         if (lci->tpe != cand_dense)
    3103         391 :                 r1->tseqbase = oid_nil;
    3104             : 
    3105             : 
    3106       13904 :         switch (t) {
    3107       11384 :         case TYPE_int:
    3108   343048907 :                 HASHJOIN(int);
    3109             :                 break;
    3110        1056 :         case TYPE_lng:
    3111   122825252 :                 HASHJOIN(lng);
    3112             :                 break;
    3113           0 :         case TYPE_uuid:
    3114           0 :                 HASHJOIN(uuid);
    3115           0 :                 break;
    3116             :         default:
    3117     2599017 :                 while (lci->next < lci->ncand) {
    3118     2597560 :                         GDK_CHECK_TIMEOUT(qry_ctx, counter,
    3119             :                                         GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    3120     2597560 :                         lo = canditer_next(lci);
    3121     2617058 :                         if (BATtdensebi(&li))
    3122         323 :                                 lval = lo - l->hseqbase + l->tseqbase;
    3123     2616735 :                         else if (li.type != TYPE_void)
    3124     2624767 :                                 v = VALUE(l, lo - l->hseqbase);
    3125     2633456 :                         nr = 0;
    3126     2633456 :                         bit mark = defmark;
    3127     2633456 :                         if ((!nil_matches || not_in) && cmp(v, nil) == 0) {
    3128             :                                 /* no match */
    3129        2959 :                                 if (not_in) {
    3130          10 :                                         lskipped = BATcount(r1) > 0;
    3131          10 :                                         continue;
    3132             :                                 }
    3133        2949 :                                 mark = bit_nil;
    3134     2630749 :                         } else if (hash_cand) {
    3135           0 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3136           0 :                                      rb != BUN_NONE;
    3137           0 :                                      rb = HASHgetlink(hsh, rb)) {
    3138           0 :                                         ro = canditer_idx(rci, rb);
    3139           0 :                                         if ((*cmp)(v, BUNtail(ri, ro - r->hseqbase)) != 0)
    3140           0 :                                                 continue;
    3141           0 :                                         if (only_misses) {
    3142           0 :                                                 nr++;
    3143           0 :                                                 break;
    3144             :                                         }
    3145           0 :                                         HASHLOOPBODY();
    3146           0 :                                         if (semi && !max_one)
    3147             :                                                 break;
    3148             :                                 }
    3149     2630749 :                         } else if (hsh == NULL) {
    3150           0 :                                 assert(BATtdensebi(&ri));
    3151           0 :                                 ro = *(const oid *) v;
    3152           0 :                                 if (ro >= r->tseqbase &&
    3153           0 :                                     ro < r->tseqbase + r->batCount) {
    3154           0 :                                         ro -= r->tseqbase;
    3155           0 :                                         ro += rseq;
    3156           0 :                                         if (canditer_contains(rci, ro)) {
    3157           0 :                                                 if (only_misses) {
    3158       13894 :                                                         nr++;
    3159             :                                                         break;
    3160             :                                                 }
    3161           0 :                                                 HASHLOOPBODY();
    3162           0 :                                                 if (semi && !max_one)
    3163             :                                                         break;
    3164             :                                         }
    3165             :                                 }
    3166     2630749 :                         } else if (rci->tpe != cand_dense) {
    3167           0 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3168           0 :                                      rb != BUN_NONE;
    3169           0 :                                      rb = HASHgetlink(hsh, rb)) {
    3170           0 :                                         if (rb >= rl && rb < rh &&
    3171           0 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0 &&
    3172           0 :                                             canditer_contains(rci, ro = (oid) (rb - roff + rseq))) {
    3173           0 :                                                 if (only_misses) {
    3174           0 :                                                         nr++;
    3175           0 :                                                         break;
    3176             :                                                 }
    3177           0 :                                                 HASHLOOPBODY();
    3178           0 :                                                 if (semi && !max_one)
    3179             :                                                         break;
    3180             :                                         }
    3181             :                                 }
    3182             :                         } else {
    3183     2630749 :                                 for (rb = HASHget(hsh, HASHprobe(hsh, v));
    3184     5276230 :                                      rb != BUN_NONE;
    3185     2624940 :                                      rb = HASHgetlink(hsh, rb)) {
    3186     5405855 :                                         if (rb >= rl && rb < rh &&
    3187     2710694 :                                             (*(cmp))(v, BUNtail(ri, rb)) == 0) {
    3188     2273268 :                                                 if (only_misses) {
    3189       60142 :                                                         nr++;
    3190       60142 :                                                         break;
    3191             :                                                 }
    3192     2213126 :                                                 ro = (oid) (rb - roff + rseq);
    3193     2213126 :                                                 HASHLOOPBODY();
    3194     2203929 :                                                 if (semi && !max_one)
    3195             :                                                         break;
    3196             :                                         }
    3197             :                                 }
    3198             :                         }
    3199     2597827 :                         if (nr == 0) {
    3200      391680 :                                 if (only_misses) {
    3201         260 :                                         nr = 1;
    3202         260 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3203           0 :                                                 goto bailout;
    3204         260 :                                         APPEND(r1, lo);
    3205         260 :                                         if (lskipped)
    3206         231 :                                                 r1->tseqbase = oid_nil;
    3207      391420 :                                 } else if (nil_on_miss) {
    3208       11215 :                                         nr = 1;
    3209       11215 :                                         if (maybeextend(r1, r2, r3, 1, lci->next, lci->ncand, maxsize) != GDK_SUCCEED)
    3210           0 :                                                 goto bailout;
    3211       10931 :                                         APPEND(r1, lo);
    3212       10931 :                                         if (r2) {
    3213           0 :                                                 r2->tnil = true;
    3214           0 :                                                 r2->tnonil = false;
    3215           0 :                                                 r2->tkey = false;
    3216           0 :                                                 APPEND(r2, oid_nil);
    3217             :                                         }
    3218       10931 :                                         if (r3) {
    3219       10931 :                                                 r3->tnil |= mark == bit_nil;
    3220       10931 :                                                 ((bit *) r3->theap->base)[r3->batCount++] = mark;
    3221             :                                         }
    3222      380205 :                                 } else if (min_one) {
    3223           0 :                                         GDKerror("not enough matches");
    3224           0 :                                         goto bailout;
    3225             :                                 } else {
    3226      380205 :                                         lskipped = BATcount(r1) > 0;
    3227             :                                 }
    3228     2206147 :                         } else if (only_misses) {
    3229       60263 :                                 lskipped = BATcount(r1) > 0;
    3230             :                         } else {
    3231     2145884 :                                 if (lskipped) {
    3232             :                                         /* note, we only get here in an
    3233             :                                          * iteration *after* lskipped was
    3234             :                                          * first set to true, i.e. we did
    3235             :                                          * indeed skip values in l */
    3236     1987068 :                                         r1->tseqbase = oid_nil;
    3237             :                                 }
    3238     2145884 :                                 if (nr > 1) {
    3239        2362 :                                         r1->tkey = false;
    3240        2362 :                                         r1->tseqbase = oid_nil;
    3241             :                                 }
    3242             :                         }
    3243     2597543 :                         if (nr > 0 && BATcount(r1) > nr)
    3244     2166791 :                                 r1->trevsorted = false;
    3245             :                 }
    3246             :                 break;
    3247             :         }
    3248       13894 :         if (locked) {
    3249       13725 :                 locked = false;
    3250       13725 :                 MT_rwlock_rdunlock(&r->thashlock);
    3251             :         }
    3252       13896 :         bat_iterator_end(&li);
    3253       13897 :         bat_iterator_end(&ri);
    3254             : 
    3255       13898 :         if (hash_cand) {
    3256         171 :                 HEAPfree(&hsh->heaplink, true);
    3257         171 :                 HEAPfree(&hsh->heapbckt, true);
    3258         171 :                 GDKfree(hsh);
    3259             :         }
    3260             :         /* also set other bits of heap to correct value to indicate size */
    3261       13898 :         BATsetcount(r1, BATcount(r1));
    3262       13897 :         if (BATcount(r1) <= 1) {
    3263        4622 :                 r1->tsorted = true;
    3264        4622 :                 r1->trevsorted = true;
    3265        4622 :                 r1->tkey = true;
    3266        4622 :                 r1->tseqbase = 0;
    3267             :         }
    3268       13897 :         if (r2) {
    3269       11345 :                 BATsetcount(r2, BATcount(r2));
    3270       11345 :                 assert(BATcount(r1) == BATcount(r2));
    3271       11345 :                 if (BATcount(r2) <= 1) {
    3272        3879 :                         r2->tsorted = true;
    3273        3879 :                         r2->trevsorted = true;
    3274        3879 :                         r2->tkey = true;
    3275        3879 :                         r2->tseqbase = 0;
    3276             :                 }
    3277             :         }
    3278       13897 :         if (r3) {
    3279          35 :                 r3->tnonil = !r3->tnil;
    3280          35 :                 BATsetcount(r3, BATcount(r3));
    3281          35 :                 assert(BATcount(r1) == BATcount(r3));
    3282             :         }
    3283       13897 :         if (BATcount(r1) > 0) {
    3284       10556 :                 if (BATtdense(r1))
    3285        6153 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3286       10556 :                 if (r2 && BATtdense(r2))
    3287        1242 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3288             :         } else {
    3289        3341 :                 r1->tseqbase = 0;
    3290        3341 :                 if (r2) {
    3291        2636 :                         r2->tseqbase = 0;
    3292             :                 }
    3293             :         }
    3294       13897 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3295             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3296             :                   "nil_matches=%s,nil_on_miss=%s,semi=%s,only_misses=%s,"
    3297             :                   "not_in=%s,max_one=%s,min_one=%s;%s %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3298             :                   " (" LLFMT "usec)\n",
    3299             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3300             :                   ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    3301             :                   nil_matches ? "true" : "false",
    3302             :                   nil_on_miss ? "true" : "false",
    3303             :                   semi ? "true" : "false",
    3304             :                   only_misses ? "true" : "false",
    3305             :                   not_in ? "true" : "false",
    3306             :                   max_one ? "true" : "false",
    3307             :                   min_one ? "true" : "false",
    3308             :                   swapped ? " swapped" : "", reason,
    3309             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3310             :                   GDKusec() - t0);
    3311             : 
    3312       13897 :         BBPreclaim(b);
    3313             :         return GDK_SUCCEED;
    3314             : 
    3315           6 :   bailout:
    3316           6 :         bat_iterator_end(&li);
    3317           6 :         bat_iterator_end(&ri);
    3318           6 :         if (locked)
    3319           6 :                 MT_rwlock_rdunlock(&r->thashlock);
    3320           6 :         if (hash_cand && hsh) {
    3321           0 :                 HEAPfree(&hsh->heaplink, true);
    3322           0 :                 HEAPfree(&hsh->heapbckt, true);
    3323           0 :                 GDKfree(hsh);
    3324             :         }
    3325           6 :         BBPreclaim(r1);
    3326           6 :         BBPreclaim(r2);
    3327           6 :         BBPreclaim(b);
    3328             :         return GDK_FAIL;
    3329             : }
    3330             : 
    3331             : /* Count the number of unique values for the first half and the complete
    3332             :  * set (the sample s of b) and return the two values in *cnt1 and
    3333             :  * *cnt2. In case of error, both values are 0. */
    3334             : static gdk_return
    3335     1014947 : count_unique(BAT *b, BAT *s, BUN *cnt1, BUN *cnt2)
    3336             : {
    3337     1014947 :         struct canditer ci;
    3338     1014947 :         BUN half;
    3339     1014947 :         BUN cnt = 0;
    3340     1014947 :         const void *v;
    3341     1014947 :         const char *bvals;
    3342     1014947 :         const char *bvars;
    3343     1014947 :         oid bval;
    3344     1014947 :         oid i, o;
    3345     1014947 :         const char *nme;
    3346     1014947 :         BUN hb;
    3347     1014947 :         BATiter bi;
    3348     1014947 :         int (*cmp)(const void *, const void *);
    3349     1014947 :         const char *algomsg = "";
    3350     1014947 :         lng t0 = 0;
    3351             : 
    3352     1014947 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    3353     1014947 :         canditer_init(&ci, b, s);
    3354     1014946 :         half = ci.ncand / 2;
    3355             : 
    3356     1014946 :         MT_lock_set(&b->theaplock);
    3357     1014943 :         if (b->tkey || ci.ncand <= 1 || BATtdense(b)) {
    3358             :                 /* trivial: already unique */
    3359        1345 :                 MT_lock_unset(&b->theaplock);
    3360        1345 :                 *cnt1 = half;
    3361        1345 :                 *cnt2 = ci.ncand;
    3362        1345 :                 return GDK_SUCCEED;
    3363             :         }
    3364     1013598 :         MT_lock_unset(&b->theaplock);
    3365             : 
    3366     1013598 :         (void) BATordered(b);
    3367     1013601 :         (void) BATordered_rev(b);
    3368     1013600 :         bi = bat_iterator(b);
    3369     1013596 :         if ((bi.sorted && bi.revsorted) ||
    3370      974642 :             (bi.type == TYPE_void && is_oid_nil(bi.tseq))) {
    3371             :                 /* trivial: all values are the same */
    3372       38954 :                 *cnt1 = *cnt2 = 1;
    3373       38954 :                 bat_iterator_end(&bi);
    3374       38954 :                 return GDK_SUCCEED;
    3375             :         }
    3376             : 
    3377      974642 :         assert(bi.type != TYPE_void);
    3378             : 
    3379      974642 :         bvals = bi.base;
    3380      974642 :         if (bi.vh && bi.type)
    3381       69888 :                 bvars = bi.vh->base;
    3382             :         else
    3383             :                 bvars = NULL;
    3384      974642 :         cmp = ATOMcompare(bi.type);
    3385             : 
    3386      974642 :         *cnt1 = *cnt2 = 0;
    3387             : 
    3388      974642 :         if (bi.sorted || bi.revsorted) {
    3389             :                 const void *prev = NULL;
    3390     9299791 :                 algomsg = "sorted";
    3391     9299791 :                 for (i = 0; i < ci.ncand; i++) {
    3392     9233389 :                         if (i == half)
    3393       66402 :                                 *cnt1 = cnt;
    3394     9233389 :                         o = canditer_next(&ci);
    3395     9233386 :                         v = VALUE(b, o - b->hseqbase);
    3396     9233386 :                         if (prev == NULL || (*cmp)(v, prev) != 0) {
    3397     3887963 :                                 cnt++;
    3398             :                         }
    3399     9233390 :                         prev = v;
    3400             :                 }
    3401       66402 :                 *cnt2 = cnt;
    3402      908241 :         } else if (ATOMbasetype(bi.type) == TYPE_bte) {
    3403       45548 :                 unsigned char val;
    3404       45548 :                 uint32_t seen[256 / 32];
    3405             : 
    3406       45548 :                 algomsg = "byte-sized atoms";
    3407       45548 :                 assert(bvars == NULL);
    3408       45548 :                 memset(seen, 0, sizeof(seen));
    3409     5992975 :                 for (i = 0; i < ci.ncand; i++) {
    3410     5947429 :                         if (i == ci.ncand/ 2) {
    3411             :                                 cnt = 0;
    3412      409614 :                                 for (int j = 0; j < 256 / 32; j++)
    3413      364064 :                                         cnt += candmask_pop(seen[j]);
    3414       45550 :                                 *cnt1 = cnt;
    3415             :                         }
    3416     5947429 :                         o = canditer_next(&ci);
    3417     5947427 :                         val = ((const unsigned char *) bvals)[o - b->hseqbase];
    3418     5947427 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3419      115600 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3420             :                         }
    3421             :                 }
    3422             :                 cnt = 0;
    3423      409778 :                 for (int j = 0; j < 256 / 32; j++)
    3424      364232 :                         cnt += candmask_pop(seen[j]);
    3425       45546 :                 *cnt2 = cnt;
    3426      862693 :         } else if (ATOMbasetype(bi.type) == TYPE_sht) {
    3427       44159 :                 unsigned short val;
    3428       44159 :                 uint32_t *seen = NULL;
    3429             : 
    3430       44159 :                 algomsg = "short-sized atoms";
    3431       44159 :                 assert(bvars == NULL);
    3432       44159 :                 seen = GDKzalloc((65536 / 32) * sizeof(seen[0]));
    3433       44159 :                 if (seen == NULL) {
    3434           0 :                         bat_iterator_end(&bi);
    3435           0 :                         return GDK_FAIL;
    3436             :                 }
    3437     6898491 :                 for (i = 0; i < ci.ncand; i++) {
    3438     6854332 :                         if (i == half) {
    3439             :                                 cnt = 0;
    3440    90367103 :                                 for (int j = 0; j < 65536 / 32; j++)
    3441    90322944 :                                         cnt += candmask_pop(seen[j]);
    3442       44159 :                                 *cnt1 = cnt;
    3443             :                         }
    3444     6854332 :                         o = canditer_next(&ci);
    3445     6854332 :                         val = ((const unsigned short *) bvals)[o - b->hseqbase];
    3446     6854332 :                         if (!(seen[val >> 5] & (1U << (val & 0x1F)))) {
    3447      134919 :                                 seen[val >> 5] |= 1U << (val & 0x1F);
    3448             :                         }
    3449             :                 }
    3450             :                 cnt = 0;
    3451    90367103 :                 for (int j = 0; j < 65536 / 32; j++)
    3452    90322944 :                         cnt += candmask_pop(seen[j]);
    3453       44159 :                 *cnt2 = cnt;
    3454       44159 :                 GDKfree(seen);
    3455       44159 :                 seen = NULL;
    3456             :         } else {
    3457      818534 :                 BUN prb;
    3458      818534 :                 BUN mask;
    3459      818534 :                 Hash hs = {
    3460             :                         .heapbckt.parentid = b->batCacheid,
    3461      818534 :                         .heaplink.parentid = b->batCacheid,
    3462             :                 };
    3463             : 
    3464      818534 :                 GDKclrerr();    /* not interested in BAThash errors */
    3465      818534 :                 algomsg = "new partial hash";
    3466      818534 :                 nme = BBP_physical(b->batCacheid);
    3467      818534 :                 mask = HASHmask(ci.ncand);
    3468      597420 :                 if (mask < ((BUN) 1 << 16))
    3469      818534 :                         mask = (BUN) 1 << 16;
    3470      818534 :                 if ((hs.heaplink.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3471      818534 :                     (hs.heapbckt.farmid = BBPselectfarm(TRANSIENT, bi.type, hashheap)) < 0 ||
    3472      818533 :                     snprintf(hs.heaplink.filename, sizeof(hs.heaplink.filename), "%s.thshjnl%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heaplink.filename) ||
    3473     1637069 :                     snprintf(hs.heapbckt.filename, sizeof(hs.heapbckt.filename), "%s.thshjnb%x", nme, (unsigned) MT_getpid()) >= (int) sizeof(hs.heapbckt.filename) ||
    3474      818535 :                     HASHnew(&hs, bi.type, ci.ncand, mask, BUN_NONE, false) != GDK_SUCCEED) {
    3475           0 :                         GDKerror("cannot allocate hash table\n");
    3476           0 :                         HEAPfree(&hs.heaplink, true);
    3477           0 :                         HEAPfree(&hs.heapbckt, true);
    3478           0 :                         bat_iterator_end(&bi);
    3479           0 :                         return GDK_FAIL;
    3480             :                 }
    3481   410666968 :                 for (i = 0; i < ci.ncand; i++) {
    3482   409848436 :                         if (i == half)
    3483      818530 :                                 *cnt1 = cnt;
    3484   409848436 :                         o = canditer_next(&ci);
    3485   409849111 :                         v = VALUE(b, o - b->hseqbase);
    3486   409848906 :                         prb = HASHprobe(&hs, v);
    3487   409851230 :                         for (hb = HASHget(&hs, prb);
    3488   409858944 :                              hb != BUN_NONE;
    3489        7714 :                              hb = HASHgetlink(&hs, hb)) {
    3490   243095788 :                                 BUN p = canditer_idx(&ci, hb) - b->hseqbase;
    3491   243128594 :                                 if (cmp(v, BUNtail(bi, p)) == 0)
    3492             :                                         break;
    3493             :                         }
    3494   409848961 :                         if (hb == BUN_NONE) {
    3495   166761769 :                                 cnt++;
    3496             :                                 /* enter into hash table */
    3497   166761769 :                                 HASHputlink(&hs, i, HASHget(&hs, prb));
    3498   166761483 :                                 HASHput(&hs, prb, i);
    3499             :                         }
    3500             :                 }
    3501      818532 :                 *cnt2 = cnt;
    3502      818532 :                 HEAPfree(&hs.heaplink, true);
    3503      818535 :                 HEAPfree(&hs.heapbckt, true);
    3504             :         }
    3505      974642 :         bat_iterator_end(&bi);
    3506             : 
    3507      974644 :         TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT
    3508             :                   " -> " BUNFMT " " BUNFMT " (%s -- " LLFMT "usec)\n",
    3509             :                   ALGOBATPAR(b), ALGOOPTBATPAR(s),
    3510             :                   *cnt1, *cnt2, algomsg, GDKusec() - t0);
    3511             : 
    3512             :         return GDK_SUCCEED;
    3513             : }
    3514             : 
    3515             : static double
    3516     1993802 : guess_uniques(BAT *b, struct canditer *ci)
    3517             : {
    3518     1993802 :         BUN cnt1, cnt2;
    3519     1993802 :         BAT *s1;
    3520             : 
    3521     1993802 :         MT_lock_set(&b->theaplock);
    3522     1993778 :         bool key = b->tkey;
    3523     1993778 :         double unique_est = b->tunique_est;
    3524     1993778 :         BUN batcount = BATcount(b);
    3525     1993778 :         MT_lock_unset(&b->theaplock);
    3526     1993786 :         if (key)
    3527      978660 :                 return (double) ci->ncand;
    3528             : 
    3529     1015126 :         if (ci->s == NULL ||
    3530           0 :             (ci->tpe == cand_dense && ci->ncand == batcount)) {
    3531     1015126 :                 if (unique_est != 0) {
    3532         184 :                         TRC_DEBUG(ALGO, "b=" ALGOBATFMT " use cached value\n",
    3533             :                                   ALGOBATPAR(b));
    3534         184 :                         return unique_est;
    3535             :                 }
    3536     1014942 :                 s1 = BATsample(b, 1000);
    3537             :         } else {
    3538           0 :                 BAT *s2 = BATsample(ci->s, 1000);
    3539           0 :                 if (s2 == NULL)
    3540             :                         return -1;
    3541           0 :                 s1 = BATproject(s2, ci->s);
    3542           0 :                 BBPreclaim(s2);
    3543             :         }
    3544     1014946 :         if (s1 == NULL)
    3545             :                 return -1;
    3546     1014946 :         BUN n2 = BATcount(s1);
    3547     1014946 :         BUN n1 = n2 / 2;
    3548     1014946 :         if (count_unique(b, s1, &cnt1, &cnt2) != GDK_SUCCEED) {
    3549           0 :                 BBPreclaim(s1);
    3550           0 :                 return -1;
    3551             :         }
    3552     1014942 :         BBPreclaim(s1);
    3553             : 
    3554     1014946 :         double A = (double) (cnt2 - cnt1) / (n2 - n1);
    3555     1014946 :         double B = cnt1 - n1 * A;
    3556             : 
    3557     1014946 :         B += A * ci->ncand;
    3558     1014946 :         MT_lock_set(&b->theaplock);
    3559     1014943 :         if (ci->s == NULL ||
    3560           0 :             (ci->tpe == cand_dense && ci->ncand == BATcount(b) && ci->ncand == batcount)) {
    3561     1014943 :                 if (b->tunique_est == 0)
    3562     1012100 :                         b->tunique_est = B;
    3563             :         }
    3564     1014943 :         MT_lock_unset(&b->theaplock);
    3565     1014941 :         return B;
    3566             : }
    3567             : 
    3568             : BUN
    3569     1228510 : BATguess_uniques(BAT *b, struct canditer *ci)
    3570             : {
    3571     1228510 :         struct canditer lci;
    3572     1228510 :         if (ci == NULL) {
    3573     1228512 :                 canditer_init(&lci, b, NULL);
    3574     1228512 :                 ci = &lci;
    3575             :         }
    3576     1228502 :         return (BUN) guess_uniques(b, ci);
    3577             : }
    3578             : 
    3579             : /* estimate the cost of doing a hashjoin with a hash on r; return value
    3580             :  * is the estimated cost, the last three arguments receive some extra
    3581             :  * information */
    3582             : double
    3583     1379340 : joincost(BAT *r, BUN lcount, struct canditer *rci,
    3584             :          bool *hash, bool *phash, bool *cand)
    3585             : {
    3586     1379340 :         bool rhash;
    3587     1379340 :         bool prhash = false;
    3588     1379340 :         bool rcand = false;
    3589     1379340 :         double rcost = 1;
    3590     1379340 :         bat parent;
    3591     1379340 :         BAT *b;
    3592     1379340 :         BUN nheads;
    3593     1379340 :         BUN cnt;
    3594             : 
    3595     1379340 :         (void) BATcheckhash(r);
    3596     1379332 :         MT_rwlock_rdlock(&r->thashlock);
    3597     1379339 :         rhash = r->thash != NULL;
    3598     1379339 :         nheads = r->thash ? r->thash->nheads : 0;
    3599     1379339 :         cnt = BATcount(r);
    3600     1379339 :         MT_rwlock_rdunlock(&r->thashlock);
    3601             : 
    3602     1379330 :         if ((rci->tpe == cand_materialized || rci->tpe == cand_except) &&
    3603      317600 :             rci->nvals > 0) {
    3604             :                 /* if we need to do binary search on candidate list,
    3605             :                  * take that into account; note checking the other
    3606             :                  * candidate types is essentially free */
    3607      317600 :                 rcost += log2((double) rci->nvals);
    3608             :         }
    3609     1379330 :         rcost *= lcount;
    3610     1379330 :         if (BATtdense(r)) {
    3611             :                 /* no need for a hash, and lookup is free */
    3612             :                 rhash = false;  /* don't use it, even if it's there */
    3613             :         } else {
    3614     1379329 :                 if (rhash) {
    3615             :                         /* average chain length */
    3616        7280 :                         rcost *= (double) cnt / nheads;
    3617     1372049 :                 } else if ((parent = VIEWtparent(r)) != 0 &&
    3618     1272150 :                            (b = BATdescriptor(parent)) != NULL) {
    3619     1272143 :                         if (BATcheckhash(b)) {
    3620       55167 :                                 MT_rwlock_rdlock(&b->thashlock);
    3621       55167 :                                 rhash = prhash = b->thash != NULL;
    3622       55167 :                                 if (rhash) {
    3623             :                                         /* average chain length */
    3624       55167 :                                         rcost *= (double) BATcount(b) / b->thash->nheads;
    3625             :                                 }
    3626       55167 :                                 MT_rwlock_rdunlock(&b->thashlock);
    3627             :                         }
    3628     1272110 :                         BBPunfix(b->batCacheid);
    3629             :                 }
    3630     1379310 :                 if (!rhash) {
    3631     1316863 :                         MT_lock_set(&r->theaplock);
    3632     1316840 :                         double unique_est = r->tunique_est;
    3633     1316840 :                         MT_lock_unset(&r->theaplock);
    3634     1316858 :                         if (unique_est == 0) {
    3635      765184 :                                 unique_est = guess_uniques(r, &(struct canditer){.tpe=cand_dense, .ncand=BATcount(r)});
    3636      765187 :                                 if (unique_est < 0)
    3637           0 :                                         return -1;
    3638             :                         }
    3639             :                         /* we have an estimate of the number of unique
    3640             :                          * values, assume some collisions */
    3641     1316861 :                         rcost *= 1.1 * ((double) cnt / unique_est);
    3642             : #ifdef PERSISTENTHASH
    3643             :                         /* only count the cost of creating the hash for
    3644             :                          * non-persistent bats */
    3645     1316861 :                         MT_lock_set(&r->theaplock);
    3646     1316856 :                         if (r->batRole != PERSISTENT /* || r->theap->dirty */ || GDKinmemory(r->theap->farmid))
    3647     1289512 :                                 rcost += cnt * 2.0;
    3648     1316856 :                         MT_lock_unset(&r->theaplock);
    3649             : #else
    3650             :                         rcost += cnt * 2.0;
    3651             : #endif
    3652             :                 }
    3653             :         }
    3654     1379300 :         if (cand) {
    3655       28645 :                 if (rci->ncand != BATcount(r) && rci->tpe != cand_mask) {
    3656             :                         /* instead of using the hash on r (cost in
    3657             :                          * rcost), we can build a new hash on r taking
    3658             :                          * the candidate list into account; don't do
    3659             :                          * this for masked candidate since the searching
    3660             :                          * of the candidate list (canditer_idx) will
    3661             :                          * kill us */
    3662        2168 :                         double rccost;
    3663        2168 :                         if (rhash && !prhash) {
    3664         841 :                                 rccost = (double) cnt / nheads;
    3665             :                         } else {
    3666        1327 :                                 MT_lock_set(&r->theaplock);
    3667        1327 :                                 double unique_est = r->tunique_est;
    3668        1327 :                                 MT_lock_unset(&r->theaplock);
    3669        1327 :                                 if (unique_est == 0) {
    3670         102 :                                         unique_est = guess_uniques(r, rci);
    3671         102 :                                         if (unique_est < 0)
    3672             :                                                 return -1;
    3673             :                                 }
    3674             :                                 /* we have an estimate of the number of unique
    3675             :                                  * values, assume some chains */
    3676        1327 :                                 rccost = 1.1 * ((double) cnt / unique_est);
    3677             :                         }
    3678        2168 :                         rccost *= lcount;
    3679        2168 :                         rccost += rci->ncand * 2.0; /* cost of building the hash */
    3680        2168 :                         if (rccost < rcost) {
    3681       28645 :                                 rcost = rccost;
    3682       28645 :                                 rcand = true;
    3683             :                         }
    3684             :                 }
    3685       28645 :                 *cand = rcand;
    3686             :         }
    3687     1379300 :         *hash = rhash;
    3688     1379300 :         *phash = prhash;
    3689     1379300 :         return rcost;
    3690             : }
    3691             : 
    3692             : #define MASK_EQ         1
    3693             : #define MASK_LT         2
    3694             : #define MASK_GT         4
    3695             : #define MASK_LE         (MASK_EQ | MASK_LT)
    3696             : #define MASK_GE         (MASK_EQ | MASK_GT)
    3697             : #define MASK_NE         (MASK_LT | MASK_GT)
    3698             : 
    3699             : static gdk_return
    3700       16836 : thetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int opcode,
    3701             :           BUN estimate, bool nil_matches, const char *reason, lng t0)
    3702             : {
    3703       16836 :         struct canditer lci, rci;
    3704       16836 :         const char *lvals, *rvals;
    3705       16836 :         const char *lvars, *rvars;
    3706       16836 :         const void *nil = ATOMnilptr(l->ttype);
    3707       16836 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    3708       16836 :         const void *vl, *vr;
    3709       16836 :         oid lastr = 0;          /* last value inserted into r2 */
    3710       16836 :         BUN nr;
    3711       16836 :         oid lo, ro;
    3712       16836 :         int c;
    3713       16836 :         bool lskipped = false;  /* whether we skipped values in l */
    3714       16836 :         lng loff = 0, roff = 0;
    3715       16836 :         oid lval = oid_nil, rval = oid_nil;
    3716             : 
    3717       16836 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    3718             : 
    3719       50502 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    3720       16836 :         assert((opcode & (MASK_EQ | MASK_LT | MASK_GT)) != 0);
    3721             : 
    3722       16836 :         BATiter li = bat_iterator(l);
    3723       16835 :         BATiter ri = bat_iterator(r);
    3724             : 
    3725       16837 :         canditer_init(&lci, l, sl);
    3726       16830 :         canditer_init(&rci, r, sr);
    3727             : 
    3728       16837 :         lvals = BATtvoid(l) ? NULL : (const char *) li.base;
    3729       16837 :         rvals = BATtvoid(r) ? NULL : (const char *) ri.base;
    3730       16837 :         if (li.vh && li.type) {
    3731           8 :                 assert(ri.vh && ri.type);
    3732           8 :                 lvars = li.vh->base;
    3733           8 :                 rvars = ri.vh->base;
    3734             :         } else {
    3735       16829 :                 assert(ri.vh == NULL);
    3736             :                 lvars = rvars = NULL;
    3737             :         }
    3738             : 
    3739       16837 :         if (BATtvoid(l)) {
    3740           0 :                 if (!BATtdensebi(&li)) {
    3741           0 :                         if (!nil_matches) {
    3742             :                                 /* trivial: nils don't match anything */
    3743           0 :                                 bat_iterator_end(&li);
    3744           0 :                                 bat_iterator_end(&ri);
    3745           0 :                                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    3746             :                                                0, false, false, __func__, t0);
    3747             :                         }
    3748             :                 } else {
    3749           0 :                         loff = (lng) l->tseqbase - (lng) l->hseqbase;
    3750             :                 }
    3751             :         }
    3752       16837 :         if (BATtvoid(r)) {
    3753           1 :                 if (!BATtdensebi(&ri)) {
    3754           0 :                         if (!nil_matches) {
    3755             :                                 /* trivial: nils don't match anything */
    3756           0 :                                 bat_iterator_end(&li);
    3757           0 :                                 bat_iterator_end(&ri);
    3758           0 :                                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    3759             :                                                0, false, false, __func__, t0);
    3760             :                         }
    3761             :                 } else {
    3762           1 :                         roff = (lng) r->tseqbase - (lng) r->hseqbase;
    3763             :                 }
    3764             :         }
    3765             : 
    3766       16837 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    3767             :                                       false, false, false, false, estimate);
    3768       16836 :         if (maxsize == BUN_NONE) {
    3769           0 :                 bat_iterator_end(&li);
    3770           0 :                 bat_iterator_end(&ri);
    3771           0 :                 return GDK_FAIL;
    3772             :         }
    3773       16836 :         BAT *r1 = *r1p;
    3774       16836 :         BAT *r2 = r2p ? *r2p : NULL;
    3775             : 
    3776       16836 :         r1->tkey = true;
    3777       16836 :         r1->tsorted = true;
    3778       16836 :         r1->trevsorted = true;
    3779       16836 :         if (r2) {
    3780        4299 :                 r2->tkey = true;
    3781        4299 :                 r2->tsorted = true;
    3782        4299 :                 r2->trevsorted = true;
    3783             :         }
    3784             : 
    3785             :         /* nested loop implementation for theta join */
    3786             :         vl = &lval;
    3787             :         vr = &rval;
    3788      188414 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    3789      171596 :                 lo = canditer_next(&lci);
    3790      170701 :                 if (lvals)
    3791      170701 :                         vl = VALUE(l, lo - l->hseqbase);
    3792           0 :                 else if (BATtdensebi(&li))
    3793           0 :                         lval = (oid) ((lng) lo + loff);
    3794      170701 :                 nr = 0;
    3795      170701 :                 if (nil_matches || cmp(vl, nil) != 0) {
    3796      166823 :                         canditer_reset(&rci);
    3797     3426929 :                         TIMEOUT_LOOP(rci.ncand, qry_ctx) {
    3798     3095175 :                                 ro = canditer_next(&rci);
    3799     3065261 :                                 if (rvals)
    3800     3065257 :                                         vr = VALUE(r, ro - r->hseqbase);
    3801           4 :                                 else if (BATtdensebi(&ri))
    3802           4 :                                         rval = (oid) ((lng) ro + roff);
    3803     3065261 :                                 if (!nil_matches && cmp(vr, nil) == 0)
    3804       60167 :                                         continue;
    3805     3004052 :                                 c = cmp(vl, vr);
    3806     3019910 :                                 if (!((opcode & MASK_LT && c < 0) ||
    3807     2810804 :                                       (opcode & MASK_GT && c > 0) ||
    3808     1527655 :                                       (opcode & MASK_EQ && c == 0)))
    3809     1527631 :                                         continue;
    3810     1492279 :                                 if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    3811           0 :                                         goto bailout;
    3812     1506205 :                                 if (BATcount(r1) > 0) {
    3813     1496018 :                                         if (r2 && lastr + 1 != ro)
    3814       41280 :                                                 r2->tseqbase = oid_nil;
    3815     1496018 :                                         if (nr == 0) {
    3816      100275 :                                                 r1->trevsorted = false;
    3817      100275 :                                                 if (r2 == NULL) {
    3818             :                                                         /* nothing */
    3819       31078 :                                                 } else if (lastr > ro) {
    3820       29253 :                                                         r2->tsorted = false;
    3821       29253 :                                                         r2->tkey = false;
    3822        1825 :                                                 } else if (lastr < ro) {
    3823           0 :                                                         r2->trevsorted = false;
    3824             :                                                 } else {
    3825        1825 :                                                         r2->tkey = false;
    3826             :                                                 }
    3827             :                                         }
    3828             :                                 }
    3829     1506205 :                                 APPEND(r1, lo);
    3830     1506205 :                                 if (r2) {
    3831     1208499 :                                         APPEND(r2, ro);
    3832             :                                 }
    3833     1506205 :                                 lastr = ro;
    3834     1506205 :                                 nr++;
    3835             :                         }
    3836      166017 :                         TIMEOUT_CHECK(qry_ctx,
    3837             :                                       GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    3838             :                 }
    3839      171578 :                 if (nr > 1) {
    3840       83619 :                         r1->tkey = false;
    3841       83619 :                         r1->tseqbase = oid_nil;
    3842       83619 :                         if (r2) {
    3843       32822 :                                 r2->trevsorted = false;
    3844             :                         }
    3845       87959 :                 } else if (nr == 0) {
    3846       60141 :                         lskipped = BATcount(r1) > 0;
    3847       27818 :                 } else if (lskipped) {
    3848       20929 :                         r1->tseqbase = oid_nil;
    3849             :                 }
    3850             :         }
    3851             :         /* also set other bits of heap to correct value to indicate size */
    3852       16818 :         BATsetcount(r1, BATcount(r1));
    3853       16827 :         if (r2) {
    3854        4297 :                 BATsetcount(r2, BATcount(r2));
    3855        4297 :                 assert(BATcount(r1) == BATcount(r2));
    3856             :         }
    3857       16827 :         if (BATcount(r1) > 0) {
    3858       11254 :                 if (BATtdense(r1))
    3859         171 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    3860       11254 :                 if (r2 && BATtdense(r2))
    3861         386 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    3862             :         } else {
    3863        5573 :                 r1->tseqbase = 0;
    3864        5573 :                 if (r2) {
    3865         650 :                         r2->tseqbase = 0;
    3866             :                 }
    3867             :         }
    3868       16827 :         bat_iterator_end(&li);
    3869       16836 :         bat_iterator_end(&ri);
    3870       16837 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    3871             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    3872             :                   "opcode=%s%s%s; %s -> " ALGOBATFMT "," ALGOOPTBATFMT
    3873             :                   " (" LLFMT "usec)\n",
    3874             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3875             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3876             :                   opcode & MASK_LT ? "<" : "",
    3877             :                   opcode & MASK_GT ? ">" : "",
    3878             :                   opcode & MASK_EQ ? "=" : "",
    3879             :                   reason,
    3880             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3881             :                   GDKusec() - t0);
    3882             :         return GDK_SUCCEED;
    3883             : 
    3884           0 :   bailout:
    3885           0 :         bat_iterator_end(&li);
    3886           0 :         bat_iterator_end(&ri);
    3887           0 :         BBPreclaim(r1);
    3888           0 :         BBPreclaim(r2);
    3889             :         return GDK_FAIL;
    3890             : }
    3891             : 
    3892             : /* small ordered right, dense left, oid's only, do fetches */
    3893             : static gdk_return
    3894           0 : fetchjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    3895             :           struct canditer *restrict lci, struct canditer *restrict rci,
    3896             :           const char *reason, lng t0)
    3897             : {
    3898           0 :         oid lo = lci->seq - l->hseqbase + l->tseqbase, hi = lo + lci->ncand;
    3899           0 :         BUN b, e, p;
    3900           0 :         BAT *r1, *r2 = NULL;
    3901             : 
    3902           0 :         MT_thread_setalgorithm(__func__);
    3903           0 :         if (r->tsorted) {
    3904           0 :                 b = SORTfndfirst(r, &lo);
    3905           0 :                 e = SORTfndfirst(r, &hi);
    3906             :         } else {
    3907           0 :                 assert(r->trevsorted);
    3908           0 :                 b = SORTfndlast(r, &hi);
    3909           0 :                 e = SORTfndlast(r, &lo);
    3910             :         }
    3911           0 :         if (b < rci->seq - r->hseqbase)
    3912             :                 b = rci->seq - r->hseqbase;
    3913           0 :         if (e > rci->seq + rci->ncand - r->hseqbase)
    3914             :                 e = rci->seq + rci->ncand - r->hseqbase;
    3915           0 :         if (e == b) {
    3916           0 :                 return nomatch(r1p, r2p, NULL, l, r, lci,
    3917             :                                0, false, false, __func__, t0);
    3918             :         }
    3919           0 :         r1 = COLnew(0, TYPE_oid, e - b, TRANSIENT);
    3920           0 :         if (r1 == NULL)
    3921             :                 return GDK_FAIL;
    3922           0 :         if (r2p) {
    3923           0 :                 if ((r2 = BATdense(0, r->hseqbase + b, e - b)) == NULL) {
    3924           0 :                         BBPreclaim(r1);
    3925           0 :                         return GDK_FAIL;
    3926             :                 }
    3927           0 :                 *r2p = r2;
    3928             :         }
    3929           0 :         *r1p = r1;
    3930           0 :         oid *op = (oid *) Tloc(r1, 0);
    3931           0 :         BATiter ri = bat_iterator(r);
    3932           0 :         const oid *rp = (const oid *) ri.base;
    3933           0 :         for (p = b; p < e; p++) {
    3934           0 :                 *op++ = rp[p] + l->hseqbase - l->tseqbase;
    3935             :         }
    3936           0 :         BATsetcount(r1, e - b);
    3937           0 :         r1->tkey = ri.key;
    3938           0 :         r1->tsorted = ri.sorted || e - b <= 1;
    3939           0 :         r1->trevsorted = ri.revsorted || e - b <= 1;
    3940           0 :         r1->tseqbase = e == b ? 0 : e - b == 1 ? *(const oid *)Tloc(r1, 0) : oid_nil;
    3941           0 :         bat_iterator_end(&ri);
    3942           0 :         TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    3943             :                   "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    3944             :                   "sr=" ALGOOPTBATFMT ") %s "
    3945             :                   "-> (" ALGOBATFMT "," ALGOOPTBATFMT ") " LLFMT "us\n",
    3946             :                   __func__,
    3947             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    3948             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    3949             :                   reason,
    3950             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    3951             :                   GDKusec() - t0);
    3952             : 
    3953             :         return GDK_SUCCEED;
    3954             : }
    3955             : 
    3956             : static BAT *
    3957        5210 : bitmaskjoin(BAT *l, BAT *r,
    3958             :             struct canditer *restrict lci, struct canditer *restrict rci,
    3959             :             bool only_misses,
    3960             :             const char *reason, lng t0)
    3961             : {
    3962        5210 :         BAT *r1;
    3963        5210 :         size_t nmsk = (lci->ncand + 31) / 32;
    3964        5210 :         uint32_t *mask = GDKzalloc(nmsk * sizeof(uint32_t));
    3965        5210 :         BUN cnt = 0;
    3966             : 
    3967        5210 :         MT_thread_setalgorithm(__func__);
    3968        5210 :         if (mask == NULL)
    3969             :                 return NULL;
    3970             : 
    3971    22464841 :         for (BUN n = 0; n < rci->ncand; n++) {
    3972    22459631 :                 oid o = canditer_next(rci) - r->hseqbase;
    3973    22223366 :                 o = BUNtoid(r, o);
    3974    22459631 :                 if (is_oid_nil(o))
    3975           0 :                         continue;
    3976    22459631 :                 o += l->hseqbase;
    3977    22459631 :                 if (o < lci->seq + l->tseqbase)
    3978           2 :                         continue;
    3979    22459629 :                 o -= lci->seq + l->tseqbase;
    3980    22459629 :                 if (o >= lci->ncand)
    3981           0 :                         continue;
    3982    22459629 :                 if ((mask[o >> 5] & (1U << (o & 0x1F))) == 0) {
    3983    16408029 :                         cnt++;
    3984    16408029 :                         mask[o >> 5] |= 1U << (o & 0x1F);
    3985             :                 }
    3986             :         }
    3987        5210 :         if (only_misses)
    3988        3975 :                 cnt = lci->ncand - cnt;
    3989        5210 :         if (cnt == 0 || cnt == lci->ncand) {
    3990        1172 :                 GDKfree(mask);
    3991        1172 :                 if (cnt == 0)
    3992         317 :                         return BATdense(0, 0, 0);
    3993         855 :                 return BATdense(0, lci->seq, lci->ncand);
    3994             :         }
    3995        4038 :         r1 = COLnew(0, TYPE_oid, cnt, TRANSIENT);
    3996        4038 :         if (r1 != NULL) {
    3997        4038 :                 oid *r1p = Tloc(r1, 0);
    3998             : 
    3999        4038 :                 r1->tkey = true;
    4000        4038 :                 r1->tnil = false;
    4001        4038 :                 r1->tnonil = true;
    4002        4038 :                 r1->tsorted = true;
    4003        4038 :                 r1->trevsorted = cnt <= 1;
    4004        4038 :                 if (only_misses) {
    4005             :                         /* set the bits for unused values at the
    4006             :                          * end so that we don't need special
    4007             :                          * code in the loop */
    4008        3658 :                         if (lci->ncand & 0x1F)
    4009        3615 :                                 mask[nmsk - 1] |= ~0U << (lci->ncand & 0x1F);
    4010     1985204 :                         for (size_t i = 0; i < nmsk; i++)
    4011     1981546 :                                 if (mask[i] != ~0U)
    4012    62503749 :                                         for (uint32_t j = 0; j < 32; j++)
    4013    60609696 :                                                 if ((mask[i] & (1U << j)) == 0)
    4014    53997794 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4015             :                 } else {
    4016      311035 :                         for (size_t i = 0; i < nmsk; i++)
    4017      310655 :                                 if (mask[i] != 0U)
    4018     7870929 :                                         for (uint32_t j = 0; j < 32; j++)
    4019     7632416 :                                                 if ((mask[i] & (1U << j)) != 0)
    4020     6869314 :                                                         *r1p++ = i * 32 + j + lci->seq;
    4021             :                 }
    4022        4038 :                 BATsetcount(r1, cnt);
    4023        4038 :                 assert((BUN) (r1p - (oid*) Tloc(r1, 0)) == BATcount(r1));
    4024             : 
    4025        4038 :                 TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
    4026             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4027             :                           "sr=" ALGOOPTBATFMT ",only_misses=%s; %s "
    4028             :                           "-> " ALGOBATFMT " (" LLFMT "usec)\n",
    4029             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4030             :                           ALGOOPTBATPAR(lci->s), ALGOOPTBATPAR(rci->s),
    4031             :                           only_misses ? "true" : "false",
    4032             :                           reason,
    4033             :                           ALGOBATPAR(r1),
    4034             :                           GDKusec() - t0);
    4035             :         }
    4036        4038 :         GDKfree(mask);
    4037        4038 :         return r1;
    4038             : }
    4039             : 
    4040             : /* Make the implementation choices for various left joins.
    4041             :  * If r3p is set, this is a "mark join" and *r3p will be a third return value containing a bat with type msk with a bit set for each
    4042             :  * nil_matches: nil is an ordinary value that can match;
    4043             :  * nil_on_miss: outer join: fill in a nil value in case of no match;
    4044             :  * semi: semi join: return one of potentially more than one matches;
    4045             :  * only_misses: difference: list rows without match on the right;
    4046             :  * not_in: for implementing NOT IN: if nil on right then there are no matches;
    4047             :  * max_one: error if there is more than one match;
    4048             :  * min_one: error if there are no matches. */
    4049             : static gdk_return
    4050      110547 : leftjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4051             :          bool nil_matches, bool nil_on_miss, bool semi, bool only_misses,
    4052             :          bool not_in, bool max_one, bool min_one, BUN estimate,
    4053             :          const char *func, lng t0)
    4054             : {
    4055      110547 :         struct canditer lci, rci;
    4056      110547 :         bool rhash, prhash, rcand;
    4057      110547 :         bat parent;
    4058      110547 :         double rcost = 0;
    4059      110547 :         gdk_return rc;
    4060      110547 :         BAT *lp = NULL;
    4061      110547 :         BAT *rp = NULL;
    4062             : 
    4063      110547 :         MT_thread_setalgorithm(__func__);
    4064             :         /* only_misses implies left output only */
    4065      110529 :         assert(!only_misses || r2p == NULL);
    4066             :         /* if nil_on_miss is set, we really need a right output */
    4067      110529 :         assert(!nil_on_miss || r2p != NULL || r3p != NULL);
    4068             :         /* if not_in is set, then so is only_misses */
    4069      110529 :         assert(!not_in || only_misses);
    4070             :         /* if r3p is set, then so is nil_on_miss */
    4071      110529 :         assert(r3p == NULL || nil_on_miss);
    4072      110529 :         *r1p = NULL;
    4073      110529 :         if (r2p)
    4074         997 :                 *r2p = NULL;
    4075      110529 :         if (r3p)
    4076        9636 :                 *r3p = NULL;
    4077             : 
    4078      110529 :         canditer_init(&lci, l, sl);
    4079      110528 :         canditer_init(&rci, r, sr);
    4080             : 
    4081      110532 :         if ((parent = VIEWtparent(l)) != 0) {
    4082        3330 :                 lp = BATdescriptor(parent);
    4083        3330 :                 if (lp == NULL)
    4084             :                         return GDK_FAIL;
    4085        3330 :                 if (l->hseqbase == lp->hseqbase &&
    4086        4385 :                     BATcount(l) == BATcount(lp) &&
    4087        3256 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4088             :                         l = lp;
    4089             :                 } else {
    4090        1702 :                         BBPunfix(lp->batCacheid);
    4091        1702 :                         lp = NULL;
    4092             :                 }
    4093             :         }
    4094      110532 :         if ((parent = VIEWtparent(r)) != 0) {
    4095        3857 :                 rp = BATdescriptor(parent);
    4096        3857 :                 if (rp == NULL) {
    4097           0 :                         BBPreclaim(lp);
    4098           0 :                         return GDK_FAIL;
    4099             :                 }
    4100        3857 :                 if (r->hseqbase == rp->hseqbase &&
    4101        6015 :                     BATcount(r) == BATcount(rp) &&
    4102        4316 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4103             :                         r = rp;
    4104             :                 } else {
    4105        1701 :                         BBPunfix(rp->batCacheid);
    4106        1701 :                         rp = NULL;
    4107             :                 }
    4108             :         }
    4109             : 
    4110      110532 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4111           5 :                 l = BATunmask(l);
    4112           5 :                 BBPreclaim(lp);
    4113           5 :                 if (l == NULL) {
    4114           0 :                         BBPreclaim(rp);
    4115           0 :                         return GDK_FAIL;
    4116             :                 }
    4117             :                 lp = l;
    4118             :         }
    4119      110532 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4120          66 :                 r = BATunmask(r);
    4121          66 :                 BBPreclaim(rp);
    4122          66 :                 if (r == NULL) {
    4123           0 :                         BBPreclaim(lp);
    4124           0 :                         return GDK_FAIL;
    4125             :                 }
    4126             :                 rp = r;
    4127             :         }
    4128             : 
    4129      110532 :         if (joinparamcheck(l, r, NULL, sl, sr, func) != GDK_SUCCEED) {
    4130           0 :                 rc = GDK_FAIL;
    4131           0 :                 goto doreturn;
    4132             :         }
    4133             : 
    4134      110545 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4135       80069 :                 TRC_DEBUG(ALGO, "%s(l=" ALGOBATFMT ","
    4136             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4137             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d,"
    4138             :                           "nil_on_miss=%d,semi=%d,only_misses=%d,"
    4139             :                           "not_in=%d,max_one=%d,min_one=%d)\n",
    4140             :                           func,
    4141             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4142             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4143             :                           nil_matches, nil_on_miss, semi, only_misses,
    4144             :                           not_in, max_one, min_one);
    4145       80069 :                 rc = nomatch(r1p, r2p, r3p, l, r, &lci,
    4146             :                              0, nil_on_miss, only_misses, func, t0);
    4147       80057 :                 goto doreturn;
    4148             :         }
    4149             : 
    4150       30476 :         if (!only_misses && !not_in &&
    4151        3759 :             (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) ||
    4152        3701 :              (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)))) {
    4153             :                 /* single value to join, use select */
    4154        1183 :                 rc = selectjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4155             :                                 nil_matches, nil_on_miss, semi, max_one, min_one,
    4156             :                                 t0, false, func);
    4157        1183 :                 goto doreturn;
    4158       29293 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4159             :                 /* use special implementation for dense right-hand side */
    4160       17828 :                 rc = mergejoin_void(r1p, r2p, r3p, l, r, &lci, &rci,
    4161             :                                     nil_on_miss, only_misses, t0, false,
    4162             :                                     func);
    4163       17828 :                 goto doreturn;
    4164       11465 :         } else if (BATtdense(l)
    4165        5280 :                    && lci.tpe == cand_dense
    4166        5261 :                    && rci.tpe == cand_dense
    4167             :                    && !semi
    4168        5261 :                    && !max_one
    4169             :                    && !min_one
    4170        3986 :                    && !nil_matches
    4171             :                    && !only_misses
    4172        3986 :                    && !not_in
    4173             :                    /* && (rci.ncand * 1024) < lci.ncand */
    4174           0 :                    && (BATordered(r) || BATordered_rev(r))) {
    4175           0 :                 assert(ATOMtype(l->ttype) == TYPE_oid); /* tdense */
    4176           0 :                 rc = fetchjoin(r1p, r2p, l, r, sl, sr, &lci, &rci, func, t0);
    4177           0 :                 goto doreturn;
    4178       11465 :         } else if (BATtdense(l)
    4179        5280 :                    && lci.tpe == cand_dense
    4180        5261 :                    && r2p == NULL
    4181        5222 :                    && (semi || only_misses)
    4182             :                    && !nil_on_miss
    4183        5222 :                    && !not_in
    4184             :                    && !max_one
    4185        5211 :                    && !min_one) {
    4186        5210 :                 *r1p = bitmaskjoin(l, r, &lci, &rci, only_misses, func, t0);
    4187        5210 :                 rc = *r1p == NULL ? GDK_FAIL : GDK_SUCCEED;
    4188        5210 :                 goto doreturn;
    4189             :         } else {
    4190             :                 /* looking at r->tvheap, so we need a lock */
    4191        6255 :                 MT_lock_set(&r->theaplock);
    4192        6255 :                 BUN hsz = r->tvheap ? r->tvheap->size : 0;
    4193        6255 :                 MT_lock_unset(&r->theaplock);
    4194        6255 :                 if ((BATordered(r) || BATordered_rev(r))
    4195        4704 :                     && (BATordered(l)
    4196         491 :                         || BATordered_rev(l)
    4197         484 :                         || BATtdense(r)
    4198         484 :                         || lci.ncand < 1024
    4199         244 :                         || BATcount(r) * (r->twidth + hsz + 2 * sizeof(BUN)) > GDK_mem_maxsize / (GDKnr_threads ? GDKnr_threads : 1))) {
    4200        4582 :                         rc = mergejoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4201             :                                        nil_matches, nil_on_miss, semi, only_misses,
    4202             :                                        not_in, max_one, min_one, estimate, t0, false, func);
    4203        4582 :                         goto doreturn;
    4204             :                 }
    4205             :         }
    4206        1673 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4207        1673 :         if (rcost < 0) {
    4208           0 :                 rc = GDK_FAIL;
    4209           0 :                 goto doreturn;
    4210             :         }
    4211             : 
    4212        1673 :         if (!nil_on_miss && !only_misses && !not_in && !max_one && !min_one) {
    4213             :                 /* maybe do a hash join on the swapped operands; if we
    4214             :                  * do, we need to sort the output, so we take that into
    4215             :                  * account as well */
    4216         960 :                 bool lhash, plhash, lcand, rkey = r->tkey;
    4217         960 :                 double lcost;
    4218             : 
    4219         960 :                 lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4220         960 :                 if (lcost < 0) {
    4221           0 :                         rc = GDK_FAIL;
    4222         820 :                         goto doreturn;
    4223             :                 }
    4224         960 :                 if (semi && !rkey)
    4225         844 :                         lcost += rci.ncand; /* cost of BATunique(r) */
    4226             :                 /* add cost of sorting; obviously we don't know the
    4227             :                  * size, so we guess that the size of the output is
    4228             :                  * the same as the right input */
    4229         960 :                 lcost += rci.ncand * log((double) rci.ncand); /* sort */
    4230         960 :                 if (lcost < rcost) {
    4231         820 :                         BAT *tmp = sr;
    4232         820 :                         BAT *r1, *r2;
    4233         820 :                         if (semi && !rkey) {
    4234         813 :                                 sr = BATunique(r, sr);
    4235         813 :                                 if (sr == NULL) {
    4236           0 :                                         rc = GDK_FAIL;
    4237           0 :                                         goto doreturn;
    4238             :                                 }
    4239         813 :                                 canditer_init(&rci, r, sr);
    4240             :                         }
    4241         820 :                         rc = hashjoin(&r2, &r1, NULL, r, l, &rci, &lci, nil_matches,
    4242             :                                       false, false, false, false, false, false, estimate,
    4243             :                                       t0, true, lhash, plhash, lcand, func);
    4244         820 :                         if (semi && !rkey)
    4245         813 :                                 BBPunfix(sr->batCacheid);
    4246         820 :                         if (rc != GDK_SUCCEED)
    4247           0 :                                 goto doreturn;
    4248         820 :                         if (r2p == NULL) {
    4249         815 :                                 BBPunfix(r2->batCacheid);
    4250         815 :                                 r2 = NULL;
    4251             :                         }
    4252         820 :                         if (semi)
    4253         815 :                                 r1->tkey = true;
    4254         820 :                         if (!VIEWtparent(r1) &&
    4255         820 :                             r1->ttype == TYPE_oid &&
    4256         820 :                             BBP_refs(r1->batCacheid) == 1 &&
    4257         820 :                             (r2 == NULL ||
    4258           5 :                              (!VIEWtparent(r2) &&
    4259           5 :                               BBP_refs(r2->batCacheid) == 1 &&
    4260           5 :                               r2->ttype == TYPE_oid))) {
    4261             :                                 /* in-place sort if we can */
    4262         820 :                                 if (r2) {
    4263           5 :                                         GDKqsort(r1->theap->base, r2->theap->base,
    4264           5 :                                                  NULL, r1->batCount, r1->twidth,
    4265           5 :                                                  r2->twidth, TYPE_oid, false,
    4266             :                                                  false);
    4267           5 :                                         r2->tsorted = false;
    4268           5 :                                         r2->trevsorted = false;
    4269           5 :                                         r2->tseqbase = oid_nil;
    4270           5 :                                         *r2p = r2;
    4271             :                                 } else {
    4272         815 :                                         GDKqsort(r1->theap->base, NULL, NULL,
    4273         815 :                                                  r1->batCount, r1->twidth, 0,
    4274             :                                                  TYPE_oid, false, false);
    4275             :                                 }
    4276         820 :                                 r1->tsorted = true;
    4277         820 :                                 r1->trevsorted = false;
    4278         820 :                                 *r1p = r1;
    4279             :                         } else {
    4280           0 :                                 BAT *ob;
    4281           0 :                                 rc = BATsort(&tmp, r2p ? &ob : NULL, NULL,
    4282             :                                              r1, NULL, NULL, false, false, false);
    4283           0 :                                 BBPunfix(r1->batCacheid);
    4284           0 :                                 if (rc != GDK_SUCCEED) {
    4285           0 :                                         BBPreclaim(r2);
    4286           0 :                                         goto doreturn;
    4287             :                                 }
    4288           0 :                                 *r1p = r1 = tmp;
    4289           0 :                                 if (r2p) {
    4290           0 :                                         tmp = BATproject(ob, r2);
    4291           0 :                                         BBPunfix(r2->batCacheid);
    4292           0 :                                         BBPunfix(ob->batCacheid);
    4293           0 :                                         if (tmp == NULL) {
    4294           0 :                                                 BBPunfix(r1->batCacheid);
    4295           0 :                                                 rc = GDK_FAIL;
    4296           0 :                                                 goto doreturn;
    4297             :                                         }
    4298           0 :                                         *r2p = tmp;
    4299             :                                 }
    4300             :                         }
    4301         820 :                         rc = GDK_SUCCEED;
    4302         820 :                         goto doreturn;
    4303             :                 }
    4304             :         }
    4305         853 :         rc = hashjoin(r1p, r2p, r3p, l, r, &lci, &rci,
    4306             :                       nil_matches, nil_on_miss, semi, only_misses,
    4307             :                       not_in, max_one, min_one, estimate, t0, false, rhash, prhash,
    4308             :                       rcand, func);
    4309      110533 :   doreturn:
    4310      110533 :         BBPreclaim(lp);
    4311      110535 :         BBPreclaim(rp);
    4312      110534 :         if (rc == GDK_SUCCEED && (semi | only_misses))
    4313      109717 :                 *r1p = virtualize(*r1p);
    4314             :         return rc;
    4315             : }
    4316             : 
    4317             : /* Perform an equi-join over l and r.  Returns two new, aligned, bats
    4318             :  * with the oids of matching tuples.  The result is in the same order
    4319             :  * as l (i.e. r1 is sorted). */
    4320             : gdk_return
    4321         645 : BATleftjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4322             : {
    4323         645 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4324             :                         false, false, false, false, false, false,
    4325             :                         estimate, __func__,
    4326         645 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4327             : }
    4328             : 
    4329             : /* Performs a left outer join over l and r.  Returns two new, aligned,
    4330             :  * bats with the oids of matching tuples, or the oid in the first
    4331             :  * output bat and nil in the second output bat if the value in l does
    4332             :  * not occur in r.  The result is in the same order as l (i.e. r1 is
    4333             :  * sorted). */
    4334             : gdk_return
    4335         123 : BATouterjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool match_one, BUN estimate)
    4336             : {
    4337         123 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4338             :                         true, false, false, false, match_one, match_one,
    4339             :                         estimate, __func__,
    4340         123 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4341             : }
    4342             : 
    4343             : /* Perform a semi-join over l and r.  Returns one or two new bats
    4344             :  * with the oids of matching tuples.  The result is in the same order
    4345             :  * as l (i.e. r1 is sorted).  If a single bat is returned, it is a
    4346             :  * candidate list. */
    4347             : gdk_return
    4348        1024 : BATsemijoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4349             :             bool nil_matches, bool max_one, BUN estimate)
    4350             : {
    4351        1024 :         return leftjoin(r1p, r2p, NULL, l, r, sl, sr, nil_matches,
    4352             :                         false, true, false, false, max_one, false,
    4353             :                         estimate, __func__,
    4354        1024 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4355             : }
    4356             : 
    4357             : /* Perform a mark-join over l and r.  Returns one or two new bats with
    4358             :  * the oids of matching tuples.  In addition, returns a bat with "marks"
    4359             :  * that indicate the type of match.  This is an outer join, so returns
    4360             :  * at least one value for each row on the left.  If the second output
    4361             :  * pointer (r2p) is NULL, this is also a semi-join, so returns exactly
    4362             :  * one row for each row on the left.  If there is a match, the mark
    4363             :  * column will be TRUE, of there is no match, the second output is NIL,
    4364             :  * and the mark output is FALSE if there are no NILs in the right input,
    4365             :  * and the left input is also not NIL, otherwise the mark output is
    4366             :  * NIL. */
    4367             : gdk_return
    4368        9642 : BATmarkjoin(BAT **r1p, BAT **r2p, BAT **r3p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4369             :             BUN estimate)
    4370             : {
    4371        9642 :         return leftjoin(r1p, r2p, r3p, l, r, sl, sr, false, true, r2p == NULL,
    4372             :                         false, false, false, false, estimate, __func__,
    4373        9642 :                         GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0);
    4374             : }
    4375             : 
    4376             : /* Return a candidate list with the list of rows in l whose value also
    4377             :  * occurs in r.  This is just the left output of a semi-join. */
    4378             : BAT *
    4379        6246 : BATintersect(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool max_one,
    4380             :              BUN estimate)
    4381             : {
    4382        6246 :         BAT *bn;
    4383             : 
    4384        6246 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4385             :                      false, true, false, false, max_one, false,
    4386             :                      estimate, __func__,
    4387        6246 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4388        6245 :                 return bn;
    4389             :         return NULL;
    4390             : }
    4391             : 
    4392             : /* Return the difference of l and r.  The result is a BAT with the
    4393             :  * oids of those values in l that do not occur in r.  This is what you
    4394             :  * might call an anti-semi-join.  The result is a candidate list. */
    4395             : BAT *
    4396       92867 : BATdiff(BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, bool not_in,
    4397             :         BUN estimate)
    4398             : {
    4399       92867 :         BAT *bn;
    4400             : 
    4401       92867 :         if (leftjoin(&bn, NULL, NULL, l, r, sl, sr, nil_matches,
    4402             :                      false, false, true, not_in, false, false,
    4403             :                      estimate, __func__,
    4404       92867 :                      GDK_TRACER_TEST(M_DEBUG, ALGO) ? GDKusec() : 0) == GDK_SUCCEED)
    4405       92865 :                 return bn;
    4406             :         return NULL;
    4407             : }
    4408             : 
    4409             : gdk_return
    4410       16837 : BATthetajoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, int op, bool nil_matches, BUN estimate)
    4411             : {
    4412       16837 :         int opcode = 0;
    4413       16837 :         lng t0 = 0;
    4414             : 
    4415             :         /* encode operator as a bit mask into opcode */
    4416       16837 :         switch (op) {
    4417           0 :         case JOIN_EQ:
    4418           0 :                 return BATjoin(r1p, r2p, l, r, sl, sr, nil_matches, estimate);
    4419             :         case JOIN_NE:
    4420             :                 opcode = MASK_NE;
    4421             :                 break;
    4422        4225 :         case JOIN_LT:
    4423        4225 :                 opcode = MASK_LT;
    4424        4225 :                 break;
    4425           7 :         case JOIN_LE:
    4426           7 :                 opcode = MASK_LE;
    4427           7 :                 break;
    4428       12525 :         case JOIN_GT:
    4429       12525 :                 opcode = MASK_GT;
    4430       12525 :                 break;
    4431          18 :         case JOIN_GE:
    4432          18 :                 opcode = MASK_GE;
    4433          18 :                 break;
    4434           0 :         default:
    4435           0 :                 GDKerror("unknown operator %d.\n", op);
    4436           0 :                 return GDK_FAIL;
    4437             :         }
    4438             : 
    4439       16837 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4440       16837 :         *r1p = NULL;
    4441       16837 :         if (r2p) {
    4442        4299 :                 *r2p = NULL;
    4443             :         }
    4444       16837 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4445             :                 return GDK_FAIL;
    4446             : 
    4447       16837 :         return thetajoin(r1p, r2p, l, r, sl, sr, opcode, estimate, nil_matches,
    4448             :                          __func__, t0);
    4449             : }
    4450             : 
    4451             : gdk_return
    4452      218455 : BATjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr, bool nil_matches, BUN estimate)
    4453             : {
    4454      218455 :         struct canditer lci, rci;
    4455      218455 :         bool lhash = false, rhash = false, lcand = false;
    4456      218455 :         bool plhash = false, prhash = false, rcand = false;
    4457      218455 :         bool swap;
    4458      218455 :         bat parent;
    4459      218455 :         double rcost = 0;
    4460      218455 :         double lcost = 0;
    4461      218455 :         gdk_return rc;
    4462      218455 :         lng t0 = 0;
    4463      218455 :         BAT *r2 = NULL;
    4464      218455 :         BAT *lp = NULL;
    4465      218455 :         BAT *rp = NULL;
    4466             : 
    4467      218455 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4468             : 
    4469      218455 :         canditer_init(&lci, l, sl);
    4470      218438 :         canditer_init(&rci, r, sr);
    4471             : 
    4472      218440 :         if ((parent = VIEWtparent(l)) != 0) {
    4473       53255 :                 lp = BATdescriptor(parent);
    4474       53256 :                 if (lp == NULL)
    4475             :                         return GDK_FAIL;
    4476       53256 :                 if (l->hseqbase == lp->hseqbase &&
    4477       56686 :                     BATcount(l) == BATcount(lp) &&
    4478       19624 :                     ATOMtype(l->ttype) == ATOMtype(lp->ttype)) {
    4479             :                         l = lp;
    4480             :                 } else {
    4481       43445 :                         BBPunfix(lp->batCacheid);
    4482       43445 :                         lp = NULL;
    4483             :                 }
    4484             :         }
    4485      218440 :         if ((parent = VIEWtparent(r)) != 0) {
    4486      168113 :                 rp = BATdescriptor(parent);
    4487      168067 :                 if (rp == NULL) {
    4488           0 :                         BBPreclaim(lp);
    4489           0 :                         return GDK_FAIL;
    4490             :                 }
    4491      168067 :                 if (r->hseqbase == rp->hseqbase &&
    4492      281782 :                     BATcount(r) == BATcount(rp) &&
    4493      249500 :                     ATOMtype(r->ttype) == ATOMtype(rp->ttype)) {
    4494             :                         r = rp;
    4495             :                 } else {
    4496       43315 :                         BBPunfix(rp->batCacheid);
    4497       43315 :                         rp = NULL;
    4498             :                 }
    4499             :         }
    4500             : 
    4501      218408 :         if (l->ttype == TYPE_msk || mask_cand(l)) {
    4502           0 :                 l = BATunmask(l);
    4503           0 :                 BBPreclaim(lp);
    4504           0 :                 if (l == NULL) {
    4505           0 :                         BBPreclaim(rp);
    4506           0 :                         return GDK_FAIL;
    4507             :                 }
    4508             :                 lp = l;
    4509             :         }
    4510      218408 :         if (r->ttype == TYPE_msk || mask_cand(r)) {
    4511          24 :                 r = BATunmask(r);
    4512          24 :                 BBPreclaim(rp);
    4513          24 :                 if (r == NULL) {
    4514           0 :                         BBPreclaim(lp);
    4515           0 :                         return GDK_FAIL;
    4516             :                 }
    4517             :                 rp = r;
    4518             :         }
    4519             : 
    4520      218408 :         *r1p = NULL;
    4521      218408 :         if (r2p)
    4522      191113 :                 *r2p = NULL;
    4523             : 
    4524      218408 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED) {
    4525           0 :                 rc = GDK_FAIL;
    4526           0 :                 goto doreturn;
    4527             :         }
    4528             : 
    4529      218449 :         if (lci.ncand == 0 || rci.ncand == 0) {
    4530      155559 :                 TRC_DEBUG(ALGO, "BATjoin(l=" ALGOBATFMT ","
    4531             :                           "r=" ALGOBATFMT ",sl=" ALGOOPTBATFMT ","
    4532             :                           "sr=" ALGOOPTBATFMT ",nil_matches=%d)\n",
    4533             :                           ALGOBATPAR(l), ALGOBATPAR(r),
    4534             :                           ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    4535             :                           nil_matches);
    4536      155559 :                 rc = nomatch(r1p, r2p, NULL, l, r, &lci,
    4537             :                              0, false, false, __func__, t0);
    4538      155536 :                 goto doreturn;
    4539             :         }
    4540             : 
    4541       62890 :         swap = false;
    4542             : 
    4543       62890 :         if (lci.ncand == 1 || (BATordered(l) && BATordered_rev(l)) || (l->ttype == TYPE_void && is_oid_nil(l->tseqbase))) {
    4544             :                 /* single value to join, use select */
    4545       35842 :                 rc = selectjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4546             :                                 nil_matches, false, false, false, false,
    4547             :                                 t0, false, __func__);
    4548       35841 :                 goto doreturn;
    4549       27047 :         } else if (rci.ncand == 1 || (BATordered(r) && BATordered_rev(r)) || (r->ttype == TYPE_void && is_oid_nil(r->tseqbase))) {
    4550             :                 /* single value to join, use select */
    4551        9986 :                 rc = selectjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4552             :                                 nil_matches, false, false, false, false,
    4553             :                                 t0, true, __func__);
    4554        6835 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4555        3683 :                         BBPunfix(r2->batCacheid);
    4556        6835 :                 goto doreturn;
    4557       20214 :         } else if (BATtdense(r) && rci.tpe == cand_dense) {
    4558             :                 /* use special implementation for dense right-hand side */
    4559         950 :                 rc = mergejoin_void(r1p, r2p, NULL, l, r, &lci, &rci,
    4560             :                                     false, false, t0, false, __func__);
    4561         950 :                 goto doreturn;
    4562       19264 :         } else if (BATtdense(l) && lci.tpe == cand_dense) {
    4563             :                 /* use special implementation for dense right-hand side */
    4564          58 :                 rc = mergejoin_void(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4565             :                                     false, false, t0, true, __func__);
    4566          39 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4567          20 :                         BBPunfix(r2->batCacheid);
    4568          39 :                 goto doreturn;
    4569       28168 :         } else if ((BATordered(l) || BATordered_rev(l)) &&
    4570       11702 :                    (BATordered(r) || BATordered_rev(r))) {
    4571             :                 /* both sorted */
    4572        6219 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4573             :                                nil_matches, false, false, false, false, false, false,
    4574             :                                estimate, t0, false, __func__);
    4575        6218 :                 goto doreturn;
    4576             :         }
    4577             : 
    4578       13006 :         lcost = joincost(l, rci.ncand, &lci, &lhash, &plhash, &lcand);
    4579       13006 :         rcost = joincost(r, lci.ncand, &rci, &rhash, &prhash, &rcand);
    4580       13006 :         if (lcost < 0 || rcost < 0) {
    4581           0 :                 rc = GDK_FAIL;
    4582           0 :                 goto doreturn;
    4583             :         }
    4584             : 
    4585             :         /* if the cost of doing searches on l is lower than the cost
    4586             :          * of doing searches on r, we swap */
    4587       13006 :         swap = (lcost < rcost);
    4588             : 
    4589       26012 :         if ((r->ttype == TYPE_void && r->tvheap != NULL) ||
    4590       26130 :             ((BATordered(r) || BATordered_rev(r)) &&
    4591        4148 :              (lci.ncand * (log2((double) rci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4592             :                 /* r is sorted and it is cheaper to do multiple binary
    4593             :                  * searches than it is to use a hash */
    4594         152 :                 rc = mergejoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4595             :                                nil_matches, false, false, false, false, false, false,
    4596             :                                estimate, t0, false, __func__);
    4597       25708 :         } else if ((l->ttype == TYPE_void && l->tvheap != NULL) ||
    4598       25880 :             ((BATordered(l) || BATordered_rev(l)) &&
    4599        2724 :              (rci.ncand * (log2((double) lci.ncand) + 1) < (swap ? lcost : rcost)))) {
    4600             :                 /* l is sorted and it is cheaper to do multiple binary
    4601             :                  * searches than it is to use a hash */
    4602        1242 :                 rc = mergejoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4603             :                                nil_matches, false, false, false, false, false, false,
    4604             :                                estimate, t0, true, __func__);
    4605         622 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4606           2 :                         BBPunfix(r2->batCacheid);
    4607       12232 :         } else if (swap) {
    4608       12172 :                 rc = hashjoin(r2p ? r2p : &r2, r1p, NULL, r, l, &rci, &lci,
    4609             :                               nil_matches, false, false, false, false, false, false,
    4610             :                               estimate, t0, true, lhash, plhash, lcand,
    4611             :                               __func__);
    4612        6306 :                 if (rc == GDK_SUCCEED && r2p == NULL)
    4613         440 :                         BBPunfix(r2->batCacheid);
    4614             :         } else {
    4615        5926 :                 rc = hashjoin(r1p, r2p, NULL, l, r, &lci, &rci,
    4616             :                               nil_matches, false, false, false, false, false, false,
    4617             :                               estimate, t0, false, rhash, prhash, rcand,
    4618             :                               __func__);
    4619             :         }
    4620      218425 :   doreturn:
    4621      218425 :         BBPreclaim(lp);
    4622      218426 :         BBPreclaim(rp);
    4623             :         return rc;
    4624             : }
    4625             : 
    4626             : gdk_return
    4627           0 : BATbandjoin(BAT **r1p, BAT **r2p, BAT *l, BAT *r, BAT *sl, BAT *sr,
    4628             :             const void *c1, const void *c2, bool linc, bool hinc, BUN estimate)
    4629             : {
    4630           0 :         lng t0 = 0;
    4631           0 :         struct canditer lci, rci;
    4632           0 :         const char *lvals, *rvals;
    4633           0 :         int t;
    4634           0 :         const void *nil = ATOMnilptr(l->ttype);
    4635           0 :         int (*cmp)(const void *, const void *) = ATOMcompare(l->ttype);
    4636           0 :         const char *vl, *vr;
    4637           0 :         oid lastr = 0;          /* last value inserted into r2 */
    4638           0 :         BUN nr;
    4639           0 :         oid lo, ro;
    4640           0 :         bool lskipped = false;  /* whether we skipped values in l */
    4641             : 
    4642           0 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    4643             : 
    4644           0 :         size_t counter = 0;
    4645           0 :         QryCtx *qry_ctx = MT_thread_get_qry_ctx();
    4646             : 
    4647             : 
    4648           0 :         MT_thread_setalgorithm(__func__);
    4649           0 :         *r1p = NULL;
    4650           0 :         if (r2p) {
    4651           0 :                 *r2p = NULL;
    4652             :         }
    4653           0 :         if (joinparamcheck(l, r, NULL, sl, sr, __func__) != GDK_SUCCEED)
    4654             :                 return GDK_FAIL;
    4655             : 
    4656           0 :         assert(ATOMtype(l->ttype) == ATOMtype(r->ttype));
    4657             : 
    4658           0 :         t = ATOMtype(l->ttype);
    4659           0 :         t = ATOMbasetype(t);
    4660             : 
    4661           0 :         canditer_init(&lci, l, sl);
    4662           0 :         canditer_init(&rci, r, sr);
    4663             : 
    4664           0 :         if (lci.ncand == 0 || rci.ncand == 0)
    4665           0 :                 return nomatch(r1p, r2p, NULL, l, r, &lci,
    4666             :                                0, false, false, __func__, t0);
    4667             : 
    4668           0 :         switch (t) {
    4669           0 :         case TYPE_bte:
    4670           0 :                 if (is_bte_nil(*(const bte *)c1) ||
    4671           0 :                     is_bte_nil(*(const bte *)c2) ||
    4672           0 :                     -*(const bte *)c1 > *(const bte *)c2 ||
    4673           0 :                     ((!hinc || !linc) && -*(const bte *)c1 == *(const bte *)c2))
    4674           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4675             :                                        0, false, false, __func__, t0);
    4676             :                 break;
    4677           0 :         case TYPE_sht:
    4678           0 :                 if (is_sht_nil(*(const sht *)c1) ||
    4679           0 :                     is_sht_nil(*(const sht *)c2) ||
    4680           0 :                     -*(const sht *)c1 > *(const sht *)c2 ||
    4681           0 :                     ((!hinc || !linc) && -*(const sht *)c1 == *(const sht *)c2))
    4682           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4683             :                                        0, false, false, __func__, t0);
    4684             :                 break;
    4685           0 :         case TYPE_int:
    4686           0 :                 if (is_int_nil(*(const int *)c1) ||
    4687           0 :                     is_int_nil(*(const int *)c2) ||
    4688           0 :                     -*(const int *)c1 > *(const int *)c2 ||
    4689           0 :                     ((!hinc || !linc) && -*(const int *)c1 == *(const int *)c2))
    4690           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4691             :                                        0, false, false, __func__, t0);
    4692             :                 break;
    4693           0 :         case TYPE_lng:
    4694           0 :                 if (is_lng_nil(*(const lng *)c1) ||
    4695           0 :                     is_lng_nil(*(const lng *)c2) ||
    4696           0 :                     -*(const lng *)c1 > *(const lng *)c2 ||
    4697           0 :                     ((!hinc || !linc) && -*(const lng *)c1 == *(const lng *)c2))
    4698           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4699             :                                        0, false, false, __func__, t0);
    4700             :                 break;
    4701             : #ifdef HAVE_HGE
    4702           0 :         case TYPE_hge:
    4703           0 :                 if (is_hge_nil(*(const hge *)c1) ||
    4704           0 :                     is_hge_nil(*(const hge *)c2) ||
    4705           0 :                     -*(const hge *)c1 > *(const hge *)c2 ||
    4706           0 :                     ((!hinc || !linc) && -*(const hge *)c1 == *(const hge *)c2))
    4707           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4708             :                                        0, false, false, __func__, t0);
    4709             :                 break;
    4710             : #endif
    4711           0 :         case TYPE_flt:
    4712           0 :                 if (is_flt_nil(*(const flt *)c1) ||
    4713           0 :                     is_flt_nil(*(const flt *)c2) ||
    4714           0 :                     -*(const flt *)c1 > *(const flt *)c2 ||
    4715           0 :                     ((!hinc || !linc) && -*(const flt *)c1 == *(const flt *)c2))
    4716           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4717             :                                        0, false, false, __func__, t0);
    4718             :                 break;
    4719           0 :         case TYPE_dbl:
    4720           0 :                 if (is_dbl_nil(*(const dbl *)c1) ||
    4721           0 :                     is_dbl_nil(*(const dbl *)c2) ||
    4722           0 :                     -*(const dbl *)c1 > *(const dbl *)c2 ||
    4723           0 :                     ((!hinc || !linc) && -*(const dbl *)c1 == *(const dbl *)c2))
    4724           0 :                         return nomatch(r1p, r2p, NULL, l, r, &lci,
    4725             :                                        0, false, false, __func__, t0);
    4726             :                 break;
    4727           0 :         default:
    4728           0 :                 GDKerror("unsupported type\n");
    4729           0 :                 return GDK_FAIL;
    4730             :         }
    4731             : 
    4732           0 :         BUN maxsize = joininitresults(r1p, r2p, NULL, lci.ncand, rci.ncand, false, false,
    4733             :                                       false, false, false, false, estimate);
    4734           0 :         if (maxsize == BUN_NONE)
    4735             :                 return GDK_FAIL;
    4736           0 :         BAT *r1 = *r1p;
    4737           0 :         BAT *r2 = r2p ? *r2p : NULL;
    4738           0 :         BATiter li = bat_iterator(l);
    4739           0 :         BATiter ri = bat_iterator(r);
    4740             : 
    4741           0 :         lvals = (const char *) li.base;
    4742           0 :         rvals = (const char *) ri.base;
    4743           0 :         assert(ri.vh == NULL);
    4744             : 
    4745           0 :         assert(lvals != NULL);
    4746           0 :         assert(rvals != NULL);
    4747             : 
    4748           0 :         r1->tkey = true;
    4749           0 :         r1->tsorted = true;
    4750           0 :         r1->trevsorted = true;
    4751           0 :         if (r2) {
    4752           0 :                 r2->tkey = true;
    4753           0 :                 r2->tsorted = true;
    4754           0 :                 r2->trevsorted = true;
    4755             :         }
    4756             : 
    4757             :         /* nested loop implementation for band join */
    4758           0 :         for (BUN lidx = 0; lidx < lci.ncand; lidx++) {
    4759           0 :                 GDK_CHECK_TIMEOUT(qry_ctx, counter,
    4760             :                                 GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
    4761           0 :                 lo = canditer_next(&lci);
    4762           0 :                 vl = FVALUE(l, lo - l->hseqbase);
    4763           0 :                 if (cmp(vl, nil) == 0)
    4764           0 :                         continue;
    4765           0 :                 nr = 0;
    4766           0 :                 canditer_reset(&rci);
    4767           0 :                 for (BUN ridx = 0; ridx < rci.ncand; ridx++) {
    4768           0 :                         ro = canditer_next(&rci);
    4769           0 :                         vr = FVALUE(r, ro - r->hseqbase);
    4770           0 :                         switch (ATOMtype(li.type)) {
    4771           0 :                         case TYPE_bte: {
    4772           0 :                                 if (is_bte_nil(*(const bte *) vr))
    4773           0 :                                         continue;
    4774           0 :                                 sht v1 = (sht) *(const bte *) vr, v2;
    4775           0 :                                 v2 = v1;
    4776           0 :                                 v1 -= *(const bte *)c1;
    4777           0 :                                 if (*(const bte *)vl <= v1 &&
    4778           0 :                                     (!linc || *(const bte *)vl != v1))
    4779           0 :                                         continue;
    4780           0 :                                 v2 += *(const bte *)c2;
    4781           0 :                                 if (*(const bte *)vl >= v2 &&
    4782           0 :                                     (!hinc || *(const bte *)vl != v2))
    4783           0 :                                         continue;
    4784             :                                 break;
    4785             :                         }
    4786           0 :                         case TYPE_sht: {
    4787           0 :                                 if (is_sht_nil(*(const sht *) vr))
    4788           0 :                                         continue;
    4789           0 :                                 int v1 = (int) *(const sht *) vr, v2;
    4790           0 :                                 v2 = v1;
    4791           0 :                                 v1 -= *(const sht *)c1;
    4792           0 :                                 if (*(const sht *)vl <= v1 &&
    4793           0 :                                     (!linc || *(const sht *)vl != v1))
    4794           0 :                                         continue;
    4795           0 :                                 v2 += *(const sht *)c2;
    4796           0 :                                 if (*(const sht *)vl >= v2 &&
    4797           0 :                                     (!hinc || *(const sht *)vl != v2))
    4798           0 :                                         continue;
    4799             :                                 break;
    4800             :                         }
    4801           0 :                         case TYPE_int: {
    4802           0 :                                 if (is_int_nil(*(const int *) vr))
    4803           0 :                                         continue;
    4804           0 :                                 lng v1 = (lng) *(const int *) vr, v2;
    4805           0 :                                 v2 = v1;
    4806           0 :                                 v1 -= *(const int *)c1;
    4807           0 :                                 if (*(const int *)vl <= v1 &&
    4808           0 :                                     (!linc || *(const int *)vl != v1))
    4809           0 :                                         continue;
    4810           0 :                                 v2 += *(const int *)c2;
    4811           0 :                                 if (*(const int *)vl >= v2 &&
    4812           0 :                                     (!hinc || *(const int *)vl != v2))
    4813           0 :                                         continue;
    4814             :                                 break;
    4815             :                         }
    4816             : #ifdef HAVE_HGE
    4817           0 :                         case TYPE_lng: {
    4818           0 :                                 if (is_lng_nil(*(const lng *) vr))
    4819           0 :                                         continue;
    4820           0 :                                 hge v1 = (hge) *(const lng *) vr, v2;
    4821           0 :                                 v2 = v1;
    4822           0 :                                 v1 -= *(const lng *)c1;
    4823           0 :                                 if (*(const lng *)vl <= v1 &&
    4824           0 :                                     (!linc || *(const lng *)vl != v1))
    4825           0 :                                         continue;
    4826           0 :                                 v2 += *(const lng *)c2;
    4827           0 :                                 if (*(const lng *)vl >= v2 &&
    4828           0 :                                     (!hinc || *(const lng *)vl != v2))
    4829           0 :                                         continue;
    4830             :                                 break;
    4831             :                         }
    4832             : #else
    4833             : #ifdef HAVE___INT128
    4834             :                         case TYPE_lng: {
    4835             :                                 if (is_lng_nil(*(const lng *) vr))
    4836             :                                         continue;
    4837             :                                 __int128 v1 = (__int128) *(const lng *) vr, v2;
    4838             :                                 v2 = v1;
    4839             :                                 v1 -= *(const lng *)c1;
    4840             :                                 if (*(const lng *)vl <= v1 &&
    4841             :                                     (!linc || *(const lng *)vl != v1))
    4842             :                                         continue;
    4843             :                                 v2 += *(const lng *)c2;
    4844             :                                 if (*(const lng *)vl >= v2 &&
    4845             :                                     (!hinc || *(const lng *)vl != v2))
    4846             :                                         continue;
    4847             :                                 break;
    4848             :                         }
    4849             : #else
    4850             : #ifdef HAVE___INT128_T
    4851             :                         case TYPE_lng: {
    4852             :                                 if (is_lng_nil(*(const lng *) vr))
    4853             :                                         continue;
    4854             :                                 __int128_t v1 = (__int128_t) *(const lng *) vr, v2;
    4855             :                                 v2 = v1;
    4856             :                                 v1 -= *(const lng *)c1;
    4857             :                                 if (*(const lng *)vl <= v1 &&
    4858             :                                     (!linc || *(const lng *)vl != v1))
    4859             :                                         continue;
    4860             :                                 v2 += *(const lng *)c2;
    4861             :                                 if (*(const lng *)vl >= v2 &&
    4862             :                                     (!hinc || *(const lng *)vl != v2))
    4863             :                                         continue;
    4864             :                                 break;
    4865             :                         }
    4866             : #else
    4867             :                         case TYPE_lng: {
    4868             :                                 if (is_lng_nil(*(const lng *) vr))
    4869             :                                         continue;
    4870             :                                 lng v1, v2;
    4871             :                                 SUBI_WITH_CHECK(*(const lng *)vr,
    4872             :                                                *(const lng *)c1,
    4873             :                                                lng, v1,
    4874             :                                                GDK_lng_max,
    4875             :                                                do{if(*(const lng*)c1<0)goto nolmatch;else goto lmatch1;}while(false));
    4876             :                                 if (*(const lng *)vl <= v1 &&
    4877             :                                     (!linc || *(const lng *)vl != v1))
    4878             :                                         continue;
    4879             :                                   lmatch1:
    4880             :                                 ADDI_WITH_CHECK(*(const lng *)vr,
    4881             :                                                *(const lng *)c2,
    4882             :                                                lng, v2,
    4883             :                                                GDK_lng_max,
    4884             :                                                do{if(*(const lng*)c2>0)goto nolmatch;else goto lmatch2;}while(false));
    4885             :                                 if (*(const lng *)vl >= v2 &&
    4886             :                                     (!hinc || *(const lng *)vl != v2))
    4887             :                                         continue;
    4888             :                                   lmatch2:
    4889             :                                 break;
    4890             :                                   nolmatch:
    4891             :                                 continue;
    4892             :                         }
    4893             : #endif
    4894             : #endif
    4895             : #endif
    4896             : #ifdef HAVE_HGE
    4897           0 :                         case TYPE_hge: {
    4898           0 :                                 if (is_hge_nil(*(const hge *) vr))
    4899           0 :                                         continue;
    4900           0 :                                 hge v1, v2;
    4901           0 :                                 SUBI_WITH_CHECK(*(const hge *)vr,
    4902             :                                                *(const hge *)c1,
    4903             :                                                hge, v1,
    4904             :                                                GDK_hge_max,
    4905             :                                                do{if(*(const hge*)c1<0)goto nohmatch;else goto hmatch1;}while(false));
    4906           0 :                                 if (*(const hge *)vl <= v1 &&
    4907           0 :                                     (!linc || *(const hge *)vl != v1))
    4908           0 :                                         continue;
    4909           0 :                                   hmatch1:
    4910           0 :                                 ADDI_WITH_CHECK(*(const hge *)vr,
    4911             :                                                *(const hge *)c2,
    4912             :                                                hge, v2,
    4913             :                                                GDK_hge_max,
    4914             :                                                do{if(*(const hge*)c2>0)goto nohmatch;else goto hmatch2;}while(false));
    4915           0 :                                 if (*(const hge *)vl >= v2 &&
    4916           0 :                                     (!hinc || *(const hge *)vl != v2))
    4917           0 :                                         continue;
    4918           0 :                                   hmatch2:
    4919             :                                 break;
    4920           0 :                                   nohmatch:
    4921           0 :                                 continue;
    4922             :                         }
    4923             : #endif
    4924           0 :                         case TYPE_flt: {
    4925           0 :                                 if (is_flt_nil(*(const flt *) vr))
    4926           0 :                                         continue;
    4927           0 :                                 dbl v1 = (dbl) *(const flt *) vr, v2;
    4928           0 :                                 v2 = v1;
    4929           0 :                                 v1 -= *(const flt *)c1;
    4930           0 :                                 if (*(const flt *)vl <= v1 &&
    4931           0 :                                     (!linc || *(const flt *)vl != v1))
    4932           0 :                                         continue;
    4933           0 :                                 v2 += *(const flt *)c2;
    4934           0 :                                 if (*(const flt *)vl >= v2 &&
    4935           0 :                                     (!hinc || *(const flt *)vl != v2))
    4936           0 :                                         continue;
    4937             :                                 break;
    4938             :                         }
    4939           0 :                         case TYPE_dbl: {
    4940           0 :                                 if (is_dbl_nil(*(const dbl *) vr))
    4941           0 :                                         continue;
    4942           0 :                                 dbl v1, v2;
    4943           0 :                                 SUBF_WITH_CHECK(*(const dbl *)vr,
    4944             :                                                *(const dbl *)c1,
    4945             :                                                dbl, v1,
    4946             :                                                GDK_dbl_max,
    4947             :                                                do{if(*(const dbl*)c1<0)goto nodmatch;else goto dmatch1;}while(false));
    4948           0 :                                 if (*(const dbl *)vl <= v1 &&
    4949           0 :                                     (!linc || *(const dbl *)vl != v1))
    4950           0 :                                         continue;
    4951           0 :                                   dmatch1:
    4952           0 :                                 ADDF_WITH_CHECK(*(const dbl *)vr,
    4953             :                                                *(const dbl *)c2,
    4954             :                                                dbl, v2,
    4955             :                                                GDK_dbl_max,
    4956             :                                                do{if(*(const dbl*)c2>0)goto nodmatch;else goto dmatch2;}while(false));
    4957           0 :                                 if (*(const dbl *)vl >= v2 &&
    4958           0 :                                     (!hinc || *(const dbl *)vl != v2))
    4959           0 :                                         continue;
    4960           0 :                                   dmatch2:
    4961             :                                 break;
    4962           0 :                                   nodmatch:
    4963           0 :                                 continue;
    4964             :                         }
    4965             :                         }
    4966           0 :                         if (maybeextend(r1, r2, NULL, 1, lci.next, lci.ncand, maxsize) != GDK_SUCCEED)
    4967           0 :                                 goto bailout;
    4968           0 :                         if (BATcount(r1) > 0) {
    4969           0 :                                 if (r2 && lastr + 1 != ro)
    4970           0 :                                         r2->tseqbase = oid_nil;
    4971           0 :                                 if (nr == 0) {
    4972           0 :                                         r1->trevsorted = false;
    4973           0 :                                         if (r2 == NULL) {
    4974             :                                                 /* nothing */
    4975           0 :                                         } else if (lastr > ro) {
    4976           0 :                                                 r2->tsorted = false;
    4977           0 :                                                 r2->tkey = false;
    4978           0 :                                         } else if (lastr < ro) {
    4979           0 :                                                 r2->trevsorted = false;
    4980             :                                         } else {
    4981           0 :                                                 r2->tkey = false;
    4982             :                                         }
    4983             :                                 }
    4984             :                         }
    4985           0 :                         APPEND(r1, lo);
    4986           0 :                         if (r2) {
    4987           0 :                                 APPEND(r2, ro);
    4988             :                         }
    4989           0 :                         lastr = ro;
    4990           0 :                         nr++;
    4991             :                 }
    4992           0 :                 if (nr > 1) {
    4993           0 :                         r1->tkey = false;
    4994           0 :                         r1->tseqbase = oid_nil;
    4995           0 :                         if (r2) {
    4996           0 :                                 r2->trevsorted = false;
    4997             :                         }
    4998           0 :                 } else if (nr == 0) {
    4999           0 :                         lskipped = BATcount(r1) > 0;
    5000           0 :                 } else if (lskipped) {
    5001           0 :                         r1->tseqbase = oid_nil;
    5002             :                 }
    5003             :         }
    5004             :         /* also set other bits of heap to correct value to indicate size */
    5005           0 :         BATsetcount(r1, BATcount(r1));
    5006           0 :         if (r2) {
    5007           0 :                 BATsetcount(r2, BATcount(r2));
    5008           0 :                 assert(BATcount(r1) == BATcount(r2));
    5009             :         }
    5010           0 :         if (BATcount(r1) > 0) {
    5011           0 :                 if (BATtdense(r1))
    5012           0 :                         r1->tseqbase = ((oid *) r1->theap->base)[0];
    5013           0 :                 if (r2 && BATtdense(r2))
    5014           0 :                         r2->tseqbase = ((oid *) r2->theap->base)[0];
    5015             :         } else {
    5016           0 :                 r1->tseqbase = 0;
    5017           0 :                 if (r2) {
    5018           0 :                         r2->tseqbase = 0;
    5019             :                 }
    5020             :         }
    5021           0 :         bat_iterator_end(&li);
    5022           0 :         bat_iterator_end(&ri);
    5023           0 :         TRC_DEBUG(ALGO, "l=" ALGOBATFMT "," "r=" ALGOBATFMT
    5024             :                   ",sl=" ALGOOPTBATFMT "," "sr=" ALGOOPTBATFMT ","
    5025             :                   " -> " ALGOBATFMT "," ALGOOPTBATFMT
    5026             :                   " (" LLFMT "usec)\n",
    5027             :                   ALGOBATPAR(l), ALGOBATPAR(r),
    5028             :                   ALGOOPTBATPAR(sl), ALGOOPTBATPAR(sr),
    5029             :                   ALGOBATPAR(r1), ALGOOPTBATPAR(r2),
    5030             :                   GDKusec() - t0);
    5031             :         return GDK_SUCCEED;
    5032             : 
    5033           0 :   bailout:
    5034           0 :         bat_iterator_end(&li);
    5035           0 :         bat_iterator_end(&ri);
    5036           0 :         BBPreclaim(r1);
    5037           0 :         BBPreclaim(r2);
    5038             :         return GDK_FAIL;
    5039             : }
    5040             : 
    5041             : gdk_return
    5042         129 : BATrangejoin(BAT **r1p, BAT **r2p, BAT *l, BAT *rl, BAT *rh,
    5043             :              BAT *sl, BAT *sr, bool linc, bool hinc, bool anti, bool symmetric,
    5044             :              BUN estimate)
    5045             : {
    5046         129 :         struct canditer lci, rci;
    5047         129 :         BAT *r1 = NULL, *r2 = NULL;
    5048         129 :         BUN maxsize;
    5049         129 :         lng t0 = 0;
    5050             : 
    5051         129 :         TRC_DEBUG_IF(ALGO) t0 = GDKusec();
    5052         129 :         *r1p = NULL;
    5053         129 :         if (r2p) {
    5054         106 :                 *r2p = NULL;
    5055             :         }
    5056         129 :         if (joinparamcheck(l, rl, rh, sl, sr, __func__) != GDK_SUCCEED)
    5057             :                 return GDK_FAIL;
    5058         129 :         canditer_init(&lci, l, sl);
    5059         129 :         canditer_init(&rci, rl, sr);
    5060         129 :         if (lci.ncand == 0 ||
    5061         122 :             rci.ncand == 0 ||
    5062         114 :             (l->ttype == TYPE_void && is_oid_nil(l->tseqbase)) ||
    5063         114 :             ((rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) &&
    5064           0 :              (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)))) {
    5065             :                 /* trivial: empty input */
    5066          15 :                 return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5067             :                                __func__, t0);
    5068             :         }
    5069         114 :         if (rl->ttype == TYPE_void && is_oid_nil(rl->tseqbase)) {
    5070           0 :                 if (!anti)
    5071           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5072             :                                        __func__, t0);
    5073           0 :                 return thetajoin(r1p, r2p, l, rh, sl, sr, MASK_GT, estimate, false,
    5074             :                                  __func__, t0);
    5075             :         }
    5076         114 :         if (rh->ttype == TYPE_void && is_oid_nil(rh->tseqbase)) {
    5077           0 :                 if (!anti)
    5078           0 :                         return nomatch(r1p, r2p, NULL, l, rl, &lci, 0, false, false,
    5079             :                                        __func__, t0);
    5080           0 :                 return thetajoin(r1p, r2p, l, rl, sl, sr, MASK_LT, estimate, false,
    5081             :                                  __func__, t0);
    5082             :         }
    5083             : 
    5084         131 :         if ((maxsize = joininitresults(&r1, r2p ? &r2 : NULL, NULL, sl ? BATcount(sl) : BATcount(l), sr ? BATcount(sr) : BATcount(rl), false, false, false, false, false, false, estimate)) == BUN_NONE)
    5085             :                 return GDK_FAIL;
    5086         114 :         *r1p = r1;
    5087         114 :         if (r2p) {
    5088          97 :                 *r2p = r2;
    5089             :         }
    5090         114 :         if (maxsize == 0)
    5091             :                 return GDK_SUCCEED;
    5092             : 
    5093             :         /* note, the rangejoin implementation is in gdk_select.c since
    5094             :          * it uses the imprints code there */
    5095         114 :         return rangejoin(r1, r2, l, rl, rh, &lci, &rci, linc, hinc, anti, symmetric, maxsize);
    5096             : }

Generated by: LCOV version 1.14