Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : /* auxiliary functions and structs for imprints */
18 : #include "gdk_imprints.h"
19 :
20 : static inline oid *
21 152048662 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
22 : {
23 152048662 : if (i == BATcapacity(bn)) {
24 54 : BATsetcount(bn, i);
25 54 : if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
26 : return NULL;
27 54 : a = (oid *) Tloc(bn, 0);
28 : }
29 152048662 : a[i] = v;
30 152048662 : return a;
31 : }
32 :
33 : BAT *
34 2136436 : virtualize(BAT *bn)
35 : {
36 : /* input must be a valid candidate list or NULL */
37 2136436 : if (bn == NULL)
38 : return NULL;
39 2136435 : if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
40 0 : fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
41 0 : bn->ttype, is_oid_nil(bn->tseqbase),
42 0 : bn->tkey, bn->tsorted);
43 0 : fflush(stderr);
44 : }
45 2137619 : assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
46 : bn->ttype == TYPE_oid) &&
47 : bn->tkey && bn->tsorted);
48 2137619 : assert(BBP_refs(bn->batCacheid) == 1);
49 2137619 : assert(BBP_lrefs(bn->batCacheid) == 0);
50 : /* since bn has unique and strictly ascending values, we can
51 : * easily check whether the column is dense */
52 2137619 : if (bn->ttype == TYPE_oid &&
53 1457090 : (BATcount(bn) <= 1 ||
54 398070 : * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
55 398070 : * (const oid *) Tloc(bn, BATcount(bn) - 1))) {
56 : /* column is dense, replace by virtual oid */
57 1103291 : oid tseq; /* work around bug in Intel compiler */
58 1103291 : if (BATcount(bn) == 0)
59 : tseq = 0;
60 : else
61 426538 : tseq = * (const oid *) Tloc(bn, 0);
62 1103291 : TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
63 : ALGOBATPAR(bn), tseq);
64 1103028 : bn->tseqbase = tseq;
65 1103028 : if (VIEWtparent(bn)) {
66 9 : Heap *h = GDKmalloc(sizeof(Heap));
67 9 : if (h == NULL) {
68 0 : BBPunfix(bn->batCacheid);
69 0 : return NULL;
70 : }
71 9 : *h = *bn->theap;
72 9 : settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
73 9 : h->parentid = bn->batCacheid;
74 9 : h->base = NULL;
75 9 : h->hasfile = false;
76 9 : ATOMIC_INIT(&h->refs, 1);
77 9 : if (bn->theap->parentid != bn->batCacheid)
78 9 : BBPrelease(bn->theap->parentid);
79 9 : HEAPdecref(bn->theap, false);
80 9 : bn->theap = h;
81 : } else {
82 1103019 : HEAPfree(bn->theap, true);
83 : }
84 1103114 : bn->theap->storage = bn->theap->newstorage = STORE_MEM;
85 1103114 : bn->theap->size = 0;
86 1103114 : bn->ttype = TYPE_void;
87 1103114 : bn->twidth = 0;
88 1103114 : bn->tshift = 0;
89 : }
90 :
91 : return bn;
92 : }
93 :
94 : #define HASHloop_bound(bi, h, hb, v, lo, hi) \
95 : for (hb = HASHget(h, HASHprobe((h), v)); \
96 : hb != BUN_NONE; \
97 : hb = HASHgetlink(h,hb)) \
98 : if (hb >= (lo) && hb < (hi) && \
99 : (cmp == NULL || \
100 : (*cmp)(v, BUNtail(bi, hb)) == 0))
101 :
102 : static BAT *
103 66979 : hashselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
104 : const void *tl, BUN maximum, bool havehash, bool phash,
105 : const char **algo)
106 : {
107 66979 : BUN i, cnt;
108 66979 : oid o, *restrict dst;
109 66979 : BUN l, h, d = 0;
110 66979 : oid seq;
111 66979 : int (*cmp)(const void *, const void *);
112 66979 : BAT *b2 = NULL;
113 :
114 66979 : size_t counter = 0;
115 66979 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
116 :
117 67010 : assert(bn->ttype == TYPE_oid);
118 67010 : seq = bi->b->hseqbase;
119 67010 : l = ci->seq - seq;
120 67010 : h = canditer_last(ci) + 1 - seq;
121 :
122 66941 : *algo = "hashselect";
123 66941 : if (phash && (b2 = BATdescriptor(VIEWtparent(bi->b))) != NULL) {
124 65197 : *algo = "hashselect on parent";
125 65197 : TRC_DEBUG(ALGO, ALGOBATFMT
126 : " using parent(" ALGOBATFMT ") "
127 : "for hash\n",
128 : ALGOBATPAR(bi->b),
129 : ALGOBATPAR(b2));
130 65197 : d = bi->baseoff - b2->tbaseoff;
131 65197 : l += d;
132 65197 : h += d;
133 65197 : bat_iterator_end(bi);
134 65151 : *bi = bat_iterator(b2);
135 : }
136 :
137 67302 : if (!havehash) {
138 2 : if (BAThash(bi->b) != GDK_SUCCEED) {
139 0 : BBPreclaim(bn);
140 0 : BBPreclaim(b2);
141 0 : return NULL;
142 : }
143 2 : MT_rwlock_rdlock(&bi->b->thashlock);
144 2 : if (bi->b->thash == NULL) {
145 0 : GDKerror("Hash destroyed before we could use it\n");
146 0 : goto bailout;
147 : }
148 : }
149 134601 : switch (ATOMbasetype(bi->type)) {
150 : case TYPE_bte:
151 : case TYPE_sht:
152 : cmp = NULL; /* no need to compare: "hash" is perfect */
153 : break;
154 67179 : default:
155 67179 : cmp = ATOMcompare(bi->type);
156 67179 : break;
157 : }
158 67302 : dst = (oid *) Tloc(bn, 0);
159 67302 : cnt = 0;
160 67302 : if (ci->tpe != cand_dense) {
161 212328 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
162 188538 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
163 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
164 188538 : o = (oid) (i + seq - d);
165 188538 : if (canditer_contains(ci, o)) {
166 141460 : dst = buninsfix(bn, dst, cnt, o,
167 70730 : maximum - BATcapacity(bn),
168 : maximum);
169 70730 : if (dst == NULL)
170 0 : goto bailout;
171 70730 : cnt++;
172 : }
173 : }
174 : } else {
175 176183 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
176 65204 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
177 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
178 65204 : o = (oid) (i + seq - d);
179 130418 : dst = buninsfix(bn, dst, cnt, o,
180 65204 : maximum - BATcapacity(bn),
181 : maximum);
182 65214 : if (dst == NULL)
183 0 : goto bailout;
184 65214 : cnt++;
185 : }
186 : }
187 67298 : MT_rwlock_rdunlock(&bi->b->thashlock);
188 67332 : BBPreclaim(b2);
189 67313 : BATsetcount(bn, cnt);
190 67292 : bn->tkey = true;
191 67292 : if (cnt > 1) {
192 : /* hash chains produce results in the order high to
193 : * low, so we just need to reverse */
194 65353 : for (l = 0, h = BATcount(bn) - 1; l < h; l++, h--) {
195 60835 : o = dst[l];
196 60835 : dst[l] = dst[h];
197 60835 : dst[h] = o;
198 : }
199 : }
200 67292 : bn->tsorted = true;
201 67292 : bn->trevsorted = bn->batCount <= 1;
202 67292 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
203 67292 : return bn;
204 :
205 0 : bailout:
206 0 : MT_rwlock_rdunlock(&bi->b->thashlock);
207 0 : BBPreclaim(b2);
208 0 : BBPreclaim(bn);
209 0 : return NULL;
210 : }
211 :
212 : /* Imprints select code */
213 :
214 : /* inner check, non-dense canditer */
215 : #define impscheck(TEST,ADD) \
216 : do { \
217 : const oid e = (oid) (i+limit-pr_off+hseq); \
218 : if (im[icnt] & mask) { \
219 : if ((im[icnt] & ~innermask) == 0) { \
220 : while (p < ncand && o < e) { \
221 : v = src[o-hseq]; \
222 : if ((ADD) == NULL) { \
223 : goto bailout; \
224 : } \
225 : cnt++; \
226 : p++; \
227 : o = canditer_next(ci); \
228 : } \
229 : } else { \
230 : while (p < ncand && o < e) { \
231 : v = src[o-hseq]; \
232 : if ((ADD) == NULL) { \
233 : goto bailout; \
234 : } \
235 : cnt += (TEST) != 0; \
236 : p++; \
237 : o = canditer_next(ci); \
238 : } \
239 : } \
240 : } else { \
241 : while (p < ncand && o < e) { \
242 : p++; \
243 : o = canditer_next(ci); \
244 : } \
245 : } \
246 : } while (false)
247 :
248 : /* inner check, dense canditer */
249 : #define impscheck_dense(TEST,ADD) \
250 : do { \
251 : const oid e = (oid) (i+limit-pr_off+hseq); \
252 : if (im[icnt] & mask) { \
253 : if ((im[icnt] & ~innermask) == 0) { \
254 : while (p < ncand && o < e) { \
255 : v = src[o-hseq]; \
256 : if ((ADD) == NULL) { \
257 : goto bailout; \
258 : } \
259 : cnt++; \
260 : p++; \
261 : o = canditer_next_dense(ci); \
262 : } \
263 : } else { \
264 : while (p < ncand && o < e) { \
265 : v = src[o-hseq]; \
266 : if ((ADD) == NULL) { \
267 : goto bailout; \
268 : } \
269 : cnt += (TEST) != 0; \
270 : p++; \
271 : o = canditer_next_dense(ci); \
272 : } \
273 : } \
274 : } else { \
275 : BUN skip_sz = MIN(ncand - p, e - o); \
276 : p += skip_sz; \
277 : o += skip_sz; \
278 : ci->next += skip_sz; \
279 : } \
280 : } while (false)
281 :
282 : /* main loop for imprints */
283 : /*
284 : * icnt is the iterator for imprints
285 : * dcnt is the iterator for dictionary entries
286 : * i is the iterator for the values in imprints
287 : */
288 : #define impsloop(ISDENSE,TEST,ADD) \
289 : do { \
290 : BUN dcnt, icnt, limit, i; \
291 : const cchdc_t *restrict d = (cchdc_t *) imprints->dict; \
292 : const uint8_t rpp = ATOMelmshift(IMPS_PAGE >> bi->shift); \
293 : o = canditer_next(ci); \
294 : for (i = 0, dcnt = 0, icnt = 0, p = 0; \
295 : dcnt < imprints->dictcnt && i <= w - hseq + pr_off && p < ncand; \
296 : dcnt++) { \
297 : GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
298 : limit = ((BUN) d[dcnt].cnt) << rpp; \
299 : while (i + limit <= o - hseq + pr_off) { \
300 : i += limit; \
301 : icnt += d[dcnt].repeat ? 1 : d[dcnt].cnt; \
302 : dcnt++; \
303 : limit = ((BUN) d[dcnt].cnt) << rpp; \
304 : } \
305 : if (!d[dcnt].repeat) { \
306 : const BUN l = icnt + d[dcnt].cnt; \
307 : limit = (BUN) 1 << rpp; \
308 : while (i + limit <= o - hseq + pr_off) { \
309 : icnt++; \
310 : i += limit; \
311 : } \
312 : for (; \
313 : icnt < l && i <= w - hseq + pr_off; \
314 : icnt++) { \
315 : impscheck##ISDENSE(TEST,ADD); \
316 : i += limit; \
317 : } \
318 : } \
319 : else { \
320 : impscheck##ISDENSE(TEST,ADD); \
321 : i += limit; \
322 : icnt++; \
323 : } \
324 : } \
325 : } while (false)
326 :
327 : static inline oid *
328 0 : quickins(oid *dst, BUN cnt, oid o, BAT *bn)
329 : {
330 0 : (void) bn;
331 0 : assert(cnt < BATcapacity(bn));
332 0 : dst[cnt] = o;
333 0 : return dst;
334 : }
335 :
336 : /* construct the mask */
337 : #define impsmask(ISDENSE,TEST,B) \
338 : do { \
339 : const uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
340 : uint##B##_t mask = 0, innermask; \
341 : const int tpe = ATOMbasetype(bi->type); \
342 : const int lbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, tl); \
343 : const int hbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, th); \
344 : /* note: (1<<n)-1 gives a sequence of n one bits */ \
345 : /* to set bits hbin..lbin inclusive, we would do: */ \
346 : /* mask = ((1 << (hbin + 1)) - 1) - ((1 << lbin) - 1); */ \
347 : /* except ((1 << (hbin + 1)) - 1) is not defined if */ \
348 : /* hbin == sizeof(uint##B##_t)*8 - 1 */ \
349 : /* the following does work, however */ \
350 : mask = (((((uint##B##_t) 1 << hbin) - 1) << 1) | 1) - (((uint##B##_t) 1 << lbin) - 1); \
351 : innermask = mask; \
352 : if (vl != minval) \
353 : innermask = IMPSunsetBit(B, innermask, lbin); \
354 : if (vh != maxval) \
355 : innermask = IMPSunsetBit(B, innermask, hbin); \
356 : if (anti) { \
357 : uint##B##_t tmp = mask; \
358 : mask = ~innermask; \
359 : innermask = ~tmp; \
360 : } \
361 : /* if there are nils, we may need to check bin 0 */ \
362 : if (!bi->nonil) \
363 : innermask = IMPSunsetBit(B, innermask, 0); \
364 : \
365 : if (BATcapacity(bn) < maximum) { \
366 : impsloop(ISDENSE, TEST, \
367 : dst = buninsfix(bn, dst, cnt, o, \
368 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
369 : * (dbl) (ncand-p) * 1.1 + 1024), \
370 : maximum)); \
371 : } else { \
372 : impsloop(ISDENSE, TEST, dst = quickins(dst, cnt, o, bn)); \
373 : } \
374 : } while (false)
375 :
376 : #define checkMINMAX(B, TYPE) \
377 : do { \
378 : const BUN *restrict imp_cnt = imprints->stats + 128; \
379 : imp_min = imp_max = nil; \
380 : for (BUN ii = 0; ii < B; ii++) { \
381 : if (is_##TYPE##_nil(imp_min) && imp_cnt[ii]) { \
382 : imp_min = basesrc[imprints->stats[ii]]; \
383 : } \
384 : if (is_##TYPE##_nil(imp_max) && imp_cnt[B-1-ii]) { \
385 : imp_max = basesrc[imprints->stats[64+B-1-ii]]; \
386 : } \
387 : } \
388 : assert(!is_##TYPE##_nil(imp_min) && \
389 : !is_##TYPE##_nil(imp_max)); \
390 : if (anti ? \
391 : vl < imp_min && vh > imp_max : \
392 : vl > imp_max || vh < imp_min) { \
393 : if (pbat) \
394 : BBPunfix(pbat->batCacheid); \
395 : return 0; \
396 : } \
397 : } while (false)
398 :
399 : /* choose number of bits */
400 : #define bitswitch(ISDENSE, TEST, TYPE) \
401 : do { \
402 : BUN ncand = ci->ncand; \
403 : assert(imprints); \
404 : *algo = parent ? "parent imprints select " #TEST " (canditer_next" #ISDENSE ")" : "imprints select " #TEST " (canditer_next" #ISDENSE ")"; \
405 : switch (imprints->bits) { \
406 : case 8: \
407 : checkMINMAX(8, TYPE); \
408 : impsmask(ISDENSE,TEST,8); \
409 : break; \
410 : case 16: \
411 : checkMINMAX(16, TYPE); \
412 : impsmask(ISDENSE,TEST,16); \
413 : break; \
414 : case 32: \
415 : checkMINMAX(32, TYPE); \
416 : impsmask(ISDENSE,TEST,32); \
417 : break; \
418 : case 64: \
419 : checkMINMAX(64, TYPE); \
420 : impsmask(ISDENSE,TEST,64); \
421 : break; \
422 : default: \
423 : MT_UNREACHABLE(); \
424 : } \
425 : } while (false)
426 :
427 : /* scan select without imprints */
428 :
429 : /* core scan select loop with & without candidates */
430 : #define scanloop(NAME,canditer_next,TEST) \
431 : do { \
432 : BUN ncand = ci->ncand; \
433 : *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
434 : if (BATcapacity(bn) < maximum) { \
435 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) { \
436 : o = canditer_next(ci); \
437 : v = src[o-hseq]; \
438 : if (TEST) { \
439 : dst = buninsfix(bn, dst, cnt, o, \
440 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
441 : * (dbl) (ncand-p) * 1.1 + 1024), \
442 : maximum); \
443 : if (dst == NULL) { \
444 : goto bailout; \
445 : } \
446 : cnt++; \
447 : } \
448 : } \
449 : } else { \
450 : TIMEOUT_LOOP(ncand, qry_ctx) { \
451 : o = canditer_next(ci); \
452 : v = src[o-hseq]; \
453 : assert(cnt < BATcapacity(bn)); \
454 : dst[cnt] = o; \
455 : cnt += (TEST) != 0; \
456 : } \
457 : } \
458 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
459 : } while (false)
460 :
461 : /* argument list for type-specific core scan select function call */
462 : #define scanargs \
463 : bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil, \
464 : cnt, bi->b->hseqbase, dst, maximum, imprints, algo
465 :
466 : #define PREVVALUEbte(x) ((x) - 1)
467 : #define PREVVALUEsht(x) ((x) - 1)
468 : #define PREVVALUEint(x) ((x) - 1)
469 : #define PREVVALUElng(x) ((x) - 1)
470 : #ifdef HAVE_HGE
471 : #define PREVVALUEhge(x) ((x) - 1)
472 : #endif
473 : #define PREVVALUEoid(x) ((x) - 1)
474 : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
475 : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
476 :
477 : #define NEXTVALUEbte(x) ((x) + 1)
478 : #define NEXTVALUEsht(x) ((x) + 1)
479 : #define NEXTVALUEint(x) ((x) + 1)
480 : #define NEXTVALUElng(x) ((x) + 1)
481 : #ifdef HAVE_HGE
482 : #define NEXTVALUEhge(x) ((x) + 1)
483 : #endif
484 : #define NEXTVALUEoid(x) ((x) + 1)
485 : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
486 : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
487 :
488 : #define MINVALUEbte GDK_bte_min
489 : #define MINVALUEsht GDK_sht_min
490 : #define MINVALUEint GDK_int_min
491 : #define MINVALUElng GDK_lng_min
492 : #ifdef HAVE_HGE
493 : #define MINVALUEhge GDK_hge_min
494 : #endif
495 : #define MINVALUEoid GDK_oid_min
496 : #define MINVALUEflt GDK_flt_min
497 : #define MINVALUEdbl GDK_dbl_min
498 :
499 : #define MAXVALUEbte GDK_bte_max
500 : #define MAXVALUEsht GDK_sht_max
501 : #define MAXVALUEint GDK_int_max
502 : #define MAXVALUElng GDK_lng_max
503 : #ifdef HAVE_HGE
504 : #define MAXVALUEhge GDK_hge_max
505 : #endif
506 : #define MAXVALUEoid GDK_oid_max
507 : #define MAXVALUEflt GDK_flt_max
508 : #define MAXVALUEdbl GDK_dbl_max
509 :
510 : #define choose(NAME, ISDENSE, TEST, TYPE) \
511 : do { \
512 : if (imprints) { \
513 : bitswitch(ISDENSE, TEST, TYPE); \
514 : } else { \
515 : scanloop(NAME, canditer_next##ISDENSE, TEST); \
516 : } \
517 : } while (false)
518 :
519 : /* definition of type-specific core scan select function */
520 : #define scanfunc(NAME, TYPE, ISDENSE) \
521 : static BUN \
522 : NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn, \
523 : const TYPE *tl, const TYPE *th, bool li, bool hi, \
524 : bool equi, bool anti, bool lval, bool hval, \
525 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst, \
526 : BUN maximum, Imprints *imprints, const char **algo) \
527 : { \
528 : TYPE vl = *tl; \
529 : TYPE vh = *th; \
530 : TYPE imp_min; \
531 : TYPE imp_max; \
532 : TYPE v; \
533 : const TYPE nil = TYPE##_nil; \
534 : const TYPE minval = MINVALUE##TYPE; \
535 : const TYPE maxval = MAXVALUE##TYPE; \
536 : const TYPE *src = (const TYPE *) bi->base; \
537 : const TYPE *basesrc; \
538 : oid o, w; \
539 : BUN p; \
540 : BUN pr_off = 0; \
541 : bat parent = 0; \
542 : BAT *pbat = NULL; \
543 : (void) li; \
544 : (void) hi; \
545 : (void) lval; \
546 : (void) hval; \
547 : assert(li == !anti); \
548 : assert(hi == !anti); \
549 : assert(lval); \
550 : assert(hval); \
551 : size_t counter = 0; \
552 : QryCtx *qry_ctx = MT_thread_get_qry_ctx(); \
553 : if (imprints && imprints->imprints.parentid != bi->b->batCacheid) { \
554 : parent = imprints->imprints.parentid; \
555 : pbat = BATdescriptor(parent); \
556 : if (pbat == NULL) { \
557 : /* can't load parent: don't use imprints */ \
558 : imprints = NULL; \
559 : basesrc = src; \
560 : } else { \
561 : basesrc = (const TYPE *) Tloc(pbat, 0); \
562 : pr_off = (BUN) (src - basesrc); \
563 : } \
564 : } else { \
565 : basesrc = src; \
566 : } \
567 : w = canditer_last(ci); \
568 : if (equi) { \
569 : assert(imprints == NULL); \
570 : if (lnil) \
571 : scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
572 : else \
573 : scanloop(NAME, canditer_next##ISDENSE, v == vl); \
574 : } else if (anti) { \
575 : if (bi->nonil) { \
576 : choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \
577 : } else { \
578 : choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \
579 : } \
580 : } else if (bi->nonil && vl == minval) { \
581 : choose(NAME, ISDENSE, v <= vh, TYPE); \
582 : } else if (vh == maxval) { \
583 : choose(NAME, ISDENSE, v >= vl, TYPE); \
584 : } else { \
585 : choose(NAME, ISDENSE, v >= vl && v <= vh, TYPE); \
586 : } \
587 : if (pbat) \
588 : BBPunfix(pbat->batCacheid); \
589 : return cnt; \
590 : bailout: \
591 : if (pbat) \
592 : BBPunfix(pbat->batCacheid); \
593 : BBPreclaim(bn); \
594 : return BUN_NONE; \
595 : }
596 :
597 : static BUN
598 3205 : fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn,
599 : const void *tl, const void *th,
600 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
601 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
602 : BUN maximum, Imprints *imprints, const char **algo)
603 : {
604 3205 : const void *v;
605 3205 : const void *restrict nil = ATOMnilptr(bi->type);
606 3205 : int (*cmp)(const void *, const void *) = ATOMcompare(bi->type);
607 3205 : oid o;
608 3205 : BUN p, ncand = ci->ncand;
609 3205 : int c;
610 :
611 3205 : (void) maximum;
612 3205 : (void) imprints;
613 3205 : (void) lnil;
614 3205 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
615 :
616 3205 : if (equi) {
617 488 : *algo = "select: fullscan equi";
618 488 : if (ci->tpe == cand_dense) {
619 3824585 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
620 3823584 : o = canditer_next_dense(ci);
621 3823584 : v = BUNtail(*bi, o-hseq);
622 3823584 : if ((*cmp)(tl, v) == 0) {
623 62175 : dst = buninsfix(bn, dst, cnt, o,
624 20718 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
625 20718 : * (dbl) (ncand-p) * 1.1 + 1024),
626 : maximum);
627 20739 : if (dst == NULL) {
628 0 : BBPreclaim(bn);
629 0 : return BUN_NONE;
630 : }
631 20739 : cnt++;
632 : }
633 : }
634 : } else {
635 40661 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
636 39956 : o = canditer_next(ci);
637 39946 : v = BUNtail(*bi, o-hseq);
638 39955 : if ((*cmp)(tl, v) == 0) {
639 61960 : dst = buninsfix(bn, dst, cnt, o,
640 20652 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
641 20652 : * (dbl) (ncand-p) * 1.1 + 1024),
642 : maximum);
643 20656 : if (dst == NULL) {
644 0 : BBPreclaim(bn);
645 0 : return BUN_NONE;
646 : }
647 20656 : cnt++;
648 : }
649 : }
650 : }
651 2717 : } else if (anti) {
652 569 : *algo = "select: fullscan anti";
653 569 : if (ci->tpe == cand_dense) {
654 4946 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
655 3491 : o = canditer_next_dense(ci);
656 3491 : v = BUNtail(*bi, o-hseq);
657 3491 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
658 3355 : ((lval &&
659 3355 : ((c = (*cmp)(tl, v)) > 0 ||
660 2290 : (!li && c == 0))) ||
661 2290 : (hval &&
662 2290 : ((c = (*cmp)(th, v)) < 0 ||
663 505 : (!hi && c == 0))))) {
664 8550 : dst = buninsfix(bn, dst, cnt, o,
665 2850 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
666 2850 : * (dbl) (ncand-p) * 1.1 + 1024),
667 : maximum);
668 2850 : if (dst == NULL) {
669 0 : BBPreclaim(bn);
670 0 : return BUN_NONE;
671 : }
672 2850 : cnt++;
673 : }
674 : }
675 : } else {
676 5563 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
677 5311 : o = canditer_next(ci);
678 5311 : v = BUNtail(*bi, o-hseq);
679 5311 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
680 5311 : ((lval &&
681 5311 : ((c = (*cmp)(tl, v)) > 0 ||
682 3120 : (!li && c == 0))) ||
683 3120 : (hval &&
684 3120 : ((c = (*cmp)(th, v)) < 0 ||
685 1244 : (!hi && c == 0))))) {
686 12201 : dst = buninsfix(bn, dst, cnt, o,
687 4067 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
688 4067 : * (dbl) (ncand-p) * 1.1 + 1024),
689 : maximum);
690 4067 : if (dst == NULL) {
691 0 : BBPreclaim(bn);
692 0 : return BUN_NONE;
693 : }
694 4067 : cnt++;
695 : }
696 : }
697 : }
698 : } else {
699 2148 : *algo = "select: fullscan range";
700 2148 : if (ci->tpe == cand_dense) {
701 5732245 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
702 5725501 : o = canditer_next_dense(ci);
703 5725501 : v = BUNtail(*bi, o-hseq);
704 5733924 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
705 4027 : ((!lval ||
706 4027 : (c = cmp(tl, v)) < 0 ||
707 3885977 : (li && c == 0)) &&
708 3906 : (!hval ||
709 3906 : (c = cmp(th, v)) > 0 ||
710 3662 : (hi && c == 0)))) {
711 11657182 : dst = buninsfix(bn, dst, cnt, o,
712 3881354 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
713 3881354 : * (dbl) (ncand-p) * 1.1 + 1024),
714 : maximum);
715 3894474 : if (dst == NULL) {
716 0 : BBPreclaim(bn);
717 0 : return BUN_NONE;
718 : }
719 3894474 : cnt++;
720 : }
721 : }
722 : } else {
723 1787261 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
724 1786956 : o = canditer_next(ci);
725 1785137 : v = BUNtail(*bi, o-hseq);
726 1793135 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
727 226 : ((!lval ||
728 226 : (c = cmp(tl, v)) < 0 ||
729 1791070 : (li && c == 0)) &&
730 0 : (!hval ||
731 0 : (c = cmp(th, v)) > 0 ||
732 0 : (hi && c == 0)))) {
733 5366445 : dst = buninsfix(bn, dst, cnt, o,
734 1791002 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
735 1791002 : * (dbl) (ncand-p) * 1.1 + 1024),
736 : maximum);
737 1784441 : if (dst == NULL) {
738 0 : BBPreclaim(bn);
739 0 : return BUN_NONE;
740 : }
741 1784441 : cnt++;
742 : }
743 : }
744 : }
745 : }
746 3208 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
747 : return cnt;
748 0 : bailout:
749 0 : BBPreclaim(bn);
750 : return BUN_NONE;
751 : }
752 :
753 : static BUN
754 196732 : fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn,
755 : const char *tl, const char *th,
756 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
757 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
758 : BUN maximum, Imprints *imprints, const char **algo)
759 : {
760 196732 : var_t pos;
761 196732 : BUN p, ncand = ci->ncand;
762 196732 : oid o;
763 196732 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
764 :
765 196777 : if (!equi || !GDK_ELIMDOUBLES(bi->vh))
766 3099 : return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
767 : lval, hval, lnil, cnt, hseq, dst,
768 : maximum, imprints, algo);
769 193678 : if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
770 2990 : *algo = "select: fullscan equi strelim (nomatch)";
771 2990 : return 0;
772 : }
773 190694 : if (pos == (var_t) -1) {
774 0 : BBPreclaim(bn);
775 0 : return BUN_NONE;
776 : }
777 190694 : *algo = "select: fullscan equi strelim";
778 190694 : assert(pos >= GDK_VAROFFSET);
779 190694 : switch (bi->width) {
780 155439 : case 1: {
781 155439 : const unsigned char *ptr = (const unsigned char *) bi->base;
782 155439 : pos -= GDK_VAROFFSET;
783 155439 : if (ci->tpe == cand_dense) {
784 22335839 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
785 21869946 : o = canditer_next_dense(ci);
786 21869946 : if (ptr[o - hseq] == pos) {
787 22576568 : dst = buninsfix(bn, dst, cnt, o,
788 7525520 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
789 7525520 : * (dbl) (ncand-p) * 1.1 + 1024),
790 : maximum);
791 7525528 : if (dst == NULL) {
792 0 : BBPreclaim(bn);
793 0 : return BUN_NONE;
794 : }
795 7525528 : cnt++;
796 : }
797 : }
798 : } else {
799 11338833 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
800 11335122 : o = canditer_next(ci);
801 11347988 : if (ptr[o - hseq] == pos) {
802 11597518 : dst = buninsfix(bn, dst, cnt, o,
803 3870129 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
804 3870129 : * (dbl) (ncand-p) * 1.1 + 1024),
805 : maximum);
806 3857260 : if (dst == NULL) {
807 0 : BBPreclaim(bn);
808 0 : return BUN_NONE;
809 : }
810 3857260 : cnt++;
811 : }
812 : }
813 : }
814 : break;
815 : }
816 35251 : case 2: {
817 35251 : const unsigned short *ptr = (const unsigned short *) bi->base;
818 35251 : pos -= GDK_VAROFFSET;
819 35251 : if (ci->tpe == cand_dense) {
820 2323526 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
821 2244283 : o = canditer_next_dense(ci);
822 2244283 : if (ptr[o - hseq] == pos) {
823 153597 : dst = buninsfix(bn, dst, cnt, o,
824 51170 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
825 51170 : * (dbl) (ncand-p) * 1.1 + 1024),
826 : maximum);
827 51257 : if (dst == NULL) {
828 0 : BBPreclaim(bn);
829 0 : return BUN_NONE;
830 : }
831 51257 : cnt++;
832 : }
833 : }
834 : } else {
835 3057233 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
836 3030376 : o = canditer_next(ci);
837 3029695 : if (ptr[o - hseq] == pos) {
838 273550 : dst = buninsfix(bn, dst, cnt, o,
839 90947 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
840 90947 : * (dbl) (ncand-p) * 1.1 + 1024),
841 : maximum);
842 91656 : if (dst == NULL) {
843 0 : BBPreclaim(bn);
844 0 : return BUN_NONE;
845 : }
846 91656 : cnt++;
847 : }
848 : }
849 : }
850 : break;
851 : }
852 : #if SIZEOF_VAR_T == 8
853 2 : case 4: {
854 2 : const unsigned int *ptr = (const unsigned int *) bi->base;
855 2 : if (ci->tpe == cand_dense) {
856 977 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
857 971 : o = canditer_next_dense(ci);
858 971 : if (ptr[o - hseq] == pos) {
859 747 : dst = buninsfix(bn, dst, cnt, o,
860 249 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
861 249 : * (dbl) (ncand-p) * 1.1 + 1024),
862 : maximum);
863 249 : if (dst == NULL) {
864 0 : BBPreclaim(bn);
865 0 : return BUN_NONE;
866 : }
867 249 : cnt++;
868 : }
869 : }
870 : } else {
871 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
872 0 : o = canditer_next(ci);
873 0 : if (ptr[o - hseq] == pos) {
874 0 : dst = buninsfix(bn, dst, cnt, o,
875 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
876 0 : * (dbl) (ncand-p) * 1.1 + 1024),
877 : maximum);
878 0 : if (dst == NULL) {
879 0 : BBPreclaim(bn);
880 0 : return BUN_NONE;
881 : }
882 0 : cnt++;
883 : }
884 : }
885 : }
886 : break;
887 : }
888 : #endif
889 2 : default: {
890 2 : const var_t *ptr = (const var_t *) bi->base;
891 2 : if (ci->tpe == cand_dense) {
892 26 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
893 20 : o = canditer_next_dense(ci);
894 20 : if (ptr[o - hseq] == pos) {
895 6 : dst = buninsfix(bn, dst, cnt, o,
896 2 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
897 2 : * (dbl) (ncand-p) * 1.1 + 1024),
898 : maximum);
899 2 : if (dst == NULL) {
900 0 : BBPreclaim(bn);
901 0 : return BUN_NONE;
902 : }
903 2 : cnt++;
904 : }
905 : }
906 : } else {
907 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
908 0 : o = canditer_next(ci);
909 0 : if (ptr[o - hseq] == pos) {
910 0 : dst = buninsfix(bn, dst, cnt, o,
911 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
912 0 : * (dbl) (ncand-p) * 1.1 + 1024),
913 : maximum);
914 0 : if (dst == NULL) {
915 0 : BBPreclaim(bn);
916 0 : return BUN_NONE;
917 : }
918 0 : cnt++;
919 : }
920 : }
921 : }
922 : break;
923 : }
924 : }
925 190805 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
926 : return cnt;
927 0 : bailout:
928 0 : BBPreclaim(bn);
929 : return BUN_NONE;
930 : }
931 :
932 : /* scan select type switch */
933 : #ifdef HAVE_HGE
934 : #define scanfunc_hge(NAME, ISDENSE) \
935 : scanfunc(NAME, hge, ISDENSE)
936 : #else
937 : #define scanfunc_hge(NAME, ISDENSE)
938 : #endif
939 : #define scan_sel(NAME, ISDENSE) \
940 : scanfunc(NAME, bte, ISDENSE) \
941 : scanfunc(NAME, sht, ISDENSE) \
942 : scanfunc(NAME, int, ISDENSE) \
943 : scanfunc(NAME, flt, ISDENSE) \
944 : scanfunc(NAME, dbl, ISDENSE) \
945 : scanfunc(NAME, lng, ISDENSE) \
946 : scanfunc_hge(NAME, ISDENSE)
947 :
948 : /* scan/imprints select */
949 192335705 : scan_sel(fullscan, )
950 1309163041 : scan_sel(densescan, _dense)
951 :
952 :
953 : static BAT *
954 1265533 : scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
955 : const void *tl, const void *th,
956 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
957 : bool lnil, BUN maximum, Imprints *imprints, const char **algo)
958 : {
959 : #ifndef NDEBUG
960 1265533 : int (*cmp)(const void *, const void *);
961 : #endif
962 1265533 : int t;
963 1265533 : BUN cnt = 0;
964 1265533 : oid *restrict dst;
965 :
966 1265533 : assert(bi->b != NULL);
967 1265533 : assert(bn != NULL);
968 1265533 : assert(bn->ttype == TYPE_oid);
969 1265533 : assert(!lval || tl != NULL);
970 1265533 : assert(!hval || th != NULL);
971 1265533 : assert(!equi || (li && hi && !anti));
972 1265533 : assert(!anti || lval || hval);
973 1265533 : assert(bi->type != TYPE_void || equi || bi->nonil);
974 :
975 : #ifndef NDEBUG
976 1265533 : cmp = ATOMcompare(bi->type);
977 : #endif
978 :
979 1265533 : assert(!lval || !hval || (*cmp)(tl, th) <= 0);
980 :
981 1265412 : dst = (oid *) Tloc(bn, 0);
982 :
983 1265412 : t = ATOMbasetype(bi->type);
984 :
985 : /* call type-specific core scan select function */
986 1265412 : switch (t) {
987 33291 : case TYPE_bte:
988 33291 : if (ci->tpe == cand_dense)
989 31709 : cnt = densescan_bte(scanargs);
990 : else
991 1582 : cnt = fullscan_bte(scanargs);
992 : break;
993 7327 : case TYPE_sht:
994 7327 : if (ci->tpe == cand_dense)
995 6862 : cnt = densescan_sht(scanargs);
996 : else
997 465 : cnt = fullscan_sht(scanargs);
998 : break;
999 1023431 : case TYPE_int:
1000 1023431 : if (ci->tpe == cand_dense)
1001 722974 : cnt = densescan_int(scanargs);
1002 : else
1003 300457 : cnt = fullscan_int(scanargs);
1004 : break;
1005 23 : case TYPE_flt:
1006 23 : if (ci->tpe == cand_dense)
1007 22 : cnt = densescan_flt(scanargs);
1008 : else
1009 1 : cnt = fullscan_flt(scanargs);
1010 : break;
1011 147 : case TYPE_dbl:
1012 147 : if (ci->tpe == cand_dense)
1013 138 : cnt = densescan_dbl(scanargs);
1014 : else
1015 9 : cnt = fullscan_dbl(scanargs);
1016 : break;
1017 4199 : case TYPE_lng:
1018 4199 : if (ci->tpe == cand_dense)
1019 4186 : cnt = densescan_lng(scanargs);
1020 : else
1021 13 : cnt = fullscan_lng(scanargs);
1022 : break;
1023 : #ifdef HAVE_HGE
1024 130 : case TYPE_hge:
1025 130 : if (ci->tpe == cand_dense)
1026 121 : cnt = densescan_hge(scanargs);
1027 : else
1028 9 : cnt = fullscan_hge(scanargs);
1029 : break;
1030 : #endif
1031 196756 : case TYPE_str:
1032 196756 : cnt = fullscan_str(scanargs);
1033 196756 : break;
1034 108 : default:
1035 108 : cnt = fullscan_any(scanargs);
1036 108 : break;
1037 : }
1038 1266016 : if (cnt == BUN_NONE) {
1039 : return NULL;
1040 : }
1041 1266015 : assert(bn->batCapacity >= cnt);
1042 :
1043 1266015 : BATsetcount(bn, cnt);
1044 1266174 : bn->tsorted = true;
1045 1266174 : bn->trevsorted = bn->batCount <= 1;
1046 1266174 : bn->tkey = true;
1047 1266174 : bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == bi->count ? bi->b->hseqbase : oid_nil;
1048 :
1049 1266174 : return bn;
1050 : }
1051 :
1052 : /* Normalize the variables li, hi, lval, hval, possibly changing anti
1053 : * in the process. This works for all (and only) numeric types.
1054 : *
1055 : * Note that the expression x < v is equivalent to x <= v' where v' is
1056 : * the next smaller value in the domain of v (similarly for x > v).
1057 : * Also note that for floating point numbers there actually is such a
1058 : * value. In fact, there is a function in standard C that calculates
1059 : * that value.
1060 : *
1061 : * The result of this macro is:
1062 : * li == !anti, hi == !anti, lval == true, hval == true
1063 : * This means that all ranges that we check for are closed ranges. If
1064 : * a range is one-sided, we fill in the minimum resp. maximum value in
1065 : * the domain so that we create a closed range. */
1066 : #define NORMALIZE(TYPE) \
1067 : do { \
1068 : if (anti && li) { \
1069 : /* -inf < x < vl === -inf < x <= vl-1 */ \
1070 : if (*(TYPE*)tl == MINVALUE##TYPE) { \
1071 : /* -inf < x < MIN || *th <[=] x < +inf */ \
1072 : /* degenerates into half range */ \
1073 : /* *th <[=] x < +inf */ \
1074 : anti = false; \
1075 : tl = th; \
1076 : li = !hi; \
1077 : hval = false; \
1078 : /* further dealt with below */ \
1079 : } else { \
1080 : vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \
1081 : tl = &vl.v_##TYPE; \
1082 : li = false; \
1083 : } \
1084 : } \
1085 : if (anti && hi) { \
1086 : /* vl < x < +inf === vl+1 <= x < +inf */ \
1087 : if (*(TYPE*)th == MAXVALUE##TYPE) { \
1088 : /* -inf < x <[=] *tl || MAX > x > +inf */ \
1089 : /* degenerates into half range */ \
1090 : /* -inf < x <[=] *tl */ \
1091 : anti = false; \
1092 : if (tl == &vl.v_##TYPE) { \
1093 : vh.v_##TYPE = vl.v_##TYPE; \
1094 : th = &vh.v_##TYPE; \
1095 : } else { \
1096 : th = tl; \
1097 : } \
1098 : hi = !li; \
1099 : lval = false; \
1100 : /* further dealt with below */ \
1101 : } else { \
1102 : vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \
1103 : th = &vh.v_##TYPE; \
1104 : hi = false; \
1105 : } \
1106 : } \
1107 : if (!anti) { \
1108 : if (lval) { \
1109 : /* range bounded on left */ \
1110 : if (!li) { \
1111 : /* open range on left */ \
1112 : if (*(TYPE*)tl == MAXVALUE##TYPE) { \
1113 : bat_iterator_end(&bi); \
1114 : return BATdense(0, 0, 0); \
1115 : } \
1116 : /* vl < x === vl+1 <= x */ \
1117 : vl.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)tl); \
1118 : li = true; \
1119 : tl = &vl.v_##TYPE; \
1120 : } \
1121 : } else { \
1122 : /* -inf, i.e. smallest value */ \
1123 : vl.v_##TYPE = MINVALUE##TYPE; \
1124 : li = true; \
1125 : tl = &vl.v_##TYPE; \
1126 : lval = true; \
1127 : } \
1128 : if (hval) { \
1129 : /* range bounded on right */ \
1130 : if (!hi) { \
1131 : /* open range on right */ \
1132 : if (*(TYPE*)th == MINVALUE##TYPE) { \
1133 : bat_iterator_end(&bi); \
1134 : return BATdense(0, 0, 0); \
1135 : } \
1136 : /* x < vh === x <= vh-1 */ \
1137 : vh.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)th); \
1138 : hi = true; \
1139 : th = &vh.v_##TYPE; \
1140 : } \
1141 : } else { \
1142 : /* +inf, i.e. largest value */ \
1143 : vh.v_##TYPE = MAXVALUE##TYPE; \
1144 : hi = true; \
1145 : th = &vh.v_##TYPE; \
1146 : hval = true; \
1147 : } \
1148 : if (*(TYPE*)tl > *(TYPE*)th) { \
1149 : bat_iterator_end(&bi); \
1150 : return BATdense(0, 0, 0); \
1151 : } \
1152 : } \
1153 : assert(lval); \
1154 : assert(hval); \
1155 : assert(li != anti); \
1156 : assert(hi != anti); \
1157 : /* if anti is set, we can now check */ \
1158 : /* (x <= *tl || x >= *th) && x != nil */ \
1159 : /* if equi==true, the check is x != *tl && x != nil */ \
1160 : /* if anti is not set, we can check just */ \
1161 : /* *tl <= x && x <= *th */ \
1162 : /* if equi==true, the check is x == *tl */ \
1163 : /* note that this includes the check for != nil */ \
1164 : /* in the case where equi==true, the check is x == *tl */ \
1165 : } while (false)
1166 :
1167 : #if SIZEOF_BUN == SIZEOF_INT
1168 : #define CALC_ESTIMATE(TPE) \
1169 : do { \
1170 : if (*(TPE*)tl < 1) { \
1171 : if ((int) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1172 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1173 : } else { \
1174 : if (-(int) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1175 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1176 : } \
1177 : } while (0)
1178 : #else
1179 : #define CALC_ESTIMATE(TPE) \
1180 : do { \
1181 : if (*(TPE*)tl < 1) { \
1182 : if ((lng) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1183 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1184 : } else { \
1185 : if (-(lng) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1186 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1187 : } \
1188 : } while (0)
1189 : #endif
1190 :
1191 : static enum range_comp_t
1192 1839078 : BATrange(BATiter *bi, const void *tl, const void *th, bool li, bool hi)
1193 : {
1194 1839078 : enum range_comp_t range;
1195 1839078 : const ValRecord *minprop = NULL, *maxprop = NULL;
1196 1839078 : const void *minval = NULL, *maxval = NULL;
1197 1839078 : bool maxincl = true;
1198 1839078 : BAT *pb = NULL;
1199 1839078 : int c;
1200 1839078 : int (*atomcmp) (const void *, const void *) = ATOMcompare(bi->type);
1201 :
1202 1839078 : if (tl && (*atomcmp)(tl, ATOMnilptr(bi->type)) == 0)
1203 21151 : tl = NULL;
1204 1839389 : if (th && (*atomcmp)(th, ATOMnilptr(bi->type)) == 0)
1205 19601 : th = NULL;
1206 1840443 : if (tl == NULL && th == NULL)
1207 : return range_contains; /* looking for everything */
1208 :
1209 1824136 : if (VIEWtparent(bi->b))
1210 1645221 : pb = BATdescriptor(VIEWtparent(bi->b));
1211 :
1212 : /* keep locked while we look at the property values */
1213 1824531 : MT_lock_set(&bi->b->theaplock);
1214 1824402 : if (bi->minpos != BUN_NONE)
1215 47205 : minval = BUNtail(*bi, bi->minpos);
1216 1777197 : else if ((minprop = BATgetprop_nolock(bi->b, GDK_MIN_BOUND)) != NULL)
1217 0 : minval = VALptr(minprop);
1218 1823821 : if (bi->maxpos != BUN_NONE) {
1219 47235 : maxval = BUNtail(*bi, bi->maxpos);
1220 : maxincl = true;
1221 1776586 : } else if ((maxprop = BATgetprop_nolock(bi->b, GDK_MAX_BOUND)) != NULL) {
1222 0 : maxval = VALptr(maxprop);
1223 0 : maxincl = false;
1224 : }
1225 1823998 : bool keep = false; /* keep lock on parent bat? */
1226 1823998 : if (minprop == NULL || maxprop == NULL) {
1227 1823753 : if (pb != NULL) {
1228 1645044 : MT_lock_set(&pb->theaplock);
1229 1645471 : if (minprop == NULL && (minprop = BATgetprop_nolock(pb, GDK_MIN_BOUND)) != NULL) {
1230 13 : keep = true;
1231 13 : minval = VALptr(minprop);
1232 : }
1233 1645396 : if (maxprop == NULL && (maxprop = BATgetprop_nolock(pb, GDK_MAX_BOUND)) != NULL) {
1234 9 : keep = true;
1235 9 : maxval = VALptr(maxprop);
1236 9 : maxincl = true;
1237 : }
1238 1645491 : if (!keep) {
1239 1645130 : MT_lock_unset(&pb->theaplock);
1240 : }
1241 : }
1242 : }
1243 :
1244 1824900 : if (minprop == NULL && maxprop == NULL) {
1245 : range = range_inside; /* strictly: unknown */
1246 13 : } else if (maxprop &&
1247 5 : tl &&
1248 5 : ((c = atomcmp(tl, maxval)) > 0 ||
1249 0 : ((!maxincl || !li) && c == 0))) {
1250 : range = range_after;
1251 10 : } else if (minprop &&
1252 7 : th &&
1253 7 : ((c = atomcmp(th, minval)) < 0 ||
1254 6 : (!hi && c == 0))) {
1255 : range = range_before;
1256 8 : } else if (tl == NULL) {
1257 4 : if (minprop == NULL) {
1258 0 : c = atomcmp(th, maxval);
1259 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1260 : range = range_atstart;
1261 : else
1262 1 : range = range_contains;
1263 : } else {
1264 4 : c = atomcmp(th, minval);
1265 4 : if (c < 0 || (!hi && c == 0))
1266 : range = range_before;
1267 4 : else if (maxprop == NULL)
1268 : range = range_atstart;
1269 : else {
1270 2 : c = atomcmp(th, maxval);
1271 2 : if (c < 0 || ((maxincl || !hi) && c == 0))
1272 : range = range_atstart;
1273 : else
1274 1 : range = range_contains;
1275 : }
1276 : }
1277 4 : } else if (th == NULL) {
1278 3 : if (maxprop == NULL) {
1279 2 : c = atomcmp(tl, minval);
1280 2 : if (c >= 0)
1281 : range = range_atend;
1282 : else
1283 1 : range = range_contains;
1284 : } else {
1285 1 : c = atomcmp(tl, maxval);
1286 1 : if (c > 0 || ((!maxincl || !li) && c == 0))
1287 : range = range_after;
1288 1 : else if (minprop == NULL)
1289 : range = range_atend;
1290 : else {
1291 1 : c = atomcmp(tl, minval);
1292 1 : if (c >= 0)
1293 : range = range_atend;
1294 : else
1295 1 : range = range_contains;
1296 : }
1297 : }
1298 1 : } else if (minprop == NULL) {
1299 0 : c = atomcmp(th, maxval);
1300 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1301 : range = range_inside;
1302 : else
1303 2 : range = range_atend;
1304 1 : } else if (maxprop == NULL) {
1305 0 : c = atomcmp(tl, minval);
1306 0 : if (c >= 0)
1307 : range = range_inside;
1308 : else
1309 4 : range = range_atstart;
1310 : } else {
1311 1 : c = atomcmp(tl, minval);
1312 1 : if (c >= 0) {
1313 1 : c = atomcmp(th, maxval);
1314 1 : if (c < 0 || ((maxincl || !hi) && c == 0))
1315 : range = range_inside;
1316 : else
1317 2 : range = range_atend;
1318 : } else {
1319 0 : c = atomcmp(th, maxval);
1320 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1321 : range = range_atstart;
1322 : else
1323 1 : range = range_contains;
1324 : }
1325 : }
1326 :
1327 1824900 : MT_lock_unset(&bi->b->theaplock);
1328 1824812 : if (pb) {
1329 1645600 : if (keep)
1330 13 : MT_lock_unset(&pb->theaplock);
1331 1645600 : BBPreclaim(pb);
1332 : }
1333 :
1334 : return range;
1335 : }
1336 :
1337 : /* generic range select
1338 : *
1339 : * Return a BAT with the OID values of b for qualifying tuples. The
1340 : * return BAT is sorted (i.e. in the same order as the input BAT).
1341 : *
1342 : * If s is non-NULL, it is a list of candidates. s must be sorted.
1343 : *
1344 : * tl may not be NULL, li, hi, and anti must be either 0 or 1.
1345 : *
1346 : * If th is NULL, hi is ignored.
1347 : *
1348 : * If anti is 0, qualifying tuples are those whose value is between tl
1349 : * and th (as in x >[=] tl && x <[=] th, where equality depends on li
1350 : * and hi--so if tl > th, nothing will be returned). If li or hi is
1351 : * 1, the respective boundary is inclusive, otherwise exclusive. If
1352 : * th is NULL it is taken to be equal to tl, turning this into an
1353 : * equi- or point-select. Note that for a point select to return
1354 : * anything, li (and hi if th was not NULL) must be 1. There is a
1355 : * special case if tl is nil and th is NULL. This is the only way to
1356 : * select for nil values.
1357 : *
1358 : * If anti is 1, the result is the complement of what the result would
1359 : * be if anti were 0, except that nils are filtered out.
1360 : *
1361 : * In brief:
1362 : * - if tl==nil and th==NULL and anti==0, return all nils (only way to
1363 : * get nils);
1364 : * - it tl==nil and th==nil, return all but nils;
1365 : * - if tl==nil and th!=NULL, no lower bound;
1366 : * - if th==NULL or tl==th, point (equi) select;
1367 : * - if th==nil, no upper bound
1368 : *
1369 : * A complete breakdown of the various arguments follows. Here, v, v1
1370 : * and v2 are values from the appropriate domain, and
1371 : * v != nil, v1 != nil, v2 != nil, v1 < v2.
1372 : * tl th li hi anti result list of OIDs for values
1373 : * -----------------------------------------------------------------
1374 : * nil NULL true ignored false x == nil (only way to get nil)
1375 : * nil NULL false ignored false NOTHING
1376 : * nil NULL ignored ignored true x != nil
1377 : * nil nil ignored ignored false x != nil
1378 : * nil nil ignored ignored true NOTHING
1379 : * nil v ignored false false x < v
1380 : * nil v ignored true false x <= v
1381 : * nil v ignored false true x >= v
1382 : * nil v ignored true true x > v
1383 : * v nil false ignored false x > v
1384 : * v nil true ignored false x >= v
1385 : * v nil false ignored true x <= v
1386 : * v nil true ignored true x < v
1387 : * v NULL false ignored false NOTHING
1388 : * v NULL true ignored false x == v
1389 : * v NULL false ignored true x != nil
1390 : * v NULL true ignored true x != v
1391 : * v v false false false NOTHING
1392 : * v v true false false NOTHING
1393 : * v v false true false NOTHING
1394 : * v v true true false x == v
1395 : * v v false false true x != nil
1396 : * v v true false true x != nil
1397 : * v v false true true x != nil
1398 : * v v true true true x != v
1399 : * v1 v2 false false false v1 < x < v2
1400 : * v1 v2 true false false v1 <= x < v2
1401 : * v1 v2 false true false v1 < x <= v2
1402 : * v1 v2 true true false v1 <= x <= v2
1403 : * v1 v2 false false true x <= v1 or x >= v2
1404 : * v1 v2 true false true x < v1 or x >= v2
1405 : * v1 v2 false true true x <= v1 or x > v2
1406 : * v1 v2 true true true x < v1 or x > v2
1407 : * v2 v1 ignored ignored false NOTHING
1408 : * v2 v1 ignored ignored true x != nil
1409 : */
1410 : BAT *
1411 3144105 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
1412 : bool li, bool hi, bool anti)
1413 : {
1414 3144105 : bool lval; /* low value used for comparison */
1415 3144105 : bool lnil; /* low value is nil */
1416 3144105 : bool hval; /* high value used for comparison */
1417 3144105 : bool equi; /* select for single value (not range) */
1418 3144105 : bool wanthash = false; /* use hash (equi must be true) */
1419 3144105 : bool havehash = false; /* we have a hash (and the hashlock) */
1420 3144105 : bool phash = false; /* use hash on parent BAT (if view) */
1421 3144105 : int t; /* data type */
1422 3144105 : bat parent; /* b's parent bat (if b is a view) */
1423 3144105 : const void *nil;
1424 3144105 : BAT *bn;
1425 3144105 : struct canditer ci;
1426 3144105 : BUN estimate = BUN_NONE, maximum = BUN_NONE;
1427 3144105 : oid vwl = 0, vwh = 0;
1428 3144105 : lng vwo = 0;
1429 3144105 : Heap *oidxh = NULL;
1430 3144105 : const char *algo;
1431 3144105 : union {
1432 : bte v_bte;
1433 : sht v_sht;
1434 : int v_int;
1435 : lng v_lng;
1436 : #ifdef HAVE_HGE
1437 : hge v_hge;
1438 : #endif
1439 : flt v_flt;
1440 : dbl v_dbl;
1441 : oid v_oid;
1442 : } vl, vh;
1443 3144105 : enum range_comp_t range;
1444 3144105 : const bool notnull = BATgetprop(b, GDK_NOT_NULL) != NULL;
1445 3147961 : lng t0 = GDKusec();
1446 :
1447 3147191 : BATcheck(b, NULL);
1448 3147191 : if (tl == NULL) {
1449 0 : GDKerror("tl value required");
1450 0 : return NULL;
1451 : }
1452 :
1453 3147191 : if (s && s->ttype != TYPE_msk && !s->tsorted) {
1454 0 : GDKerror("invalid argument: s must be sorted.\n");
1455 0 : return NULL;
1456 : }
1457 :
1458 3147191 : canditer_init(&ci, b, s);
1459 3145376 : if (ci.ncand == 0) {
1460 : /* trivially empty result */
1461 1299756 : MT_thread_setalgorithm("select: trivially empty");
1462 1300567 : bn = BATdense(0, 0, 0);
1463 1297697 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1464 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1465 : " (" LLFMT " usec): "
1466 : "trivially empty\n",
1467 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1468 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1469 1297697 : return bn;
1470 : }
1471 :
1472 1845620 : BATiter bi = bat_iterator(b);
1473 :
1474 1846167 : t = bi.type;
1475 1846167 : nil = ATOMnilptr(t);
1476 : /* can we use the base type? */
1477 1846167 : t = ATOMbasetype(t);
1478 1846167 : lnil = nil && ATOMcmp(t, tl, nil) == 0; /* low value = nil? */
1479 :
1480 1846131 : if (!lnil && th != NULL && (!li || !hi) && !anti && ATOMcmp(t, tl, th) == 0) {
1481 : /* upper and lower bound of range are equal and we
1482 : * want an interval that's open on at least one
1483 : * side */
1484 60 : MT_thread_setalgorithm("select: empty interval");
1485 60 : bn = BATdense(0, 0, 0);
1486 60 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1487 : ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
1488 : ALGOOPTBATFMT " (" LLFMT " usec): "
1489 : "empty interval\n",
1490 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1491 : li, hi, anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1492 60 : bat_iterator_end(&bi);
1493 60 : return bn;
1494 : }
1495 :
1496 1846074 : lval = !lnil || th == NULL; /* low value used for comparison */
1497 1846074 : equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
1498 1846127 : if (equi) {
1499 1806993 : assert(lval);
1500 1806993 : if (th == NULL)
1501 1623882 : hi = li;
1502 : th = tl;
1503 : hval = true;
1504 39134 : } else if (nil == NULL) {
1505 : hval = true;
1506 : } else {
1507 39134 : hval = ATOMcmp(t, th, nil) != 0;
1508 : }
1509 1846141 : if (anti) {
1510 74940 : if (lval != hval) {
1511 : /* one of the end points is nil and the other
1512 : * isn't: swap sub-ranges */
1513 47 : const void *tv;
1514 47 : bool ti;
1515 47 : assert(!equi);
1516 47 : ti = li;
1517 47 : li = !hi;
1518 47 : hi = !ti;
1519 47 : tv = tl;
1520 47 : tl = th;
1521 47 : th = tv;
1522 47 : ti = lval;
1523 47 : lval = hval;
1524 47 : hval = ti;
1525 47 : lnil = ATOMcmp(t, tl, nil) == 0;
1526 47 : anti = false;
1527 47 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1528 : ",s=" ALGOOPTBATFMT ",anti=%d "
1529 : "anti: switch ranges...\n",
1530 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1531 : anti);
1532 74893 : } else if (!lval && !hval) {
1533 : /* antiselect for nil-nil range: all non-nil
1534 : * values are in range; we must return all
1535 : * other non-nil values, i.e. nothing */
1536 26 : MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
1537 26 : bn = BATdense(0, 0, 0);
1538 26 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1539 : ",s=" ALGOOPTBATFMT ",anti=%d -> "
1540 : ALGOOPTBATFMT " (" LLFMT " usec): "
1541 : "anti: nil-nil range, nonil\n",
1542 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1543 : anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1544 26 : bat_iterator_end(&bi);
1545 26 : return bn;
1546 74867 : } else if (equi && lnil) {
1547 : /* antiselect for nil value: turn into range
1548 : * select for nil-nil range (i.e. everything
1549 : * but nil) */
1550 8128 : equi = false;
1551 8128 : anti = false;
1552 8128 : lval = false;
1553 8128 : hval = false;
1554 8128 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1555 : ",s=" ALGOOPTBATFMT ",anti=0 "
1556 : "anti-nil...\n",
1557 : ALGOBATPAR(b), ALGOOPTBATPAR(s));
1558 66739 : } else if (equi) {
1559 39694 : equi = false;
1560 39694 : if (!(li && hi)) {
1561 : /* antiselect for nothing: turn into
1562 : * range select for nil-nil range
1563 : * (i.e. everything but nil) */
1564 12 : anti = false;
1565 12 : lval = false;
1566 12 : hval = false;
1567 12 : TRC_DEBUG(ALGO, "b="
1568 : ALGOBATFMT ",s="
1569 : ALGOOPTBATFMT ",anti=0 "
1570 : "anti-nothing...\n",
1571 : ALGOBATPAR(b),
1572 : ALGOOPTBATPAR(s));
1573 : }
1574 27045 : } else if (ATOMcmp(t, tl, th) > 0) {
1575 : /* empty range: turn into range select for
1576 : * nil-nil range (i.e. everything but nil) */
1577 27 : equi = false;
1578 27 : anti = false;
1579 27 : lval = false;
1580 27 : hval = false;
1581 27 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1582 : ",s=" ALGOOPTBATFMT ",anti=0 "
1583 : "anti-nil...\n",
1584 : ALGOBATPAR(b), ALGOOPTBATPAR(s));
1585 : }
1586 : }
1587 :
1588 : /* if equi set, then so are both lval and hval */
1589 1846115 : assert(!equi || (lval && hval));
1590 :
1591 1846115 : if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
1592 : /* empty range */
1593 30 : MT_thread_setalgorithm("select: empty range");
1594 30 : bn = BATdense(0, 0, 0);
1595 30 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1596 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1597 : " (" LLFMT " usec) "
1598 : "empty range\n",
1599 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1600 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1601 30 : bat_iterator_end(&bi);
1602 30 : return bn;
1603 : }
1604 1846078 : if (equi && lnil && (notnull || bi.nonil)) {
1605 : /* return all nils, but there aren't any */
1606 5824 : MT_thread_setalgorithm("select: equi-nil, nonil");
1607 5824 : bn = BATdense(0, 0, 0);
1608 5824 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1609 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1610 : " (" LLFMT " usec): "
1611 : "equi-nil, nonil\n",
1612 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1613 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1614 5824 : bat_iterator_end(&bi);
1615 5824 : return bn;
1616 : }
1617 :
1618 1840254 : if (!equi && !lval && !hval && lnil && (notnull || bi.nonil)) {
1619 : /* return all non-nils from a BAT that doesn't have
1620 : * any: i.e. return everything */
1621 202 : MT_thread_setalgorithm("select: everything, nonil");
1622 202 : bn = canditer_slice(&ci, 0, ci.ncand);
1623 202 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1624 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1625 : " (" LLFMT " usec): "
1626 : "everything, nonil\n",
1627 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1628 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1629 202 : bat_iterator_end(&bi);
1630 202 : return bn;
1631 : }
1632 :
1633 : /* figure out how the searched for range compares with the known
1634 : * minimum and maximum values */
1635 1864150 : range = BATrange(&bi, lval ? tl : NULL, hval ? th : NULL, li, hi);
1636 1839398 : if (anti) {
1637 66649 : switch (range) {
1638 0 : case range_contains:
1639 : /* MIN..MAX range of values in BAT fully inside
1640 : * tl..th range, so nothing left over for
1641 : * anti */
1642 0 : MT_thread_setalgorithm("select: nothing, out of range");
1643 0 : bn = BATdense(0, 0, 0);
1644 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1645 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1646 : " (" LLFMT " usec): "
1647 : "nothing, out of range\n",
1648 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1649 0 : bat_iterator_end(&bi);
1650 0 : return bn;
1651 0 : case range_before:
1652 : case range_after:
1653 0 : if (notnull || b->tnonil) {
1654 : /* search range does not overlap with BAT range,
1655 : * and there are no nils, so we can return
1656 : * everything */
1657 0 : MT_thread_setalgorithm("select: everything, anti, nonil");
1658 0 : bn = canditer_slice(&ci, 0, ci.ncand);
1659 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1660 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1661 : " (" LLFMT " usec): "
1662 : "everything, nonil\n",
1663 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1664 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1665 0 : bat_iterator_end(&bi);
1666 0 : return bn;
1667 : }
1668 : break;
1669 : default:
1670 : break;
1671 : }
1672 1772749 : } else if (!equi || !lnil) {
1673 1764348 : switch (range) {
1674 5 : case range_before:
1675 : case range_after:
1676 : /* range we're looking for either completely
1677 : * before or complete after the range of values
1678 : * in the BAT */
1679 5 : MT_thread_setalgorithm("select: nothing, out of range");
1680 5 : bn = BATdense(0, 0, 0);
1681 5 : TRC_DEBUG(ALGO, "b="
1682 : ALGOBATFMT ",s="
1683 : ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1684 : " (" LLFMT " usec): "
1685 : "nothing, out of range\n",
1686 : ALGOBATPAR(b),
1687 : ALGOOPTBATPAR(s), anti,
1688 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1689 5 : bat_iterator_end(&bi);
1690 5 : return bn;
1691 7978 : case range_contains:
1692 7978 : if (notnull || b->tnonil) {
1693 : /* search range contains BAT range, and
1694 : * there are no nils, so we can return
1695 : * everything */
1696 11 : MT_thread_setalgorithm("select: everything, nonil");
1697 11 : bn = canditer_slice(&ci, 0, ci.ncand);
1698 11 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1699 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1700 : " (" LLFMT " usec): "
1701 : "everything, nonil\n",
1702 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1703 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1704 11 : bat_iterator_end(&bi);
1705 11 : return bn;
1706 : }
1707 : break;
1708 : default:
1709 : break;
1710 : }
1711 : }
1712 :
1713 1839382 : if (ATOMtype(bi.type) == TYPE_oid) {
1714 54016 : NORMALIZE(oid);
1715 : } else {
1716 1809434 : switch (t) {
1717 123371 : case TYPE_bte:
1718 123899 : NORMALIZE(bte);
1719 : break;
1720 10329 : case TYPE_sht:
1721 10520 : NORMALIZE(sht);
1722 : break;
1723 1341205 : case TYPE_int:
1724 1349440 : NORMALIZE(int);
1725 : break;
1726 3186 : case TYPE_lng:
1727 5491 : NORMALIZE(lng);
1728 : break;
1729 : #ifdef HAVE_HGE
1730 420 : case TYPE_hge:
1731 761 : NORMALIZE(hge);
1732 : break;
1733 : #endif
1734 141 : case TYPE_flt:
1735 326 : NORMALIZE(flt);
1736 : break;
1737 499 : case TYPE_dbl:
1738 943 : NORMALIZE(dbl);
1739 : break;
1740 : }
1741 : }
1742 :
1743 1839374 : parent = VIEWtparent(b);
1744 1651040 : assert(parent >= 0);
1745 1839374 : BAT *pb;
1746 1839374 : BATiter pbi;
1747 1839374 : if (parent > 0)
1748 1651045 : pb = BATdescriptor(parent);
1749 : else
1750 : pb = NULL;
1751 1840305 : pbi = bat_iterator(pb);
1752 : /* use hash only for equi-join, and then only if b or its
1753 : * parent already has a hash, or if b or its parent is
1754 : * persistent and the total size wouldn't be too large; check
1755 : * for existence of hash last since that may involve I/O */
1756 1838045 : if (equi) {
1757 1752855 : double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
1758 1752920 : if (cost > 0 && cost < ci.ncand) {
1759 94365 : wanthash = true;
1760 94365 : if (havehash) {
1761 67316 : if (phash) {
1762 65193 : MT_rwlock_rdlock(&pb->thashlock);
1763 65217 : if (pb->thash == NULL) {
1764 0 : MT_rwlock_rdunlock(&pb->thashlock);
1765 0 : havehash = false;
1766 : }
1767 : } else {
1768 2123 : MT_rwlock_rdlock(&b->thashlock);
1769 2122 : if (b->thash == NULL) {
1770 0 : MT_rwlock_rdunlock(&b->thashlock);
1771 0 : havehash = false;
1772 : }
1773 : }
1774 : }
1775 94388 : if (wanthash && !havehash && b->batRole != PERSISTENT) {
1776 88 : MT_lock_set(&b->theaplock);
1777 88 : if (++b->selcnt > 1000)
1778 0 : b->selcnt = 1000; /* limit value */
1779 : else
1780 : wanthash = false;
1781 88 : MT_lock_unset(&b->theaplock);
1782 : }
1783 : } else {
1784 1658555 : wanthash = havehash = false;
1785 : }
1786 : }
1787 :
1788 : /* at this point, if havehash is set, we have the hash lock
1789 : * the lock is on the parent if phash is set, on b itself if not
1790 : * also, if havehash is set, then so is wanthash (but not v.v.) */
1791 :
1792 1838133 : if (!havehash) {
1793 : /* make sure tsorted and trevsorted flags are set, but
1794 : * we only need to know if we're not yet sure that we're
1795 : * going for the hash (i.e. it already exists) */
1796 1772306 : (void) BATordered(b);
1797 1773734 : (void) BATordered_rev(b);
1798 : /* reinitialize iterator since properties may have changed */
1799 1773674 : bat_iterator_end(&bi);
1800 1773577 : bi = bat_iterator(b);
1801 : }
1802 :
1803 : /* If there is an order index or it is a view and the parent
1804 : * has an ordered index, and the bat is not tsorted or
1805 : * trevstorted then use the order index. And there is no cand
1806 : * list or if there is one, it is dense.
1807 : * TODO: we do not support anti-select with order index */
1808 1839501 : bool poidx = false;
1809 1839501 : if (!anti &&
1810 1787691 : !havehash &&
1811 1720160 : !bi.sorted &&
1812 1366638 : !bi.revsorted &&
1813 1242081 : ci.tpe == cand_dense) {
1814 945662 : BAT *view = NULL;
1815 945662 : (void) BATcheckorderidx(b);
1816 945881 : MT_lock_set(&b->batIdxLock);
1817 945811 : if ((oidxh = b->torderidx) != NULL)
1818 42 : HEAPincref(oidxh);
1819 945811 : MT_lock_unset(&b->batIdxLock);
1820 945809 : if (oidxh == NULL && pb != NULL) {
1821 878154 : (void) BATcheckorderidx(pb);
1822 878164 : MT_lock_set(&pb->batIdxLock);
1823 878148 : if ((oidxh = pb->torderidx) != NULL) {
1824 9 : HEAPincref(oidxh);
1825 9 : view = b;
1826 9 : b = pb;
1827 : }
1828 878148 : MT_lock_unset(&pb->batIdxLock);
1829 : }
1830 945820 : if (oidxh) {
1831 : /* Is query selective enough to use the ordered index ? */
1832 : /* TODO: Test if this heuristic works in practice */
1833 : /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < bi.count/1000 ? (BUN)1000: bi.count/1000))*/
1834 51 : if ((ORDERfnd(b, oidxh, th) - ORDERfnd(b, oidxh, tl)) < bi.count/3) {
1835 22 : if (view) {
1836 3 : bat_iterator_end(&bi);
1837 3 : bi = bat_iterator(b);
1838 3 : poidx = true; /* using parent oidx */
1839 3 : vwo = (lng) (view->tbaseoff - bi.baseoff);
1840 3 : vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
1841 3 : vwh = vwl + canditer_last(&ci) - ci.seq;
1842 3 : vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
1843 3 : TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
1844 : } else {
1845 19 : vwl = ci.seq;
1846 19 : vwh = canditer_last(&ci);
1847 : }
1848 : } else {
1849 29 : if (view) {
1850 6 : b = view;
1851 6 : view = NULL;
1852 : }
1853 29 : HEAPdecref(oidxh, false);
1854 29 : oidxh = NULL;
1855 : }
1856 : }
1857 : }
1858 :
1859 1839659 : if (!havehash && (bi.sorted || bi.revsorted || oidxh != NULL)) {
1860 505820 : BUN low = 0;
1861 505820 : BUN high = bi.count;
1862 :
1863 505820 : if (BATtdensebi(&bi)) {
1864 : /* positional */
1865 : /* we expect nonil to be set, in which case we
1866 : * already know that we're not dealing with a
1867 : * nil equiselect (dealt with above) */
1868 25246 : oid h, l;
1869 25246 : assert(bi.nonil);
1870 25246 : assert(bi.sorted);
1871 25246 : assert(oidxh == NULL);
1872 25246 : algo = "select: dense";
1873 25246 : h = * (oid *) th + hi;
1874 25246 : if (h > bi.tseq)
1875 25395 : h -= bi.tseq;
1876 : else
1877 : h = 0;
1878 25246 : if ((BUN) h < high)
1879 : high = (BUN) h;
1880 :
1881 25246 : l = *(oid *) tl + !li;
1882 25246 : if (l > bi.tseq)
1883 18123 : l -= bi.tseq;
1884 : else
1885 : l = 0;
1886 18123 : if ((BUN) l > low)
1887 18123 : low = (BUN) l;
1888 18123 : if (low > high)
1889 : low = high;
1890 480574 : } else if (bi.sorted) {
1891 354120 : assert(oidxh == NULL);
1892 354120 : algo = "select: sorted";
1893 354120 : if (lval) {
1894 350559 : if (li)
1895 342394 : low = SORTfndfirst(b, tl);
1896 : else
1897 8165 : low = SORTfndlast(b, tl);
1898 : } else {
1899 : /* skip over nils at start of column */
1900 3561 : low = SORTfndlast(b, nil);
1901 : }
1902 354638 : if (hval) {
1903 351097 : if (hi)
1904 342919 : high = SORTfndlast(b, th);
1905 : else
1906 8178 : high = SORTfndfirst(b, th);
1907 : }
1908 126454 : } else if (bi.revsorted) {
1909 126432 : assert(oidxh == NULL);
1910 126432 : algo = "select: reverse sorted";
1911 126432 : if (lval) {
1912 126413 : if (li)
1913 126142 : high = SORTfndlast(b, tl);
1914 : else
1915 271 : high = SORTfndfirst(b, tl);
1916 : } else {
1917 : /* skip over nils at end of column */
1918 19 : high = SORTfndfirst(b, nil);
1919 : }
1920 126448 : if (hval) {
1921 126409 : if (hi)
1922 126138 : low = SORTfndfirst(b, th);
1923 : else
1924 271 : low = SORTfndlast(b, th);
1925 : }
1926 : } else {
1927 22 : assert(oidxh != NULL);
1928 22 : algo = poidx ? "select: parent orderidx" : "select: orderidx";
1929 22 : if (lval) {
1930 22 : if (li)
1931 22 : low = ORDERfndfirst(b, oidxh, tl);
1932 : else
1933 0 : low = ORDERfndlast(b, oidxh, tl);
1934 : } else {
1935 : /* skip over nils at start of column */
1936 0 : low = ORDERfndlast(b, oidxh, nil);
1937 : }
1938 22 : if (hval) {
1939 22 : if (hi)
1940 22 : high = ORDERfndlast(b, oidxh, th);
1941 : else
1942 0 : high = ORDERfndfirst(b, oidxh, th);
1943 : }
1944 : }
1945 506085 : if (anti) {
1946 28292 : assert(oidxh == NULL);
1947 28292 : if (bi.sorted) {
1948 26429 : BUN first = SORTfndlast(b, nil);
1949 : /* match: [first..low) + [high..last) */
1950 26442 : bn = canditer_slice2val(&ci,
1951 : first + b->hseqbase,
1952 : low + b->hseqbase,
1953 26442 : high + b->hseqbase,
1954 : oid_nil);
1955 : } else {
1956 1863 : BUN last = SORTfndfirst(b, nil);
1957 : /* match: [first..low) + [high..last) */
1958 1863 : bn = canditer_slice2val(&ci,
1959 : oid_nil,
1960 : low + b->hseqbase,
1961 : high + b->hseqbase,
1962 1863 : last + b->hseqbase);
1963 : }
1964 : } else {
1965 477793 : if (bi.sorted || bi.revsorted) {
1966 477771 : assert(oidxh == NULL);
1967 : /* match: [low..high) */
1968 477771 : bn = canditer_sliceval(&ci,
1969 : low + b->hseqbase,
1970 477771 : high + b->hseqbase);
1971 : } else {
1972 22 : BUN i;
1973 22 : BUN cnt = 0;
1974 22 : const oid *rs;
1975 22 : oid *rbn;
1976 :
1977 22 : rs = (const oid *) oidxh->base + ORDERIDXOFF;
1978 22 : rs += low;
1979 22 : bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
1980 22 : if (bn == NULL) {
1981 0 : HEAPdecref(oidxh, false);
1982 0 : bat_iterator_end(&bi);
1983 0 : bat_iterator_end(&pbi);
1984 0 : BBPreclaim(pb);
1985 0 : return NULL;
1986 : }
1987 :
1988 22 : rbn = (oid *) Tloc((bn), 0);
1989 :
1990 154 : for (i = low; i < high; i++) {
1991 132 : if (vwl <= *rs && *rs <= vwh) {
1992 125 : *rbn++ = (oid) ((lng) *rs + vwo);
1993 125 : cnt++;
1994 : }
1995 132 : rs++;
1996 : }
1997 22 : HEAPdecref(oidxh, false);
1998 22 : BATsetcount(bn, cnt);
1999 :
2000 : /* output must be sorted */
2001 22 : GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
2002 22 : bn->tsorted = true;
2003 22 : bn->trevsorted = bn->batCount <= 1;
2004 22 : bn->tkey = true;
2005 22 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
2006 22 : bn->tnil = false;
2007 22 : bn->tnonil = true;
2008 22 : if (s) {
2009 4 : s = BATintersectcand(bn, s);
2010 4 : BBPunfix(bn->batCacheid);
2011 4 : bn = s;
2012 : }
2013 : }
2014 : }
2015 :
2016 506403 : bn = virtualize(bn);
2017 505442 : MT_thread_setalgorithm(algo);
2018 506477 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
2019 : ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
2020 : ALGOBATPAR(b), anti ? "true" : "false",
2021 : ALGOOPTBATPAR(bn), algo,
2022 : GDKusec() - t0);
2023 :
2024 506477 : bat_iterator_end(&bi);
2025 505779 : bat_iterator_end(&pbi);
2026 505918 : BBPreclaim(pb);
2027 505947 : return bn;
2028 : }
2029 :
2030 66970 : assert(oidxh == NULL);
2031 : /* upper limit for result size */
2032 1333839 : maximum = ci.ncand;
2033 1333839 : if (equi && havehash) {
2034 : /* we can look in the hash struct to see whether all
2035 : * values are distinct and set estimate accordingly */
2036 67303 : if (phash) {
2037 65181 : if (pb->thash->nunique == pbi.count)
2038 : estimate = 1;
2039 2122 : } else if (b->thash->nunique == bi.count)
2040 : estimate = 1;
2041 : }
2042 1323697 : if (estimate == BUN_NONE && (bi.key || (pb != NULL && pbi.key))) {
2043 : /* exact result size in special cases */
2044 446 : if (equi) {
2045 : estimate = 1;
2046 67 : } else if (!anti && lval && hval) {
2047 23 : switch (t) {
2048 0 : case TYPE_bte:
2049 0 : estimate = (BUN) (*(bte *) th - *(bte *) tl);
2050 0 : break;
2051 0 : case TYPE_sht:
2052 0 : estimate = (BUN) (*(sht *) th - *(sht *) tl);
2053 0 : break;
2054 23 : case TYPE_int:
2055 23 : CALC_ESTIMATE(int);
2056 : break;
2057 0 : case TYPE_lng:
2058 0 : CALC_ESTIMATE(lng);
2059 : break;
2060 : #ifdef HAVE_HGE
2061 0 : case TYPE_hge:
2062 0 : CALC_ESTIMATE(hge);
2063 : break;
2064 : #endif
2065 : }
2066 23 : if (estimate == BUN_NONE)
2067 0 : estimate += li + hi - 1;
2068 : }
2069 : }
2070 : /* refine upper limit by exact size (if known) */
2071 1333839 : maximum = MIN(maximum, estimate);
2072 1333839 : if (wanthash && !havehash && estimate == BUN_NONE) {
2073 : /* no exact result size, but we need estimate to
2074 : * choose between hash- & scan-select (if we already
2075 : * have a hash, it's a no-brainer: we use it) */
2076 24619 : if (ci.ncand <= 10000) {
2077 : /* "small" input: don't bother about more accurate
2078 : * estimate */
2079 : estimate = maximum;
2080 : } else {
2081 : /* layman's quick "pseudo-sample" of 1000 tuples,
2082 : * i.e., 333 from begin, middle & end of BAT */
2083 2 : BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
2084 2 : BAT *smpl, *slct;
2085 :
2086 2 : delta = 1000 / 3 / 2;
2087 2 : skip = (bi.count - (2 * delta)) / 2;
2088 8 : for (pos = delta; pos < bi.count; pos += skip) {
2089 6 : smpl = BATslice(b, pos - delta, pos + delta);
2090 6 : if (smpl) {
2091 6 : slct = BATselect(smpl, NULL, tl,
2092 : th, li, hi, anti);
2093 6 : if (slct) {
2094 6 : smpl_cnt += BATcount(smpl);
2095 6 : slct_cnt += BATcount(slct);
2096 6 : BBPreclaim(slct);
2097 : }
2098 6 : BBPreclaim(smpl);
2099 : }
2100 : }
2101 2 : if (smpl_cnt > 0 && slct_cnt > 0) {
2102 : /* linear extrapolation plus 10% margin */
2103 1 : estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
2104 1 : * (dbl) bi.count * 1.1);
2105 1 : } else if (smpl_cnt > 0 && slct_cnt == 0) {
2106 : /* estimate low enough to trigger hash select */
2107 1 : estimate = (ci.ncand / 100) - 1;
2108 : }
2109 : }
2110 24619 : wanthash = estimate < ci.ncand / 100;
2111 : }
2112 1333839 : if (estimate == BUN_NONE) {
2113 : /* no better estimate possible/required:
2114 : * (pre-)allocate 1M tuples, i.e., avoid/delay extend
2115 : * without too much overallocation */
2116 1298589 : estimate = 1000000;
2117 : }
2118 : /* limit estimation by upper limit */
2119 1333839 : estimate = MIN(estimate, maximum);
2120 :
2121 1333839 : bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
2122 1333705 : if (bn == NULL) {
2123 0 : if (havehash) {
2124 0 : if (phash)
2125 0 : MT_rwlock_rdunlock(&pb->thashlock);
2126 : else
2127 0 : MT_rwlock_rdunlock(&b->thashlock);
2128 : }
2129 0 : bat_iterator_end(&bi);
2130 0 : bat_iterator_end(&pbi);
2131 0 : BBPreclaim(pb);
2132 0 : return NULL;
2133 : }
2134 :
2135 1333705 : if (wanthash) {
2136 : /* hashselect unlocks the hash lock */
2137 67184 : bn = hashselect(&bi, &ci, bn, tl, maximum, havehash, phash, &algo);
2138 : } else {
2139 1266521 : assert(!havehash);
2140 : /* use imprints if
2141 : * i) bat is persistent, or parent is persistent
2142 : * ii) it is not an equi-select, and
2143 : * iii) imprints are supported.
2144 : */
2145 1266521 : Imprints *imprints = NULL;
2146 1266521 : if (!equi &&
2147 : /* DISABLES CODE */ (0) && imprintable(bi.type) &&
2148 : (!bi.transient ||
2149 : (pb != NULL && !pbi.transient)) &&
2150 : BATimprints(b) == GDK_SUCCEED) {
2151 : if (pb != NULL) {
2152 : MT_lock_set(&pb->batIdxLock);
2153 : imprints = pb->timprints;
2154 : if (imprints != NULL)
2155 : IMPSincref(imprints);
2156 : else
2157 : imprints = NULL;
2158 : MT_lock_unset(&pb->batIdxLock);
2159 : } else {
2160 : MT_lock_set(&b->batIdxLock);
2161 : imprints = b->timprints;
2162 : if (imprints != NULL)
2163 : IMPSincref(imprints);
2164 : else
2165 : imprints = NULL;
2166 1266521 : MT_lock_unset(&b->batIdxLock);
2167 : }
2168 : }
2169 1266521 : GDKclrerr();
2170 1266136 : bn = scanselect(&bi, &ci, bn, tl, th, li, hi, equi, anti,
2171 : lval, hval, lnil, maximum, imprints, &algo);
2172 1266136 : if (imprints)
2173 : IMPSdecref(imprints, false);
2174 : }
2175 1333376 : bat_iterator_end(&bi);
2176 1334175 : bat_iterator_end(&pbi);
2177 1334169 : BBPreclaim(pb);
2178 :
2179 1333988 : bn = virtualize(bn);
2180 1333511 : MT_thread_setalgorithm(algo);
2181 1333143 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
2182 : " %s (" LLFMT " usec)\n",
2183 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
2184 : anti ? "true" : "false",
2185 : ALGOOPTBATPAR(bn), algo,
2186 : GDKusec() - t0);
2187 :
2188 : return bn;
2189 : }
2190 :
2191 : /* theta select
2192 : *
2193 : * Returns a BAT with the OID values of b for qualifying tuples. The
2194 : * return BAT is sorted (i.e. in the same order as the input BAT).
2195 : *
2196 : * If s is not NULL, it is a list of candidates. s must be sorted.
2197 : *
2198 : * Theta select returns all values from b which are less/greater than
2199 : * or (not) equal to the provided value depending on the value of op.
2200 : * Op is a string with one of the values: "=", "==", "<", "<=", ">",
2201 : * ">=", "<>", "!=" (the first two are equivalent and the last two are
2202 : * equivalent). Theta select never returns nils.
2203 : *
2204 : * If value is nil, the result is empty.
2205 : */
2206 : BAT *
2207 382229 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
2208 : {
2209 382229 : const void *nil;
2210 :
2211 382229 : BATcheck(b, NULL);
2212 382229 : BATcheck(val, NULL);
2213 382229 : BATcheck(op, NULL);
2214 :
2215 382229 : nil = ATOMnilptr(b->ttype);
2216 382229 : if (ATOMcmp(b->ttype, val, nil) == 0)
2217 51 : return BATdense(0, 0, 0);
2218 382231 : if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
2219 : /* "=" or "==" */
2220 303005 : return BATselect(b, s, val, NULL, true, true, false);
2221 : }
2222 79226 : if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
2223 : /* "!=" (equivalent to "<>") */
2224 73792 : return BATselect(b, s, val, NULL, true, true, true);
2225 : }
2226 5434 : if (op[0] == '<') {
2227 896 : if (op[1] == 0) {
2228 : /* "<" */
2229 728 : return BATselect(b, s, nil, val, false, false, false);
2230 : }
2231 168 : if (op[1] == '=' && op[2] == 0) {
2232 : /* "<=" */
2233 164 : return BATselect(b, s, nil, val, false, true, false);
2234 : }
2235 4 : if (op[1] == '>' && op[2] == 0) {
2236 : /* "<>" (equivalent to "!=") */
2237 6 : return BATselect(b, s, val, NULL, true, true, true);
2238 : }
2239 : }
2240 4536 : if (op[0] == '>') {
2241 4536 : if (op[1] == 0) {
2242 : /* ">" */
2243 4032 : return BATselect(b, s, val, nil, false, false, false);
2244 : }
2245 504 : if (op[1] == '=' && op[2] == 0) {
2246 : /* ">=" */
2247 504 : return BATselect(b, s, val, nil, true, false, false);
2248 : }
2249 : }
2250 0 : GDKerror("unknown operator.\n");
2251 0 : return NULL;
2252 : }
2253 :
2254 : #define VALUE(s, x) (s##vars ? \
2255 : s##vars + VarHeapVal(s##vals, (x), s##width) : \
2256 : s##vals + ((x) * s##width))
2257 : #define FVALUE(s, x) (s##vals + ((x) * s##width))
2258 :
2259 : #define LTany(a,b) ((*cmp)(a, b) < 0)
2260 : #define EQany(a,b) ((*cmp)(a, b) == 0)
2261 : #define is_any_nil(v) ((v) == NULL || (*cmp)((v), nil) == 0)
2262 :
2263 : #define less3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b)))
2264 : #define grtr3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b)))
2265 : #define or3(a,b) ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0)
2266 : #define and3(a,b) ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1)
2267 : #define not3(a) (is_bit_nil(a) ? bit_nil : !(a))
2268 :
2269 : #define between3(v, lo, linc, hi, hinc, TYPE) \
2270 : and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE))
2271 :
2272 : #define BETWEEN(v, lo, linc, hi, hinc, TYPE) \
2273 : (is_##TYPE##_nil(v) \
2274 : ? bit_nil \
2275 : : (bit) (anti \
2276 : ? (symmetric \
2277 : ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE), \
2278 : between3(v, hi, hinc, lo, linc, TYPE))) \
2279 : : not3(between3(v, lo, linc, hi, hinc, TYPE))) \
2280 : : (symmetric \
2281 : ? or3(between3(v, lo, linc, hi, hinc, TYPE), \
2282 : between3(v, hi, hinc, lo, linc, TYPE)) \
2283 : : between3(v, lo, linc, hi, hinc, TYPE))))
2284 :
2285 : gdk_return
2286 113 : rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh,
2287 : struct canditer *lci, struct canditer *rci,
2288 : bool linc, bool hinc, bool anti, bool symmetric, BUN maxsize)
2289 : {
2290 113 : if (!anti && !symmetric) {
2291 : /* we'll need these */
2292 102 : (void) BATordered(l);
2293 107 : (void) BATordered_rev(l);
2294 : }
2295 118 : BATiter li = bat_iterator(l);
2296 117 : BATiter rli = bat_iterator(rl);
2297 118 : BATiter rhi = bat_iterator(rh);
2298 118 : const char *rlvals, *rhvals;
2299 118 : const char *lvars, *rlvars, *rhvars;
2300 118 : int rlwidth, rhwidth;
2301 118 : int lwidth;
2302 118 : const void *nil = ATOMnilptr(li.type);
2303 118 : int (*cmp)(const void *, const void *) = ATOMcompare(li.type);
2304 118 : int t;
2305 118 : BUN cnt, ncnt, lncand = lci->ncand, rncand = rci->ncand;
2306 118 : oid *restrict dst1, *restrict dst2;
2307 118 : const void *vrl, *vrh;
2308 118 : oid ro;
2309 118 : oid rlval = oid_nil, rhval = oid_nil;
2310 118 : int sorted = 0; /* which output column is sorted */
2311 118 : BAT *tmp = NULL;
2312 118 : const char *algo = NULL;
2313 118 : Heap *oidxh = NULL;
2314 :
2315 118 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
2316 :
2317 354 : assert(ATOMtype(li.type) == ATOMtype(rli.type));
2318 236 : assert(ATOMtype(li.type) == ATOMtype(rhi.type));
2319 118 : assert(BATcount(rl) == BATcount(rh));
2320 118 : assert(rl->hseqbase == rh->hseqbase);
2321 118 : assert(r1->ttype == TYPE_oid);
2322 118 : assert(r2 == NULL || r2->ttype == TYPE_oid);
2323 99 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2324 118 : assert(li.type != TYPE_void || !is_oid_nil(l->tseqbase));
2325 118 : assert(rli.type != TYPE_void || !is_oid_nil(rl->tseqbase));
2326 118 : assert(rhi.type != TYPE_void || !is_oid_nil(rh->tseqbase));
2327 :
2328 118 : TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
2329 : "rl=" ALGOBATFMT ",rh=" ALGOBATFMT ","
2330 : "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ","
2331 : "anti=%s,symmetric=%s\n",
2332 : ALGOBATPAR(l),
2333 : ALGOBATPAR(rl),
2334 : ALGOBATPAR(rh),
2335 : ALGOOPTBATPAR(lci->s),
2336 : ALGOOPTBATPAR(rci->s),
2337 : anti ? "true" : "false",
2338 : symmetric ? "true" : "false");
2339 :
2340 118 : rlvals = rli.type == TYPE_void ? NULL : (const char *) rli.base;
2341 118 : rhvals = rhi.type == TYPE_void ? NULL : (const char *) rhi.base;
2342 118 : lwidth = li.width;
2343 118 : rlwidth = rli.width;
2344 118 : rhwidth = rhi.width;
2345 118 : dst1 = (oid *) Tloc(r1, 0);
2346 118 : dst2 = r2 ? (oid *) Tloc(r2, 0) : NULL;
2347 :
2348 118 : t = ATOMtype(li.type);
2349 118 : t = ATOMbasetype(t);
2350 :
2351 118 : if (li.vh && li.type) {
2352 17 : assert(rli.vh && rli.type);
2353 17 : assert(rhi.vh && rhi.type);
2354 17 : lvars = li.vh->base;
2355 17 : rlvars = rli.vh->base;
2356 17 : rhvars = rhi.vh->base;
2357 : } else {
2358 101 : assert(rli.vh == NULL);
2359 101 : assert(rhi.vh == NULL);
2360 : lvars = rlvars = rhvars = NULL;
2361 : }
2362 :
2363 118 : if (!anti && !symmetric && !li.sorted && !li.revsorted) {
2364 12 : (void) BATcheckorderidx(l);
2365 12 : MT_lock_set(&l->batIdxLock);
2366 12 : if ((oidxh = l->torderidx) != NULL)
2367 0 : HEAPincref(oidxh);
2368 12 : MT_lock_unset(&l->batIdxLock);
2369 : #if 0 /* needs checking */
2370 : if (oidxh == NULL && VIEWtparent(l)) {
2371 : /* if enabled, need to fix/unfix parent bat */
2372 : BAT *pb = BBP_desc(VIEWtparent(l));
2373 : (void) BATcheckorderidx(pb);
2374 : MT_lock_set(&pb->batIdxLock);
2375 : if ((oidxh = pb->torderidx) != NULL) {
2376 : HEAPincref(oidxh);
2377 : l = pb;
2378 : }
2379 : MT_lock_unset(&pb->batIdxLock);
2380 : }
2381 : #endif
2382 : }
2383 :
2384 118 : vrl = &rlval;
2385 118 : vrh = &rhval;
2386 118 : if (!anti && !symmetric && (li.sorted || li.revsorted || oidxh)) {
2387 : /* left column is sorted, use binary search */
2388 94 : sorted = 2;
2389 602 : TIMEOUT_LOOP(rncand, qry_ctx) {
2390 413 : BUN low, high;
2391 :
2392 413 : ro = canditer_next(rci);
2393 404 : if (rlvals) {
2394 404 : vrl = VALUE(rl, ro - rl->hseqbase);
2395 : } else {
2396 : /* TYPE_void */
2397 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2398 : }
2399 404 : if (rhvals) {
2400 404 : vrh = VALUE(rh, ro - rh->hseqbase);
2401 : } else {
2402 : /* TYPE_void */
2403 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2404 : }
2405 405 : if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
2406 9 : continue;
2407 387 : if (li.sorted) {
2408 387 : if (linc)
2409 280 : low = SORTfndfirst(l, vrl);
2410 : else
2411 107 : low = SORTfndlast(l, vrl);
2412 410 : if (hinc)
2413 349 : high = SORTfndlast(l, vrh);
2414 : else
2415 61 : high = SORTfndfirst(l, vrh);
2416 0 : } else if (li.revsorted) {
2417 0 : if (hinc)
2418 0 : low = SORTfndfirst(l, vrh);
2419 : else
2420 0 : low = SORTfndlast(l, vrh);
2421 0 : if (linc)
2422 0 : high = SORTfndlast(l, vrl);
2423 : else
2424 0 : high = SORTfndfirst(l, vrl);
2425 : } else {
2426 0 : assert(oidxh);
2427 0 : if (linc)
2428 0 : low = ORDERfndfirst(l, oidxh, vrl);
2429 : else
2430 0 : low = ORDERfndlast(l, oidxh, vrl);
2431 0 : if (hinc)
2432 0 : high = ORDERfndlast(l, oidxh, vrh);
2433 : else
2434 0 : high = ORDERfndfirst(l, oidxh, vrh);
2435 : }
2436 411 : if (high <= low)
2437 224 : continue;
2438 187 : if (li.sorted || li.revsorted) {
2439 187 : low = canditer_search(lci, low + l->hseqbase, true);
2440 187 : high = canditer_search(lci, high + l->hseqbase, true);
2441 187 : assert(high >= low);
2442 :
2443 187 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2444 0 : cnt = BATcount(r1) + high - low + 1024;
2445 0 : if (cnt > maxsize)
2446 : cnt = maxsize;
2447 0 : BATsetcount(r1, BATcount(r1));
2448 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2449 0 : goto bailout;
2450 0 : dst1 = (oid *) Tloc(r1, 0);
2451 0 : if (r2) {
2452 0 : BATsetcount(r2, BATcount(r2));
2453 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2454 0 : goto bailout;
2455 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2456 0 : dst2 = (oid *) Tloc(r2, 0);
2457 : }
2458 : }
2459 187 : canditer_setidx(lci, low);
2460 1026 : while (low < high) {
2461 842 : dst1[r1->batCount++] = canditer_next(lci);
2462 839 : if (r2) {
2463 827 : dst2[r2->batCount++] = ro;
2464 : }
2465 839 : low++;
2466 : }
2467 : } else {
2468 0 : const oid *ord;
2469 :
2470 0 : assert(oidxh);
2471 0 : ord = (const oid *) oidxh->base + ORDERIDXOFF;
2472 :
2473 0 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2474 0 : cnt = BATcount(r1) + high - low + 1024;
2475 0 : if (cnt > maxsize)
2476 : cnt = maxsize;
2477 0 : BATsetcount(r1, BATcount(r1));
2478 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2479 0 : goto bailout;
2480 0 : dst1 = (oid *) Tloc(r1, 0);
2481 0 : if (r2) {
2482 0 : BATsetcount(r2, BATcount(r2));
2483 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2484 0 : goto bailout;
2485 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2486 0 : dst2 = (oid *) Tloc(r2, 0);
2487 : }
2488 : }
2489 :
2490 0 : while (low < high) {
2491 0 : if (canditer_contains(lci, ord[low])) {
2492 0 : dst1[r1->batCount++] = ord[low];
2493 0 : if (r2) {
2494 0 : dst2[r2->batCount++] = ro;
2495 : }
2496 : }
2497 0 : low++;
2498 : }
2499 : }
2500 : }
2501 95 : if (oidxh)
2502 0 : HEAPdecref(oidxh, false);
2503 95 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
2504 95 : cnt = BATcount(r1);
2505 95 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2506 24 : } else if (/* DISABLES CODE */ (0) &&
2507 : !anti && !symmetric &&
2508 : imprintable(li.type) &&
2509 : (BATcount(rl) > 2 ||
2510 : !li.transient ||
2511 : (VIEWtparent(l) != 0 &&
2512 : /* if enabled, need to fix/unfix parent bat */
2513 : /* batTransient access needs to be protected */
2514 : !(tmp = BBP_desc(VIEWtparent(l)))->batTransient) ||
2515 : BATcheckimprints(l)) &&
2516 : BATimprints(l) == GDK_SUCCEED) {
2517 : /* implementation using imprints on left column
2518 : *
2519 : * we use imprints if we can (the type is right for
2520 : * imprints) and either the left bat is persistent or
2521 : * already has imprints, or the right bats are long
2522 : * enough (for creating imprints being worth it) */
2523 : Imprints *imprints = NULL;
2524 :
2525 : if (tmp) {
2526 : MT_lock_set(&tmp->batIdxLock);
2527 : imprints = tmp->timprints;
2528 : if (imprints != NULL)
2529 : IMPSincref(imprints);
2530 : else
2531 : imprints = NULL;
2532 : MT_lock_unset(&tmp->batIdxLock);
2533 : } else {
2534 : MT_lock_set(&l->batIdxLock);
2535 : imprints = l->timprints;
2536 : if (imprints != NULL)
2537 : IMPSincref(imprints);
2538 : else
2539 : imprints = NULL;
2540 : MT_lock_unset(&l->batIdxLock);
2541 : }
2542 : /* in the unlikely case that the imprints were removed
2543 : * before we could get at them, take the slow, nested
2544 : * loop implementation */
2545 : if (imprints == NULL)
2546 : goto nestedloop;
2547 :
2548 : sorted = 2;
2549 : cnt = 0;
2550 : TIMEOUT_LOOP_IDX_DECL(i, rncand, qry_ctx) {
2551 : maxsize = cnt + (rncand - i) * lncand;
2552 : ro = canditer_next(rci);
2553 : if (rlvals) {
2554 : vrl = FVALUE(rl, ro - rl->hseqbase);
2555 : } else {
2556 : /* TYPE_void */
2557 : rlval = ro - rl->hseqbase + rl->tseqbase;
2558 : }
2559 : if (rhvals) {
2560 : vrh = FVALUE(rh, ro - rh->hseqbase);
2561 : } else {
2562 : /* TYPE_void */
2563 : rhval = ro - rl->hseqbase + rl->tseqbase;
2564 : }
2565 : dst1 = (oid *) Tloc(r1, 0);
2566 : canditer_reset(lci);
2567 : switch (t) {
2568 : case TYPE_bte: {
2569 : bte vl, vh;
2570 : if (is_bte_nil((vl = *(bte *) vrl)))
2571 : continue;
2572 : if (is_bte_nil((vh = *(bte *) vrh)))
2573 : continue;
2574 : if (!linc) {
2575 : if (vl == MAXVALUEbte)
2576 : continue;
2577 : vl = NEXTVALUEbte(vl);
2578 : }
2579 : if (!hinc) {
2580 : if (vh == MINVALUEbte)
2581 : continue;
2582 : vh = PREVVALUEbte(vh);
2583 : }
2584 : if (vl > vh)
2585 : continue;
2586 : ncnt = fullscan_bte(&li, lci, r1, &vl, &vh,
2587 : true, true, false,
2588 : false, true, true,
2589 : false, cnt,
2590 : l->hseqbase, dst1,
2591 : maxsize,
2592 : imprints, &algo);
2593 : break;
2594 : }
2595 : case TYPE_sht: {
2596 : sht vl, vh;
2597 : if (is_sht_nil((vl = *(sht *) vrl)))
2598 : continue;
2599 : if (is_sht_nil((vh = *(sht *) vrh)))
2600 : continue;
2601 : if (!linc) {
2602 : if (vl == MAXVALUEsht)
2603 : continue;
2604 : vl = NEXTVALUEsht(vl);
2605 : }
2606 : if (!hinc) {
2607 : if (vh == MINVALUEsht)
2608 : continue;
2609 : vh = PREVVALUEsht(vh);
2610 : }
2611 : if (vl > vh)
2612 : continue;
2613 : ncnt = fullscan_sht(&li, lci, r1, &vl, &vh,
2614 : true, true, false,
2615 : false, true, true,
2616 : false, cnt,
2617 : l->hseqbase, dst1,
2618 : maxsize,
2619 : imprints, &algo);
2620 : break;
2621 : }
2622 : case TYPE_int:
2623 : #if SIZEOF_OID == SIZEOF_INT
2624 : case TYPE_oid:
2625 : #endif
2626 : {
2627 : int vl, vh;
2628 : if (is_int_nil((vl = *(int *) vrl)))
2629 : continue;
2630 : if (is_int_nil((vh = *(int *) vrh)))
2631 : continue;
2632 : if (!linc) {
2633 : if (vl == MAXVALUEint)
2634 : continue;
2635 : vl = NEXTVALUEint(vl);
2636 : }
2637 : if (!hinc) {
2638 : #if SIZEOF_OID == SIZEOF_INT
2639 : if (t == TYPE_oid) {
2640 : if (vh == MINVALUEoid)
2641 : continue;
2642 : vh = PREVVALUEoid(vh);
2643 : } else
2644 : #endif
2645 : {
2646 : if (vh == MINVALUEint)
2647 : continue;
2648 : vh = PREVVALUEint(vh);
2649 : }
2650 : }
2651 : if (vl > vh)
2652 : continue;
2653 : ncnt = fullscan_int(&li, lci, r1, &vl, &vh,
2654 : true, true, false,
2655 : false, true, true,
2656 : false, cnt,
2657 : l->hseqbase, dst1,
2658 : maxsize,
2659 : imprints, &algo);
2660 : break;
2661 : }
2662 : case TYPE_lng:
2663 : #if SIZEOF_OID == SIZEOF_LNG
2664 : case TYPE_oid:
2665 : #endif
2666 : {
2667 : lng vl, vh;
2668 : if (is_lng_nil((vl = *(lng *) vrl)))
2669 : continue;
2670 : if (is_lng_nil((vh = *(lng *) vrh)))
2671 : continue;
2672 : if (!linc) {
2673 : if (vl == MAXVALUElng)
2674 : continue;
2675 : vl = NEXTVALUElng(vl);
2676 : }
2677 : if (!hinc) {
2678 : #if SIZEOF_OID == SIZEOF_LNG
2679 : if (t == TYPE_oid) {
2680 : if (vh == MINVALUEoid)
2681 : continue;
2682 : vh = PREVVALUEoid(vh);
2683 : } else
2684 : #endif
2685 : {
2686 : if (vh == MINVALUElng)
2687 : continue;
2688 : vh = PREVVALUElng(vh);
2689 : }
2690 : }
2691 : if (vl > vh)
2692 : continue;
2693 : ncnt = fullscan_lng(&li, lci, r1, &vl, &vh,
2694 : true, true, false,
2695 : false, true, true,
2696 : false, cnt,
2697 : l->hseqbase, dst1,
2698 : maxsize,
2699 : imprints, &algo);
2700 : break;
2701 : }
2702 : #ifdef HAVE_HGE
2703 : case TYPE_hge: {
2704 : hge vl, vh;
2705 : if (is_hge_nil((vl = *(hge *) vrl)))
2706 : continue;
2707 : if (is_hge_nil((vh = *(hge *) vrh)))
2708 : continue;
2709 : if (!linc) {
2710 : if (vl == MAXVALUEhge)
2711 : continue;
2712 : vl = NEXTVALUEhge(vl);
2713 : }
2714 : if (!hinc) {
2715 : if (vh == MINVALUEhge)
2716 : continue;
2717 : vh = PREVVALUEhge(vh);
2718 : }
2719 : if (vl > vh)
2720 : continue;
2721 : ncnt = fullscan_hge(&li, lci, r1, &vl, &vh,
2722 : true, true, false,
2723 : false, true, true,
2724 : false, cnt,
2725 : l->hseqbase, dst1,
2726 : maxsize,
2727 : imprints, &algo);
2728 : break;
2729 : }
2730 : #endif
2731 : case TYPE_flt: {
2732 : flt vl, vh;
2733 : vl = *(flt *) vrl;
2734 : if (is_flt_nil(vl))
2735 : continue;
2736 : vh = *(flt *) vrh;
2737 : if (is_flt_nil(vh))
2738 : continue;
2739 : if (!linc) {
2740 : if (vl == MAXVALUEflt)
2741 : continue;
2742 : vl = NEXTVALUEflt(vl);
2743 : }
2744 : if (!hinc) {
2745 : if (vh == MINVALUEflt)
2746 : continue;
2747 : vh = PREVVALUEflt(vh);
2748 : }
2749 : if (vl > vh)
2750 : continue;
2751 : ncnt = fullscan_flt(&li, lci, r1, &vl, &vh,
2752 : true, true, false,
2753 : false, true, true,
2754 : false, cnt,
2755 : l->hseqbase, dst1,
2756 : maxsize,
2757 : imprints, &algo);
2758 : break;
2759 : }
2760 : case TYPE_dbl: {
2761 : dbl vl, vh;
2762 : vl = *(dbl *) vrl;
2763 : if (is_dbl_nil(vl))
2764 : continue;
2765 : vh = *(dbl *) vrh;
2766 : if (is_dbl_nil(vh))
2767 : continue;
2768 : if (!linc) {
2769 : if (vl == MAXVALUEdbl)
2770 : continue;
2771 : vl = NEXTVALUEdbl(vl);
2772 : }
2773 : if (!hinc) {
2774 : if (vh == MINVALUEdbl)
2775 : continue;
2776 : vh = PREVVALUEdbl(vh);
2777 : }
2778 : if (vl > vh)
2779 : continue;
2780 : ncnt = fullscan_dbl(&li, lci, r1, &vl, &vh,
2781 : true, true, false,
2782 : false, true, true,
2783 : false, cnt,
2784 : l->hseqbase, dst1,
2785 : maxsize,
2786 : imprints, &algo);
2787 : break;
2788 : }
2789 : default:
2790 : ncnt = BUN_NONE;
2791 : GDKerror("unsupported type\n");
2792 : MT_UNREACHABLE();
2793 : }
2794 : if (ncnt == BUN_NONE) {
2795 : IMPSdecref(imprints, false);
2796 : goto bailout;
2797 : }
2798 : assert(ncnt >= cnt || ncnt == 0);
2799 : if (ncnt == cnt || ncnt == 0)
2800 : continue;
2801 : if (r2) {
2802 : if (BATcapacity(r2) < ncnt) {
2803 : BATsetcount(r2, cnt);
2804 : if (BATextend(r2, BATcapacity(r1)) != GDK_SUCCEED) {
2805 : IMPSdecref(imprints, false);
2806 : goto bailout;
2807 : }
2808 : dst2 = (oid *) Tloc(r2, 0);
2809 : }
2810 : while (cnt < ncnt)
2811 : dst2[cnt++] = ro;
2812 : } else {
2813 : cnt = ncnt;
2814 : }
2815 : }
2816 : IMPSdecref(imprints, false);
2817 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
2818 : } else {
2819 24 : nestedloop:;
2820 : /* nested loop implementation */
2821 24 : const void *vl;
2822 24 : const char *lvals;
2823 24 : oid lval;
2824 :
2825 24 : GDKclrerr(); /* not interested in BATimprints errors */
2826 23 : sorted = 1;
2827 23 : lvals = li.type == TYPE_void ? NULL : (const char *) li.base;
2828 23 : vl = &lval;
2829 280 : TIMEOUT_LOOP(lncand, qry_ctx) {
2830 234 : oid lo;
2831 :
2832 234 : lo = canditer_next(lci);
2833 234 : if (lvals) {
2834 234 : vl = VALUE(l, lo - l->hseqbase);
2835 235 : if (cmp(vl, nil) == 0)
2836 8 : continue;
2837 : } else {
2838 0 : lval = lo - l->hseqbase + l->tseqbase;
2839 : }
2840 225 : canditer_reset(rci);
2841 25972 : for (BUN j = 0; j < rncand; j++) {
2842 25744 : ro = canditer_next(rci);
2843 19425 : if (rlvals) {
2844 19425 : vrl = VALUE(rl, ro - rl->hseqbase);
2845 : } else {
2846 : /* TYPE_void */
2847 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2848 : }
2849 19425 : if (rhvals) {
2850 19425 : vrh = VALUE(rh, ro - rh->hseqbase);
2851 : } else {
2852 : /* TYPE_void */
2853 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2854 : }
2855 19642 : if (BETWEEN(vl, vrl, linc, vrh, hinc, any) != 1)
2856 16063 : continue;
2857 9684 : if (BATcount(r1) == BATcapacity(r1)) {
2858 2 : BUN newcap = BATgrows(r1);
2859 2 : if (newcap > maxsize)
2860 : newcap = maxsize;
2861 2 : BATsetcount(r1, BATcount(r1));
2862 2 : if (BATextend(r1, newcap) != GDK_SUCCEED)
2863 0 : goto bailout;
2864 2 : dst1 = (oid *) Tloc(r1, 0);
2865 2 : if (r2) {
2866 2 : BATsetcount(r2, BATcount(r2));
2867 2 : if (BATextend(r2, newcap) != GDK_SUCCEED)
2868 0 : goto bailout;
2869 2 : assert(BATcapacity(r1) == BATcapacity(r2));
2870 2 : dst2 = (oid *) Tloc(r2, 0);
2871 : }
2872 : }
2873 9684 : dst1[r1->batCount++] = lo;
2874 9684 : if (r2) {
2875 9676 : dst2[r2->batCount++] = ro;
2876 : }
2877 : }
2878 : }
2879 23 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
2880 23 : cnt = BATcount(r1);
2881 23 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2882 : }
2883 :
2884 : /* also set other bits of heap to correct value to indicate size */
2885 118 : BATsetcount(r1, cnt);
2886 :
2887 : /* set properties using an extra scan (usually not complete) */
2888 113 : dst1 = (oid *) Tloc(r1, 0);
2889 113 : r1->tkey = true;
2890 113 : r1->tsorted = true;
2891 113 : r1->trevsorted = true;
2892 113 : r1->tseqbase = 0;
2893 113 : r1->tnil = false;
2894 113 : r1->tnonil = true;
2895 592 : for (ncnt = 1; ncnt < cnt; ncnt++) {
2896 490 : if (dst1[ncnt - 1] == dst1[ncnt]) {
2897 427 : r1->tseqbase = oid_nil;
2898 427 : r1->tkey = false;
2899 63 : } else if (dst1[ncnt - 1] < dst1[ncnt]) {
2900 60 : r1->trevsorted = false;
2901 60 : if (dst1[ncnt - 1] + 1 != dst1[ncnt])
2902 5 : r1->tseqbase = oid_nil;
2903 : } else {
2904 3 : assert(sorted != 1);
2905 3 : r1->tsorted = false;
2906 3 : r1->tseqbase = oid_nil;
2907 3 : r1->tkey = false;
2908 : }
2909 537 : if (!(r1->trevsorted | BATtdense(r1) | r1->tkey | ((sorted != 1) & r1->tsorted)))
2910 : break;
2911 : }
2912 113 : if (BATtdense(r1))
2913 88 : r1->tseqbase = cnt > 0 ? dst1[0] : 0;
2914 113 : if (r2) {
2915 95 : BATsetcount(r2, cnt);
2916 94 : dst2 = (oid *) Tloc(r2, 0);
2917 94 : r2->tkey = true;
2918 94 : r2->tsorted = true;
2919 94 : r2->trevsorted = true;
2920 94 : r2->tseqbase = 0;
2921 94 : r2->tnil = false;
2922 94 : r2->tnonil = true;
2923 567 : for (ncnt = 1; ncnt < cnt; ncnt++) {
2924 486 : if (dst2[ncnt - 1] == dst2[ncnt]) {
2925 42 : r2->tseqbase = oid_nil;
2926 42 : r2->tkey = false;
2927 444 : } else if (dst2[ncnt - 1] < dst2[ncnt]) {
2928 434 : r2->trevsorted = false;
2929 434 : if (dst2[ncnt - 1] + 1 != dst2[ncnt])
2930 129 : r2->tseqbase = oid_nil;
2931 : } else {
2932 10 : assert(sorted != 2);
2933 10 : r2->tsorted = false;
2934 10 : r2->tseqbase = oid_nil;
2935 10 : r2->tkey = false;
2936 : }
2937 659 : if (!(r2->trevsorted | BATtdense(r2) | r2->tkey | ((sorted != 2) & r2->tsorted)))
2938 : break;
2939 : }
2940 94 : if (BATtdense(r2))
2941 64 : r2->tseqbase = cnt > 0 ? dst2[0] : 0;
2942 : }
2943 112 : TRC_DEBUG(ALGO, "l=%s,rl=%s,rh=%s -> "
2944 : "(" ALGOBATFMT "," ALGOOPTBATFMT ")\n",
2945 : BATgetId(l), BATgetId(rl), BATgetId(rh),
2946 : ALGOBATPAR(r1), ALGOOPTBATPAR(r2));
2947 112 : bat_iterator_end(&li);
2948 116 : bat_iterator_end(&rli);
2949 114 : bat_iterator_end(&rhi);
2950 114 : return GDK_SUCCEED;
2951 :
2952 0 : bailout:
2953 0 : bat_iterator_end(&li);
2954 0 : bat_iterator_end(&rli);
2955 0 : bat_iterator_end(&rhi);
2956 0 : BBPreclaim(r1);
2957 0 : BBPreclaim(r2);
2958 : return GDK_FAIL;
2959 : }
|