Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : /* auxiliary functions and structs for imprints */
18 : #include "gdk_imprints.h"
19 :
20 : static inline oid *
21 155328441 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
22 : {
23 155328441 : if (i == BATcapacity(bn)) {
24 66 : BATsetcount(bn, i);
25 66 : if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
26 : return NULL;
27 66 : a = (oid *) Tloc(bn, 0);
28 : }
29 155328441 : a[i] = v;
30 155328441 : return a;
31 : }
32 :
33 : BAT *
34 1976786 : virtualize(BAT *bn)
35 : {
36 : /* input must be a valid candidate list or NULL */
37 1976786 : if (bn == NULL)
38 : return NULL;
39 1976786 : if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
40 0 : fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
41 0 : bn->ttype, is_oid_nil(bn->tseqbase),
42 0 : bn->tkey, bn->tsorted);
43 0 : fflush(stderr);
44 : }
45 1976698 : assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
46 : bn->ttype == TYPE_oid) &&
47 : bn->tkey && bn->tsorted);
48 1976698 : assert(BBP_refs(bn->batCacheid) == 1);
49 1976698 : assert(BBP_lrefs(bn->batCacheid) == 0);
50 : /* since bn has unique and strictly ascending values, we can
51 : * easily check whether the column is dense */
52 1976698 : if (bn->ttype == TYPE_oid &&
53 1399925 : (BATcount(bn) <= 1 ||
54 401834 : * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
55 401834 : * (const oid *) Tloc(bn, BATcount(bn) - 1))) {
56 : /* column is dense, replace by virtual oid */
57 1041194 : oid tseq; /* work around bug in Intel compiler */
58 1041194 : if (BATcount(bn) == 0)
59 : tseq = 0;
60 : else
61 413252 : tseq = * (const oid *) Tloc(bn, 0);
62 1041194 : TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
63 : ALGOBATPAR(bn), tseq);
64 1041206 : bn->tseqbase = tseq;
65 1041206 : if (VIEWtparent(bn)) {
66 7 : Heap *h = GDKmalloc(sizeof(Heap));
67 7 : if (h == NULL) {
68 0 : BBPunfix(bn->batCacheid);
69 0 : return NULL;
70 : }
71 7 : *h = *bn->theap;
72 7 : settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
73 7 : h->parentid = bn->batCacheid;
74 7 : h->base = NULL;
75 7 : h->hasfile = false;
76 7 : ATOMIC_INIT(&h->refs, 1);
77 7 : if (bn->theap->parentid != bn->batCacheid)
78 7 : BBPrelease(bn->theap->parentid);
79 7 : HEAPdecref(bn->theap, false);
80 7 : bn->theap = h;
81 : } else {
82 1041199 : HEAPfree(bn->theap, true);
83 : }
84 1041219 : bn->theap->storage = bn->theap->newstorage = STORE_MEM;
85 1041219 : bn->theap->size = 0;
86 1041219 : bn->ttype = TYPE_void;
87 1041219 : bn->twidth = 0;
88 1041219 : bn->tshift = 0;
89 : }
90 :
91 : return bn;
92 : }
93 :
94 : #define HASHloop_bound(bi, h, hb, v, lo, hi) \
95 : for (hb = HASHget(h, HASHprobe((h), v)); \
96 : hb != BUN_NONE; \
97 : hb = HASHgetlink(h,hb)) \
98 : if (hb >= (lo) && hb < (hi) && \
99 : (cmp == NULL || \
100 : (*cmp)(v, BUNtail(bi, hb)) == 0))
101 :
102 : static BAT *
103 49742 : hashselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
104 : const void *tl, BUN maximum, bool havehash, bool phash,
105 : const char **algo)
106 : {
107 49742 : BUN i, cnt;
108 49742 : oid o, *restrict dst;
109 49742 : BUN l, h, d = 0;
110 49742 : oid seq;
111 49742 : int (*cmp)(const void *, const void *);
112 49742 : BAT *b2 = NULL;
113 :
114 49742 : size_t counter = 0;
115 49742 : lng timeoffset = 0;
116 49742 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
117 49741 : if (qry_ctx != NULL) {
118 37782 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
119 : }
120 :
121 49741 : assert(bn->ttype == TYPE_oid);
122 49741 : seq = bi->b->hseqbase;
123 49741 : l = ci->seq - seq;
124 49741 : h = canditer_last(ci) + 1 - seq;
125 :
126 49741 : *algo = "hashselect";
127 49741 : if (phash && (b2 = BATdescriptor(VIEWtparent(bi->b))) != NULL) {
128 48518 : *algo = "hashselect on parent";
129 48518 : TRC_DEBUG(ALGO, ALGOBATFMT
130 : " using parent(" ALGOBATFMT ") "
131 : "for hash\n",
132 : ALGOBATPAR(bi->b),
133 : ALGOBATPAR(b2));
134 48518 : d = bi->baseoff - b2->tbaseoff;
135 48518 : l += d;
136 48518 : h += d;
137 48518 : bat_iterator_end(bi);
138 48520 : *bi = bat_iterator(b2);
139 : }
140 :
141 49741 : if (!havehash) {
142 1 : if (BAThash(bi->b) != GDK_SUCCEED) {
143 0 : BBPreclaim(bn);
144 0 : BBPreclaim(b2);
145 0 : return NULL;
146 : }
147 1 : MT_rwlock_rdlock(&bi->b->thashlock);
148 1 : if (bi->b->thash == NULL) {
149 0 : GDKerror("Hash destroyed before we could use it\n");
150 0 : goto bailout;
151 : }
152 : }
153 99479 : switch (ATOMbasetype(bi->type)) {
154 : case TYPE_bte:
155 : case TYPE_sht:
156 : cmp = NULL; /* no need to compare: "hash" is perfect */
157 : break;
158 49712 : default:
159 49712 : cmp = ATOMcompare(bi->type);
160 49712 : break;
161 : }
162 49741 : dst = (oid *) Tloc(bn, 0);
163 49741 : cnt = 0;
164 49741 : if (ci->tpe != cand_dense) {
165 186068 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
166 169810 : GDK_CHECK_TIMEOUT(timeoffset, counter,
167 : GOTO_LABEL_TIMEOUT_HANDLER(bailout));
168 169810 : o = (oid) (i + seq - d);
169 169810 : if (canditer_contains(ci, o)) {
170 119358 : dst = buninsfix(bn, dst, cnt, o,
171 59679 : maximum - BATcapacity(bn),
172 : maximum);
173 59679 : if (dst == NULL)
174 0 : goto bailout;
175 59679 : cnt++;
176 : }
177 : }
178 : } else {
179 168663 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
180 75647 : GDK_CHECK_TIMEOUT(timeoffset, counter,
181 : GOTO_LABEL_TIMEOUT_HANDLER(bailout));
182 75647 : o = (oid) (i + seq - d);
183 151310 : dst = buninsfix(bn, dst, cnt, o,
184 75647 : maximum - BATcapacity(bn),
185 : maximum);
186 75663 : if (dst == NULL)
187 0 : goto bailout;
188 75663 : cnt++;
189 : }
190 : }
191 49737 : MT_rwlock_rdunlock(&bi->b->thashlock);
192 49742 : BBPreclaim(b2);
193 49740 : BATsetcount(bn, cnt);
194 49732 : bn->tkey = true;
195 49732 : if (cnt > 1) {
196 : /* hash chains produce results in the order high to
197 : * low, so we just need to reverse */
198 60590 : for (l = 0, h = BATcount(bn) - 1; l < h; l++, h--) {
199 57068 : o = dst[l];
200 57068 : dst[l] = dst[h];
201 57068 : dst[h] = o;
202 : }
203 : }
204 49732 : bn->tsorted = true;
205 49732 : bn->trevsorted = bn->batCount <= 1;
206 49732 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
207 49732 : return bn;
208 :
209 0 : bailout:
210 0 : MT_rwlock_rdunlock(&bi->b->thashlock);
211 0 : BBPreclaim(b2);
212 0 : BBPreclaim(bn);
213 0 : return NULL;
214 : }
215 :
216 : /* Imprints select code */
217 :
218 : /* inner check, non-dense canditer */
219 : #define impscheck(TEST,ADD) \
220 : do { \
221 : const oid e = (oid) (i+limit-pr_off+hseq); \
222 : if (im[icnt] & mask) { \
223 : if ((im[icnt] & ~innermask) == 0) { \
224 : while (p < ncand && o < e) { \
225 : v = src[o-hseq]; \
226 : if ((ADD) == NULL) { \
227 : goto bailout; \
228 : } \
229 : cnt++; \
230 : p++; \
231 : o = canditer_next(ci); \
232 : } \
233 : } else { \
234 : while (p < ncand && o < e) { \
235 : v = src[o-hseq]; \
236 : if ((ADD) == NULL) { \
237 : goto bailout; \
238 : } \
239 : cnt += (TEST) != 0; \
240 : p++; \
241 : o = canditer_next(ci); \
242 : } \
243 : } \
244 : } else { \
245 : while (p < ncand && o < e) { \
246 : p++; \
247 : o = canditer_next(ci); \
248 : } \
249 : } \
250 : } while (false)
251 :
252 : /* inner check, dense canditer */
253 : #define impscheck_dense(TEST,ADD) \
254 : do { \
255 : const oid e = (oid) (i+limit-pr_off+hseq); \
256 : if (im[icnt] & mask) { \
257 : if ((im[icnt] & ~innermask) == 0) { \
258 : while (p < ncand && o < e) { \
259 : v = src[o-hseq]; \
260 : if ((ADD) == NULL) { \
261 : goto bailout; \
262 : } \
263 : cnt++; \
264 : p++; \
265 : o = canditer_next_dense(ci); \
266 : } \
267 : } else { \
268 : while (p < ncand && o < e) { \
269 : v = src[o-hseq]; \
270 : if ((ADD) == NULL) { \
271 : goto bailout; \
272 : } \
273 : cnt += (TEST) != 0; \
274 : p++; \
275 : o = canditer_next_dense(ci); \
276 : } \
277 : } \
278 : } else { \
279 : BUN skip_sz = MIN(ncand - p, e - o); \
280 : p += skip_sz; \
281 : o += skip_sz; \
282 : ci->next += skip_sz; \
283 : } \
284 : } while (false)
285 :
286 : /* main loop for imprints */
287 : /*
288 : * icnt is the iterator for imprints
289 : * dcnt is the iterator for dictionary entries
290 : * i is the iterator for the values in imprints
291 : */
292 : #define impsloop(ISDENSE,TEST,ADD) \
293 : do { \
294 : BUN dcnt, icnt, limit, i; \
295 : const cchdc_t *restrict d = (cchdc_t *) imprints->dict; \
296 : const uint8_t rpp = ATOMelmshift(IMPS_PAGE >> bi->shift); \
297 : o = canditer_next(ci); \
298 : for (i = 0, dcnt = 0, icnt = 0, p = 0; \
299 : dcnt < imprints->dictcnt && i <= w - hseq + pr_off && p < ncand; \
300 : dcnt++) { \
301 : GDK_CHECK_TIMEOUT(timeoffset, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
302 : limit = ((BUN) d[dcnt].cnt) << rpp; \
303 : while (i + limit <= o - hseq + pr_off) { \
304 : i += limit; \
305 : icnt += d[dcnt].repeat ? 1 : d[dcnt].cnt; \
306 : dcnt++; \
307 : limit = ((BUN) d[dcnt].cnt) << rpp; \
308 : } \
309 : if (!d[dcnt].repeat) { \
310 : const BUN l = icnt + d[dcnt].cnt; \
311 : limit = (BUN) 1 << rpp; \
312 : while (i + limit <= o - hseq + pr_off) { \
313 : icnt++; \
314 : i += limit; \
315 : } \
316 : for (; \
317 : icnt < l && i <= w - hseq + pr_off; \
318 : icnt++) { \
319 : impscheck##ISDENSE(TEST,ADD); \
320 : i += limit; \
321 : } \
322 : } \
323 : else { \
324 : impscheck##ISDENSE(TEST,ADD); \
325 : i += limit; \
326 : icnt++; \
327 : } \
328 : } \
329 : } while (false)
330 :
331 : static inline oid *
332 0 : quickins(oid *dst, BUN cnt, oid o, BAT *bn)
333 : {
334 0 : (void) bn;
335 0 : assert(cnt < BATcapacity(bn));
336 0 : dst[cnt] = o;
337 0 : return dst;
338 : }
339 :
340 : /* construct the mask */
341 : #define impsmask(ISDENSE,TEST,B) \
342 : do { \
343 : const uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
344 : uint##B##_t mask = 0, innermask; \
345 : const int tpe = ATOMbasetype(bi->type); \
346 : const int lbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, tl); \
347 : const int hbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, th); \
348 : /* note: (1<<n)-1 gives a sequence of n one bits */ \
349 : /* to set bits hbin..lbin inclusive, we would do: */ \
350 : /* mask = ((1 << (hbin + 1)) - 1) - ((1 << lbin) - 1); */ \
351 : /* except ((1 << (hbin + 1)) - 1) is not defined if */ \
352 : /* hbin == sizeof(uint##B##_t)*8 - 1 */ \
353 : /* the following does work, however */ \
354 : mask = (((((uint##B##_t) 1 << hbin) - 1) << 1) | 1) - (((uint##B##_t) 1 << lbin) - 1); \
355 : innermask = mask; \
356 : if (vl != minval) \
357 : innermask = IMPSunsetBit(B, innermask, lbin); \
358 : if (vh != maxval) \
359 : innermask = IMPSunsetBit(B, innermask, hbin); \
360 : if (anti) { \
361 : uint##B##_t tmp = mask; \
362 : mask = ~innermask; \
363 : innermask = ~tmp; \
364 : } \
365 : /* if there are nils, we may need to check bin 0 */ \
366 : if (!bi->nonil) \
367 : innermask = IMPSunsetBit(B, innermask, 0); \
368 : \
369 : if (BATcapacity(bn) < maximum) { \
370 : impsloop(ISDENSE, TEST, \
371 : dst = buninsfix(bn, dst, cnt, o, \
372 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
373 : * (dbl) (ncand-p) * 1.1 + 1024), \
374 : maximum)); \
375 : } else { \
376 : impsloop(ISDENSE, TEST, dst = quickins(dst, cnt, o, bn)); \
377 : } \
378 : } while (false)
379 :
380 : #define checkMINMAX(B, TYPE) \
381 : do { \
382 : const BUN *restrict imp_cnt = imprints->stats + 128; \
383 : imp_min = imp_max = nil; \
384 : for (BUN ii = 0; ii < B; ii++) { \
385 : if (is_##TYPE##_nil(imp_min) && imp_cnt[ii]) { \
386 : imp_min = basesrc[imprints->stats[ii]]; \
387 : } \
388 : if (is_##TYPE##_nil(imp_max) && imp_cnt[B-1-ii]) { \
389 : imp_max = basesrc[imprints->stats[64+B-1-ii]]; \
390 : } \
391 : } \
392 : assert(!is_##TYPE##_nil(imp_min) && \
393 : !is_##TYPE##_nil(imp_max)); \
394 : if (anti ? \
395 : vl < imp_min && vh > imp_max : \
396 : vl > imp_max || vh < imp_min) { \
397 : if (pbat) \
398 : BBPunfix(pbat->batCacheid); \
399 : return 0; \
400 : } \
401 : } while (false)
402 :
403 : /* choose number of bits */
404 : #define bitswitch(ISDENSE, TEST, TYPE) \
405 : do { \
406 : BUN ncand = ci->ncand; \
407 : assert(imprints); \
408 : *algo = parent ? "parent imprints select " #TEST " (canditer_next" #ISDENSE ")" : "imprints select " #TEST " (canditer_next" #ISDENSE ")"; \
409 : switch (imprints->bits) { \
410 : case 8: \
411 : checkMINMAX(8, TYPE); \
412 : impsmask(ISDENSE,TEST,8); \
413 : break; \
414 : case 16: \
415 : checkMINMAX(16, TYPE); \
416 : impsmask(ISDENSE,TEST,16); \
417 : break; \
418 : case 32: \
419 : checkMINMAX(32, TYPE); \
420 : impsmask(ISDENSE,TEST,32); \
421 : break; \
422 : case 64: \
423 : checkMINMAX(64, TYPE); \
424 : impsmask(ISDENSE,TEST,64); \
425 : break; \
426 : default: \
427 : MT_UNREACHABLE(); \
428 : } \
429 : } while (false)
430 :
431 : /* scan select without imprints */
432 :
433 : /* core scan select loop with & without candidates */
434 : #define scanloop(NAME,canditer_next,TEST) \
435 : do { \
436 : BUN ncand = ci->ncand; \
437 : *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
438 : if (BATcapacity(bn) < maximum) { \
439 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) { \
440 : o = canditer_next(ci); \
441 : v = src[o-hseq]; \
442 : if (TEST) { \
443 : dst = buninsfix(bn, dst, cnt, o, \
444 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
445 : * (dbl) (ncand-p) * 1.1 + 1024), \
446 : maximum); \
447 : if (dst == NULL) { \
448 : goto bailout; \
449 : } \
450 : cnt++; \
451 : } \
452 : } \
453 : } else { \
454 : TIMEOUT_LOOP(ncand, timeoffset) { \
455 : o = canditer_next(ci); \
456 : v = src[o-hseq]; \
457 : assert(cnt < BATcapacity(bn)); \
458 : dst[cnt] = o; \
459 : cnt += (TEST) != 0; \
460 : } \
461 : } \
462 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout)); \
463 : } while (false)
464 :
465 : /* argument list for type-specific core scan select function call */
466 : #define scanargs \
467 : bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil, \
468 : cnt, bi->b->hseqbase, dst, maximum, imprints, algo
469 :
470 : #define PREVVALUEbte(x) ((x) - 1)
471 : #define PREVVALUEsht(x) ((x) - 1)
472 : #define PREVVALUEint(x) ((x) - 1)
473 : #define PREVVALUElng(x) ((x) - 1)
474 : #ifdef HAVE_HGE
475 : #define PREVVALUEhge(x) ((x) - 1)
476 : #endif
477 : #define PREVVALUEoid(x) ((x) - 1)
478 : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
479 : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
480 :
481 : #define NEXTVALUEbte(x) ((x) + 1)
482 : #define NEXTVALUEsht(x) ((x) + 1)
483 : #define NEXTVALUEint(x) ((x) + 1)
484 : #define NEXTVALUElng(x) ((x) + 1)
485 : #ifdef HAVE_HGE
486 : #define NEXTVALUEhge(x) ((x) + 1)
487 : #endif
488 : #define NEXTVALUEoid(x) ((x) + 1)
489 : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
490 : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
491 :
492 : #define MINVALUEbte GDK_bte_min
493 : #define MINVALUEsht GDK_sht_min
494 : #define MINVALUEint GDK_int_min
495 : #define MINVALUElng GDK_lng_min
496 : #ifdef HAVE_HGE
497 : #define MINVALUEhge GDK_hge_min
498 : #endif
499 : #define MINVALUEoid GDK_oid_min
500 : #define MINVALUEflt GDK_flt_min
501 : #define MINVALUEdbl GDK_dbl_min
502 :
503 : #define MAXVALUEbte GDK_bte_max
504 : #define MAXVALUEsht GDK_sht_max
505 : #define MAXVALUEint GDK_int_max
506 : #define MAXVALUElng GDK_lng_max
507 : #ifdef HAVE_HGE
508 : #define MAXVALUEhge GDK_hge_max
509 : #endif
510 : #define MAXVALUEoid GDK_oid_max
511 : #define MAXVALUEflt GDK_flt_max
512 : #define MAXVALUEdbl GDK_dbl_max
513 :
514 : #define choose(NAME, ISDENSE, TEST, TYPE) \
515 : do { \
516 : if (imprints) { \
517 : bitswitch(ISDENSE, TEST, TYPE); \
518 : } else { \
519 : scanloop(NAME, canditer_next##ISDENSE, TEST); \
520 : } \
521 : } while (false)
522 :
523 : /* definition of type-specific core scan select function */
524 : #define scanfunc(NAME, TYPE, ISDENSE) \
525 : static BUN \
526 : NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn, \
527 : const TYPE *tl, const TYPE *th, bool li, bool hi, \
528 : bool equi, bool anti, bool lval, bool hval, \
529 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst, \
530 : BUN maximum, Imprints *imprints, const char **algo) \
531 : { \
532 : TYPE vl = *tl; \
533 : TYPE vh = *th; \
534 : TYPE imp_min; \
535 : TYPE imp_max; \
536 : TYPE v; \
537 : const TYPE nil = TYPE##_nil; \
538 : const TYPE minval = MINVALUE##TYPE; \
539 : const TYPE maxval = MAXVALUE##TYPE; \
540 : const TYPE *src = (const TYPE *) bi->base; \
541 : const TYPE *basesrc; \
542 : oid o, w; \
543 : BUN p; \
544 : BUN pr_off = 0; \
545 : bat parent = 0; \
546 : BAT *pbat = NULL; \
547 : (void) li; \
548 : (void) hi; \
549 : (void) lval; \
550 : (void) hval; \
551 : assert(li == !anti); \
552 : assert(hi == !anti); \
553 : assert(lval); \
554 : assert(hval); \
555 : size_t counter = 0; \
556 : lng timeoffset = 0; \
557 : QryCtx *qry_ctx = MT_thread_get_qry_ctx(); \
558 : if (qry_ctx != NULL) { \
559 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0; \
560 : } \
561 : if (imprints && imprints->imprints.parentid != bi->b->batCacheid) { \
562 : parent = imprints->imprints.parentid; \
563 : pbat = BATdescriptor(parent); \
564 : if (pbat == NULL) { \
565 : /* can't load parent: don't use imprints */ \
566 : imprints = NULL; \
567 : basesrc = src; \
568 : } else { \
569 : basesrc = (const TYPE *) Tloc(pbat, 0); \
570 : pr_off = (BUN) (src - basesrc); \
571 : } \
572 : } else { \
573 : basesrc = src; \
574 : } \
575 : w = canditer_last(ci); \
576 : if (equi) { \
577 : assert(imprints == NULL); \
578 : if (lnil) \
579 : scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
580 : else \
581 : scanloop(NAME, canditer_next##ISDENSE, v == vl); \
582 : } else if (anti) { \
583 : if (bi->nonil) { \
584 : choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \
585 : } else { \
586 : choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \
587 : } \
588 : } else if (bi->nonil && vl == minval) { \
589 : choose(NAME, ISDENSE, v <= vh, TYPE); \
590 : } else if (vh == maxval) { \
591 : choose(NAME, ISDENSE, v >= vl, TYPE); \
592 : } else { \
593 : choose(NAME, ISDENSE, v >= vl && v <= vh, TYPE); \
594 : } \
595 : if (pbat) \
596 : BBPunfix(pbat->batCacheid); \
597 : return cnt; \
598 : bailout: \
599 : if (pbat) \
600 : BBPunfix(pbat->batCacheid); \
601 : BBPreclaim(bn); \
602 : return BUN_NONE; \
603 : }
604 :
605 : static BUN
606 3200 : fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn,
607 : const void *tl, const void *th,
608 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
609 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
610 : BUN maximum, Imprints *imprints, const char **algo)
611 : {
612 3200 : const void *v;
613 3200 : const void *restrict nil = ATOMnilptr(bi->type);
614 3200 : int (*cmp)(const void *, const void *) = ATOMcompare(bi->type);
615 3200 : oid o;
616 3200 : BUN p, ncand = ci->ncand;
617 3200 : int c;
618 :
619 3200 : (void) maximum;
620 3200 : (void) imprints;
621 3200 : (void) lnil;
622 3200 : lng timeoffset = 0;
623 3200 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
624 3200 : if (qry_ctx != NULL) {
625 1274 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
626 : }
627 :
628 3200 : if (equi) {
629 473 : *algo = "select: fullscan equi";
630 473 : if (ci->tpe == cand_dense) {
631 3970483 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
632 3969491 : o = canditer_next_dense(ci);
633 3969491 : v = BUNtail(*bi, o-hseq);
634 3969491 : if ((*cmp)(tl, v) == 0) {
635 62702 : dst = buninsfix(bn, dst, cnt, o,
636 20902 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
637 20902 : * (dbl) (ncand-p) * 1.1 + 1024),
638 : maximum);
639 20898 : if (dst == NULL) {
640 0 : BBPreclaim(bn);
641 0 : return BUN_NONE;
642 : }
643 20898 : cnt++;
644 : }
645 : }
646 : } else {
647 39923 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
648 39254 : o = canditer_next(ci);
649 39254 : v = BUNtail(*bi, o-hseq);
650 39254 : if ((*cmp)(tl, v) == 0) {
651 62163 : dst = buninsfix(bn, dst, cnt, o,
652 20721 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
653 20721 : * (dbl) (ncand-p) * 1.1 + 1024),
654 : maximum);
655 20721 : if (dst == NULL) {
656 0 : BBPreclaim(bn);
657 0 : return BUN_NONE;
658 : }
659 20721 : cnt++;
660 : }
661 : }
662 : }
663 2727 : } else if (anti) {
664 556 : *algo = "select: fullscan anti";
665 556 : if (ci->tpe == cand_dense) {
666 4923 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
667 3459 : o = canditer_next_dense(ci);
668 3459 : v = BUNtail(*bi, o-hseq);
669 3459 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
670 3322 : ((lval &&
671 3323 : ((c = (*cmp)(tl, v)) > 0 ||
672 2277 : (!li && c == 0))) ||
673 2277 : (hval &&
674 2277 : ((c = (*cmp)(th, v)) < 0 ||
675 499 : (!hi && c == 0))))) {
676 8469 : dst = buninsfix(bn, dst, cnt, o,
677 2823 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
678 2823 : * (dbl) (ncand-p) * 1.1 + 1024),
679 : maximum);
680 2823 : if (dst == NULL) {
681 0 : BBPreclaim(bn);
682 0 : return BUN_NONE;
683 : }
684 2823 : cnt++;
685 : }
686 : }
687 : } else {
688 3480 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
689 3276 : o = canditer_next(ci);
690 3276 : v = BUNtail(*bi, o-hseq);
691 3276 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
692 3276 : ((lval &&
693 3276 : ((c = (*cmp)(tl, v)) > 0 ||
694 2573 : (!li && c == 0))) ||
695 2573 : (hval &&
696 2573 : ((c = (*cmp)(th, v)) < 0 ||
697 1367 : (!hi && c == 0))))) {
698 5727 : dst = buninsfix(bn, dst, cnt, o,
699 1909 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
700 1909 : * (dbl) (ncand-p) * 1.1 + 1024),
701 : maximum);
702 1909 : if (dst == NULL) {
703 0 : BBPreclaim(bn);
704 0 : return BUN_NONE;
705 : }
706 1909 : cnt++;
707 : }
708 : }
709 : }
710 : } else {
711 2171 : *algo = "select: fullscan range";
712 2171 : if (ci->tpe == cand_dense) {
713 6913945 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
714 6907122 : o = canditer_next_dense(ci);
715 6907122 : v = BUNtail(*bi, o-hseq);
716 6906056 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
717 4036 : ((!lval ||
718 4036 : (c = cmp(tl, v)) < 0 ||
719 4640181 : (li && c == 0)) &&
720 3918 : (!hval ||
721 3918 : (c = cmp(th, v)) > 0 ||
722 3662 : (hi && c == 0)))) {
723 13906932 : dst = buninsfix(bn, dst, cnt, o,
724 4635549 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
725 4635549 : * (dbl) (ncand-p) * 1.1 + 1024),
726 : maximum);
727 4635834 : if (dst == NULL) {
728 0 : BBPreclaim(bn);
729 0 : return BUN_NONE;
730 : }
731 4635834 : cnt++;
732 : }
733 : }
734 : } else {
735 1432965 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
736 1432677 : o = canditer_next(ci);
737 1433130 : v = BUNtail(*bi, o-hseq);
738 1433019 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
739 226 : ((!lval ||
740 226 : (c = cmp(tl, v)) < 0 ||
741 1429511 : (li && c == 0)) &&
742 0 : (!hval ||
743 0 : (c = cmp(th, v)) > 0 ||
744 0 : (hi && c == 0)))) {
745 4288177 : dst = buninsfix(bn, dst, cnt, o,
746 1429443 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
747 1429443 : * (dbl) (ncand-p) * 1.1 + 1024),
748 : maximum);
749 1429291 : if (dst == NULL) {
750 0 : BBPreclaim(bn);
751 0 : return BUN_NONE;
752 : }
753 1429291 : cnt++;
754 : }
755 : }
756 : }
757 : }
758 3199 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
759 : return cnt;
760 0 : bailout:
761 0 : BBPreclaim(bn);
762 : return BUN_NONE;
763 : }
764 :
765 : static BUN
766 179996 : fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn,
767 : const char *tl, const char *th,
768 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
769 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
770 : BUN maximum, Imprints *imprints, const char **algo)
771 : {
772 179996 : var_t pos;
773 179996 : BUN p, ncand = ci->ncand;
774 179996 : oid o;
775 179996 : lng timeoffset = 0;
776 179996 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
777 179993 : if (qry_ctx != NULL) {
778 65004 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
779 : }
780 :
781 179993 : if (!equi || !GDK_ELIMDOUBLES(bi->vh))
782 3090 : return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
783 : lval, hval, lnil, cnt, hseq, dst,
784 : maximum, imprints, algo);
785 176903 : if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
786 3009 : *algo = "select: fullscan equi strelim (nomatch)";
787 3009 : return 0;
788 : }
789 173898 : if (pos == (var_t) -1) {
790 0 : BBPreclaim(bn);
791 0 : return BUN_NONE;
792 : }
793 173898 : *algo = "select: fullscan equi strelim";
794 173898 : assert(pos >= GDK_VAROFFSET);
795 173898 : switch (bi->width) {
796 151833 : case 1: {
797 151833 : const unsigned char *ptr = (const unsigned char *) bi->base;
798 151833 : pos -= GDK_VAROFFSET;
799 151833 : if (ci->tpe == cand_dense) {
800 24710674 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
801 24255364 : o = canditer_next_dense(ci);
802 24255364 : if (ptr[o - hseq] == pos) {
803 24178216 : dst = buninsfix(bn, dst, cnt, o,
804 8059404 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
805 8059404 : * (dbl) (ncand-p) * 1.1 + 1024),
806 : maximum);
807 8059408 : if (dst == NULL) {
808 0 : BBPreclaim(bn);
809 0 : return BUN_NONE;
810 : }
811 8059408 : cnt++;
812 : }
813 : }
814 : } else {
815 13294216 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
816 13290680 : o = canditer_next(ci);
817 13280654 : if (ptr[o - hseq] == pos) {
818 12706935 : dst = buninsfix(bn, dst, cnt, o,
819 4232303 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
820 4232303 : * (dbl) (ncand-p) * 1.1 + 1024),
821 : maximum);
822 4242329 : if (dst == NULL) {
823 0 : BBPreclaim(bn);
824 0 : return BUN_NONE;
825 : }
826 4242329 : cnt++;
827 : }
828 : }
829 : }
830 : break;
831 : }
832 22063 : case 2: {
833 22063 : const unsigned short *ptr = (const unsigned short *) bi->base;
834 22063 : pos -= GDK_VAROFFSET;
835 22063 : if (ci->tpe == cand_dense) {
836 2681993 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
837 2629353 : o = canditer_next_dense(ci);
838 2629353 : if (ptr[o - hseq] == pos) {
839 159303 : dst = buninsfix(bn, dst, cnt, o,
840 53100 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
841 53100 : * (dbl) (ncand-p) * 1.1 + 1024),
842 : maximum);
843 53103 : if (dst == NULL) {
844 0 : BBPreclaim(bn);
845 0 : return BUN_NONE;
846 : }
847 53103 : cnt++;
848 : }
849 : }
850 : } else {
851 2920141 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
852 2906450 : o = canditer_next(ci);
853 2906322 : if (ptr[o - hseq] == pos) {
854 244992 : dst = buninsfix(bn, dst, cnt, o,
855 81622 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
856 81622 : * (dbl) (ncand-p) * 1.1 + 1024),
857 : maximum);
858 81748 : if (dst == NULL) {
859 0 : BBPreclaim(bn);
860 0 : return BUN_NONE;
861 : }
862 81748 : cnt++;
863 : }
864 : }
865 : }
866 : break;
867 : }
868 : #if SIZEOF_VAR_T == 8
869 1 : case 4: {
870 1 : const unsigned int *ptr = (const unsigned int *) bi->base;
871 1 : if (ci->tpe == cand_dense) {
872 973 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
873 970 : o = canditer_next_dense(ci);
874 970 : if (ptr[o - hseq] == pos) {
875 744 : dst = buninsfix(bn, dst, cnt, o,
876 248 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
877 248 : * (dbl) (ncand-p) * 1.1 + 1024),
878 : maximum);
879 248 : if (dst == NULL) {
880 0 : BBPreclaim(bn);
881 0 : return BUN_NONE;
882 : }
883 248 : cnt++;
884 : }
885 : }
886 : } else {
887 0 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
888 0 : o = canditer_next(ci);
889 0 : if (ptr[o - hseq] == pos) {
890 0 : dst = buninsfix(bn, dst, cnt, o,
891 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
892 0 : * (dbl) (ncand-p) * 1.1 + 1024),
893 : maximum);
894 0 : if (dst == NULL) {
895 0 : BBPreclaim(bn);
896 0 : return BUN_NONE;
897 : }
898 0 : cnt++;
899 : }
900 : }
901 : }
902 : break;
903 : }
904 : #endif
905 1 : default: {
906 1 : const var_t *ptr = (const var_t *) bi->base;
907 1 : if (ci->tpe == cand_dense) {
908 13 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
909 10 : o = canditer_next_dense(ci);
910 10 : if (ptr[o - hseq] == pos) {
911 3 : dst = buninsfix(bn, dst, cnt, o,
912 1 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
913 1 : * (dbl) (ncand-p) * 1.1 + 1024),
914 : maximum);
915 1 : if (dst == NULL) {
916 0 : BBPreclaim(bn);
917 0 : return BUN_NONE;
918 : }
919 1 : cnt++;
920 : }
921 : }
922 : } else {
923 0 : TIMEOUT_LOOP_IDX(p, ncand, timeoffset) {
924 0 : o = canditer_next(ci);
925 0 : if (ptr[o - hseq] == pos) {
926 0 : dst = buninsfix(bn, dst, cnt, o,
927 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
928 0 : * (dbl) (ncand-p) * 1.1 + 1024),
929 : maximum);
930 0 : if (dst == NULL) {
931 0 : BBPreclaim(bn);
932 0 : return BUN_NONE;
933 : }
934 0 : cnt++;
935 : }
936 : }
937 : }
938 : break;
939 : }
940 : }
941 173897 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
942 : return cnt;
943 0 : bailout:
944 0 : BBPreclaim(bn);
945 : return BUN_NONE;
946 : }
947 :
948 : /* scan select type switch */
949 : #ifdef HAVE_HGE
950 : #define scanfunc_hge(NAME, ISDENSE) \
951 : scanfunc(NAME, hge, ISDENSE)
952 : #else
953 : #define scanfunc_hge(NAME, ISDENSE)
954 : #endif
955 : #define scan_sel(NAME, ISDENSE) \
956 : scanfunc(NAME, bte, ISDENSE) \
957 : scanfunc(NAME, sht, ISDENSE) \
958 : scanfunc(NAME, int, ISDENSE) \
959 : scanfunc(NAME, flt, ISDENSE) \
960 : scanfunc(NAME, dbl, ISDENSE) \
961 : scanfunc(NAME, lng, ISDENSE) \
962 : scanfunc_hge(NAME, ISDENSE)
963 :
964 : /* scan/imprints select */
965 183188354 : scan_sel(fullscan, )
966 1359241705 : scan_sel(densescan, _dense)
967 :
968 :
969 : static BAT *
970 1233033 : scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
971 : const void *tl, const void *th,
972 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
973 : bool lnil, BUN maximum, Imprints *imprints, const char **algo)
974 : {
975 : #ifndef NDEBUG
976 1233033 : int (*cmp)(const void *, const void *);
977 : #endif
978 1233033 : int t;
979 1233033 : BUN cnt = 0;
980 1233033 : oid *restrict dst;
981 :
982 1233033 : assert(bi->b != NULL);
983 1233033 : assert(bn != NULL);
984 1233033 : assert(bn->ttype == TYPE_oid);
985 1233033 : assert(!lval || tl != NULL);
986 1233033 : assert(!hval || th != NULL);
987 1233033 : assert(!equi || (li && hi && !anti));
988 1233033 : assert(!anti || lval || hval);
989 1233033 : assert(bi->type != TYPE_void || equi || bi->nonil);
990 :
991 : #ifndef NDEBUG
992 1233033 : cmp = ATOMcompare(bi->type);
993 : #endif
994 :
995 1233033 : assert(!lval || !hval || (*cmp)(tl, th) <= 0);
996 :
997 1233024 : dst = (oid *) Tloc(bn, 0);
998 :
999 1233024 : t = ATOMbasetype(bi->type);
1000 :
1001 : /* call type-specific core scan select function */
1002 1233024 : switch (t) {
1003 39506 : case TYPE_bte:
1004 39506 : if (ci->tpe == cand_dense)
1005 37398 : cnt = densescan_bte(scanargs);
1006 : else
1007 2108 : cnt = fullscan_bte(scanargs);
1008 : break;
1009 6934 : case TYPE_sht:
1010 6934 : if (ci->tpe == cand_dense)
1011 6672 : cnt = densescan_sht(scanargs);
1012 : else
1013 262 : cnt = fullscan_sht(scanargs);
1014 : break;
1015 1003945 : case TYPE_int:
1016 1003945 : if (ci->tpe == cand_dense)
1017 713010 : cnt = densescan_int(scanargs);
1018 : else
1019 290935 : cnt = fullscan_int(scanargs);
1020 : break;
1021 21 : case TYPE_flt:
1022 21 : if (ci->tpe == cand_dense)
1023 20 : cnt = densescan_flt(scanargs);
1024 : else
1025 1 : cnt = fullscan_flt(scanargs);
1026 : break;
1027 140 : case TYPE_dbl:
1028 140 : if (ci->tpe == cand_dense)
1029 139 : cnt = densescan_dbl(scanargs);
1030 : else
1031 1 : cnt = fullscan_dbl(scanargs);
1032 : break;
1033 2172 : case TYPE_lng:
1034 2172 : if (ci->tpe == cand_dense)
1035 2163 : cnt = densescan_lng(scanargs);
1036 : else
1037 9 : cnt = fullscan_lng(scanargs);
1038 : break;
1039 : #ifdef HAVE_HGE
1040 203 : case TYPE_hge:
1041 203 : if (ci->tpe == cand_dense)
1042 193 : cnt = densescan_hge(scanargs);
1043 : else
1044 10 : cnt = fullscan_hge(scanargs);
1045 : break;
1046 : #endif
1047 179994 : case TYPE_str:
1048 179994 : cnt = fullscan_str(scanargs);
1049 179994 : break;
1050 109 : default:
1051 109 : cnt = fullscan_any(scanargs);
1052 109 : break;
1053 : }
1054 1233023 : if (cnt == BUN_NONE) {
1055 : return NULL;
1056 : }
1057 1233023 : assert(bn->batCapacity >= cnt);
1058 :
1059 1233023 : BATsetcount(bn, cnt);
1060 1233014 : bn->tsorted = true;
1061 1233014 : bn->trevsorted = bn->batCount <= 1;
1062 1233014 : bn->tkey = true;
1063 1233014 : bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == bi->count ? bi->b->hseqbase : oid_nil;
1064 :
1065 1233014 : return bn;
1066 : }
1067 :
1068 : /* Normalize the variables li, hi, lval, hval, possibly changing anti
1069 : * in the process. This works for all (and only) numeric types.
1070 : *
1071 : * Note that the expression x < v is equivalent to x <= v' where v' is
1072 : * the next smaller value in the domain of v (similarly for x > v).
1073 : * Also note that for floating point numbers there actually is such a
1074 : * value. In fact, there is a function in standard C that calculates
1075 : * that value.
1076 : *
1077 : * The result of this macro is:
1078 : * li == !anti, hi == !anti, lval == true, hval == true
1079 : * This means that all ranges that we check for are closed ranges. If
1080 : * a range is one-sided, we fill in the minimum resp. maximum value in
1081 : * the domain so that we create a closed range. */
1082 : #define NORMALIZE(TYPE) \
1083 : do { \
1084 : if (anti && li) { \
1085 : /* -inf < x < vl === -inf < x <= vl-1 */ \
1086 : if (*(TYPE*)tl == MINVALUE##TYPE) { \
1087 : /* -inf < x < MIN || *th <[=] x < +inf */ \
1088 : /* degenerates into half range */ \
1089 : /* *th <[=] x < +inf */ \
1090 : anti = false; \
1091 : tl = th; \
1092 : li = !hi; \
1093 : hval = false; \
1094 : /* further dealt with below */ \
1095 : } else { \
1096 : vl.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)tl); \
1097 : tl = &vl.v_##TYPE; \
1098 : li = false; \
1099 : } \
1100 : } \
1101 : if (anti && hi) { \
1102 : /* vl < x < +inf === vl+1 <= x < +inf */ \
1103 : if (*(TYPE*)th == MAXVALUE##TYPE) { \
1104 : /* -inf < x <[=] *tl || MAX > x > +inf */ \
1105 : /* degenerates into half range */ \
1106 : /* -inf < x <[=] *tl */ \
1107 : anti = false; \
1108 : if (tl == &vl.v_##TYPE) { \
1109 : vh.v_##TYPE = vl.v_##TYPE; \
1110 : th = &vh.v_##TYPE; \
1111 : } else { \
1112 : th = tl; \
1113 : } \
1114 : hi = !li; \
1115 : lval = false; \
1116 : /* further dealt with below */ \
1117 : } else { \
1118 : vh.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)th); \
1119 : th = &vh.v_##TYPE; \
1120 : hi = false; \
1121 : } \
1122 : } \
1123 : if (!anti) { \
1124 : if (lval) { \
1125 : /* range bounded on left */ \
1126 : if (!li) { \
1127 : /* open range on left */ \
1128 : if (*(TYPE*)tl == MAXVALUE##TYPE) { \
1129 : bat_iterator_end(&bi); \
1130 : return BATdense(0, 0, 0); \
1131 : } \
1132 : /* vl < x === vl+1 <= x */ \
1133 : vl.v_##TYPE = NEXTVALUE##TYPE(*(TYPE*)tl); \
1134 : li = true; \
1135 : tl = &vl.v_##TYPE; \
1136 : } \
1137 : } else { \
1138 : /* -inf, i.e. smallest value */ \
1139 : vl.v_##TYPE = MINVALUE##TYPE; \
1140 : li = true; \
1141 : tl = &vl.v_##TYPE; \
1142 : lval = true; \
1143 : } \
1144 : if (hval) { \
1145 : /* range bounded on right */ \
1146 : if (!hi) { \
1147 : /* open range on right */ \
1148 : if (*(TYPE*)th == MINVALUE##TYPE) { \
1149 : bat_iterator_end(&bi); \
1150 : return BATdense(0, 0, 0); \
1151 : } \
1152 : /* x < vh === x <= vh-1 */ \
1153 : vh.v_##TYPE = PREVVALUE##TYPE(*(TYPE*)th); \
1154 : hi = true; \
1155 : th = &vh.v_##TYPE; \
1156 : } \
1157 : } else { \
1158 : /* +inf, i.e. largest value */ \
1159 : vh.v_##TYPE = MAXVALUE##TYPE; \
1160 : hi = true; \
1161 : th = &vh.v_##TYPE; \
1162 : hval = true; \
1163 : } \
1164 : if (*(TYPE*)tl > *(TYPE*)th) { \
1165 : bat_iterator_end(&bi); \
1166 : return BATdense(0, 0, 0); \
1167 : } \
1168 : } \
1169 : assert(lval); \
1170 : assert(hval); \
1171 : assert(li != anti); \
1172 : assert(hi != anti); \
1173 : /* if anti is set, we can now check */ \
1174 : /* (x <= *tl || x >= *th) && x != nil */ \
1175 : /* if equi==true, the check is x != *tl && x != nil */ \
1176 : /* if anti is not set, we can check just */ \
1177 : /* *tl <= x && x <= *th */ \
1178 : /* if equi==true, the check is x == *tl */ \
1179 : /* note that this includes the check for != nil */ \
1180 : /* in the case where equi==true, the check is x == *tl */ \
1181 : } while (false)
1182 :
1183 : #if SIZEOF_BUN == SIZEOF_INT
1184 : #define CALC_ESTIMATE(TPE) \
1185 : do { \
1186 : if (*(TPE*)tl < 1) { \
1187 : if ((int) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1188 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1189 : } else { \
1190 : if (-(int) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1191 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1192 : } \
1193 : } while (0)
1194 : #else
1195 : #define CALC_ESTIMATE(TPE) \
1196 : do { \
1197 : if (*(TPE*)tl < 1) { \
1198 : if ((lng) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1199 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1200 : } else { \
1201 : if (-(lng) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1202 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1203 : } \
1204 : } while (0)
1205 : #endif
1206 :
1207 : static enum range_comp_t
1208 1714942 : BATrange(BATiter *bi, const void *tl, const void *th, bool li, bool hi)
1209 : {
1210 1714942 : enum range_comp_t range;
1211 1714942 : const ValRecord *minprop = NULL, *maxprop = NULL;
1212 1714942 : const void *minval = NULL, *maxval = NULL;
1213 1714942 : bool maxincl = true;
1214 1714942 : BAT *pb = NULL;
1215 1714942 : int c;
1216 1714942 : int (*atomcmp) (const void *, const void *) = ATOMcompare(bi->type);
1217 :
1218 1714942 : if (tl && (*atomcmp)(tl, ATOMnilptr(bi->type)) == 0)
1219 17026 : tl = NULL;
1220 1714922 : if (th && (*atomcmp)(th, ATOMnilptr(bi->type)) == 0)
1221 16563 : th = NULL;
1222 1714915 : if (tl == NULL && th == NULL)
1223 : return range_contains; /* looking for everything */
1224 :
1225 1701248 : if (VIEWtparent(bi->b))
1226 1571337 : pb = BATdescriptor(VIEWtparent(bi->b));
1227 :
1228 : /* keep locked while we look at the property values */
1229 1701247 : MT_lock_set(&bi->b->theaplock);
1230 1701195 : if (bi->minpos != BUN_NONE)
1231 38775 : minval = BUNtail(*bi, bi->minpos);
1232 1662420 : else if ((minprop = BATgetprop_nolock(bi->b, GDK_MIN_BOUND)) != NULL)
1233 0 : minval = VALptr(minprop);
1234 1701194 : if (bi->maxpos != BUN_NONE) {
1235 38816 : maxval = BUNtail(*bi, bi->maxpos);
1236 : maxincl = true;
1237 1662378 : } else if ((maxprop = BATgetprop_nolock(bi->b, GDK_MAX_BOUND)) != NULL) {
1238 0 : maxval = VALptr(maxprop);
1239 0 : maxincl = false;
1240 : }
1241 1701185 : bool keep = false; /* keep lock on parent bat? */
1242 1701185 : if (minprop == NULL || maxprop == NULL) {
1243 1701201 : if (pb != NULL) {
1244 1571286 : MT_lock_set(&pb->theaplock);
1245 1571301 : if (minprop == NULL && (minprop = BATgetprop_nolock(pb, GDK_MIN_BOUND)) != NULL) {
1246 13 : keep = true;
1247 13 : minval = VALptr(minprop);
1248 : }
1249 1571293 : if (maxprop == NULL && (maxprop = BATgetprop_nolock(pb, GDK_MAX_BOUND)) != NULL) {
1250 9 : keep = true;
1251 9 : maxval = VALptr(maxprop);
1252 9 : maxincl = true;
1253 : }
1254 1571273 : if (!keep) {
1255 1571255 : MT_lock_unset(&pb->theaplock);
1256 : }
1257 : }
1258 : }
1259 :
1260 1701201 : if (minprop == NULL && maxprop == NULL) {
1261 : range = range_inside; /* strictly: unknown */
1262 13 : } else if (maxprop &&
1263 5 : tl &&
1264 5 : ((c = atomcmp(tl, maxval)) > 0 ||
1265 0 : ((!maxincl || !li) && c == 0))) {
1266 : range = range_after;
1267 10 : } else if (minprop &&
1268 7 : th &&
1269 7 : ((c = atomcmp(th, minval)) < 0 ||
1270 6 : (!hi && c == 0))) {
1271 : range = range_before;
1272 8 : } else if (tl == NULL) {
1273 4 : if (minprop == NULL) {
1274 0 : c = atomcmp(th, maxval);
1275 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1276 : range = range_atstart;
1277 : else
1278 1 : range = range_contains;
1279 : } else {
1280 4 : c = atomcmp(th, minval);
1281 4 : if (c < 0 || (!hi && c == 0))
1282 : range = range_before;
1283 4 : else if (maxprop == NULL)
1284 : range = range_atstart;
1285 : else {
1286 2 : c = atomcmp(th, maxval);
1287 2 : if (c < 0 || ((maxincl || !hi) && c == 0))
1288 : range = range_atstart;
1289 : else
1290 1 : range = range_contains;
1291 : }
1292 : }
1293 4 : } else if (th == NULL) {
1294 3 : if (maxprop == NULL) {
1295 2 : c = atomcmp(tl, minval);
1296 2 : if (c >= 0)
1297 : range = range_atend;
1298 : else
1299 1 : range = range_contains;
1300 : } else {
1301 1 : c = atomcmp(tl, maxval);
1302 1 : if (c > 0 || ((!maxincl || !li) && c == 0))
1303 : range = range_after;
1304 1 : else if (minprop == NULL)
1305 : range = range_atend;
1306 : else {
1307 1 : c = atomcmp(tl, minval);
1308 1 : if (c >= 0)
1309 : range = range_atend;
1310 : else
1311 1 : range = range_contains;
1312 : }
1313 : }
1314 1 : } else if (minprop == NULL) {
1315 0 : c = atomcmp(th, maxval);
1316 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1317 : range = range_inside;
1318 : else
1319 2 : range = range_atend;
1320 1 : } else if (maxprop == NULL) {
1321 0 : c = atomcmp(tl, minval);
1322 0 : if (c >= 0)
1323 : range = range_inside;
1324 : else
1325 4 : range = range_atstart;
1326 : } else {
1327 1 : c = atomcmp(tl, minval);
1328 1 : if (c >= 0) {
1329 1 : c = atomcmp(th, maxval);
1330 1 : if (c < 0 || ((maxincl || !hi) && c == 0))
1331 : range = range_inside;
1332 : else
1333 2 : range = range_atend;
1334 : } else {
1335 0 : c = atomcmp(th, maxval);
1336 0 : if (c < 0 || ((maxincl || !hi) && c == 0))
1337 : range = range_atstart;
1338 : else
1339 1 : range = range_contains;
1340 : }
1341 : }
1342 :
1343 1701201 : MT_lock_unset(&bi->b->theaplock);
1344 1701241 : if (pb) {
1345 1571327 : if (keep)
1346 13 : MT_lock_unset(&pb->theaplock);
1347 1571327 : BBPreclaim(pb);
1348 : }
1349 :
1350 : return range;
1351 : }
1352 :
1353 : /* generic range select
1354 : *
1355 : * Return a BAT with the OID values of b for qualifying tuples. The
1356 : * return BAT is sorted (i.e. in the same order as the input BAT).
1357 : *
1358 : * If s is non-NULL, it is a list of candidates. s must be sorted.
1359 : *
1360 : * tl may not be NULL, li, hi, and anti must be either 0 or 1.
1361 : *
1362 : * If th is NULL, hi is ignored.
1363 : *
1364 : * If anti is 0, qualifying tuples are those whose value is between tl
1365 : * and th (as in x >[=] tl && x <[=] th, where equality depends on li
1366 : * and hi--so if tl > th, nothing will be returned). If li or hi is
1367 : * 1, the respective boundary is inclusive, otherwise exclusive. If
1368 : * th is NULL it is taken to be equal to tl, turning this into an
1369 : * equi- or point-select. Note that for a point select to return
1370 : * anything, li (and hi if th was not NULL) must be 1. There is a
1371 : * special case if tl is nil and th is NULL. This is the only way to
1372 : * select for nil values.
1373 : *
1374 : * If anti is 1, the result is the complement of what the result would
1375 : * be if anti were 0, except that nils are filtered out.
1376 : *
1377 : * In brief:
1378 : * - if tl==nil and th==NULL and anti==0, return all nils (only way to
1379 : * get nils);
1380 : * - it tl==nil and th==nil, return all but nils;
1381 : * - if tl==nil and th!=NULL, no lower bound;
1382 : * - if th==NULL or tl==th, point (equi) select;
1383 : * - if th==nil, no upper bound
1384 : *
1385 : * A complete breakdown of the various arguments follows. Here, v, v1
1386 : * and v2 are values from the appropriate domain, and
1387 : * v != nil, v1 != nil, v2 != nil, v1 < v2.
1388 : * tl th li hi anti result list of OIDs for values
1389 : * -----------------------------------------------------------------
1390 : * nil NULL true ignored false x == nil (only way to get nil)
1391 : * nil NULL false ignored false NOTHING
1392 : * nil NULL ignored ignored true x != nil
1393 : * nil nil ignored ignored false x != nil
1394 : * nil nil ignored ignored true NOTHING
1395 : * nil v ignored false false x < v
1396 : * nil v ignored true false x <= v
1397 : * nil v ignored false true x >= v
1398 : * nil v ignored true true x > v
1399 : * v nil false ignored false x > v
1400 : * v nil true ignored false x >= v
1401 : * v nil false ignored true x <= v
1402 : * v nil true ignored true x < v
1403 : * v NULL false ignored false NOTHING
1404 : * v NULL true ignored false x == v
1405 : * v NULL false ignored true x != nil
1406 : * v NULL true ignored true x != v
1407 : * v v false false false NOTHING
1408 : * v v true false false NOTHING
1409 : * v v false true false NOTHING
1410 : * v v true true false x == v
1411 : * v v false false true x != nil
1412 : * v v true false true x != nil
1413 : * v v false true true x != nil
1414 : * v v true true true x != v
1415 : * v1 v2 false false false v1 < x < v2
1416 : * v1 v2 true false false v1 <= x < v2
1417 : * v1 v2 false true false v1 < x <= v2
1418 : * v1 v2 true true false v1 <= x <= v2
1419 : * v1 v2 false false true x <= v1 or x >= v2
1420 : * v1 v2 true false true x < v1 or x >= v2
1421 : * v1 v2 false true true x <= v1 or x > v2
1422 : * v1 v2 true true true x < v1 or x > v2
1423 : * v2 v1 ignored ignored false NOTHING
1424 : * v2 v1 ignored ignored true x != nil
1425 : */
1426 : BAT *
1427 2759938 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
1428 : bool li, bool hi, bool anti)
1429 : {
1430 2759938 : bool lval; /* low value used for comparison */
1431 2759938 : bool lnil; /* low value is nil */
1432 2759938 : bool hval; /* high value used for comparison */
1433 2759938 : bool equi; /* select for single value (not range) */
1434 2759938 : bool wanthash = false; /* use hash (equi must be true) */
1435 2759938 : bool havehash = false; /* we have a hash (and the hashlock) */
1436 2759938 : bool phash = false; /* use hash on parent BAT (if view) */
1437 2759938 : int t; /* data type */
1438 2759938 : bat parent; /* b's parent bat (if b is a view) */
1439 2759938 : const void *nil;
1440 2759938 : BAT *bn;
1441 2759938 : struct canditer ci;
1442 2759938 : BUN estimate = BUN_NONE, maximum = BUN_NONE;
1443 2759938 : oid vwl = 0, vwh = 0;
1444 2759938 : lng vwo = 0;
1445 2759938 : Heap *oidxh = NULL;
1446 2759938 : const char *algo;
1447 2759938 : union {
1448 : bte v_bte;
1449 : sht v_sht;
1450 : int v_int;
1451 : lng v_lng;
1452 : #ifdef HAVE_HGE
1453 : hge v_hge;
1454 : #endif
1455 : flt v_flt;
1456 : dbl v_dbl;
1457 : oid v_oid;
1458 : } vl, vh;
1459 2759938 : enum range_comp_t range;
1460 2759938 : const bool notnull = BATgetprop(b, GDK_NOT_NULL) != NULL;
1461 2759918 : lng t0 = GDKusec();
1462 :
1463 2759932 : BATcheck(b, NULL);
1464 2759932 : if (tl == NULL) {
1465 0 : GDKerror("tl value required");
1466 0 : return NULL;
1467 : }
1468 :
1469 2759932 : if (s && s->ttype != TYPE_msk && !s->tsorted) {
1470 0 : GDKerror("invalid argument: s must be sorted.\n");
1471 0 : return NULL;
1472 : }
1473 :
1474 2759932 : canditer_init(&ci, b, s);
1475 2759911 : if (ci.ncand == 0) {
1476 : /* trivially empty result */
1477 1038658 : MT_thread_setalgorithm("select: trivially empty");
1478 1038624 : bn = BATdense(0, 0, 0);
1479 1038598 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1480 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1481 : " (" LLFMT " usec): "
1482 : "trivially empty\n",
1483 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1484 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1485 1038598 : return bn;
1486 : }
1487 :
1488 1721253 : BATiter bi = bat_iterator(b);
1489 :
1490 1721260 : t = bi.type;
1491 1721260 : nil = ATOMnilptr(t);
1492 : /* can we use the base type? */
1493 1721260 : t = ATOMbasetype(t);
1494 1721260 : lnil = nil && ATOMcmp(t, tl, nil) == 0; /* low value = nil? */
1495 :
1496 1721245 : if (!lnil && th != NULL && (!li || !hi) && !anti && ATOMcmp(t, tl, th) == 0) {
1497 : /* upper and lower bound of range are equal and we
1498 : * want an interval that's open on at least one
1499 : * side */
1500 41 : MT_thread_setalgorithm("select: empty interval");
1501 41 : bn = BATdense(0, 0, 0);
1502 41 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1503 : ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
1504 : ALGOOPTBATFMT " (" LLFMT " usec): "
1505 : "empty interval\n",
1506 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1507 : li, hi, anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1508 41 : bat_iterator_end(&bi);
1509 41 : return bn;
1510 : }
1511 :
1512 1721204 : lval = !lnil || th == NULL; /* low value used for comparison */
1513 1721204 : equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
1514 1721204 : if (equi) {
1515 1693593 : assert(lval);
1516 1693593 : if (th == NULL)
1517 1543280 : hi = li;
1518 : th = tl;
1519 : hval = true;
1520 27611 : } else if (nil == NULL) {
1521 : hval = true;
1522 : } else {
1523 27611 : hval = ATOMcmp(t, th, nil) != 0;
1524 : }
1525 1721206 : if (anti) {
1526 63551 : if (lval != hval) {
1527 : /* one of the end points is nil and the other
1528 : * isn't: swap sub-ranges */
1529 41 : const void *tv;
1530 41 : bool ti;
1531 41 : assert(!equi);
1532 41 : ti = li;
1533 41 : li = !hi;
1534 41 : hi = !ti;
1535 41 : tv = tl;
1536 41 : tl = th;
1537 41 : th = tv;
1538 41 : ti = lval;
1539 41 : lval = hval;
1540 41 : hval = ti;
1541 41 : lnil = ATOMcmp(t, tl, nil) == 0;
1542 41 : anti = false;
1543 41 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1544 : ",s=" ALGOOPTBATFMT ",anti=%d "
1545 : "anti: switch ranges...\n",
1546 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1547 : anti);
1548 63510 : } else if (!lval && !hval) {
1549 : /* antiselect for nil-nil range: all non-nil
1550 : * values are in range; we must return all
1551 : * other non-nil values, i.e. nothing */
1552 23 : MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
1553 23 : bn = BATdense(0, 0, 0);
1554 23 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1555 : ",s=" ALGOOPTBATFMT ",anti=%d -> "
1556 : ALGOOPTBATFMT " (" LLFMT " usec): "
1557 : "anti: nil-nil range, nonil\n",
1558 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1559 : anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1560 23 : bat_iterator_end(&bi);
1561 23 : return bn;
1562 63487 : } else if (equi && lnil) {
1563 : /* antiselect for nil value: turn into range
1564 : * select for nil-nil range (i.e. everything
1565 : * but nil) */
1566 6052 : equi = false;
1567 6052 : anti = false;
1568 6052 : lval = false;
1569 6052 : hval = false;
1570 6052 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1571 : ",s=" ALGOOPTBATFMT ",anti=0 "
1572 : "anti-nil...\n",
1573 : ALGOBATPAR(b), ALGOOPTBATPAR(s));
1574 57435 : } else if (equi) {
1575 38644 : equi = false;
1576 38644 : if (!(li && hi)) {
1577 : /* antiselect for nothing: turn into
1578 : * range select for nil-nil range
1579 : * (i.e. everything but nil) */
1580 12 : anti = false;
1581 12 : lval = false;
1582 12 : hval = false;
1583 12 : TRC_DEBUG(ALGO, "b="
1584 : ALGOBATFMT ",s="
1585 : ALGOOPTBATFMT ",anti=0 "
1586 : "anti-nothing...\n",
1587 : ALGOBATPAR(b),
1588 : ALGOOPTBATPAR(s));
1589 : }
1590 18791 : } else if (ATOMcmp(t, tl, th) > 0) {
1591 : /* empty range: turn into range select for
1592 : * nil-nil range (i.e. everything but nil) */
1593 24 : equi = false;
1594 24 : anti = false;
1595 24 : lval = false;
1596 24 : hval = false;
1597 24 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1598 : ",s=" ALGOOPTBATFMT ",anti=0 "
1599 : "anti-nil...\n",
1600 : ALGOBATPAR(b), ALGOOPTBATPAR(s));
1601 : }
1602 : }
1603 :
1604 : /* if equi set, then so are both lval and hval */
1605 1721183 : assert(!equi || (lval && hval));
1606 :
1607 1721183 : if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
1608 : /* empty range */
1609 30 : MT_thread_setalgorithm("select: empty range");
1610 30 : bn = BATdense(0, 0, 0);
1611 30 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1612 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1613 : " (" LLFMT " usec) "
1614 : "empty range\n",
1615 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1616 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1617 30 : bat_iterator_end(&bi);
1618 30 : return bn;
1619 : }
1620 1721154 : if (equi && lnil && (notnull || bi.nonil)) {
1621 : /* return all nils, but there aren't any */
1622 5972 : MT_thread_setalgorithm("select: equi-nil, nonil");
1623 5972 : bn = BATdense(0, 0, 0);
1624 5972 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1625 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1626 : " (" LLFMT " usec): "
1627 : "equi-nil, nonil\n",
1628 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1629 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1630 5972 : bat_iterator_end(&bi);
1631 5972 : return bn;
1632 : }
1633 :
1634 1715182 : if (!equi && !lval && !hval && lnil && (notnull || bi.nonil)) {
1635 : /* return all non-nils from a BAT that doesn't have
1636 : * any: i.e. return everything */
1637 256 : MT_thread_setalgorithm("select: everything, nonil");
1638 256 : bn = canditer_slice(&ci, 0, ci.ncand);
1639 256 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1640 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1641 : " (" LLFMT " usec): "
1642 : "everything, nonil\n",
1643 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1644 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1645 256 : bat_iterator_end(&bi);
1646 256 : return bn;
1647 : }
1648 :
1649 : /* figure out how the searched for range compares with the known
1650 : * minimum and maximum values */
1651 1732869 : range = BATrange(&bi, lval ? tl : NULL, hval ? th : NULL, li, hi);
1652 1714930 : if (anti) {
1653 57401 : switch (range) {
1654 0 : case range_contains:
1655 : /* MIN..MAX range of values in BAT fully inside
1656 : * tl..th range, so nothing left over for
1657 : * anti */
1658 0 : MT_thread_setalgorithm("select: nothing, out of range");
1659 0 : bn = BATdense(0, 0, 0);
1660 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1661 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1662 : " (" LLFMT " usec): "
1663 : "nothing, out of range\n",
1664 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1665 0 : bat_iterator_end(&bi);
1666 0 : return bn;
1667 0 : case range_before:
1668 : case range_after:
1669 0 : if (notnull || b->tnonil) {
1670 : /* search range does not overlap with BAT range,
1671 : * and there are no nils, so we can return
1672 : * everything */
1673 0 : MT_thread_setalgorithm("select: everything, anti, nonil");
1674 0 : bn = canditer_slice(&ci, 0, ci.ncand);
1675 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1676 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1677 : " (" LLFMT " usec): "
1678 : "everything, nonil\n",
1679 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1680 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1681 0 : bat_iterator_end(&bi);
1682 0 : return bn;
1683 : }
1684 : break;
1685 : default:
1686 : break;
1687 : }
1688 1657529 : } else if (!equi || !lnil) {
1689 1649711 : switch (range) {
1690 5 : case range_before:
1691 : case range_after:
1692 : /* range we're looking for either completely
1693 : * before or complete after the range of values
1694 : * in the BAT */
1695 5 : MT_thread_setalgorithm("select: nothing, out of range");
1696 5 : bn = BATdense(0, 0, 0);
1697 5 : TRC_DEBUG(ALGO, "b="
1698 : ALGOBATFMT ",s="
1699 : ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1700 : " (" LLFMT " usec): "
1701 : "nothing, out of range\n",
1702 : ALGOBATPAR(b),
1703 : ALGOOPTBATPAR(s), anti,
1704 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1705 5 : bat_iterator_end(&bi);
1706 5 : return bn;
1707 5845 : case range_contains:
1708 5845 : if (notnull || b->tnonil) {
1709 : /* search range contains BAT range, and
1710 : * there are no nils, so we can return
1711 : * everything */
1712 8 : MT_thread_setalgorithm("select: everything, nonil");
1713 8 : bn = canditer_slice(&ci, 0, ci.ncand);
1714 8 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1715 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1716 : " (" LLFMT " usec): "
1717 : "everything, nonil\n",
1718 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1719 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1720 8 : bat_iterator_end(&bi);
1721 8 : return bn;
1722 : }
1723 : break;
1724 : default:
1725 : break;
1726 : }
1727 : }
1728 :
1729 1714917 : if (ATOMtype(bi.type) == TYPE_oid) {
1730 38202 : NORMALIZE(oid);
1731 : } else {
1732 1693387 : switch (t) {
1733 81043 : case TYPE_bte:
1734 81234 : NORMALIZE(bte);
1735 : break;
1736 8995 : case TYPE_sht:
1737 9130 : NORMALIZE(sht);
1738 : break;
1739 1286999 : case TYPE_int:
1740 1293559 : NORMALIZE(int);
1741 : break;
1742 2920 : case TYPE_lng:
1743 5154 : NORMALIZE(lng);
1744 : break;
1745 : #ifdef HAVE_HGE
1746 401 : case TYPE_hge:
1747 867 : NORMALIZE(hge);
1748 : break;
1749 : #endif
1750 100 : case TYPE_flt:
1751 225 : NORMALIZE(flt);
1752 : break;
1753 413 : case TYPE_dbl:
1754 799 : NORMALIZE(dbl);
1755 : break;
1756 : }
1757 : }
1758 :
1759 1714913 : parent = VIEWtparent(b);
1760 1575171 : assert(parent >= 0);
1761 1714913 : BAT *pb;
1762 1714913 : BATiter pbi;
1763 1714913 : if (parent > 0)
1764 1575166 : pb = BATdescriptor(parent);
1765 : else
1766 : pb = NULL;
1767 1714913 : pbi = bat_iterator(pb);
1768 : /* use hash only for equi-join, and then only if b or its
1769 : * parent already has a hash, or if b or its parent is
1770 : * persistent and the total size wouldn't be too large; check
1771 : * for existence of hash last since that may involve I/O */
1772 1714927 : if (equi) {
1773 1642936 : double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
1774 1642905 : if (cost > 0 && cost < ci.ncand) {
1775 76216 : wanthash = true;
1776 76216 : if (havehash) {
1777 49741 : if (phash) {
1778 48525 : MT_rwlock_rdlock(&pb->thashlock);
1779 48523 : if (pb->thash == NULL) {
1780 0 : MT_rwlock_rdunlock(&pb->thashlock);
1781 0 : havehash = false;
1782 : }
1783 : } else {
1784 1216 : MT_rwlock_rdlock(&b->thashlock);
1785 1216 : if (b->thash == NULL) {
1786 0 : MT_rwlock_rdunlock(&b->thashlock);
1787 0 : havehash = false;
1788 : }
1789 : }
1790 : }
1791 76214 : if (wanthash && !havehash && b->batRole != PERSISTENT) {
1792 60 : MT_lock_set(&b->theaplock);
1793 60 : if (++b->selcnt > 1000)
1794 0 : b->selcnt = 1000; /* limit value */
1795 : else
1796 : wanthash = false;
1797 60 : MT_lock_unset(&b->theaplock);
1798 : }
1799 : } else {
1800 1566689 : wanthash = havehash = false;
1801 : }
1802 : }
1803 :
1804 : /* at this point, if havehash is set, we have the hash lock
1805 : * the lock is on the parent if phash is set, on b itself if not
1806 : * also, if havehash is set, then so is wanthash (but not v.v.) */
1807 :
1808 1714894 : if (!havehash) {
1809 : /* make sure tsorted and trevsorted flags are set, but
1810 : * we only need to know if we're not yet sure that we're
1811 : * going for the hash (i.e. it already exists) */
1812 1665137 : (void) BATordered(b);
1813 1665163 : (void) BATordered_rev(b);
1814 : /* reinitialize iterator since properties may have changed */
1815 1665172 : bat_iterator_end(&bi);
1816 1665170 : bi = bat_iterator(b);
1817 : }
1818 :
1819 : /* If there is an order index or it is a view and the parent
1820 : * has an ordered index, and the bat is not tsorted or
1821 : * trevstorted then use the order index. And there is no cand
1822 : * list or if there is one, it is dense.
1823 : * TODO: we do not support anti-select with order index */
1824 1714916 : bool poidx = false;
1825 1714916 : if (!anti &&
1826 1667174 : !havehash &&
1827 1617435 : !bi.sorted &&
1828 1325969 : !bi.revsorted &&
1829 1207886 : ci.tpe == cand_dense) {
1830 925272 : BAT *view = NULL;
1831 925272 : (void) BATcheckorderidx(b);
1832 925270 : MT_lock_set(&b->batIdxLock);
1833 925267 : if ((oidxh = b->torderidx) != NULL)
1834 42 : HEAPincref(oidxh);
1835 925267 : MT_lock_unset(&b->batIdxLock);
1836 925268 : if (oidxh == NULL && pb != NULL) {
1837 854916 : (void) BATcheckorderidx(pb);
1838 854914 : MT_lock_set(&pb->batIdxLock);
1839 854910 : if ((oidxh = pb->torderidx) != NULL) {
1840 34 : HEAPincref(oidxh);
1841 34 : view = b;
1842 34 : b = pb;
1843 : }
1844 854910 : MT_lock_unset(&pb->batIdxLock);
1845 : }
1846 925261 : if (oidxh) {
1847 : /* Is query selective enough to use the ordered index ? */
1848 : /* TODO: Test if this heuristic works in practice */
1849 : /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < bi.count/1000 ? (BUN)1000: bi.count/1000))*/
1850 76 : if ((ORDERfnd(b, oidxh, th) - ORDERfnd(b, oidxh, tl)) < bi.count/3) {
1851 41 : if (view) {
1852 22 : bat_iterator_end(&bi);
1853 22 : bi = bat_iterator(b);
1854 22 : poidx = true; /* using parent oidx */
1855 22 : vwo = (lng) (view->tbaseoff - bi.baseoff);
1856 22 : vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
1857 22 : vwh = vwl + canditer_last(&ci) - ci.seq;
1858 22 : vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
1859 22 : TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
1860 : } else {
1861 19 : vwl = ci.seq;
1862 19 : vwh = canditer_last(&ci);
1863 : }
1864 : } else {
1865 35 : if (view) {
1866 12 : b = view;
1867 12 : view = NULL;
1868 : }
1869 35 : HEAPdecref(oidxh, false);
1870 35 : oidxh = NULL;
1871 : }
1872 : }
1873 : }
1874 :
1875 1714905 : if (!havehash && (bi.sorted || bi.revsorted || oidxh != NULL)) {
1876 432133 : BUN low = 0;
1877 432133 : BUN high = bi.count;
1878 :
1879 432133 : if (BATtdensebi(&bi)) {
1880 : /* positional */
1881 : /* we expect nonil to be set, in which case we
1882 : * already know that we're not dealing with a
1883 : * nil equiselect (dealt with above) */
1884 19089 : oid h, l;
1885 19089 : assert(bi.nonil);
1886 19089 : assert(bi.sorted);
1887 19089 : assert(oidxh == NULL);
1888 19089 : algo = "select: dense";
1889 19089 : h = * (oid *) th + hi;
1890 19089 : if (h > bi.tseq)
1891 19090 : h -= bi.tseq;
1892 : else
1893 : h = 0;
1894 19089 : if ((BUN) h < high)
1895 : high = (BUN) h;
1896 :
1897 19089 : l = *(oid *) tl + !li;
1898 19089 : if (l > bi.tseq)
1899 13613 : l -= bi.tseq;
1900 : else
1901 : l = 0;
1902 13613 : if ((BUN) l > low)
1903 13613 : low = (BUN) l;
1904 13613 : if (low > high)
1905 : low = high;
1906 413044 : } else if (bi.sorted) {
1907 292772 : assert(oidxh == NULL);
1908 292772 : algo = "select: sorted";
1909 292772 : if (lval) {
1910 291040 : if (li)
1911 287342 : low = SORTfndfirst(b, tl);
1912 : else
1913 3698 : low = SORTfndlast(b, tl);
1914 : } else {
1915 : /* skip over nils at start of column */
1916 1732 : low = SORTfndlast(b, nil);
1917 : }
1918 292769 : if (hval) {
1919 291030 : if (hi)
1920 287324 : high = SORTfndlast(b, th);
1921 : else
1922 3706 : high = SORTfndfirst(b, th);
1923 : }
1924 120272 : } else if (bi.revsorted) {
1925 120231 : assert(oidxh == NULL);
1926 120231 : algo = "select: reverse sorted";
1927 120231 : if (lval) {
1928 120198 : if (li)
1929 120073 : high = SORTfndlast(b, tl);
1930 : else
1931 125 : high = SORTfndfirst(b, tl);
1932 : } else {
1933 : /* skip over nils at end of column */
1934 33 : high = SORTfndfirst(b, nil);
1935 : }
1936 120231 : if (hval) {
1937 120198 : if (hi)
1938 120073 : low = SORTfndfirst(b, th);
1939 : else
1940 125 : low = SORTfndlast(b, th);
1941 : }
1942 : } else {
1943 41 : assert(oidxh != NULL);
1944 41 : algo = poidx ? "select: parent orderidx" : "select: orderidx";
1945 41 : if (lval) {
1946 41 : if (li)
1947 41 : low = ORDERfndfirst(b, oidxh, tl);
1948 : else
1949 0 : low = ORDERfndlast(b, oidxh, tl);
1950 : } else {
1951 : /* skip over nils at start of column */
1952 0 : low = ORDERfndlast(b, oidxh, nil);
1953 : }
1954 41 : if (hval) {
1955 41 : if (hi)
1956 41 : high = ORDERfndlast(b, oidxh, th);
1957 : else
1958 0 : high = ORDERfndfirst(b, oidxh, th);
1959 : }
1960 : }
1961 432129 : if (anti) {
1962 22542 : assert(oidxh == NULL);
1963 22542 : if (bi.sorted) {
1964 20397 : BUN first = SORTfndlast(b, nil);
1965 : /* match: [first..low) + [high..last) */
1966 20397 : bn = canditer_slice2val(&ci,
1967 : first + b->hseqbase,
1968 : low + b->hseqbase,
1969 20397 : high + b->hseqbase,
1970 : oid_nil);
1971 : } else {
1972 2145 : BUN last = SORTfndfirst(b, nil);
1973 : /* match: [first..low) + [high..last) */
1974 2145 : bn = canditer_slice2val(&ci,
1975 : oid_nil,
1976 : low + b->hseqbase,
1977 : high + b->hseqbase,
1978 2145 : last + b->hseqbase);
1979 : }
1980 : } else {
1981 409587 : if (bi.sorted || bi.revsorted) {
1982 409546 : assert(oidxh == NULL);
1983 : /* match: [low..high) */
1984 409546 : bn = canditer_sliceval(&ci,
1985 : low + b->hseqbase,
1986 409546 : high + b->hseqbase);
1987 : } else {
1988 41 : BUN i;
1989 41 : BUN cnt = 0;
1990 41 : const oid *rs;
1991 41 : oid *rbn;
1992 :
1993 41 : rs = (const oid *) oidxh->base + ORDERIDXOFF;
1994 41 : rs += low;
1995 41 : bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
1996 41 : if (bn == NULL) {
1997 0 : HEAPdecref(oidxh, false);
1998 0 : bat_iterator_end(&bi);
1999 0 : bat_iterator_end(&pbi);
2000 0 : BBPreclaim(pb);
2001 0 : return NULL;
2002 : }
2003 :
2004 41 : rbn = (oid *) Tloc((bn), 0);
2005 :
2006 255 : for (i = low; i < high; i++) {
2007 214 : if (vwl <= *rs && *rs <= vwh) {
2008 153 : *rbn++ = (oid) ((lng) *rs + vwo);
2009 153 : cnt++;
2010 : }
2011 214 : rs++;
2012 : }
2013 41 : HEAPdecref(oidxh, false);
2014 41 : BATsetcount(bn, cnt);
2015 :
2016 : /* output must be sorted */
2017 41 : GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
2018 41 : bn->tsorted = true;
2019 41 : bn->trevsorted = bn->batCount <= 1;
2020 41 : bn->tkey = true;
2021 41 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
2022 41 : bn->tnil = false;
2023 41 : bn->tnonil = true;
2024 41 : if (s) {
2025 23 : s = BATintersectcand(bn, s);
2026 23 : BBPunfix(bn->batCacheid);
2027 23 : bn = s;
2028 : }
2029 : }
2030 : }
2031 :
2032 432134 : bn = virtualize(bn);
2033 432130 : MT_thread_setalgorithm(algo);
2034 432120 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
2035 : ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
2036 : ALGOBATPAR(b), anti ? "true" : "false",
2037 : ALGOOPTBATPAR(bn), algo,
2038 : GDKusec() - t0);
2039 :
2040 432120 : bat_iterator_end(&bi);
2041 432140 : bat_iterator_end(&pbi);
2042 432139 : BBPreclaim(pb);
2043 432137 : return bn;
2044 : }
2045 :
2046 49752 : assert(oidxh == NULL);
2047 : /* upper limit for result size */
2048 1282772 : maximum = ci.ncand;
2049 1282772 : if (equi && havehash) {
2050 : /* we can look in the hash struct to see whether all
2051 : * values are distinct and set estimate accordingly */
2052 49742 : if (phash) {
2053 48526 : if (pb->thash->nunique == pbi.count)
2054 : estimate = 1;
2055 1216 : } else if (b->thash->nunique == bi.count)
2056 : estimate = 1;
2057 : }
2058 1274651 : if (estimate == BUN_NONE && (bi.key || (pb != NULL && pbi.key))) {
2059 : /* exact result size in special cases */
2060 368 : if (equi) {
2061 : estimate = 1;
2062 48 : } else if (!anti && lval && hval) {
2063 12 : switch (t) {
2064 0 : case TYPE_bte:
2065 0 : estimate = (BUN) (*(bte *) th - *(bte *) tl);
2066 0 : break;
2067 0 : case TYPE_sht:
2068 0 : estimate = (BUN) (*(sht *) th - *(sht *) tl);
2069 0 : break;
2070 12 : case TYPE_int:
2071 12 : CALC_ESTIMATE(int);
2072 : break;
2073 0 : case TYPE_lng:
2074 0 : CALC_ESTIMATE(lng);
2075 : break;
2076 : #ifdef HAVE_HGE
2077 0 : case TYPE_hge:
2078 0 : CALC_ESTIMATE(hge);
2079 : break;
2080 : #endif
2081 : }
2082 12 : if (estimate == BUN_NONE)
2083 0 : estimate += li + hi - 1;
2084 : }
2085 : }
2086 : /* refine upper limit by exact size (if known) */
2087 1282772 : maximum = MIN(maximum, estimate);
2088 1282772 : if (wanthash && !havehash && estimate == BUN_NONE) {
2089 : /* no exact result size, but we need estimate to
2090 : * choose between hash- & scan-select (if we already
2091 : * have a hash, it's a no-brainer: we use it) */
2092 24591 : if (ci.ncand <= 10000) {
2093 : /* "small" input: don't bother about more accurate
2094 : * estimate */
2095 : estimate = maximum;
2096 : } else {
2097 : /* layman's quick "pseudo-sample" of 1000 tuples,
2098 : * i.e., 333 from begin, middle & end of BAT */
2099 1 : BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
2100 1 : BAT *smpl, *slct;
2101 :
2102 1 : delta = 1000 / 3 / 2;
2103 1 : skip = (bi.count - (2 * delta)) / 2;
2104 4 : for (pos = delta; pos < bi.count; pos += skip) {
2105 3 : smpl = BATslice(b, pos - delta, pos + delta);
2106 3 : if (smpl) {
2107 3 : slct = BATselect(smpl, NULL, tl,
2108 : th, li, hi, anti);
2109 3 : if (slct) {
2110 3 : smpl_cnt += BATcount(smpl);
2111 3 : slct_cnt += BATcount(slct);
2112 3 : BBPreclaim(slct);
2113 : }
2114 3 : BBPreclaim(smpl);
2115 : }
2116 : }
2117 1 : if (smpl_cnt > 0 && slct_cnt > 0) {
2118 : /* linear extrapolation plus 10% margin */
2119 0 : estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
2120 0 : * (dbl) bi.count * 1.1);
2121 1 : } else if (smpl_cnt > 0 && slct_cnt == 0) {
2122 : /* estimate low enough to trigger hash select */
2123 1 : estimate = (ci.ncand / 100) - 1;
2124 : }
2125 : }
2126 24591 : wanthash = estimate < ci.ncand / 100;
2127 : }
2128 1282772 : if (estimate == BUN_NONE) {
2129 : /* no better estimate possible/required:
2130 : * (pre-)allocate 1M tuples, i.e., avoid/delay extend
2131 : * without too much overallocation */
2132 1249732 : estimate = 1000000;
2133 : }
2134 : /* limit estimation by upper limit */
2135 1282772 : estimate = MIN(estimate, maximum);
2136 :
2137 1282772 : bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
2138 1282774 : if (bn == NULL) {
2139 0 : if (havehash) {
2140 0 : if (phash)
2141 0 : MT_rwlock_rdunlock(&pb->thashlock);
2142 : else
2143 0 : MT_rwlock_rdunlock(&b->thashlock);
2144 : }
2145 0 : bat_iterator_end(&bi);
2146 0 : bat_iterator_end(&pbi);
2147 0 : BBPreclaim(pb);
2148 0 : return NULL;
2149 : }
2150 :
2151 1282774 : if (wanthash) {
2152 : /* hashselect unlocks the hash lock */
2153 49742 : bn = hashselect(&bi, &ci, bn, tl, maximum, havehash, phash, &algo);
2154 : } else {
2155 1233032 : assert(!havehash);
2156 : /* use imprints if
2157 : * i) bat is persistent, or parent is persistent
2158 : * ii) it is not an equi-select, and
2159 : * iii) imprints are supported.
2160 : */
2161 1233032 : Imprints *imprints = NULL;
2162 1233032 : if (!equi &&
2163 : /* DISABLES CODE */ (0) && imprintable(bi.type) &&
2164 : (!bi.transient ||
2165 : (pb != NULL && !pbi.transient)) &&
2166 : BATimprints(b) == GDK_SUCCEED) {
2167 : if (pb != NULL) {
2168 : MT_lock_set(&pb->batIdxLock);
2169 : imprints = pb->timprints;
2170 : if (imprints != NULL)
2171 : IMPSincref(imprints);
2172 : else
2173 : imprints = NULL;
2174 : MT_lock_unset(&pb->batIdxLock);
2175 : } else {
2176 : MT_lock_set(&b->batIdxLock);
2177 : imprints = b->timprints;
2178 : if (imprints != NULL)
2179 : IMPSincref(imprints);
2180 : else
2181 : imprints = NULL;
2182 1233032 : MT_lock_unset(&b->batIdxLock);
2183 : }
2184 : }
2185 1233032 : GDKclrerr();
2186 1233023 : bn = scanselect(&bi, &ci, bn, tl, th, li, hi, equi, anti,
2187 : lval, hval, lnil, maximum, imprints, &algo);
2188 1233023 : if (imprints)
2189 : IMPSdecref(imprints, false);
2190 : }
2191 1282753 : bat_iterator_end(&bi);
2192 1282755 : bat_iterator_end(&pbi);
2193 1282766 : BBPreclaim(pb);
2194 :
2195 1282759 : bn = virtualize(bn);
2196 1282767 : MT_thread_setalgorithm(algo);
2197 1282763 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
2198 : " %s (" LLFMT " usec)\n",
2199 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
2200 : anti ? "true" : "false",
2201 : ALGOOPTBATPAR(bn), algo,
2202 : GDKusec() - t0);
2203 :
2204 : return bn;
2205 : }
2206 :
2207 : /* theta select
2208 : *
2209 : * Returns a BAT with the OID values of b for qualifying tuples. The
2210 : * return BAT is sorted (i.e. in the same order as the input BAT).
2211 : *
2212 : * If s is not NULL, it is a list of candidates. s must be sorted.
2213 : *
2214 : * Theta select returns all values from b which are less/greater than
2215 : * or (not) equal to the provided value depending on the value of op.
2216 : * Op is a string with one of the values: "=", "==", "<", "<=", ">",
2217 : * ">=", "<>", "!=" (the first two are equivalent and the last two are
2218 : * equivalent). Theta select never returns nils.
2219 : *
2220 : * If value is nil, the result is empty.
2221 : */
2222 : BAT *
2223 291918 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
2224 : {
2225 291918 : const void *nil;
2226 :
2227 291918 : BATcheck(b, NULL);
2228 291918 : BATcheck(val, NULL);
2229 291918 : BATcheck(op, NULL);
2230 :
2231 291918 : nil = ATOMnilptr(b->ttype);
2232 291918 : if (ATOMcmp(b->ttype, val, nil) == 0)
2233 40 : return BATdense(0, 0, 0);
2234 291865 : if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
2235 : /* "=" or "==" */
2236 217460 : return BATselect(b, s, val, NULL, true, true, false);
2237 : }
2238 74405 : if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
2239 : /* "!=" (equivalent to "<>") */
2240 70383 : return BATselect(b, s, val, NULL, true, true, true);
2241 : }
2242 4022 : if (op[0] == '<') {
2243 691 : if (op[1] == 0) {
2244 : /* "<" */
2245 553 : return BATselect(b, s, nil, val, false, false, false);
2246 : }
2247 138 : if (op[1] == '=' && op[2] == 0) {
2248 : /* "<=" */
2249 132 : return BATselect(b, s, nil, val, false, true, false);
2250 : }
2251 6 : if (op[1] == '>' && op[2] == 0) {
2252 : /* "<>" (equivalent to "!=") */
2253 6 : return BATselect(b, s, val, NULL, true, true, true);
2254 : }
2255 : }
2256 3331 : if (op[0] == '>') {
2257 3331 : if (op[1] == 0) {
2258 : /* ">" */
2259 2974 : return BATselect(b, s, val, nil, false, false, false);
2260 : }
2261 357 : if (op[1] == '=' && op[2] == 0) {
2262 : /* ">=" */
2263 357 : return BATselect(b, s, val, nil, true, false, false);
2264 : }
2265 : }
2266 0 : GDKerror("unknown operator.\n");
2267 0 : return NULL;
2268 : }
2269 :
2270 : #define VALUE(s, x) (s##vars ? \
2271 : s##vars + VarHeapVal(s##vals, (x), s##width) : \
2272 : s##vals + ((x) * s##width))
2273 : #define FVALUE(s, x) (s##vals + ((x) * s##width))
2274 :
2275 : #define LTany(a,b) ((*cmp)(a, b) < 0)
2276 : #define EQany(a,b) ((*cmp)(a, b) == 0)
2277 : #define is_any_nil(v) ((v) == NULL || (*cmp)((v), nil) == 0)
2278 :
2279 : #define less3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b)))
2280 : #define grtr3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b)))
2281 : #define or3(a,b) ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0)
2282 : #define and3(a,b) ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1)
2283 : #define not3(a) (is_bit_nil(a) ? bit_nil : !(a))
2284 :
2285 : #define between3(v, lo, linc, hi, hinc, TYPE) \
2286 : and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE))
2287 :
2288 : #define BETWEEN(v, lo, linc, hi, hinc, TYPE) \
2289 : (is_##TYPE##_nil(v) \
2290 : ? bit_nil \
2291 : : (bit) (anti \
2292 : ? (symmetric \
2293 : ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE), \
2294 : between3(v, hi, hinc, lo, linc, TYPE))) \
2295 : : not3(between3(v, lo, linc, hi, hinc, TYPE))) \
2296 : : (symmetric \
2297 : ? or3(between3(v, lo, linc, hi, hinc, TYPE), \
2298 : between3(v, hi, hinc, lo, linc, TYPE)) \
2299 : : between3(v, lo, linc, hi, hinc, TYPE))))
2300 :
2301 : gdk_return
2302 120 : rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh,
2303 : struct canditer *lci, struct canditer *rci,
2304 : bool linc, bool hinc, bool anti, bool symmetric, BUN maxsize)
2305 : {
2306 120 : if (!anti && !symmetric) {
2307 : /* we'll need these */
2308 109 : (void) BATordered(l);
2309 109 : (void) BATordered_rev(l);
2310 : }
2311 120 : BATiter li = bat_iterator(l);
2312 120 : BATiter rli = bat_iterator(rl);
2313 120 : BATiter rhi = bat_iterator(rh);
2314 120 : const char *rlvals, *rhvals;
2315 120 : const char *lvars, *rlvars, *rhvars;
2316 120 : int rlwidth, rhwidth;
2317 120 : int lwidth;
2318 120 : const void *nil = ATOMnilptr(li.type);
2319 120 : int (*cmp)(const void *, const void *) = ATOMcompare(li.type);
2320 120 : int t;
2321 120 : BUN cnt, ncnt, lncand = lci->ncand, rncand = rci->ncand;
2322 120 : oid *restrict dst1, *restrict dst2;
2323 120 : const void *vrl, *vrh;
2324 120 : oid ro;
2325 120 : oid rlval = oid_nil, rhval = oid_nil;
2326 120 : int sorted = 0; /* which output column is sorted */
2327 120 : BAT *tmp = NULL;
2328 120 : const char *algo = NULL;
2329 120 : Heap *oidxh = NULL;
2330 :
2331 120 : lng timeoffset = 0;
2332 120 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
2333 120 : if (qry_ctx != NULL) {
2334 120 : timeoffset = (qry_ctx->starttime && qry_ctx->querytimeout) ? (qry_ctx->starttime + qry_ctx->querytimeout) : 0;
2335 : }
2336 :
2337 360 : assert(ATOMtype(li.type) == ATOMtype(rli.type));
2338 240 : assert(ATOMtype(li.type) == ATOMtype(rhi.type));
2339 120 : assert(BATcount(rl) == BATcount(rh));
2340 120 : assert(rl->hseqbase == rh->hseqbase);
2341 120 : assert(r1->ttype == TYPE_oid);
2342 120 : assert(r2 == NULL || r2->ttype == TYPE_oid);
2343 103 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2344 120 : assert(li.type != TYPE_void || !is_oid_nil(l->tseqbase));
2345 120 : assert(rli.type != TYPE_void || !is_oid_nil(rl->tseqbase));
2346 120 : assert(rhi.type != TYPE_void || !is_oid_nil(rh->tseqbase));
2347 :
2348 120 : TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
2349 : "rl=" ALGOBATFMT ",rh=" ALGOBATFMT ","
2350 : "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ","
2351 : "anti=%s,symmetric=%s\n",
2352 : ALGOBATPAR(l),
2353 : ALGOBATPAR(rl),
2354 : ALGOBATPAR(rh),
2355 : ALGOOPTBATPAR(lci->s),
2356 : ALGOOPTBATPAR(rci->s),
2357 : anti ? "true" : "false",
2358 : symmetric ? "true" : "false");
2359 :
2360 120 : rlvals = rli.type == TYPE_void ? NULL : (const char *) rli.base;
2361 120 : rhvals = rhi.type == TYPE_void ? NULL : (const char *) rhi.base;
2362 120 : lwidth = li.width;
2363 120 : rlwidth = rli.width;
2364 120 : rhwidth = rhi.width;
2365 120 : dst1 = (oid *) Tloc(r1, 0);
2366 120 : dst2 = r2 ? (oid *) Tloc(r2, 0) : NULL;
2367 :
2368 120 : t = ATOMtype(li.type);
2369 120 : t = ATOMbasetype(t);
2370 :
2371 120 : if (li.vh && li.type) {
2372 18 : assert(rli.vh && rli.type);
2373 18 : assert(rhi.vh && rhi.type);
2374 18 : lvars = li.vh->base;
2375 18 : rlvars = rli.vh->base;
2376 18 : rhvars = rhi.vh->base;
2377 : } else {
2378 102 : assert(rli.vh == NULL);
2379 102 : assert(rhi.vh == NULL);
2380 : lvars = rlvars = rhvars = NULL;
2381 : }
2382 :
2383 120 : if (!anti && !symmetric && !li.sorted && !li.revsorted) {
2384 10 : (void) BATcheckorderidx(l);
2385 10 : MT_lock_set(&l->batIdxLock);
2386 10 : if ((oidxh = l->torderidx) != NULL)
2387 0 : HEAPincref(oidxh);
2388 10 : MT_lock_unset(&l->batIdxLock);
2389 : #if 0 /* needs checking */
2390 : if (oidxh == NULL && VIEWtparent(l)) {
2391 : /* if enabled, need to fix/unfix parent bat */
2392 : BAT *pb = BBP_cache(VIEWtparent(l));
2393 : (void) BATcheckorderidx(pb);
2394 : MT_lock_set(&pb->batIdxLock);
2395 : if ((oidxh = pb->torderidx) != NULL) {
2396 : HEAPincref(oidxh);
2397 : l = pb;
2398 : }
2399 : MT_lock_unset(&pb->batIdxLock);
2400 : }
2401 : #endif
2402 : }
2403 :
2404 120 : vrl = &rlval;
2405 120 : vrh = &rhval;
2406 120 : if (!anti && !symmetric && (li.sorted || li.revsorted || oidxh)) {
2407 : /* left column is sorted, use binary search */
2408 99 : sorted = 2;
2409 454 : TIMEOUT_LOOP(rncand, timeoffset) {
2410 256 : BUN low, high;
2411 :
2412 256 : ro = canditer_next(rci);
2413 256 : if (rlvals) {
2414 256 : vrl = VALUE(rl, ro - rl->hseqbase);
2415 : } else {
2416 : /* TYPE_void */
2417 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2418 : }
2419 256 : if (rhvals) {
2420 256 : vrh = VALUE(rh, ro - rh->hseqbase);
2421 : } else {
2422 : /* TYPE_void */
2423 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2424 : }
2425 256 : if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
2426 9 : continue;
2427 247 : if (li.sorted) {
2428 228 : if (linc)
2429 167 : low = SORTfndfirst(l, vrl);
2430 : else
2431 61 : low = SORTfndlast(l, vrl);
2432 228 : if (hinc)
2433 167 : high = SORTfndlast(l, vrh);
2434 : else
2435 61 : high = SORTfndfirst(l, vrh);
2436 19 : } else if (li.revsorted) {
2437 19 : if (hinc)
2438 19 : low = SORTfndfirst(l, vrh);
2439 : else
2440 0 : low = SORTfndlast(l, vrh);
2441 19 : if (linc)
2442 3 : high = SORTfndlast(l, vrl);
2443 : else
2444 16 : high = SORTfndfirst(l, vrl);
2445 : } else {
2446 0 : assert(oidxh);
2447 0 : if (linc)
2448 0 : low = ORDERfndfirst(l, oidxh, vrl);
2449 : else
2450 0 : low = ORDERfndlast(l, oidxh, vrl);
2451 0 : if (hinc)
2452 0 : high = ORDERfndlast(l, oidxh, vrh);
2453 : else
2454 0 : high = ORDERfndfirst(l, oidxh, vrh);
2455 : }
2456 247 : if (high <= low)
2457 131 : continue;
2458 116 : if (li.sorted || li.revsorted) {
2459 116 : low = canditer_search(lci, low + l->hseqbase, true);
2460 116 : high = canditer_search(lci, high + l->hseqbase, true);
2461 116 : assert(high >= low);
2462 :
2463 116 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2464 0 : cnt = BATcount(r1) + high - low + 1024;
2465 0 : if (cnt > maxsize)
2466 : cnt = maxsize;
2467 0 : BATsetcount(r1, BATcount(r1));
2468 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2469 0 : goto bailout;
2470 0 : dst1 = (oid *) Tloc(r1, 0);
2471 0 : if (r2) {
2472 0 : BATsetcount(r2, BATcount(r2));
2473 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2474 0 : goto bailout;
2475 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2476 0 : dst2 = (oid *) Tloc(r2, 0);
2477 : }
2478 : }
2479 116 : canditer_setidx(lci, low);
2480 607 : while (low < high) {
2481 491 : dst1[r1->batCount++] = canditer_next(lci);
2482 491 : if (r2) {
2483 479 : dst2[r2->batCount++] = ro;
2484 : }
2485 491 : low++;
2486 : }
2487 : } else {
2488 0 : const oid *ord;
2489 :
2490 0 : assert(oidxh);
2491 0 : ord = (const oid *) oidxh->base + ORDERIDXOFF;
2492 :
2493 0 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2494 0 : cnt = BATcount(r1) + high - low + 1024;
2495 0 : if (cnt > maxsize)
2496 : cnt = maxsize;
2497 0 : BATsetcount(r1, BATcount(r1));
2498 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2499 0 : goto bailout;
2500 0 : dst1 = (oid *) Tloc(r1, 0);
2501 0 : if (r2) {
2502 0 : BATsetcount(r2, BATcount(r2));
2503 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2504 0 : goto bailout;
2505 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2506 0 : dst2 = (oid *) Tloc(r2, 0);
2507 : }
2508 : }
2509 :
2510 0 : while (low < high) {
2511 0 : if (canditer_contains(lci, ord[low])) {
2512 0 : dst1[r1->batCount++] = ord[low];
2513 0 : if (r2) {
2514 0 : dst2[r2->batCount++] = ro;
2515 : }
2516 : }
2517 0 : low++;
2518 : }
2519 : }
2520 : }
2521 99 : if (oidxh)
2522 0 : HEAPdecref(oidxh, false);
2523 99 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
2524 99 : cnt = BATcount(r1);
2525 99 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2526 21 : } else if (/* DISABLES CODE */ (0) &&
2527 : !anti && !symmetric &&
2528 : imprintable(li.type) &&
2529 : (BATcount(rl) > 2 ||
2530 : !li.transient ||
2531 : (VIEWtparent(l) != 0 &&
2532 : /* if enabled, need to fix/unfix parent bat */
2533 : (tmp = BBP_cache(VIEWtparent(l))) != NULL &&
2534 : /* batTransient access needs to be protected */
2535 : !tmp->batTransient) ||
2536 : BATcheckimprints(l)) &&
2537 : BATimprints(l) == GDK_SUCCEED) {
2538 : /* implementation using imprints on left column
2539 : *
2540 : * we use imprints if we can (the type is right for
2541 : * imprints) and either the left bat is persistent or
2542 : * already has imprints, or the right bats are long
2543 : * enough (for creating imprints being worth it) */
2544 : Imprints *imprints = NULL;
2545 :
2546 : if (tmp) {
2547 : MT_lock_set(&tmp->batIdxLock);
2548 : imprints = tmp->timprints;
2549 : if (imprints != NULL)
2550 : IMPSincref(imprints);
2551 : else
2552 : imprints = NULL;
2553 : MT_lock_unset(&tmp->batIdxLock);
2554 : } else {
2555 : MT_lock_set(&l->batIdxLock);
2556 : imprints = l->timprints;
2557 : if (imprints != NULL)
2558 : IMPSincref(imprints);
2559 : else
2560 : imprints = NULL;
2561 : MT_lock_unset(&l->batIdxLock);
2562 : }
2563 : /* in the unlikely case that the imprints were removed
2564 : * before we could get at them, take the slow, nested
2565 : * loop implementation */
2566 : if (imprints == NULL)
2567 : goto nestedloop;
2568 :
2569 : sorted = 2;
2570 : cnt = 0;
2571 : TIMEOUT_LOOP_IDX_DECL(i, rncand, timeoffset) {
2572 : maxsize = cnt + (rncand - i) * lncand;
2573 : ro = canditer_next(rci);
2574 : if (rlvals) {
2575 : vrl = FVALUE(rl, ro - rl->hseqbase);
2576 : } else {
2577 : /* TYPE_void */
2578 : rlval = ro - rl->hseqbase + rl->tseqbase;
2579 : }
2580 : if (rhvals) {
2581 : vrh = FVALUE(rh, ro - rh->hseqbase);
2582 : } else {
2583 : /* TYPE_void */
2584 : rhval = ro - rl->hseqbase + rl->tseqbase;
2585 : }
2586 : dst1 = (oid *) Tloc(r1, 0);
2587 : canditer_reset(lci);
2588 : switch (t) {
2589 : case TYPE_bte: {
2590 : bte vl, vh;
2591 : if (is_bte_nil((vl = *(bte *) vrl)))
2592 : continue;
2593 : if (is_bte_nil((vh = *(bte *) vrh)))
2594 : continue;
2595 : if (!linc) {
2596 : if (vl == MAXVALUEbte)
2597 : continue;
2598 : vl = NEXTVALUEbte(vl);
2599 : }
2600 : if (!hinc) {
2601 : if (vh == MINVALUEbte)
2602 : continue;
2603 : vh = PREVVALUEbte(vh);
2604 : }
2605 : if (vl > vh)
2606 : continue;
2607 : ncnt = fullscan_bte(&li, lci, r1, &vl, &vh,
2608 : true, true, false,
2609 : false, true, true,
2610 : false, cnt,
2611 : l->hseqbase, dst1,
2612 : maxsize,
2613 : imprints, &algo);
2614 : break;
2615 : }
2616 : case TYPE_sht: {
2617 : sht vl, vh;
2618 : if (is_sht_nil((vl = *(sht *) vrl)))
2619 : continue;
2620 : if (is_sht_nil((vh = *(sht *) vrh)))
2621 : continue;
2622 : if (!linc) {
2623 : if (vl == MAXVALUEsht)
2624 : continue;
2625 : vl = NEXTVALUEsht(vl);
2626 : }
2627 : if (!hinc) {
2628 : if (vh == MINVALUEsht)
2629 : continue;
2630 : vh = PREVVALUEsht(vh);
2631 : }
2632 : if (vl > vh)
2633 : continue;
2634 : ncnt = fullscan_sht(&li, lci, r1, &vl, &vh,
2635 : true, true, false,
2636 : false, true, true,
2637 : false, cnt,
2638 : l->hseqbase, dst1,
2639 : maxsize,
2640 : imprints, &algo);
2641 : break;
2642 : }
2643 : case TYPE_int:
2644 : #if SIZEOF_OID == SIZEOF_INT
2645 : case TYPE_oid:
2646 : #endif
2647 : {
2648 : int vl, vh;
2649 : if (is_int_nil((vl = *(int *) vrl)))
2650 : continue;
2651 : if (is_int_nil((vh = *(int *) vrh)))
2652 : continue;
2653 : if (!linc) {
2654 : if (vl == MAXVALUEint)
2655 : continue;
2656 : vl = NEXTVALUEint(vl);
2657 : }
2658 : if (!hinc) {
2659 : #if SIZEOF_OID == SIZEOF_INT
2660 : if (t == TYPE_oid) {
2661 : if (vh == MINVALUEoid)
2662 : continue;
2663 : vh = PREVVALUEoid(vh);
2664 : } else
2665 : #endif
2666 : {
2667 : if (vh == MINVALUEint)
2668 : continue;
2669 : vh = PREVVALUEint(vh);
2670 : }
2671 : }
2672 : if (vl > vh)
2673 : continue;
2674 : ncnt = fullscan_int(&li, lci, r1, &vl, &vh,
2675 : true, true, false,
2676 : false, true, true,
2677 : false, cnt,
2678 : l->hseqbase, dst1,
2679 : maxsize,
2680 : imprints, &algo);
2681 : break;
2682 : }
2683 : case TYPE_lng:
2684 : #if SIZEOF_OID == SIZEOF_LNG
2685 : case TYPE_oid:
2686 : #endif
2687 : {
2688 : lng vl, vh;
2689 : if (is_lng_nil((vl = *(lng *) vrl)))
2690 : continue;
2691 : if (is_lng_nil((vh = *(lng *) vrh)))
2692 : continue;
2693 : if (!linc) {
2694 : if (vl == MAXVALUElng)
2695 : continue;
2696 : vl = NEXTVALUElng(vl);
2697 : }
2698 : if (!hinc) {
2699 : #if SIZEOF_OID == SIZEOF_LNG
2700 : if (t == TYPE_oid) {
2701 : if (vh == MINVALUEoid)
2702 : continue;
2703 : vh = PREVVALUEoid(vh);
2704 : } else
2705 : #endif
2706 : {
2707 : if (vh == MINVALUElng)
2708 : continue;
2709 : vh = PREVVALUElng(vh);
2710 : }
2711 : }
2712 : if (vl > vh)
2713 : continue;
2714 : ncnt = fullscan_lng(&li, lci, r1, &vl, &vh,
2715 : true, true, false,
2716 : false, true, true,
2717 : false, cnt,
2718 : l->hseqbase, dst1,
2719 : maxsize,
2720 : imprints, &algo);
2721 : break;
2722 : }
2723 : #ifdef HAVE_HGE
2724 : case TYPE_hge: {
2725 : hge vl, vh;
2726 : if (is_hge_nil((vl = *(hge *) vrl)))
2727 : continue;
2728 : if (is_hge_nil((vh = *(hge *) vrh)))
2729 : continue;
2730 : if (!linc) {
2731 : if (vl == MAXVALUEhge)
2732 : continue;
2733 : vl = NEXTVALUEhge(vl);
2734 : }
2735 : if (!hinc) {
2736 : if (vh == MINVALUEhge)
2737 : continue;
2738 : vh = PREVVALUEhge(vh);
2739 : }
2740 : if (vl > vh)
2741 : continue;
2742 : ncnt = fullscan_hge(&li, lci, r1, &vl, &vh,
2743 : true, true, false,
2744 : false, true, true,
2745 : false, cnt,
2746 : l->hseqbase, dst1,
2747 : maxsize,
2748 : imprints, &algo);
2749 : break;
2750 : }
2751 : #endif
2752 : case TYPE_flt: {
2753 : flt vl, vh;
2754 : vl = *(flt *) vrl;
2755 : if (is_flt_nil(vl))
2756 : continue;
2757 : vh = *(flt *) vrh;
2758 : if (is_flt_nil(vh))
2759 : continue;
2760 : if (!linc) {
2761 : if (vl == MAXVALUEflt)
2762 : continue;
2763 : vl = NEXTVALUEflt(vl);
2764 : }
2765 : if (!hinc) {
2766 : if (vh == MINVALUEflt)
2767 : continue;
2768 : vh = PREVVALUEflt(vh);
2769 : }
2770 : if (vl > vh)
2771 : continue;
2772 : ncnt = fullscan_flt(&li, lci, r1, &vl, &vh,
2773 : true, true, false,
2774 : false, true, true,
2775 : false, cnt,
2776 : l->hseqbase, dst1,
2777 : maxsize,
2778 : imprints, &algo);
2779 : break;
2780 : }
2781 : case TYPE_dbl: {
2782 : dbl vl, vh;
2783 : vl = *(dbl *) vrl;
2784 : if (is_dbl_nil(vl))
2785 : continue;
2786 : vh = *(dbl *) vrh;
2787 : if (is_dbl_nil(vh))
2788 : continue;
2789 : if (!linc) {
2790 : if (vl == MAXVALUEdbl)
2791 : continue;
2792 : vl = NEXTVALUEdbl(vl);
2793 : }
2794 : if (!hinc) {
2795 : if (vh == MINVALUEdbl)
2796 : continue;
2797 : vh = PREVVALUEdbl(vh);
2798 : }
2799 : if (vl > vh)
2800 : continue;
2801 : ncnt = fullscan_dbl(&li, lci, r1, &vl, &vh,
2802 : true, true, false,
2803 : false, true, true,
2804 : false, cnt,
2805 : l->hseqbase, dst1,
2806 : maxsize,
2807 : imprints, &algo);
2808 : break;
2809 : }
2810 : default:
2811 : ncnt = BUN_NONE;
2812 : GDKerror("unsupported type\n");
2813 : MT_UNREACHABLE();
2814 : }
2815 : if (ncnt == BUN_NONE) {
2816 : IMPSdecref(imprints, false);
2817 : goto bailout;
2818 : }
2819 : assert(ncnt >= cnt || ncnt == 0);
2820 : if (ncnt == cnt || ncnt == 0)
2821 : continue;
2822 : if (r2) {
2823 : if (BATcapacity(r2) < ncnt) {
2824 : BATsetcount(r2, cnt);
2825 : if (BATextend(r2, BATcapacity(r1)) != GDK_SUCCEED) {
2826 : IMPSdecref(imprints, false);
2827 : goto bailout;
2828 : }
2829 : dst2 = (oid *) Tloc(r2, 0);
2830 : }
2831 : while (cnt < ncnt)
2832 : dst2[cnt++] = ro;
2833 : } else {
2834 : cnt = ncnt;
2835 : }
2836 : }
2837 : IMPSdecref(imprints, false);
2838 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
2839 : } else {
2840 21 : nestedloop:;
2841 : /* nested loop implementation */
2842 21 : const void *vl;
2843 21 : const char *lvals;
2844 21 : oid lval;
2845 :
2846 21 : GDKclrerr(); /* not interested in BATimprints errors */
2847 21 : sorted = 1;
2848 21 : lvals = li.type == TYPE_void ? NULL : (const char *) li.base;
2849 21 : vl = &lval;
2850 289 : TIMEOUT_LOOP(lncand, timeoffset) {
2851 247 : oid lo;
2852 :
2853 247 : lo = canditer_next(lci);
2854 247 : if (lvals) {
2855 247 : vl = VALUE(l, lo - l->hseqbase);
2856 247 : if (cmp(vl, nil) == 0)
2857 8 : continue;
2858 : } else {
2859 0 : lval = lo - l->hseqbase + l->tseqbase;
2860 : }
2861 239 : canditer_reset(rci);
2862 36663 : for (BUN j = 0; j < rncand; j++) {
2863 36424 : ro = canditer_next(rci);
2864 33194 : if (rlvals) {
2865 33194 : vrl = VALUE(rl, ro - rl->hseqbase);
2866 : } else {
2867 : /* TYPE_void */
2868 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2869 : }
2870 33194 : if (rhvals) {
2871 33194 : vrh = VALUE(rh, ro - rh->hseqbase);
2872 : } else {
2873 : /* TYPE_void */
2874 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2875 : }
2876 33216 : if (BETWEEN(vl, vrl, linc, vrh, hinc, any) != 1)
2877 23675 : continue;
2878 12749 : if (BATcount(r1) == BATcapacity(r1)) {
2879 2 : BUN newcap = BATgrows(r1);
2880 2 : if (newcap > maxsize)
2881 : newcap = maxsize;
2882 2 : BATsetcount(r1, BATcount(r1));
2883 2 : if (BATextend(r1, newcap) != GDK_SUCCEED)
2884 0 : goto bailout;
2885 2 : dst1 = (oid *) Tloc(r1, 0);
2886 2 : if (r2) {
2887 2 : BATsetcount(r2, BATcount(r2));
2888 2 : if (BATextend(r2, newcap) != GDK_SUCCEED)
2889 0 : goto bailout;
2890 2 : assert(BATcapacity(r1) == BATcapacity(r2));
2891 2 : dst2 = (oid *) Tloc(r2, 0);
2892 : }
2893 : }
2894 12749 : dst1[r1->batCount++] = lo;
2895 12749 : if (r2) {
2896 12740 : dst2[r2->batCount++] = ro;
2897 : }
2898 : }
2899 : }
2900 21 : TIMEOUT_CHECK(timeoffset, GOTO_LABEL_TIMEOUT_HANDLER(bailout));
2901 21 : cnt = BATcount(r1);
2902 21 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2903 : }
2904 :
2905 : /* also set other bits of heap to correct value to indicate size */
2906 120 : BATsetcount(r1, cnt);
2907 :
2908 : /* set properties using an extra scan (usually not complete) */
2909 120 : dst1 = (oid *) Tloc(r1, 0);
2910 120 : r1->tkey = true;
2911 120 : r1->tsorted = true;
2912 120 : r1->trevsorted = true;
2913 120 : r1->tseqbase = 0;
2914 120 : r1->tnil = false;
2915 120 : r1->tnonil = true;
2916 432 : for (ncnt = 1; ncnt < cnt; ncnt++) {
2917 318 : if (dst1[ncnt - 1] == dst1[ncnt]) {
2918 253 : r1->tseqbase = oid_nil;
2919 253 : r1->tkey = false;
2920 65 : } else if (dst1[ncnt - 1] < dst1[ncnt]) {
2921 62 : r1->trevsorted = false;
2922 62 : if (dst1[ncnt - 1] + 1 != dst1[ncnt])
2923 5 : r1->tseqbase = oid_nil;
2924 : } else {
2925 3 : assert(sorted != 1);
2926 3 : r1->tsorted = false;
2927 3 : r1->tseqbase = oid_nil;
2928 3 : r1->tkey = false;
2929 : }
2930 371 : if (!(r1->trevsorted | BATtdense(r1) | r1->tkey | ((sorted != 1) & r1->tsorted)))
2931 : break;
2932 : }
2933 120 : if (BATtdense(r1))
2934 95 : r1->tseqbase = cnt > 0 ? dst1[0] : 0;
2935 120 : if (r2) {
2936 103 : BATsetcount(r2, cnt);
2937 103 : dst2 = (oid *) Tloc(r2, 0);
2938 103 : r2->tkey = true;
2939 103 : r2->tsorted = true;
2940 103 : r2->trevsorted = true;
2941 103 : r2->tseqbase = 0;
2942 103 : r2->tnil = false;
2943 103 : r2->tnonil = true;
2944 415 : for (ncnt = 1; ncnt < cnt; ncnt++) {
2945 320 : if (dst2[ncnt - 1] == dst2[ncnt]) {
2946 44 : r2->tseqbase = oid_nil;
2947 44 : r2->tkey = false;
2948 276 : } else if (dst2[ncnt - 1] < dst2[ncnt]) {
2949 269 : r2->trevsorted = false;
2950 269 : if (dst2[ncnt - 1] + 1 != dst2[ncnt])
2951 82 : r2->tseqbase = oid_nil;
2952 : } else {
2953 7 : assert(sorted != 2);
2954 7 : r2->tsorted = false;
2955 7 : r2->tseqbase = oid_nil;
2956 7 : r2->tkey = false;
2957 : }
2958 419 : if (!(r2->trevsorted | BATtdense(r2) | r2->tkey | ((sorted != 2) & r2->tsorted)))
2959 : break;
2960 : }
2961 103 : if (BATtdense(r2))
2962 71 : r2->tseqbase = cnt > 0 ? dst2[0] : 0;
2963 : }
2964 120 : TRC_DEBUG(ALGO, "l=%s,rl=%s,rh=%s -> "
2965 : "(" ALGOBATFMT "," ALGOOPTBATFMT ")\n",
2966 : BATgetId(l), BATgetId(rl), BATgetId(rh),
2967 : ALGOBATPAR(r1), ALGOOPTBATPAR(r2));
2968 120 : bat_iterator_end(&li);
2969 120 : bat_iterator_end(&rli);
2970 120 : bat_iterator_end(&rhi);
2971 120 : return GDK_SUCCEED;
2972 :
2973 0 : bailout:
2974 0 : bat_iterator_end(&li);
2975 0 : bat_iterator_end(&rli);
2976 0 : bat_iterator_end(&rhi);
2977 0 : BBPreclaim(r1);
2978 0 : BBPreclaim(r2);
2979 : return GDK_FAIL;
2980 : }
|