Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : /* auxiliary functions and structs for imprints */
18 : #include "gdk_imprints.h"
19 :
20 : static inline oid *
21 115725418 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
22 : {
23 115725418 : if (i == BATcapacity(bn)) {
24 50 : BATsetcount(bn, i);
25 50 : if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
26 : return NULL;
27 50 : a = (oid *) Tloc(bn, 0);
28 : }
29 115725418 : a[i] = v;
30 115725418 : return a;
31 : }
32 :
33 : BAT *
34 1963767 : virtualize(BAT *bn)
35 : {
36 : /* input must be a valid candidate list or NULL */
37 1963767 : if (bn == NULL)
38 : return NULL;
39 1963767 : if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
40 0 : fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
41 0 : bn->ttype, is_oid_nil(bn->tseqbase),
42 0 : bn->tkey, bn->tsorted);
43 0 : fflush(stderr);
44 : }
45 1963727 : assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
46 : bn->ttype == TYPE_oid) &&
47 : bn->tkey && bn->tsorted);
48 1963727 : assert(BBP_refs(bn->batCacheid) == 1);
49 1963727 : assert(BBP_lrefs(bn->batCacheid) == 0);
50 : /* since bn has unique and strictly ascending values, we can
51 : * easily check whether the column is dense */
52 1963727 : if (bn->ttype == TYPE_oid &&
53 1448358 : (BATcount(bn) <= 1 ||
54 414790 : * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
55 414790 : * (const oid *) Tloc(bn, BATcount(bn) - 1))) {
56 : /* column is dense, replace by virtual oid */
57 1075756 : oid tseq; /* work around bug in Intel compiler */
58 1075756 : if (BATcount(bn) == 0)
59 : tseq = 0;
60 : else
61 427998 : tseq = * (const oid *) Tloc(bn, 0);
62 1075756 : TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
63 : ALGOBATPAR(bn), tseq);
64 1075765 : bn->tseqbase = tseq;
65 1075765 : if (VIEWtparent(bn)) {
66 15 : Heap *h = GDKmalloc(sizeof(Heap));
67 15 : if (h == NULL) {
68 0 : BBPunfix(bn->batCacheid);
69 0 : return NULL;
70 : }
71 15 : *h = *bn->theap;
72 15 : settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
73 15 : h->parentid = bn->batCacheid;
74 15 : h->base = NULL;
75 15 : h->hasfile = false;
76 15 : ATOMIC_INIT(&h->refs, 1);
77 15 : if (bn->theap->parentid != bn->batCacheid)
78 15 : BBPrelease(bn->theap->parentid);
79 15 : HEAPdecref(bn->theap, false);
80 15 : bn->theap = h;
81 : } else {
82 1075750 : HEAPfree(bn->theap, true);
83 : }
84 1075768 : bn->theap->storage = bn->theap->newstorage = STORE_MEM;
85 1075768 : bn->theap->size = 0;
86 1075768 : bn->ttype = TYPE_void;
87 1075768 : bn->twidth = 0;
88 1075768 : bn->tshift = 0;
89 : }
90 :
91 : return bn;
92 : }
93 :
94 : #define HASHloop_bound(bi, h, hb, v, lo, hi) \
95 : for (hb = HASHget(h, HASHprobe((h), v)); \
96 : hb != BUN_NONE; \
97 : hb = HASHgetlink(h,hb)) \
98 : if (hb >= (lo) && hb < (hi) && \
99 : (cmp == NULL || \
100 : (*cmp)(v, BUNtail(bi, hb)) == 0))
101 :
102 : static BAT *
103 53062 : hashselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
104 : const void *tl, BUN maximum, bool havehash, bool phash,
105 : const char **algo)
106 : {
107 53062 : BUN i, cnt;
108 53062 : oid o, *restrict dst;
109 53062 : BUN l, h, d = 0;
110 53062 : oid seq;
111 53062 : int (*cmp)(const void *, const void *);
112 53062 : BAT *b2 = NULL;
113 53062 : BATiter pbi = {0};
114 :
115 53062 : size_t counter = 0;
116 53062 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
117 :
118 53065 : assert(bn->ttype == TYPE_oid);
119 53065 : seq = bi->b->hseqbase;
120 53065 : l = ci->seq - seq;
121 53065 : h = canditer_last(ci) + 1 - seq;
122 :
123 53067 : *algo = "hashselect";
124 53067 : if (phash && (b2 = BATdescriptor(VIEWtparent(bi->b))) != NULL) {
125 52544 : *algo = "hashselect on parent";
126 52544 : TRC_DEBUG(ALGO, ALGOBATFMT
127 : " using parent(" ALGOBATFMT ") "
128 : "for hash\n",
129 : ALGOBATPAR(bi->b),
130 : ALGOBATPAR(b2));
131 52544 : d = bi->baseoff - b2->tbaseoff;
132 52544 : l += d;
133 52544 : h += d;
134 52544 : pbi = bat_iterator(b2);
135 52544 : bi = &pbi;
136 : } else {
137 : phash = false;
138 : }
139 :
140 53064 : if (!havehash) {
141 2 : if (BAThash(bi->b) != GDK_SUCCEED) {
142 0 : BBPreclaim(bn);
143 0 : BBPreclaim(b2);
144 0 : if (phash)
145 0 : bat_iterator_end(&pbi);
146 0 : return NULL;
147 : }
148 2 : MT_rwlock_rdlock(&bi->b->thashlock);
149 2 : if (bi->b->thash == NULL) {
150 0 : GDKerror("Hash destroyed before we could use it\n");
151 0 : goto bailout;
152 : }
153 : }
154 106125 : switch (ATOMbasetype(bi->type)) {
155 : case TYPE_bte:
156 : case TYPE_sht:
157 : cmp = NULL; /* no need to compare: "hash" is perfect */
158 : break;
159 53063 : default:
160 53063 : cmp = ATOMcompare(bi->type);
161 53063 : break;
162 : }
163 53064 : dst = (oid *) Tloc(bn, 0);
164 53064 : cnt = 0;
165 53064 : if (ci->tpe != cand_dense) {
166 40036 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
167 15831 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
168 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
169 15831 : o = (oid) (i + seq - d);
170 15831 : if (canditer_contains(ci, o)) {
171 30830 : dst = buninsfix(bn, dst, cnt, o,
172 15415 : maximum - BATcapacity(bn),
173 : maximum);
174 15415 : if (dst == NULL)
175 0 : goto bailout;
176 15415 : cnt++;
177 : }
178 : }
179 : } else {
180 161389 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
181 73567 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
182 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
183 73567 : o = (oid) (i + seq - d);
184 147102 : dst = buninsfix(bn, dst, cnt, o,
185 73567 : maximum - BATcapacity(bn),
186 : maximum);
187 73535 : if (dst == NULL)
188 0 : goto bailout;
189 73535 : cnt++;
190 : }
191 : }
192 53060 : MT_rwlock_rdunlock(&bi->b->thashlock);
193 53064 : BBPreclaim(b2);
194 53064 : BATsetcount(bn, cnt);
195 53064 : bn->tkey = true;
196 53064 : if (cnt > 1) {
197 : /* hash chains produce results in the order high to
198 : * low, so we just need to reverse */
199 35233 : for (l = 0, h = BATcount(bn) - 1; l < h; l++, h--) {
200 32279 : o = dst[l];
201 32279 : dst[l] = dst[h];
202 32279 : dst[h] = o;
203 : }
204 : }
205 53064 : bn->tsorted = true;
206 53064 : bn->trevsorted = bn->batCount <= 1;
207 53064 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
208 53064 : if (phash)
209 52543 : bat_iterator_end(&pbi);
210 : return bn;
211 :
212 0 : bailout:
213 0 : MT_rwlock_rdunlock(&bi->b->thashlock);
214 0 : if (phash)
215 0 : bat_iterator_end(&pbi);
216 0 : BBPreclaim(b2);
217 0 : BBPreclaim(bn);
218 0 : return NULL;
219 : }
220 :
221 : /* Imprints select code */
222 :
223 : /* inner check, non-dense canditer */
224 : #define impscheck(TEST,ADD) \
225 : do { \
226 : const oid e = (oid) (i+limit-pr_off+hseq); \
227 : if (im[icnt] & mask) { \
228 : if ((im[icnt] & ~innermask) == 0) { \
229 : while (p < ncand && o < e) { \
230 : v = src[o-hseq]; \
231 : if ((ADD) == NULL) { \
232 : goto bailout; \
233 : } \
234 : cnt++; \
235 : p++; \
236 : o = canditer_next(ci); \
237 : } \
238 : } else { \
239 : while (p < ncand && o < e) { \
240 : v = src[o-hseq]; \
241 : if ((ADD) == NULL) { \
242 : goto bailout; \
243 : } \
244 : cnt += (TEST) != 0; \
245 : p++; \
246 : o = canditer_next(ci); \
247 : } \
248 : } \
249 : } else { \
250 : while (p < ncand && o < e) { \
251 : p++; \
252 : o = canditer_next(ci); \
253 : } \
254 : } \
255 : } while (false)
256 :
257 : /* inner check, dense canditer */
258 : #define impscheck_dense(TEST,ADD) \
259 : do { \
260 : const oid e = (oid) (i+limit-pr_off+hseq); \
261 : if (im[icnt] & mask) { \
262 : if ((im[icnt] & ~innermask) == 0) { \
263 : while (p < ncand && o < e) { \
264 : v = src[o-hseq]; \
265 : if ((ADD) == NULL) { \
266 : goto bailout; \
267 : } \
268 : cnt++; \
269 : p++; \
270 : o = canditer_next_dense(ci); \
271 : } \
272 : } else { \
273 : while (p < ncand && o < e) { \
274 : v = src[o-hseq]; \
275 : if ((ADD) == NULL) { \
276 : goto bailout; \
277 : } \
278 : cnt += (TEST) != 0; \
279 : p++; \
280 : o = canditer_next_dense(ci); \
281 : } \
282 : } \
283 : } else { \
284 : BUN skip_sz = MIN(ncand - p, e - o); \
285 : p += skip_sz; \
286 : o += skip_sz; \
287 : ci->next += skip_sz; \
288 : } \
289 : } while (false)
290 :
291 : /* main loop for imprints */
292 : /*
293 : * icnt is the iterator for imprints
294 : * dcnt is the iterator for dictionary entries
295 : * i is the iterator for the values in imprints
296 : */
297 : #define impsloop(ISDENSE,TEST,ADD) \
298 : do { \
299 : BUN dcnt, icnt, limit, i; \
300 : const cchdc_t *restrict d = (cchdc_t *) imprints->dict; \
301 : const uint8_t rpp = ATOMelmshift(IMPS_PAGE >> bi->shift); \
302 : o = canditer_next(ci); \
303 : for (i = 0, dcnt = 0, icnt = 0, p = 0; \
304 : dcnt < imprints->dictcnt && i <= w - hseq + pr_off && p < ncand; \
305 : dcnt++) { \
306 : GDK_CHECK_TIMEOUT(qry_ctx, counter, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
307 : limit = ((BUN) d[dcnt].cnt) << rpp; \
308 : while (i + limit <= o - hseq + pr_off) { \
309 : i += limit; \
310 : icnt += d[dcnt].repeat ? 1 : d[dcnt].cnt; \
311 : dcnt++; \
312 : limit = ((BUN) d[dcnt].cnt) << rpp; \
313 : } \
314 : if (!d[dcnt].repeat) { \
315 : const BUN l = icnt + d[dcnt].cnt; \
316 : limit = (BUN) 1 << rpp; \
317 : while (i + limit <= o - hseq + pr_off) { \
318 : icnt++; \
319 : i += limit; \
320 : } \
321 : for (; \
322 : icnt < l && i <= w - hseq + pr_off; \
323 : icnt++) { \
324 : impscheck##ISDENSE(TEST,ADD); \
325 : i += limit; \
326 : } \
327 : } \
328 : else { \
329 : impscheck##ISDENSE(TEST,ADD); \
330 : i += limit; \
331 : icnt++; \
332 : } \
333 : } \
334 : } while (false)
335 :
336 : static inline oid *
337 0 : quickins(oid *dst, BUN cnt, oid o, BAT *bn)
338 : {
339 0 : (void) bn;
340 0 : assert(cnt < BATcapacity(bn));
341 0 : dst[cnt] = o;
342 0 : return dst;
343 : }
344 :
345 : /* construct the mask */
346 : #define impsmask(ISDENSE,TEST,B) \
347 : do { \
348 : const uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
349 : uint##B##_t mask = 0, innermask; \
350 : const int tpe = ATOMbasetype(bi->type); \
351 : const int lbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, &vl); \
352 : const int hbin = IMPSgetbin(tpe, imprints->bits, imprints->bins, &vh); \
353 : /* note: (1<<n)-1 gives a sequence of n one bits */ \
354 : /* to set bits hbin..lbin inclusive, we would do: */ \
355 : /* mask = ((1 << (hbin + 1)) - 1) - ((1 << lbin) - 1); */ \
356 : /* except ((1 << (hbin + 1)) - 1) is not defined if */ \
357 : /* hbin == sizeof(uint##B##_t)*8 - 1 */ \
358 : /* the following does work, however */ \
359 : mask = (((((uint##B##_t) 1 << hbin) - 1) << 1) | 1) - (((uint##B##_t) 1 << lbin) - 1); \
360 : innermask = mask; \
361 : if (vl != minval) \
362 : innermask = IMPSunsetBit(B, innermask, lbin); \
363 : if (vh != maxval) \
364 : innermask = IMPSunsetBit(B, innermask, hbin); \
365 : if (anti) { \
366 : uint##B##_t tmp = mask; \
367 : mask = ~innermask; \
368 : innermask = ~tmp; \
369 : } \
370 : /* if there are nils, we may need to check bin 0 */ \
371 : if (!bi->nonil) \
372 : innermask = IMPSunsetBit(B, innermask, 0); \
373 : \
374 : if (BATcapacity(bn) < maximum) { \
375 : impsloop(ISDENSE, TEST, \
376 : dst = buninsfix(bn, dst, cnt, o, \
377 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
378 : * (dbl) (ncand-p) * 1.1 + 1024), \
379 : maximum)); \
380 : } else { \
381 : impsloop(ISDENSE, TEST, dst = quickins(dst, cnt, o, bn)); \
382 : } \
383 : } while (false)
384 :
385 : #define checkMINMAX(B, TYPE) \
386 : do { \
387 : const BUN *restrict imp_cnt = imprints->stats + 128; \
388 : imp_min = imp_max = nil; \
389 : for (BUN ii = 0; ii < B; ii++) { \
390 : if (is_##TYPE##_nil(imp_min) && imp_cnt[ii]) { \
391 : imp_min = basesrc[imprints->stats[ii]]; \
392 : break; \
393 : } \
394 : } \
395 : for (BUN ii = B; ii != 0; ii--) { \
396 : if (is_##TYPE##_nil(imp_max) && imp_cnt[ii-1]) { \
397 : imp_max = basesrc[imprints->stats[64+ii-1]]; \
398 : break; \
399 : } \
400 : } \
401 : assert(!is_##TYPE##_nil(imp_min) && \
402 : !is_##TYPE##_nil(imp_max)); \
403 : if (anti ? \
404 : vl < imp_min && vh > imp_max : \
405 : vl > imp_max || vh < imp_min) { \
406 : if (pbat) \
407 : BBPunfix(pbat->batCacheid); \
408 : return 0; \
409 : } \
410 : } while (false)
411 :
412 : /* choose number of bits */
413 : #define bitswitch(ISDENSE, TEST, TYPE) \
414 : do { \
415 : BUN ncand = ci->ncand; \
416 : assert(imprints); \
417 : *algo = parent ? "parent imprints select " #TEST " (canditer_next" #ISDENSE ")" : "imprints select " #TEST " (canditer_next" #ISDENSE ")"; \
418 : switch (imprints->bits) { \
419 : case 8: \
420 : checkMINMAX(8, TYPE); \
421 : impsmask(ISDENSE,TEST,8); \
422 : break; \
423 : case 16: \
424 : checkMINMAX(16, TYPE); \
425 : impsmask(ISDENSE,TEST,16); \
426 : break; \
427 : case 32: \
428 : checkMINMAX(32, TYPE); \
429 : impsmask(ISDENSE,TEST,32); \
430 : break; \
431 : case 64: \
432 : checkMINMAX(64, TYPE); \
433 : impsmask(ISDENSE,TEST,64); \
434 : break; \
435 : default: \
436 : MT_UNREACHABLE(); \
437 : } \
438 : } while (false)
439 :
440 : /* scan select without imprints */
441 :
442 : /* core scan select loop with & without candidates */
443 : #define scanloop(NAME,canditer_next,TEST) \
444 : do { \
445 : BUN ncand = ci->ncand; \
446 : *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
447 : if (BATcapacity(bn) < maximum) { \
448 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) { \
449 : o = canditer_next(ci); \
450 : v = src[o-hseq]; \
451 : if (TEST) { \
452 : dst = buninsfix(bn, dst, cnt, o, \
453 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
454 : * (dbl) (ncand-p) * 1.1 + 1024), \
455 : maximum); \
456 : if (dst == NULL) { \
457 : goto bailout; \
458 : } \
459 : cnt++; \
460 : } \
461 : } \
462 : } else { \
463 : TIMEOUT_LOOP(ncand, qry_ctx) { \
464 : o = canditer_next(ci); \
465 : v = src[o-hseq]; \
466 : assert(cnt < BATcapacity(bn)); \
467 : dst[cnt] = o; \
468 : cnt += (TEST) != 0; \
469 : } \
470 : } \
471 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
472 : } while (false)
473 :
474 : /* argument list for type-specific core scan select function call */
475 : #define scanargs \
476 : bi, ci, bn, tl, th, li, hi, equi, anti, lval, hval, lnil, \
477 : cnt, bi->b->hseqbase, dst, maximum, imprints, algo
478 :
479 : #define PREVVALUEbte(x) ((x) - 1)
480 : #define PREVVALUEsht(x) ((x) - 1)
481 : #define PREVVALUEint(x) ((x) - 1)
482 : #define PREVVALUElng(x) ((x) - 1)
483 : #ifdef HAVE_HGE
484 : #define PREVVALUEhge(x) ((x) - 1)
485 : #endif
486 : #define PREVVALUEoid(x) ((x) - 1)
487 : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
488 : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
489 :
490 : #define NEXTVALUEbte(x) ((x) + 1)
491 : #define NEXTVALUEsht(x) ((x) + 1)
492 : #define NEXTVALUEint(x) ((x) + 1)
493 : #define NEXTVALUElng(x) ((x) + 1)
494 : #ifdef HAVE_HGE
495 : #define NEXTVALUEhge(x) ((x) + 1)
496 : #endif
497 : #define NEXTVALUEoid(x) ((x) + 1)
498 : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
499 : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
500 :
501 : #define MINVALUEbte GDK_bte_min
502 : #define MINVALUEsht GDK_sht_min
503 : #define MINVALUEint GDK_int_min
504 : #define MINVALUElng GDK_lng_min
505 : #ifdef HAVE_HGE
506 : #define MINVALUEhge GDK_hge_min
507 : #endif
508 : #define MINVALUEoid GDK_oid_min
509 : #define MINVALUEflt GDK_flt_min
510 : #define MINVALUEdbl GDK_dbl_min
511 :
512 : #define MAXVALUEbte GDK_bte_max
513 : #define MAXVALUEsht GDK_sht_max
514 : #define MAXVALUEint GDK_int_max
515 : #define MAXVALUElng GDK_lng_max
516 : #ifdef HAVE_HGE
517 : #define MAXVALUEhge GDK_hge_max
518 : #endif
519 : #define MAXVALUEoid GDK_oid_max
520 : #define MAXVALUEflt GDK_flt_max
521 : #define MAXVALUEdbl GDK_dbl_max
522 :
523 : #define choose(NAME, ISDENSE, TEST, TYPE) \
524 : do { \
525 : if (imprints) { \
526 : bitswitch(ISDENSE, TEST, TYPE); \
527 : } else { \
528 : scanloop(NAME, canditer_next##ISDENSE, TEST); \
529 : } \
530 : } while (false)
531 :
532 : /* definition of type-specific core scan select function */
533 : #define scanfunc(NAME, TYPE, ISDENSE) \
534 : static BUN \
535 : NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn, \
536 : const TYPE *tl, const TYPE *th, bool li, bool hi, \
537 : bool equi, bool anti, bool lval, bool hval, \
538 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst, \
539 : BUN maximum, Imprints *imprints, const char **algo) \
540 : { \
541 : TYPE vl = *tl; \
542 : TYPE vh = *th; \
543 : TYPE imp_min; \
544 : TYPE imp_max; \
545 : TYPE v; \
546 : const TYPE nil = TYPE##_nil; \
547 : const TYPE minval = MINVALUE##TYPE; \
548 : const TYPE maxval = MAXVALUE##TYPE; \
549 : const TYPE *src = (const TYPE *) bi->base; \
550 : const TYPE *basesrc; \
551 : oid o, w; \
552 : BUN p; \
553 : BUN pr_off = 0; \
554 : bat parent = 0; \
555 : BAT *pbat = NULL; \
556 : (void) li; \
557 : (void) hi; \
558 : (void) lval; \
559 : (void) hval; \
560 : size_t counter = 0; \
561 : QryCtx *qry_ctx = MT_thread_get_qry_ctx(); \
562 : if (imprints && imprints->imprints.parentid != bi->b->batCacheid) { \
563 : parent = imprints->imprints.parentid; \
564 : pbat = BATdescriptor(parent); \
565 : if (pbat == NULL) { \
566 : /* can't load parent: don't use imprints */ \
567 : imprints = NULL; \
568 : basesrc = src; \
569 : } else { \
570 : basesrc = (const TYPE *) Tloc(pbat, 0); \
571 : pr_off = (BUN) (src - basesrc); \
572 : } \
573 : } else { \
574 : basesrc = src; \
575 : } \
576 : /* Normalize the variables li, hi, lval, hval, possibly */ \
577 : /* changing anti in the process. This works for all */ \
578 : /* (and only) numeric types. */ \
579 : \
580 : /* Note that the expression x < v is equivalent to x <= */ \
581 : /* v' where v' is the next smaller value in the domain */ \
582 : /* of v (similarly for x > v). Also note that for */ \
583 : /* floating point numbers there actually is such a */ \
584 : /* value. In fact, there is a function in standard C */ \
585 : /* that calculates that value. */ \
586 : \
587 : /* The result is: */ \
588 : /* li == !anti, hi == !anti, lval == true, hval == true */ \
589 : /* This means that all ranges that we check for are */ \
590 : /* closed ranges. If a range is one-sided, we fill in */ \
591 : /* the minimum resp. maximum value in the domain so that */ \
592 : /* we create a closed range. */ \
593 : if (anti && li) { \
594 : /* -inf < x < vl === -inf < x <= vl-1 */ \
595 : if (vl == MINVALUE##TYPE) { \
596 : /* -inf < x < MIN || *th <[=] x < +inf */ \
597 : /* degenerates into half range */ \
598 : /* *th <[=] x < +inf */ \
599 : anti = false; \
600 : vl = vh; \
601 : li = !hi; \
602 : hval = false; \
603 : /* further dealt with below */ \
604 : } else { \
605 : vl = PREVVALUE##TYPE(vl); \
606 : li = false; \
607 : } \
608 : } \
609 : if (anti && hi) { \
610 : /* vl < x < +inf === vl+1 <= x < +inf */ \
611 : if (vh == MAXVALUE##TYPE) { \
612 : /* -inf < x <[=] *tl || MAX > x > +inf */ \
613 : /* degenerates into half range */ \
614 : /* -inf < x <[=] *tl */ \
615 : anti = false; \
616 : vh = vl; \
617 : hi = !li; \
618 : lval = false; \
619 : /* further dealt with below */ \
620 : } else { \
621 : vh = NEXTVALUE##TYPE(vh); \
622 : hi = false; \
623 : } \
624 : } \
625 : if (!anti) { \
626 : if (lval) { \
627 : /* range bounded on left */ \
628 : if (!li) { \
629 : /* open range on left */ \
630 : if (vl == MAXVALUE##TYPE) { \
631 : *algo = "select: empty range"; \
632 : return 0; \
633 : } \
634 : /* vl < x === vl+1 <= x */ \
635 : vl = NEXTVALUE##TYPE(vl); \
636 : li = true; \
637 : } \
638 : } else { \
639 : /* -inf, i.e. smallest value */ \
640 : vl = MINVALUE##TYPE; \
641 : li = true; \
642 : lval = true; \
643 : } \
644 : if (hval) { \
645 : /* range bounded on right */ \
646 : if (!hi) { \
647 : /* open range on right */ \
648 : if (vh == MINVALUE##TYPE) { \
649 : *algo = "select: empty range"; \
650 : return 0; \
651 : } \
652 : /* x < vh === x <= vh-1 */ \
653 : vh = PREVVALUE##TYPE(vh); \
654 : hi = true; \
655 : } \
656 : } else { \
657 : /* +inf, i.e. largest value */ \
658 : vh = MAXVALUE##TYPE; \
659 : hi = true; \
660 : hval = true; \
661 : } \
662 : if (vl > vh) { \
663 : *algo = "select: empty range"; \
664 : return 0; \
665 : } \
666 : } \
667 : /* if anti is set, we can now check */ \
668 : /* (x <= vl || x >= vh) && x != nil */ \
669 : /* if anti is not set, we can check just */ \
670 : /* vl <= x && x <= vh */ \
671 : /* if equi==true, the check is x == vl */ \
672 : /* note that this includes the check for != nil */ \
673 : assert(li == !anti); \
674 : assert(hi == !anti); \
675 : assert(lval); \
676 : assert(hval); \
677 : w = canditer_last(ci); \
678 : if (equi) { \
679 : assert(imprints == NULL); \
680 : if (lnil) \
681 : scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
682 : else \
683 : scanloop(NAME, canditer_next##ISDENSE, v == vl); \
684 : } else if (anti) { \
685 : if (bi->nonil) { \
686 : choose(NAME, ISDENSE, (v <= vl || v >= vh), TYPE); \
687 : } else { \
688 : choose(NAME, ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh), TYPE); \
689 : } \
690 : } else if (bi->nonil && vl == minval) { \
691 : choose(NAME, ISDENSE, v <= vh, TYPE); \
692 : } else if (vh == maxval) { \
693 : choose(NAME, ISDENSE, v >= vl, TYPE); \
694 : } else { \
695 : choose(NAME, ISDENSE, v >= vl && v <= vh, TYPE); \
696 : } \
697 : if (pbat) \
698 : BBPunfix(pbat->batCacheid); \
699 : return cnt; \
700 : bailout: \
701 : if (pbat) \
702 : BBPunfix(pbat->batCacheid); \
703 : BBPreclaim(bn); \
704 : return BUN_NONE; \
705 : }
706 :
707 : static BUN
708 553 : fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn,
709 : const void *tl, const void *th,
710 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
711 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
712 : BUN maximum, Imprints *imprints, const char **algo)
713 : {
714 553 : const void *v;
715 553 : const void *restrict nil = ATOMnilptr(bi->type);
716 553 : int (*cmp)(const void *, const void *) = ATOMcompare(bi->type);
717 553 : oid o;
718 553 : BUN p, ncand = ci->ncand;
719 553 : int c;
720 :
721 553 : (void) maximum;
722 553 : (void) imprints;
723 553 : (void) lnil;
724 553 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
725 :
726 553 : if (equi) {
727 536 : *algo = "select: fullscan equi";
728 536 : if (ci->tpe == cand_dense) {
729 10319457 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
730 10317796 : o = canditer_next_dense(ci);
731 10317796 : v = BUNtail(*bi, o-hseq);
732 10317299 : if ((*cmp)(tl, v) == 0) {
733 6844937 : dst = buninsfix(bn, dst, cnt, o,
734 2280964 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
735 2280964 : * (dbl) (ncand-p) * 1.1 + 1024),
736 : maximum);
737 2283009 : if (dst == NULL) {
738 0 : BBPreclaim(bn);
739 0 : return BUN_NONE;
740 : }
741 2283009 : cnt++;
742 : }
743 : }
744 : } else {
745 1930641 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
746 1929852 : o = canditer_next(ci);
747 1929868 : v = BUNtail(*bi, o-hseq);
748 1929824 : if ((*cmp)(tl, v) == 0) {
749 60108 : dst = buninsfix(bn, dst, cnt, o,
750 20035 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
751 20035 : * (dbl) (ncand-p) * 1.1 + 1024),
752 : maximum);
753 20038 : if (dst == NULL) {
754 0 : BBPreclaim(bn);
755 0 : return BUN_NONE;
756 : }
757 20038 : cnt++;
758 : }
759 : }
760 : }
761 17 : } else if (anti) {
762 2 : *algo = "select: fullscan anti";
763 2 : if (ci->tpe == cand_dense) {
764 14 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
765 8 : o = canditer_next_dense(ci);
766 8 : v = BUNtail(*bi, o-hseq);
767 8 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
768 8 : ((lval &&
769 8 : ((c = (*cmp)(tl, v)) > 0 ||
770 6 : (!li && c == 0))) ||
771 6 : (hval &&
772 6 : ((c = (*cmp)(th, v)) < 0 ||
773 4 : (!hi && c == 0))))) {
774 12 : dst = buninsfix(bn, dst, cnt, o,
775 4 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
776 4 : * (dbl) (ncand-p) * 1.1 + 1024),
777 : maximum);
778 4 : if (dst == NULL) {
779 0 : BBPreclaim(bn);
780 0 : return BUN_NONE;
781 : }
782 4 : cnt++;
783 : }
784 : }
785 : } else {
786 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
787 0 : o = canditer_next(ci);
788 0 : v = BUNtail(*bi, o-hseq);
789 0 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
790 0 : ((lval &&
791 0 : ((c = (*cmp)(tl, v)) > 0 ||
792 0 : (!li && c == 0))) ||
793 0 : (hval &&
794 0 : ((c = (*cmp)(th, v)) < 0 ||
795 0 : (!hi && c == 0))))) {
796 0 : dst = buninsfix(bn, dst, cnt, o,
797 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
798 0 : * (dbl) (ncand-p) * 1.1 + 1024),
799 : maximum);
800 0 : if (dst == NULL) {
801 0 : BBPreclaim(bn);
802 0 : return BUN_NONE;
803 : }
804 0 : cnt++;
805 : }
806 : }
807 : }
808 : } else {
809 15 : *algo = "select: fullscan range";
810 15 : if (ci->tpe == cand_dense) {
811 4946 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
812 4907 : o = canditer_next_dense(ci);
813 4907 : v = BUNtail(*bi, o-hseq);
814 4907 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
815 4036 : ((!lval ||
816 4036 : (c = cmp(tl, v)) < 0 ||
817 4907 : (li && c == 0)) &&
818 3915 : (!hval ||
819 3915 : (c = cmp(th, v)) > 0 ||
820 3662 : (hi && c == 0)))) {
821 825 : dst = buninsfix(bn, dst, cnt, o,
822 275 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
823 275 : * (dbl) (ncand-p) * 1.1 + 1024),
824 : maximum);
825 275 : if (dst == NULL) {
826 0 : BBPreclaim(bn);
827 0 : return BUN_NONE;
828 : }
829 275 : cnt++;
830 : }
831 : }
832 : } else {
833 232 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
834 226 : o = canditer_next(ci);
835 226 : v = BUNtail(*bi, o-hseq);
836 226 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
837 226 : ((!lval ||
838 226 : (c = cmp(tl, v)) < 0 ||
839 226 : (li && c == 0)) &&
840 0 : (!hval ||
841 0 : (c = cmp(th, v)) > 0 ||
842 0 : (hi && c == 0)))) {
843 474 : dst = buninsfix(bn, dst, cnt, o,
844 158 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
845 158 : * (dbl) (ncand-p) * 1.1 + 1024),
846 : maximum);
847 158 : if (dst == NULL) {
848 0 : BBPreclaim(bn);
849 0 : return BUN_NONE;
850 : }
851 158 : cnt++;
852 : }
853 : }
854 : }
855 : }
856 553 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
857 : return cnt;
858 0 : bailout:
859 0 : BBPreclaim(bn);
860 : return BUN_NONE;
861 : }
862 :
863 : static BUN
864 188709 : fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn,
865 : const char *tl, const char *th,
866 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
867 : bool lnil, BUN cnt, const oid hseq, oid *restrict dst,
868 : BUN maximum, Imprints *imprints, const char **algo)
869 : {
870 188709 : var_t pos;
871 188709 : BUN p, ncand = ci->ncand;
872 188709 : oid o;
873 188709 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
874 :
875 188706 : if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
876 238 : strcmp(tl, str_nil) != 0 &&
877 119 : strLocate(bi->vh, str_nil) == (var_t) -2) {
878 : /* anti-equi select for non-nil value, and there are no
879 : * nils, so we can use fast path; trigger by setting
880 : * nonil */
881 119 : bi->nonil = true;
882 : }
883 188706 : if (!((equi ||
884 537 : (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
885 188689 : GDK_ELIMDOUBLES(bi->vh)))
886 451 : return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
887 : lval, hval, lnil, cnt, hseq, dst,
888 : maximum, imprints, algo);
889 188255 : if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
890 799 : if (anti) {
891 : /* return the whole shebang */
892 20 : *algo = "select: fullscan anti-equi strelim (all)";
893 20 : if (BATextend(bn, ncand) != GDK_SUCCEED) {
894 0 : BBPreclaim(bn);
895 0 : return BUN_NONE;
896 : }
897 20 : dst = Tloc(bn, 0);
898 1258 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
899 1218 : dst[p] = canditer_next(ci);
900 : }
901 20 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
902 : return ncand;
903 : }
904 779 : *algo = "select: fullscan equi strelim (nomatch)";
905 779 : return 0;
906 : }
907 187458 : if (pos == (var_t) -1) {
908 0 : *algo = NULL;
909 0 : BBPreclaim(bn);
910 0 : return BUN_NONE;
911 : }
912 187458 : *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan equi strelim";
913 187458 : assert(pos >= GDK_VAROFFSET);
914 187458 : switch (bi->width) {
915 163367 : case 1: {
916 163367 : const unsigned char *ptr = (const unsigned char *) bi->base;
917 163367 : pos -= GDK_VAROFFSET;
918 163367 : if (ci->tpe == cand_dense) {
919 162532 : if (anti) {
920 2724 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
921 2286 : o = canditer_next_dense(ci);
922 2286 : if (ptr[o - hseq] != pos) {
923 6240 : dst = buninsfix(bn, dst, cnt, o,
924 2080 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
925 2080 : * (dbl) (ncand-p) * 1.1 + 1024),
926 : maximum);
927 2080 : if (dst == NULL) {
928 0 : BBPreclaim(bn);
929 0 : return BUN_NONE;
930 : }
931 2080 : cnt++;
932 : }
933 : }
934 : } else {
935 29094435 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
936 28605144 : o = canditer_next_dense(ci);
937 28605144 : if (ptr[o - hseq] == pos) {
938 27093651 : dst = buninsfix(bn, dst, cnt, o,
939 9031217 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
940 9031217 : * (dbl) (ncand-p) * 1.1 + 1024),
941 : maximum);
942 9031217 : if (dst == NULL) {
943 0 : BBPreclaim(bn);
944 0 : return BUN_NONE;
945 : }
946 9031217 : cnt++;
947 : }
948 : }
949 : }
950 : } else {
951 835 : if (anti) {
952 1254 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
953 1194 : o = canditer_next(ci);
954 1194 : if (ptr[o - hseq] != pos) {
955 297 : dst = buninsfix(bn, dst, cnt, o,
956 99 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
957 99 : * (dbl) (ncand-p) * 1.1 + 1024),
958 : maximum);
959 99 : if (dst == NULL) {
960 0 : BBPreclaim(bn);
961 0 : return BUN_NONE;
962 : }
963 99 : cnt++;
964 : }
965 : }
966 : } else {
967 12739573 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
968 12735941 : o = canditer_next(ci);
969 12733380 : if (ptr[o - hseq] == pos) {
970 12076916 : dst = buninsfix(bn, dst, cnt, o,
971 4024785 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
972 4024785 : * (dbl) (ncand-p) * 1.1 + 1024),
973 : maximum);
974 4027346 : if (dst == NULL) {
975 0 : BBPreclaim(bn);
976 0 : return BUN_NONE;
977 : }
978 4027346 : cnt++;
979 : }
980 : }
981 : }
982 : }
983 : break;
984 : }
985 24089 : case 2: {
986 24089 : const unsigned short *ptr = (const unsigned short *) bi->base;
987 24089 : pos -= GDK_VAROFFSET;
988 24089 : if (ci->tpe == cand_dense) {
989 18966 : if (anti) {
990 1759 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
991 874 : o = canditer_next_dense(ci);
992 874 : if (ptr[o - hseq] != pos) {
993 1842 : dst = buninsfix(bn, dst, cnt, o,
994 614 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
995 614 : * (dbl) (ncand-p) * 1.1 + 1024),
996 : maximum);
997 614 : if (dst == NULL) {
998 0 : BBPreclaim(bn);
999 0 : return BUN_NONE;
1000 : }
1001 614 : cnt++;
1002 : }
1003 : }
1004 : } else {
1005 2850386 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1006 2794270 : o = canditer_next_dense(ci);
1007 2794270 : if (ptr[o - hseq] == pos) {
1008 174300 : dst = buninsfix(bn, dst, cnt, o,
1009 58100 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1010 58100 : * (dbl) (ncand-p) * 1.1 + 1024),
1011 : maximum);
1012 58100 : if (dst == NULL) {
1013 0 : BBPreclaim(bn);
1014 0 : return BUN_NONE;
1015 : }
1016 58100 : cnt++;
1017 : }
1018 : }
1019 : }
1020 : } else {
1021 5123 : if (anti) {
1022 2115 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1023 1998 : o = canditer_next(ci);
1024 1998 : if (ptr[o - hseq] != pos) {
1025 5130 : dst = buninsfix(bn, dst, cnt, o,
1026 1710 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1027 1710 : * (dbl) (ncand-p) * 1.1 + 1024),
1028 : maximum);
1029 1710 : if (dst == NULL) {
1030 0 : BBPreclaim(bn);
1031 0 : return BUN_NONE;
1032 : }
1033 1710 : cnt++;
1034 : }
1035 : }
1036 : } else {
1037 2758028 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1038 2742729 : o = canditer_next(ci);
1039 2742658 : if (ptr[o - hseq] == pos) {
1040 255880 : dst = buninsfix(bn, dst, cnt, o,
1041 85270 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1042 85270 : * (dbl) (ncand-p) * 1.1 + 1024),
1043 : maximum);
1044 85340 : if (dst == NULL) {
1045 0 : BBPreclaim(bn);
1046 0 : return BUN_NONE;
1047 : }
1048 85340 : cnt++;
1049 : }
1050 : }
1051 : }
1052 : }
1053 : break;
1054 : }
1055 : #if SIZEOF_VAR_T == 8
1056 1 : case 4: {
1057 1 : const unsigned int *ptr = (const unsigned int *) bi->base;
1058 1 : if (ci->tpe == cand_dense) {
1059 1 : if (anti) {
1060 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1061 0 : o = canditer_next_dense(ci);
1062 0 : if (ptr[o - hseq] != pos) {
1063 0 : dst = buninsfix(bn, dst, cnt, o,
1064 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1065 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1066 : maximum);
1067 0 : if (dst == NULL) {
1068 0 : BBPreclaim(bn);
1069 0 : return BUN_NONE;
1070 : }
1071 0 : cnt++;
1072 : }
1073 : }
1074 : } else {
1075 973 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1076 970 : o = canditer_next_dense(ci);
1077 970 : if (ptr[o - hseq] == pos) {
1078 744 : dst = buninsfix(bn, dst, cnt, o,
1079 248 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1080 248 : * (dbl) (ncand-p) * 1.1 + 1024),
1081 : maximum);
1082 248 : if (dst == NULL) {
1083 0 : BBPreclaim(bn);
1084 0 : return BUN_NONE;
1085 : }
1086 248 : cnt++;
1087 : }
1088 : }
1089 : }
1090 : } else {
1091 0 : if (anti) {
1092 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1093 0 : o = canditer_next(ci);
1094 0 : if (ptr[o - hseq] != pos) {
1095 0 : dst = buninsfix(bn, dst, cnt, o,
1096 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1097 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1098 : maximum);
1099 0 : if (dst == NULL) {
1100 0 : BBPreclaim(bn);
1101 0 : return BUN_NONE;
1102 : }
1103 0 : cnt++;
1104 : }
1105 : }
1106 : } else {
1107 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1108 0 : o = canditer_next(ci);
1109 0 : if (ptr[o - hseq] == pos) {
1110 0 : dst = buninsfix(bn, dst, cnt, o,
1111 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1112 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1113 : maximum);
1114 0 : if (dst == NULL) {
1115 0 : BBPreclaim(bn);
1116 0 : return BUN_NONE;
1117 : }
1118 0 : cnt++;
1119 : }
1120 : }
1121 : }
1122 : }
1123 : break;
1124 : }
1125 : #endif
1126 1 : default: {
1127 1 : const var_t *ptr = (const var_t *) bi->base;
1128 1 : if (ci->tpe == cand_dense) {
1129 1 : if (anti) {
1130 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1131 0 : o = canditer_next_dense(ci);
1132 0 : if (ptr[o - hseq] != pos) {
1133 0 : dst = buninsfix(bn, dst, cnt, o,
1134 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1135 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1136 : maximum);
1137 0 : if (dst == NULL) {
1138 0 : BBPreclaim(bn);
1139 0 : return BUN_NONE;
1140 : }
1141 0 : cnt++;
1142 : }
1143 : }
1144 : } else {
1145 13 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1146 10 : o = canditer_next_dense(ci);
1147 10 : if (ptr[o - hseq] == pos) {
1148 3 : dst = buninsfix(bn, dst, cnt, o,
1149 1 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1150 1 : * (dbl) (ncand-p) * 1.1 + 1024),
1151 : maximum);
1152 1 : if (dst == NULL) {
1153 0 : BBPreclaim(bn);
1154 0 : return BUN_NONE;
1155 : }
1156 1 : cnt++;
1157 : }
1158 : }
1159 : }
1160 : } else {
1161 0 : if (anti) {
1162 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1163 0 : o = canditer_next(ci);
1164 0 : if (ptr[o - hseq] != pos) {
1165 0 : dst = buninsfix(bn, dst, cnt, o,
1166 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1167 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1168 : maximum);
1169 0 : if (dst == NULL) {
1170 0 : BBPreclaim(bn);
1171 0 : return BUN_NONE;
1172 : }
1173 0 : cnt++;
1174 : }
1175 : }
1176 : } else {
1177 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
1178 0 : o = canditer_next(ci);
1179 0 : if (ptr[o - hseq] == pos) {
1180 0 : dst = buninsfix(bn, dst, cnt, o,
1181 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
1182 0 : * (dbl) (ncand-p) * 1.1 + 1024),
1183 : maximum);
1184 0 : if (dst == NULL) {
1185 0 : BBPreclaim(bn);
1186 0 : return BUN_NONE;
1187 : }
1188 0 : cnt++;
1189 : }
1190 : }
1191 : }
1192 : }
1193 : break;
1194 : }
1195 : }
1196 187457 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
1197 : return cnt;
1198 0 : bailout:
1199 0 : BBPreclaim(bn);
1200 : return BUN_NONE;
1201 : }
1202 :
1203 : /* scan select type switch */
1204 : #ifdef HAVE_HGE
1205 : #define scanfunc_hge(NAME, ISDENSE) \
1206 : scanfunc(NAME, hge, ISDENSE)
1207 : #else
1208 : #define scanfunc_hge(NAME, ISDENSE)
1209 : #endif
1210 : #define scan_sel(NAME, ISDENSE) \
1211 : scanfunc(NAME, bte, ISDENSE) \
1212 : scanfunc(NAME, sht, ISDENSE) \
1213 : scanfunc(NAME, int, ISDENSE) \
1214 : scanfunc(NAME, flt, ISDENSE) \
1215 : scanfunc(NAME, dbl, ISDENSE) \
1216 : scanfunc(NAME, lng, ISDENSE) \
1217 : scanfunc_hge(NAME, ISDENSE)
1218 :
1219 : /* scan/imprints select */
1220 189393014 : scan_sel(fullscan, )
1221 1336843231 : scan_sel(densescan, _dense)
1222 :
1223 : #if 0
1224 : /* some programs that produce editor tags files don't recognize the
1225 : * scanselect function because before it are the scan_del macro
1226 : * calls that don't look like function definitions or variable
1227 : * declarations, hence we have this hidden away function to realign the
1228 : * tags program */
1229 : void
1230 : realign_tags(void)
1231 : {
1232 : }
1233 : #endif
1234 :
1235 : static BAT *
1236 1266164 : scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
1237 : const void *tl, const void *th,
1238 : bool li, bool hi, bool equi, bool anti, bool lval, bool hval,
1239 : bool lnil, BUN maximum, Imprints *imprints, const char **algo)
1240 : {
1241 : #ifndef NDEBUG
1242 1266164 : int (*cmp)(const void *, const void *);
1243 : #endif
1244 1266164 : int t;
1245 1266164 : BUN cnt = 0;
1246 1266164 : oid *restrict dst;
1247 :
1248 1266164 : assert(bi->b != NULL);
1249 1266164 : assert(bn != NULL);
1250 1266164 : assert(bn->ttype == TYPE_oid);
1251 1266164 : assert(!lval || tl != NULL);
1252 1266164 : assert(!hval || th != NULL);
1253 1266164 : assert(!equi || (li && hi && !anti));
1254 1266164 : assert(!anti || lval || hval);
1255 1266164 : assert(bi->type != TYPE_void || equi || bi->nonil);
1256 :
1257 : #ifndef NDEBUG
1258 1266164 : cmp = ATOMcompare(bi->type);
1259 : #endif
1260 :
1261 1266164 : assert(!lval || !hval || tl == th || (*cmp)(tl, th) <= 0);
1262 :
1263 1266164 : dst = (oid *) Tloc(bn, 0);
1264 :
1265 1266164 : t = ATOMbasetype(bi->type);
1266 :
1267 : /* call type-specific core scan select function */
1268 1266164 : switch (t) {
1269 39927 : case TYPE_bte:
1270 39927 : if (ci->tpe == cand_dense)
1271 37662 : cnt = densescan_bte(scanargs);
1272 : else
1273 2265 : cnt = fullscan_bte(scanargs);
1274 : break;
1275 6318 : case TYPE_sht:
1276 6318 : if (ci->tpe == cand_dense)
1277 5670 : cnt = densescan_sht(scanargs);
1278 : else
1279 648 : cnt = fullscan_sht(scanargs);
1280 : break;
1281 1028719 : case TYPE_int:
1282 1028719 : if (ci->tpe == cand_dense)
1283 731774 : cnt = densescan_int(scanargs);
1284 : else
1285 296945 : cnt = fullscan_int(scanargs);
1286 : break;
1287 16 : case TYPE_flt:
1288 16 : if (ci->tpe == cand_dense)
1289 15 : cnt = densescan_flt(scanargs);
1290 : else
1291 1 : cnt = fullscan_flt(scanargs);
1292 : break;
1293 104 : case TYPE_dbl:
1294 104 : if (ci->tpe == cand_dense)
1295 95 : cnt = densescan_dbl(scanargs);
1296 : else
1297 9 : cnt = fullscan_dbl(scanargs);
1298 : break;
1299 2139 : case TYPE_lng:
1300 2139 : if (ci->tpe == cand_dense)
1301 2129 : cnt = densescan_lng(scanargs);
1302 : else
1303 10 : cnt = fullscan_lng(scanargs);
1304 : break;
1305 : #ifdef HAVE_HGE
1306 132 : case TYPE_hge:
1307 132 : if (ci->tpe == cand_dense)
1308 124 : cnt = densescan_hge(scanargs);
1309 : else
1310 8 : cnt = fullscan_hge(scanargs);
1311 : break;
1312 : #endif
1313 188707 : case TYPE_str:
1314 188707 : cnt = fullscan_str(scanargs);
1315 188707 : break;
1316 102 : default:
1317 102 : cnt = fullscan_any(scanargs);
1318 102 : break;
1319 : }
1320 1266138 : if (cnt == BUN_NONE) {
1321 : return NULL;
1322 : }
1323 1266138 : assert(bn->batCapacity >= cnt);
1324 :
1325 1266138 : BATsetcount(bn, cnt);
1326 1266131 : bn->tsorted = true;
1327 1266131 : bn->trevsorted = bn->batCount <= 1;
1328 1266131 : bn->tkey = true;
1329 1266131 : bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == bi->count ? bi->b->hseqbase : oid_nil;
1330 :
1331 1266131 : return bn;
1332 : }
1333 :
1334 : #if SIZEOF_BUN == SIZEOF_INT
1335 : #define CALC_ESTIMATE(TPE) \
1336 : do { \
1337 : if (*(TPE*)tl < 1) { \
1338 : if ((int) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1339 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1340 : } else { \
1341 : if (-(int) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1342 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1343 : } \
1344 : } while (0)
1345 : #else
1346 : #define CALC_ESTIMATE(TPE) \
1347 : do { \
1348 : if (*(TPE*)tl < 1) { \
1349 : if ((lng) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1350 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1351 : } else { \
1352 : if (-(lng) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1353 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1354 : } \
1355 : } while (0)
1356 : #endif
1357 :
1358 : static enum range_comp_t
1359 1758700 : BATrange(BATiter *bi, const void *tl, const void *th, bool li, bool hi)
1360 : {
1361 1758700 : enum range_comp_t range;
1362 1758700 : const ValRecord *minprop = NULL, *maxprop = NULL;
1363 1758700 : const void *minval = NULL, *maxval = NULL;
1364 1758700 : bool maxincl = true;
1365 1758700 : BAT *pb = NULL;
1366 1758700 : int c;
1367 1758700 : int (*atomcmp) (const void *, const void *) = ATOMcompare(bi->type);
1368 1758700 : BATiter bi2 = *bi;
1369 :
1370 1758700 : if (tl && (*atomcmp)(tl, ATOMnilptr(bi->type)) == 0)
1371 10931 : tl = NULL;
1372 1758682 : if (th && (*atomcmp)(th, ATOMnilptr(bi->type)) == 0)
1373 7990 : th = NULL;
1374 1758674 : if (tl == NULL && th == NULL)
1375 : return range_contains; /* looking for everything */
1376 :
1377 1753864 : if (VIEWtparent(bi->b))
1378 1614721 : pb = BATdescriptor(VIEWtparent(bi->b));
1379 :
1380 : /* keep locked while we look at the property values */
1381 1753870 : MT_lock_set(&bi->b->theaplock);
1382 1753877 : if (bi->sorted && (bi->nonil || atomcmp(BUNtail(*bi, 0), ATOMnilptr(bi->type)) != 0))
1383 283468 : minval = BUNtail(*bi, 0);
1384 1470409 : else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(*bi, bi->count - 1), ATOMnilptr(bi->type)) != 0))
1385 120490 : minval = BUNtail(*bi, bi->count - 1);
1386 1349919 : else if (bi->minpos != BUN_NONE)
1387 26475 : minval = BUNtail(*bi, bi->minpos);
1388 1323444 : else if ((minprop = BATgetprop_nolock(bi->b, GDK_MIN_BOUND)) != NULL)
1389 0 : minval = VALptr(minprop);
1390 1753863 : if (bi->sorted && (bi->nonil || atomcmp(BUNtail(bi2, bi->count - 1), ATOMnilptr(bi->type)) != 0)) {
1391 283918 : maxval = BUNtail(bi2, bi->count - 1);
1392 : maxincl = true;
1393 1469961 : } else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(bi2, 0), ATOMnilptr(bi->type)) != 0)) {
1394 120633 : maxval = BUNtail(bi2, 0);
1395 : maxincl = true;
1396 1349328 : } else if (bi->maxpos != BUN_NONE) {
1397 26423 : maxval = BUNtail(bi2, bi->maxpos);
1398 : maxincl = true;
1399 1322905 : } else if ((maxprop = BATgetprop_nolock(bi->b, GDK_MAX_BOUND)) != NULL) {
1400 0 : maxval = VALptr(maxprop);
1401 0 : maxincl = false;
1402 : }
1403 1753878 : bool keep = false; /* keep lock on parent bat? */
1404 1753878 : if (minval == NULL || maxval == NULL) {
1405 1323445 : if (pb != NULL) {
1406 1261691 : MT_lock_set(&pb->theaplock);
1407 1261689 : if (minval == NULL && (minprop = BATgetprop_nolock(pb, GDK_MIN_BOUND)) != NULL) {
1408 6 : keep = true;
1409 6 : minval = VALptr(minprop);
1410 : }
1411 1261686 : if (maxval == NULL && (maxprop = BATgetprop_nolock(pb, GDK_MAX_BOUND)) != NULL) {
1412 2 : keep = true;
1413 2 : maxval = VALptr(maxprop);
1414 2 : maxincl = true;
1415 : }
1416 1261684 : if (!keep) {
1417 1261675 : MT_lock_unset(&pb->theaplock);
1418 : }
1419 : }
1420 : }
1421 :
1422 1753861 : if (minval == NULL && maxval == NULL) {
1423 : range = range_inside; /* strictly: unknown */
1424 430978 : } else if (maxval &&
1425 425021 : tl &&
1426 425025 : ((c = atomcmp(tl, maxval)) > 0 ||
1427 723 : ((!maxincl || !li) && c == 0))) {
1428 : range = range_after;
1429 371272 : } else if (minval &&
1430 370161 : th &&
1431 370160 : ((c = atomcmp(th, minval)) < 0 ||
1432 352499 : (!hi && c == 0))) {
1433 : range = range_before;
1434 353557 : } else if (tl == NULL) {
1435 5819 : if (minval == NULL) {
1436 16 : c = atomcmp(th, maxval);
1437 16 : if (c < 0 || ((maxincl || !hi) && c == 0))
1438 : range = range_atstart;
1439 : else
1440 6030 : range = range_contains;
1441 : } else {
1442 5803 : c = atomcmp(th, minval);
1443 5803 : if (c < 0 || (!hi && c == 0))
1444 : range = range_before;
1445 5803 : else if (maxval == NULL)
1446 : range = range_atstart;
1447 : else {
1448 5801 : c = atomcmp(th, maxval);
1449 5801 : if (c < 0 || ((maxincl || !hi) && c == 0))
1450 : range = range_atstart;
1451 : else
1452 6030 : range = range_contains;
1453 : }
1454 : }
1455 347738 : } else if (th == NULL) {
1456 758 : if (maxval == NULL) {
1457 2 : c = atomcmp(tl, minval);
1458 2 : if (c >= 0)
1459 : range = range_atend;
1460 : else
1461 6030 : range = range_contains;
1462 : } else {
1463 756 : c = atomcmp(tl, maxval);
1464 756 : if (c > 0 || ((!maxincl || !li) && c == 0))
1465 : range = range_after;
1466 756 : else if (minval == NULL)
1467 : range = range_atend;
1468 : else {
1469 740 : c = atomcmp(tl, minval);
1470 740 : if (c >= 0)
1471 : range = range_atend;
1472 : else
1473 6030 : range = range_contains;
1474 : }
1475 : }
1476 346980 : } else if (minval == NULL) {
1477 339 : c = atomcmp(th, maxval);
1478 339 : if (c < 0 || ((maxincl || !hi) && c == 0))
1479 : range = range_inside;
1480 : else
1481 14456 : range = range_atend;
1482 346641 : } else if (maxval == NULL) {
1483 2 : c = atomcmp(tl, minval);
1484 2 : if (c >= 0)
1485 : range = range_inside;
1486 : else
1487 258 : range = range_atstart;
1488 : } else {
1489 346639 : c = atomcmp(tl, minval);
1490 346639 : if (c >= 0) {
1491 346378 : c = atomcmp(th, maxval);
1492 346385 : if (c < 0 || ((maxincl || !hi) && c == 0))
1493 : range = range_inside;
1494 : else
1495 14456 : range = range_atend;
1496 : } else {
1497 261 : c = atomcmp(th, maxval);
1498 261 : if (c < 0 || ((maxincl || !hi) && c == 0))
1499 : range = range_atstart;
1500 : else
1501 6030 : range = range_contains;
1502 : }
1503 : }
1504 :
1505 1753865 : MT_lock_unset(&bi->b->theaplock);
1506 1753857 : if (pb) {
1507 1614711 : if (keep)
1508 6 : MT_lock_unset(&pb->theaplock);
1509 1614711 : BBPreclaim(pb);
1510 : }
1511 :
1512 : return range;
1513 : }
1514 :
1515 : /* generic range select
1516 : *
1517 : * Return a BAT with the OID values of b for qualifying tuples. The
1518 : * return BAT is sorted (i.e. in the same order as the input BAT).
1519 : *
1520 : * If s is non-NULL, it is a list of candidates. s must be sorted.
1521 : *
1522 : * tl may not be NULL, li, hi, and anti must be either 0 or 1.
1523 : *
1524 : * If th is NULL, hi is ignored.
1525 : *
1526 : * If anti is 0, qualifying tuples are those whose value is between tl
1527 : * and th (as in x >[=] tl && x <[=] th, where equality depends on li
1528 : * and hi--so if tl > th, nothing will be returned). If li or hi is
1529 : * 1, the respective boundary is inclusive, otherwise exclusive. If
1530 : * th is NULL it is taken to be equal to tl, turning this into an
1531 : * equi- or point-select. Note that for a point select to return
1532 : * anything, li (and hi if th was not NULL) must be 1. There is a
1533 : * special case if tl is nil and th is NULL. This is the only way to
1534 : * select for nil values.
1535 : *
1536 : * If anti is 1, the result is the complement of what the result would
1537 : * be if anti were 0, except that nils are filtered out.
1538 : *
1539 : * In brief:
1540 : * - if tl==nil and th==NULL and anti==0, return all nils (only way to
1541 : * get nils);
1542 : * - it tl==nil and th==nil, return all but nils;
1543 : * - if tl==nil and th!=NULL, no lower bound;
1544 : * - if th==NULL or tl==th, point (equi) select;
1545 : * - if th==nil, no upper bound
1546 : *
1547 : * A complete breakdown of the various arguments follows. Here, v, v1
1548 : * and v2 are values from the appropriate domain, and
1549 : * v != nil, v1 != nil, v2 != nil, v1 < v2.
1550 : * tl th li hi anti result list of OIDs for values
1551 : * -----------------------------------------------------------------
1552 : * nil NULL true ignored false x == nil (only way to get nil)
1553 : * nil NULL false ignored false NOTHING
1554 : * nil NULL ignored ignored true x != nil
1555 : * nil nil ignored ignored false x != nil
1556 : * nil nil ignored ignored true NOTHING
1557 : * nil v ignored false false x < v
1558 : * nil v ignored true false x <= v
1559 : * nil v ignored false true x >= v
1560 : * nil v ignored true true x > v
1561 : * v nil false ignored false x > v
1562 : * v nil true ignored false x >= v
1563 : * v nil false ignored true x <= v
1564 : * v nil true ignored true x < v
1565 : * v NULL false ignored false NOTHING
1566 : * v NULL true ignored false x == v
1567 : * v NULL false ignored true x != nil
1568 : * v NULL true ignored true x != v
1569 : * v v false false false NOTHING
1570 : * v v true false false NOTHING
1571 : * v v false true false NOTHING
1572 : * v v true true false x == v
1573 : * v v false false true x != nil
1574 : * v v true false true x != nil
1575 : * v v false true true x != nil
1576 : * v v true true true x != v
1577 : * v1 v2 false false false v1 < x < v2
1578 : * v1 v2 true false false v1 <= x < v2
1579 : * v1 v2 false true false v1 < x <= v2
1580 : * v1 v2 true true false v1 <= x <= v2
1581 : * v1 v2 false false true x <= v1 or x >= v2
1582 : * v1 v2 true false true x < v1 or x >= v2
1583 : * v1 v2 false true true x <= v1 or x > v2
1584 : * v1 v2 true true true x < v1 or x > v2
1585 : * v2 v1 ignored ignored false NOTHING
1586 : * v2 v1 ignored ignored true x != nil
1587 : */
1588 : BAT *
1589 2874119 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
1590 : bool li, bool hi, bool anti)
1591 : {
1592 2874119 : bool lval; /* low value used for comparison */
1593 2874119 : bool lnil; /* low value is nil */
1594 2874119 : bool hval; /* high value used for comparison */
1595 2874119 : bool equi; /* select for single value (not range) */
1596 2874119 : bool antiequi = false; /* select for all but single value */
1597 2874119 : bool wanthash = false; /* use hash (equi must be true) */
1598 2874119 : bool havehash = false; /* we have a hash (and the hashlock) */
1599 2874119 : bool phash = false; /* use hash on parent BAT (if view) */
1600 2874119 : int t; /* data type */
1601 2874119 : bat parent; /* b's parent bat (if b is a view) */
1602 2874119 : const void *nil;
1603 2874119 : BAT *bn;
1604 2874119 : struct canditer ci;
1605 2874119 : BUN estimate = BUN_NONE, maximum = BUN_NONE;
1606 2874119 : oid vwl = 0, vwh = 0;
1607 2874119 : lng vwo = 0;
1608 2874119 : Heap *oidxh = NULL;
1609 2874119 : const char *algo;
1610 2874119 : enum range_comp_t range;
1611 2874119 : const bool notnull = BATgetprop(b, GDK_NOT_NULL) != NULL;
1612 2874084 : lng t0 = GDKusec();
1613 :
1614 2874128 : BATcheck(b, NULL);
1615 2874128 : if (tl == NULL) {
1616 0 : GDKerror("tl value required");
1617 0 : return NULL;
1618 : }
1619 :
1620 2874128 : if (s && s->ttype != TYPE_msk && !s->tsorted) {
1621 0 : GDKerror("invalid argument: s must be sorted.\n");
1622 0 : return NULL;
1623 : }
1624 :
1625 2874128 : canditer_init(&ci, b, s);
1626 2874103 : if (ci.ncand == 0) {
1627 : /* trivially empty result */
1628 1094919 : MT_thread_setalgorithm("select: trivially empty");
1629 1094881 : bn = BATdense(0, 0, 0);
1630 1094891 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1631 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1632 : " (" LLFMT " usec): "
1633 : "trivially empty\n",
1634 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1635 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1636 1094891 : return bn;
1637 : }
1638 :
1639 1779184 : BATiter bi = bat_iterator(b);
1640 :
1641 1779185 : t = bi.type;
1642 1779185 : nil = ATOMnilptr(t);
1643 : /* can we use the base type? */
1644 1779185 : t = ATOMbasetype(t);
1645 1779185 : lnil = nil && ATOMcmp(t, tl, nil) == 0; /* low value = nil? */
1646 :
1647 1779202 : if (!lnil && th != NULL && (!li || !hi) && !anti && ATOMcmp(t, tl, th) == 0) {
1648 : /* upper and lower bound of range are equal and we
1649 : * want an interval that's open on at least one
1650 : * side */
1651 40 : MT_thread_setalgorithm("select: empty interval");
1652 40 : bn = BATdense(0, 0, 0);
1653 40 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1654 : ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
1655 : ALGOOPTBATFMT " (" LLFMT " usec): "
1656 : "empty interval\n",
1657 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1658 : li, hi, anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1659 40 : bat_iterator_end(&bi);
1660 40 : return bn;
1661 : }
1662 :
1663 1779165 : lval = !lnil || th == NULL; /* low value used for comparison */
1664 1779165 : equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
1665 1779162 : if (equi) {
1666 1747403 : assert(lval);
1667 1747403 : if (th == NULL)
1668 1591571 : hi = li;
1669 : th = tl;
1670 : hval = true;
1671 31759 : } else if (nil == NULL) {
1672 : hval = true;
1673 : } else {
1674 31759 : hval = ATOMcmp(t, th, nil) != 0;
1675 : }
1676 1779160 : if (anti) {
1677 63401 : if (lval != hval) {
1678 : /* one of the end points is nil and the other
1679 : * isn't: swap sub-ranges */
1680 41 : const void *tv;
1681 41 : bool ti;
1682 41 : assert(!equi);
1683 41 : ti = li;
1684 41 : li = !hi;
1685 41 : hi = !ti;
1686 41 : tv = tl;
1687 41 : tl = th;
1688 41 : th = tv;
1689 41 : ti = lval;
1690 41 : lval = hval;
1691 41 : hval = ti;
1692 41 : lnil = ATOMcmp(t, tl, nil) == 0;
1693 41 : anti = false;
1694 41 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1695 : ",s=" ALGOOPTBATFMT ",anti=%d "
1696 : "anti: switch ranges...\n",
1697 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1698 : anti);
1699 63360 : } else if (!lval && !hval) {
1700 : /* antiselect for nil-nil range: all non-nil
1701 : * values are in range; we must return all
1702 : * other non-nil values, i.e. nothing */
1703 23 : MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
1704 23 : bn = BATdense(0, 0, 0);
1705 23 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1706 : ",s=" ALGOOPTBATFMT ",anti=%d -> "
1707 : ALGOOPTBATFMT " (" LLFMT " usec): "
1708 : "anti: nil-nil range, nonil\n",
1709 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1710 : anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1711 23 : bat_iterator_end(&bi);
1712 23 : return bn;
1713 63337 : } else if ((equi && (lnil || !(li && hi))) || ATOMcmp(t, tl, th) > 0) {
1714 : /* various ways to select for everything except nil */
1715 :
1716 5496 : bn = BATselect(b, s, nil, NULL, true, true, false);
1717 5495 : if (bn == NULL) {
1718 0 : bat_iterator_end(&bi);
1719 0 : return NULL;
1720 : }
1721 5495 : BAT *bn2;
1722 5495 : if (s) {
1723 2334 : bn2 = BATdiffcand(s, bn);
1724 : } else {
1725 3161 : bn2 = BATnegcands2(ci.seq, bi.count, bn);
1726 : }
1727 5495 : bat_iterator_end(&bi);
1728 5495 : BBPreclaim(bn);
1729 5495 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1730 : ",s=" ALGOOPTBATFMT ",anti=1,equi=%d -> "
1731 : ALGOOPTBATFMT " (" LLFMT " usec): "
1732 : "everything except nil\n",
1733 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1734 : equi, ALGOOPTBATPAR(bn2), GDKusec() - t0);
1735 5495 : return bn2; /* also if NULL */
1736 : } else {
1737 : antiequi = equi;
1738 : equi = false;
1739 : }
1740 : }
1741 :
1742 : /* if equi set, then so are both lval and hval */
1743 1773640 : assert(!equi || (lval && hval));
1744 :
1745 1773640 : if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
1746 : /* empty range */
1747 31 : MT_thread_setalgorithm("select: empty range");
1748 31 : bn = BATdense(0, 0, 0);
1749 31 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1750 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1751 : " (" LLFMT " usec) "
1752 : "empty range\n",
1753 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1754 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1755 31 : bat_iterator_end(&bi);
1756 31 : return bn;
1757 : }
1758 1773607 : if (equi && lnil && (notnull || bi.nonil)) {
1759 : /* return all nils, but there aren't any */
1760 14912 : MT_thread_setalgorithm("select: equi-nil, nonil");
1761 14912 : bn = BATdense(0, 0, 0);
1762 14912 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1763 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1764 : " (" LLFMT " usec): "
1765 : "equi-nil, nonil\n",
1766 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1767 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1768 14912 : bat_iterator_end(&bi);
1769 14912 : return bn;
1770 : }
1771 :
1772 1758695 : if (!equi && !lval && !hval && lnil && (notnull || bi.nonil)) {
1773 : /* return all non-nils from a BAT that doesn't have
1774 : * any: i.e. return everything */
1775 0 : MT_thread_setalgorithm("select: everything, nonil");
1776 0 : bn = canditer_slice(&ci, 0, ci.ncand);
1777 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1778 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1779 : " (" LLFMT " usec): "
1780 : "everything, nonil\n",
1781 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1782 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1783 0 : bat_iterator_end(&bi);
1784 0 : return bn;
1785 : }
1786 :
1787 : /* figure out how the searched for range compares with the known
1788 : * minimum and maximum values */
1789 1768020 : range = BATrange(&bi, lval ? tl : NULL, hval ? th : NULL, li, hi);
1790 1758692 : if (anti) {
1791 57840 : switch (range) {
1792 14 : case range_contains:
1793 : /* MIN..MAX range of values in BAT fully inside
1794 : * tl..th range, so nothing left over for
1795 : * anti */
1796 14 : MT_thread_setalgorithm("select: nothing, out of range");
1797 14 : bn = BATdense(0, 0, 0);
1798 14 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1799 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1800 : " (" LLFMT " usec): "
1801 : "nothing, out of range\n",
1802 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1803 14 : bat_iterator_end(&bi);
1804 14 : return bn;
1805 4652 : case range_before:
1806 : case range_after:
1807 4652 : if (notnull || b->tnonil) {
1808 : /* search range does not overlap with BAT range,
1809 : * and there are no nils, so we can return
1810 : * everything */
1811 4539 : MT_thread_setalgorithm("select: everything, anti, nonil");
1812 4539 : bn = canditer_slice(&ci, 0, ci.ncand);
1813 4539 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1814 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1815 : " (" LLFMT " usec): "
1816 : "everything, nonil\n",
1817 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1818 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1819 4539 : bat_iterator_end(&bi);
1820 4539 : return bn;
1821 : }
1822 : break;
1823 : default:
1824 : break;
1825 : }
1826 1700852 : } else if (!equi || !lnil) {
1827 1696057 : switch (range) {
1828 72765 : case range_before:
1829 : case range_after:
1830 : /* range we're looking for either completely
1831 : * before or complete after the range of values
1832 : * in the BAT */
1833 72765 : MT_thread_setalgorithm("select: nothing, out of range");
1834 72763 : bn = BATdense(0, 0, 0);
1835 72765 : TRC_DEBUG(ALGO, "b="
1836 : ALGOBATFMT ",s="
1837 : ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1838 : " (" LLFMT " usec): "
1839 : "nothing, out of range\n",
1840 : ALGOBATPAR(b),
1841 : ALGOOPTBATPAR(s), anti,
1842 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1843 72765 : bat_iterator_end(&bi);
1844 72765 : return bn;
1845 6028 : case range_contains:
1846 6028 : if (notnull || b->tnonil) {
1847 : /* search range contains BAT range, and
1848 : * there are no nils, so we can return
1849 : * everything */
1850 5975 : MT_thread_setalgorithm("select: everything, nonil");
1851 5975 : bn = canditer_slice(&ci, 0, ci.ncand);
1852 5975 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1853 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1854 : " (" LLFMT " usec): "
1855 : "everything, nonil\n",
1856 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1857 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1858 5975 : bat_iterator_end(&bi);
1859 5975 : return bn;
1860 : }
1861 : break;
1862 : default:
1863 : break;
1864 : }
1865 : }
1866 :
1867 1675399 : parent = VIEWtparent(b);
1868 1554372 : assert(parent >= 0);
1869 1675399 : BAT *pb;
1870 1675399 : BATiter pbi;
1871 1675399 : if (parent > 0)
1872 1554371 : pb = BATdescriptor(parent);
1873 : else
1874 : pb = NULL;
1875 1675387 : pbi = bat_iterator(pb);
1876 : /* use hash only for equi-join if the bat is not sorted, but
1877 : * only if b or its parent already has a hash, or if b or its
1878 : * parent is persistent and the total size wouldn't be too
1879 : * large; check for existence of hash last since that may
1880 : * involve I/O */
1881 1675410 : if ((equi || antiequi) && !bi.sorted && !bi.revsorted) {
1882 1344644 : double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
1883 1344626 : if (cost > 0 && cost < ci.ncand) {
1884 77759 : wanthash = true;
1885 77759 : if (havehash) {
1886 53065 : if (phash) {
1887 52549 : MT_rwlock_rdlock(&pb->thashlock);
1888 52547 : if (pb->thash == NULL) {
1889 0 : MT_rwlock_rdunlock(&pb->thashlock);
1890 0 : havehash = false;
1891 : }
1892 : } else {
1893 516 : MT_rwlock_rdlock(&b->thashlock);
1894 516 : if (b->thash == NULL) {
1895 0 : MT_rwlock_rdunlock(&b->thashlock);
1896 0 : havehash = false;
1897 : }
1898 : }
1899 : }
1900 77757 : if (wanthash && !havehash && b->batRole != PERSISTENT) {
1901 0 : MT_lock_set(&b->theaplock);
1902 0 : if (++b->selcnt > 1000)
1903 0 : b->selcnt = 1000; /* limit value */
1904 : else
1905 : wanthash = false;
1906 0 : MT_lock_unset(&b->theaplock);
1907 : }
1908 : } else {
1909 1266867 : wanthash = havehash = false;
1910 : }
1911 : }
1912 :
1913 : /* at this point, if havehash is set, we have the hash lock
1914 : * the lock is on the parent if phash is set, on b itself if not
1915 : * also, if havehash is set, then so is wanthash (but not v.v.) */
1916 :
1917 1675390 : if (!havehash) {
1918 : /* make sure tsorted and trevsorted flags are set, but
1919 : * we only need to know if we're not yet sure that we're
1920 : * going for the hash (i.e. it already exists) */
1921 1622323 : (void) BATordered(b);
1922 1622332 : (void) BATordered_rev(b);
1923 : /* reinitialize iterator since properties may have changed */
1924 1622331 : bat_iterator_end(&bi);
1925 1622340 : bi = bat_iterator(b);
1926 : }
1927 :
1928 : /* If there is an order index or it is a view and the parent
1929 : * has an ordered index, and the bat is not tsorted or
1930 : * trevstorted then use the order index. And there is no cand
1931 : * list or if there is one, it is dense.
1932 : * TODO: we do not support anti-select with order index */
1933 1675387 : bool poidx = false;
1934 1675387 : if (!anti &&
1935 1622092 : !havehash &&
1936 1572551 : !bi.sorted &&
1937 1363489 : !bi.revsorted &&
1938 1245199 : ci.tpe == cand_dense) {
1939 953029 : BAT *view = NULL;
1940 953029 : (void) BATcheckorderidx(b);
1941 953028 : MT_lock_set(&b->batIdxLock);
1942 953028 : if ((oidxh = b->torderidx) != NULL)
1943 39 : HEAPincref(oidxh);
1944 953028 : MT_lock_unset(&b->batIdxLock);
1945 953031 : if (oidxh == NULL && pb != NULL) {
1946 885294 : (void) BATcheckorderidx(pb);
1947 885295 : MT_lock_set(&pb->batIdxLock);
1948 885290 : if ((oidxh = pb->torderidx) != NULL) {
1949 34 : HEAPincref(oidxh);
1950 34 : view = b;
1951 34 : b = pb;
1952 : }
1953 885290 : MT_lock_unset(&pb->batIdxLock);
1954 : }
1955 953024 : if (oidxh) {
1956 : /* Is query selective enough to use the ordered index ? */
1957 : /* TODO: Test if this heuristic works in practice */
1958 : /*if ((ORDERfnd(b, th) - ORDERfnd(b, tl)) < ((BUN)1000 < bi.count/1000 ? (BUN)1000: bi.count/1000))*/
1959 73 : if ((ORDERfnd(b, oidxh, th) - ORDERfnd(b, oidxh, tl)) < bi.count/3) {
1960 31 : if (view) {
1961 16 : bat_iterator_end(&bi);
1962 16 : bi = bat_iterator(b);
1963 16 : poidx = true; /* using parent oidx */
1964 16 : vwo = (lng) (view->tbaseoff - bi.baseoff);
1965 16 : vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
1966 16 : vwh = vwl + canditer_last(&ci) - ci.seq;
1967 16 : vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
1968 16 : TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
1969 : } else {
1970 15 : vwl = ci.seq;
1971 15 : vwh = canditer_last(&ci);
1972 : }
1973 : } else {
1974 42 : if (view) {
1975 18 : b = view;
1976 18 : view = NULL;
1977 : }
1978 42 : HEAPdecref(oidxh, false);
1979 42 : oidxh = NULL;
1980 : }
1981 : }
1982 : }
1983 :
1984 1675382 : if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) {
1985 356163 : BUN low = 0;
1986 356163 : BUN high = bi.count;
1987 :
1988 356163 : if (BATtdensebi(&bi)) {
1989 : /* positional */
1990 : /* we expect nonil to be set, in which case we
1991 : * already know that we're not dealing with a
1992 : * nil equiselect (dealt with above) */
1993 17641 : assert(bi.nonil);
1994 17641 : assert(bi.sorted);
1995 17641 : assert(oidxh == NULL);
1996 17641 : algo = "select: dense";
1997 17641 : if (hval) {
1998 17650 : oid h = * (oid *) th + hi;
1999 17650 : if (h > bi.tseq)
2000 17650 : h -= bi.tseq;
2001 : else
2002 : h = 0;
2003 17650 : if ((BUN) h < high)
2004 17641 : high = (BUN) h;
2005 : }
2006 :
2007 17641 : if (lval) {
2008 17650 : oid l = *(oid *) tl + !li;
2009 17650 : if (l > bi.tseq)
2010 3986 : l -= bi.tseq;
2011 : else
2012 : l = 0;
2013 3986 : if ((BUN) l > low)
2014 3986 : low = (BUN) l;
2015 3986 : if (low > high)
2016 : low = high;
2017 : }
2018 338522 : } else if (bi.sorted) {
2019 218021 : assert(oidxh == NULL);
2020 218021 : algo = "select: sorted";
2021 218021 : if (lval) {
2022 217899 : if (li)
2023 217034 : low = SORTfndfirst(b, tl);
2024 : else
2025 865 : low = SORTfndlast(b, tl);
2026 : } else {
2027 : /* skip over nils at start of column */
2028 122 : low = SORTfndlast(b, nil);
2029 : }
2030 218017 : if (hval) {
2031 216674 : if (hi)
2032 215964 : high = SORTfndlast(b, th);
2033 : else
2034 710 : high = SORTfndfirst(b, th);
2035 : }
2036 120501 : } else if (bi.revsorted) {
2037 120470 : assert(oidxh == NULL);
2038 120470 : algo = "select: reverse sorted";
2039 120470 : if (lval) {
2040 120441 : if (li)
2041 120400 : high = SORTfndlast(b, tl);
2042 : else
2043 41 : high = SORTfndfirst(b, tl);
2044 : } else {
2045 : /* skip over nils at end of column */
2046 29 : high = SORTfndfirst(b, nil);
2047 : }
2048 120470 : if (hval) {
2049 120414 : if (hi)
2050 120359 : low = SORTfndfirst(b, th);
2051 : else
2052 55 : low = SORTfndlast(b, th);
2053 : }
2054 : } else {
2055 31 : assert(oidxh != NULL);
2056 31 : algo = poidx ? "select: parent orderidx" : "select: orderidx";
2057 31 : if (lval) {
2058 24 : if (li)
2059 24 : low = ORDERfndfirst(b, oidxh, tl);
2060 : else
2061 0 : low = ORDERfndlast(b, oidxh, tl);
2062 : } else {
2063 : /* skip over nils at start of column */
2064 7 : low = ORDERfndlast(b, oidxh, nil);
2065 : }
2066 31 : if (hval) {
2067 28 : if (hi)
2068 24 : high = ORDERfndlast(b, oidxh, th);
2069 : else
2070 4 : high = ORDERfndfirst(b, oidxh, th);
2071 : }
2072 : }
2073 356160 : if (anti) {
2074 28773 : assert(oidxh == NULL);
2075 28773 : if (bi.sorted) {
2076 26593 : BUN first = SORTfndlast(b, nil);
2077 : /* match: [first..low) + [high..last) */
2078 26593 : bn = canditer_slice2val(&ci,
2079 : first + b->hseqbase,
2080 : low + b->hseqbase,
2081 26593 : high + b->hseqbase,
2082 : oid_nil);
2083 : } else {
2084 2180 : BUN last = SORTfndfirst(b, nil);
2085 : /* match: [first..low) + [high..last) */
2086 2180 : bn = canditer_slice2val(&ci,
2087 : oid_nil,
2088 : low + b->hseqbase,
2089 : high + b->hseqbase,
2090 2180 : last + b->hseqbase);
2091 : }
2092 : } else {
2093 327387 : if (bi.sorted || bi.revsorted) {
2094 327356 : assert(oidxh == NULL);
2095 : /* match: [low..high) */
2096 327356 : bn = canditer_sliceval(&ci,
2097 : low + b->hseqbase,
2098 327356 : high + b->hseqbase);
2099 : } else {
2100 31 : BUN i;
2101 31 : BUN cnt = 0;
2102 31 : const oid *rs;
2103 31 : oid *rbn;
2104 :
2105 31 : rs = (const oid *) oidxh->base + ORDERIDXOFF;
2106 31 : rs += low;
2107 31 : bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
2108 31 : if (bn == NULL) {
2109 0 : HEAPdecref(oidxh, false);
2110 0 : bat_iterator_end(&bi);
2111 0 : bat_iterator_end(&pbi);
2112 0 : BBPreclaim(pb);
2113 0 : return NULL;
2114 : }
2115 :
2116 31 : rbn = (oid *) Tloc((bn), 0);
2117 :
2118 147 : for (i = low; i < high; i++) {
2119 116 : if (vwl <= *rs && *rs <= vwh) {
2120 61 : *rbn++ = (oid) ((lng) *rs + vwo);
2121 61 : cnt++;
2122 : }
2123 116 : rs++;
2124 : }
2125 31 : HEAPdecref(oidxh, false);
2126 31 : BATsetcount(bn, cnt);
2127 :
2128 : /* output must be sorted */
2129 31 : GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
2130 31 : bn->tsorted = true;
2131 31 : bn->trevsorted = bn->batCount <= 1;
2132 31 : bn->tkey = true;
2133 31 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
2134 31 : bn->tnil = false;
2135 31 : bn->tnonil = true;
2136 31 : if (s) {
2137 16 : s = BATintersectcand(bn, s);
2138 16 : BBPunfix(bn->batCacheid);
2139 16 : bn = s;
2140 : }
2141 : }
2142 : }
2143 :
2144 356157 : bn = virtualize(bn);
2145 356149 : MT_thread_setalgorithm(algo);
2146 356139 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
2147 : ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
2148 : ALGOBATPAR(b), anti ? "true" : "false",
2149 : ALGOOPTBATPAR(bn), algo,
2150 : GDKusec() - t0);
2151 :
2152 356139 : bat_iterator_end(&bi);
2153 356168 : bat_iterator_end(&pbi);
2154 356169 : BBPreclaim(pb);
2155 356162 : return bn;
2156 : }
2157 :
2158 53055 : assert(oidxh == NULL);
2159 : /* upper limit for result size */
2160 1319219 : maximum = ci.ncand;
2161 1319219 : if ((equi || antiequi) && havehash) {
2162 : /* we can look in the hash struct to see whether all
2163 : * values are distinct and set estimate accordingly */
2164 53065 : if (phash) {
2165 52549 : if (pb->thash->nunique == pbi.count)
2166 : estimate = 1;
2167 516 : } else if (b->thash->nunique == bi.count)
2168 : estimate = 1;
2169 : }
2170 1313922 : if (estimate == BUN_NONE && (bi.key || (pb != NULL && pbi.key))) {
2171 : /* exact result size in special cases */
2172 326 : if (equi || (antiequi && wanthash)) {
2173 : estimate = 1;
2174 9 : } else if (!anti && lval && hval) {
2175 0 : switch (t) {
2176 0 : case TYPE_bte:
2177 0 : estimate = (BUN) (*(bte *) th - *(bte *) tl);
2178 0 : break;
2179 0 : case TYPE_sht:
2180 0 : estimate = (BUN) (*(sht *) th - *(sht *) tl);
2181 0 : break;
2182 0 : case TYPE_int:
2183 0 : CALC_ESTIMATE(int);
2184 : break;
2185 0 : case TYPE_lng:
2186 0 : CALC_ESTIMATE(lng);
2187 : break;
2188 : #ifdef HAVE_HGE
2189 0 : case TYPE_hge:
2190 0 : CALC_ESTIMATE(hge);
2191 : break;
2192 : #endif
2193 : }
2194 0 : if (estimate == BUN_NONE)
2195 0 : estimate += li + hi - 1;
2196 : }
2197 : }
2198 : /* refine upper limit by exact size (if known) */
2199 1319219 : maximum = MIN(maximum, estimate);
2200 1319219 : if (wanthash && !havehash && estimate == BUN_NONE) {
2201 : /* no exact result size, but we need estimate to
2202 : * choose between hash- & scan-select (if we already
2203 : * have a hash, it's a no-brainer: we use it) */
2204 24661 : if (ci.ncand <= 10000) {
2205 : /* "small" input: don't bother about more accurate
2206 : * estimate */
2207 : estimate = maximum;
2208 : } else {
2209 : /* layman's quick "pseudo-sample" of 1000 tuples,
2210 : * i.e., 333 from begin, middle & end of BAT */
2211 2 : BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
2212 2 : BAT *smpl, *slct;
2213 :
2214 2 : delta = 1000 / 3 / 2;
2215 2 : skip = (bi.count - (2 * delta)) / 2;
2216 8 : for (pos = delta; pos < bi.count; pos += skip) {
2217 6 : smpl = BATslice(b, pos - delta, pos + delta);
2218 6 : if (smpl) {
2219 6 : slct = BATselect(smpl, NULL, tl,
2220 : th, li, hi, anti);
2221 6 : if (slct) {
2222 6 : smpl_cnt += BATcount(smpl);
2223 6 : slct_cnt += BATcount(slct);
2224 6 : BBPreclaim(slct);
2225 : }
2226 6 : BBPreclaim(smpl);
2227 : }
2228 : }
2229 2 : if (smpl_cnt > 0 && slct_cnt > 0) {
2230 : /* linear extrapolation plus 10% margin */
2231 1 : estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
2232 1 : * (dbl) bi.count * 1.1);
2233 1 : } else if (smpl_cnt > 0 && slct_cnt == 0) {
2234 : /* estimate low enough to trigger hash select */
2235 1 : estimate = (ci.ncand / 100) - 1;
2236 : }
2237 : }
2238 24661 : wanthash = estimate < ci.ncand / 100;
2239 : }
2240 1319219 : if (estimate == BUN_NONE) {
2241 : /* no better estimate possible/required:
2242 : * (pre-)allocate 1M tuples, i.e., avoid/delay extend
2243 : * without too much overallocation */
2244 1288935 : estimate = 1000000;
2245 : }
2246 : /* limit estimation by upper limit */
2247 1319219 : estimate = MIN(estimate, maximum);
2248 :
2249 1319219 : bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
2250 1319213 : if (bn == NULL) {
2251 0 : if (havehash) {
2252 0 : if (phash)
2253 0 : MT_rwlock_rdunlock(&pb->thashlock);
2254 : else
2255 0 : MT_rwlock_rdunlock(&b->thashlock);
2256 : }
2257 0 : bat_iterator_end(&bi);
2258 0 : bat_iterator_end(&pbi);
2259 0 : BBPreclaim(pb);
2260 0 : return NULL;
2261 : }
2262 :
2263 1319213 : if (wanthash) {
2264 : /* hashselect unlocks the hash lock */
2265 53060 : bn = hashselect(&bi, &ci, bn, tl, maximum, havehash, phash, &algo);
2266 53067 : if (bn && antiequi) {
2267 3517 : BAT *bn2;
2268 3517 : if (s) {
2269 3517 : bn2 = BATdiffcand(s, bn);
2270 : } else {
2271 0 : bn2 = BATnegcands2(ci.seq, bi.count, bn);
2272 : }
2273 3517 : BBPreclaim(bn);
2274 3517 : bn = bn2;
2275 3517 : if (!bi.nonil) {
2276 16 : bn2 = BATselect(b, s, nil, NULL, true, true, false);
2277 16 : if (bn2 == NULL) {
2278 0 : BBPreclaim(bn);
2279 0 : return NULL;
2280 : }
2281 16 : BAT *bn3 = BATdiffcand(bn, bn2);
2282 16 : BBPreclaim(bn2);
2283 16 : BBPreclaim(bn);
2284 : bn = bn3;
2285 : }
2286 : }
2287 : } else {
2288 1266153 : assert(!havehash);
2289 : /* use imprints if
2290 : * i) bat is persistent, or parent is persistent
2291 : * ii) it is not an equi-select, and
2292 : * iii) imprints are supported.
2293 : */
2294 1266153 : Imprints *imprints = NULL;
2295 1266153 : if (!equi &&
2296 : /* DISABLES CODE */ (0) && imprintable(bi.type) &&
2297 : (!bi.transient ||
2298 : (pb != NULL && !pbi.transient)) &&
2299 : BATimprints(b) == GDK_SUCCEED) {
2300 : if (pb != NULL) {
2301 : MT_lock_set(&pb->batIdxLock);
2302 : imprints = pb->timprints;
2303 : if (imprints != NULL)
2304 : IMPSincref(imprints);
2305 : MT_lock_unset(&pb->batIdxLock);
2306 : } else {
2307 : MT_lock_set(&b->batIdxLock);
2308 : imprints = b->timprints;
2309 : if (imprints != NULL)
2310 : IMPSincref(imprints);
2311 1266153 : MT_lock_unset(&b->batIdxLock);
2312 : }
2313 : }
2314 1266153 : GDKclrerr();
2315 1266145 : bn = scanselect(&bi, &ci, bn, tl, th, li, hi, equi, anti,
2316 : lval, hval, lnil, maximum, imprints, &algo);
2317 1266145 : if (imprints)
2318 : IMPSdecref(imprints, false);
2319 : }
2320 1319199 : bat_iterator_end(&bi);
2321 1319225 : bat_iterator_end(&pbi);
2322 1319223 : BBPreclaim(pb);
2323 :
2324 1319214 : bn = virtualize(bn);
2325 1319219 : MT_thread_setalgorithm(algo);
2326 1319206 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
2327 : " %s (" LLFMT " usec)\n",
2328 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
2329 : anti ? "true" : "false",
2330 : ALGOOPTBATPAR(bn), algo,
2331 : GDKusec() - t0);
2332 :
2333 : return bn;
2334 : }
2335 :
2336 : /* theta select
2337 : *
2338 : * Returns a BAT with the OID values of b for qualifying tuples. The
2339 : * return BAT is sorted (i.e. in the same order as the input BAT).
2340 : *
2341 : * If s is not NULL, it is a list of candidates. s must be sorted.
2342 : *
2343 : * Theta select returns all values from b which are less/greater than
2344 : * or (not) equal to the provided value depending on the value of op.
2345 : * Op is a string with one of the values: "=", "==", "<", "<=", ">",
2346 : * ">=", "<>", "!=" (the first two are equivalent and the last two are
2347 : * equivalent). Theta select never returns nils.
2348 : *
2349 : * If value is nil, the result is empty.
2350 : */
2351 : BAT *
2352 300577 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
2353 : {
2354 300577 : const void *nil;
2355 :
2356 300577 : BATcheck(b, NULL);
2357 300577 : BATcheck(val, NULL);
2358 300577 : BATcheck(op, NULL);
2359 :
2360 300577 : nil = ATOMnilptr(b->ttype);
2361 300577 : if (ATOMcmp(b->ttype, val, nil) == 0)
2362 25 : return BATdense(0, 0, 0);
2363 300547 : if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
2364 : /* "=" or "==" */
2365 227947 : return BATselect(b, s, val, NULL, true, true, false);
2366 : }
2367 72600 : if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
2368 : /* "!=" (equivalent to "<>") */
2369 68266 : return BATselect(b, s, val, NULL, true, true, true);
2370 : }
2371 4334 : if (op[0] == '<') {
2372 516 : if (op[1] == 0) {
2373 : /* "<" */
2374 430 : return BATselect(b, s, nil, val, false, false, false);
2375 : }
2376 86 : if (op[1] == '=' && op[2] == 0) {
2377 : /* "<=" */
2378 80 : return BATselect(b, s, nil, val, false, true, false);
2379 : }
2380 6 : if (op[1] == '>' && op[2] == 0) {
2381 : /* "<>" (equivalent to "!=") */
2382 6 : return BATselect(b, s, val, NULL, true, true, true);
2383 : }
2384 : }
2385 3818 : if (op[0] == '>') {
2386 3818 : if (op[1] == 0) {
2387 : /* ">" */
2388 3474 : return BATselect(b, s, val, nil, false, false, false);
2389 : }
2390 344 : if (op[1] == '=' && op[2] == 0) {
2391 : /* ">=" */
2392 344 : return BATselect(b, s, val, nil, true, false, false);
2393 : }
2394 : }
2395 0 : GDKerror("unknown operator.\n");
2396 0 : return NULL;
2397 : }
2398 :
2399 : #define VALUE(s, x) (s##vars ? \
2400 : s##vars + VarHeapVal(s##vals, (x), s##width) : \
2401 : s##vals + ((x) * s##width))
2402 : #define FVALUE(s, x) (s##vals + ((x) * s##width))
2403 :
2404 : #define LTany(a,b) ((*cmp)(a, b) < 0)
2405 : #define EQany(a,b) ((*cmp)(a, b) == 0)
2406 : #define is_any_nil(v) ((v) == NULL || (*cmp)((v), nil) == 0)
2407 :
2408 : #define less3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(a, b) || (i && EQ##t(a, b)))
2409 : #define grtr3(a,b,i,t) (is_##t##_nil(a) || is_##t##_nil(b) ? bit_nil : LT##t(b, a) || (i && EQ##t(a, b)))
2410 : #define or3(a,b) ((a) == 1 || (b) == 1 ? 1 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 0)
2411 : #define and3(a,b) ((a) == 0 || (b) == 0 ? 0 : is_bit_nil(a) || is_bit_nil(b) ? bit_nil : 1)
2412 : #define not3(a) (is_bit_nil(a) ? bit_nil : !(a))
2413 :
2414 : #define between3(v, lo, linc, hi, hinc, TYPE) \
2415 : and3(grtr3(v, lo, linc, TYPE), less3(v, hi, hinc, TYPE))
2416 :
2417 : #define BETWEEN(v, lo, linc, hi, hinc, TYPE) \
2418 : (is_##TYPE##_nil(v) \
2419 : ? bit_nil \
2420 : : (bit) (anti \
2421 : ? (symmetric \
2422 : ? not3(or3(between3(v, lo, linc, hi, hinc, TYPE), \
2423 : between3(v, hi, hinc, lo, linc, TYPE))) \
2424 : : not3(between3(v, lo, linc, hi, hinc, TYPE))) \
2425 : : (symmetric \
2426 : ? or3(between3(v, lo, linc, hi, hinc, TYPE), \
2427 : between3(v, hi, hinc, lo, linc, TYPE)) \
2428 : : between3(v, lo, linc, hi, hinc, TYPE))))
2429 :
2430 : gdk_return
2431 114 : rangejoin(BAT *r1, BAT *r2, BAT *l, BAT *rl, BAT *rh,
2432 : struct canditer *lci, struct canditer *rci,
2433 : bool linc, bool hinc, bool anti, bool symmetric, BUN maxsize)
2434 : {
2435 114 : if (!anti && !symmetric) {
2436 : /* we'll need these */
2437 103 : (void) BATordered(l);
2438 103 : (void) BATordered_rev(l);
2439 : }
2440 114 : BATiter li = bat_iterator(l);
2441 114 : BATiter rli = bat_iterator(rl);
2442 114 : BATiter rhi = bat_iterator(rh);
2443 114 : const char *rlvals, *rhvals;
2444 114 : const char *lvars, *rlvars, *rhvars;
2445 114 : int rlwidth, rhwidth;
2446 114 : int lwidth;
2447 114 : const void *nil = ATOMnilptr(li.type);
2448 114 : int (*cmp)(const void *, const void *) = ATOMcompare(li.type);
2449 114 : int t;
2450 114 : BUN cnt, ncnt, lncand = lci->ncand, rncand = rci->ncand;
2451 114 : oid *restrict dst1, *restrict dst2;
2452 114 : const void *vrl, *vrh;
2453 114 : oid ro;
2454 114 : oid rlval = oid_nil, rhval = oid_nil;
2455 114 : int sorted = 0; /* which output column is sorted */
2456 114 : BAT *tmp = NULL;
2457 114 : const char *algo = NULL;
2458 114 : Heap *oidxh = NULL;
2459 :
2460 114 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
2461 :
2462 342 : assert(ATOMtype(li.type) == ATOMtype(rli.type));
2463 228 : assert(ATOMtype(li.type) == ATOMtype(rhi.type));
2464 114 : assert(BATcount(rl) == BATcount(rh));
2465 114 : assert(rl->hseqbase == rh->hseqbase);
2466 114 : assert(r1->ttype == TYPE_oid);
2467 114 : assert(r2 == NULL || r2->ttype == TYPE_oid);
2468 97 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2469 114 : assert(li.type != TYPE_void || !is_oid_nil(l->tseqbase));
2470 114 : assert(rli.type != TYPE_void || !is_oid_nil(rl->tseqbase));
2471 114 : assert(rhi.type != TYPE_void || !is_oid_nil(rh->tseqbase));
2472 :
2473 114 : TRC_DEBUG(ALGO, "l=" ALGOBATFMT ","
2474 : "rl=" ALGOBATFMT ",rh=" ALGOBATFMT ","
2475 : "sl=" ALGOOPTBATFMT ",sr=" ALGOOPTBATFMT ","
2476 : "anti=%s,symmetric=%s\n",
2477 : ALGOBATPAR(l),
2478 : ALGOBATPAR(rl),
2479 : ALGOBATPAR(rh),
2480 : ALGOOPTBATPAR(lci->s),
2481 : ALGOOPTBATPAR(rci->s),
2482 : anti ? "true" : "false",
2483 : symmetric ? "true" : "false");
2484 :
2485 114 : rlvals = rli.type == TYPE_void ? NULL : (const char *) rli.base;
2486 114 : rhvals = rhi.type == TYPE_void ? NULL : (const char *) rhi.base;
2487 114 : lwidth = li.width;
2488 114 : rlwidth = rli.width;
2489 114 : rhwidth = rhi.width;
2490 114 : dst1 = (oid *) Tloc(r1, 0);
2491 114 : dst2 = r2 ? (oid *) Tloc(r2, 0) : NULL;
2492 :
2493 114 : t = ATOMtype(li.type);
2494 114 : t = ATOMbasetype(t);
2495 :
2496 114 : if (li.vh && li.type) {
2497 15 : assert(rli.vh && rli.type);
2498 15 : assert(rhi.vh && rhi.type);
2499 15 : lvars = li.vh->base;
2500 15 : rlvars = rli.vh->base;
2501 15 : rhvars = rhi.vh->base;
2502 : } else {
2503 99 : assert(rli.vh == NULL);
2504 99 : assert(rhi.vh == NULL);
2505 : lvars = rlvars = rhvars = NULL;
2506 : }
2507 :
2508 114 : if (!anti && !symmetric && !li.sorted && !li.revsorted) {
2509 10 : (void) BATcheckorderidx(l);
2510 10 : MT_lock_set(&l->batIdxLock);
2511 10 : if ((oidxh = l->torderidx) != NULL)
2512 0 : HEAPincref(oidxh);
2513 10 : MT_lock_unset(&l->batIdxLock);
2514 : #if 0 /* needs checking */
2515 : if (oidxh == NULL && VIEWtparent(l)) {
2516 : /* if enabled, need to fix/unfix parent bat */
2517 : BAT *pb = BBP_desc(VIEWtparent(l));
2518 : (void) BATcheckorderidx(pb);
2519 : MT_lock_set(&pb->batIdxLock);
2520 : if ((oidxh = pb->torderidx) != NULL) {
2521 : HEAPincref(oidxh);
2522 : l = pb;
2523 : }
2524 : MT_lock_unset(&pb->batIdxLock);
2525 : }
2526 : #endif
2527 : }
2528 :
2529 114 : vrl = &rlval;
2530 114 : vrh = &rhval;
2531 114 : if (!anti && !symmetric && (li.sorted || li.revsorted || oidxh)) {
2532 : /* left column is sorted, use binary search */
2533 93 : sorted = 2;
2534 425 : TIMEOUT_LOOP(rncand, qry_ctx) {
2535 239 : BUN low, high;
2536 :
2537 239 : ro = canditer_next(rci);
2538 239 : if (rlvals) {
2539 239 : vrl = VALUE(rl, ro - rl->hseqbase);
2540 : } else {
2541 : /* TYPE_void */
2542 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2543 : }
2544 239 : if (rhvals) {
2545 239 : vrh = VALUE(rh, ro - rh->hseqbase);
2546 : } else {
2547 : /* TYPE_void */
2548 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2549 : }
2550 239 : if (cmp(vrl, nil) == 0 || cmp(vrh, nil) == 0)
2551 9 : continue;
2552 230 : if (li.sorted) {
2553 211 : if (linc)
2554 148 : low = SORTfndfirst(l, vrl);
2555 : else
2556 63 : low = SORTfndlast(l, vrl);
2557 211 : if (hinc)
2558 148 : high = SORTfndlast(l, vrh);
2559 : else
2560 63 : high = SORTfndfirst(l, vrh);
2561 19 : } else if (li.revsorted) {
2562 19 : if (hinc)
2563 19 : low = SORTfndfirst(l, vrh);
2564 : else
2565 0 : low = SORTfndlast(l, vrh);
2566 19 : if (linc)
2567 3 : high = SORTfndlast(l, vrl);
2568 : else
2569 16 : high = SORTfndfirst(l, vrl);
2570 : } else {
2571 0 : assert(oidxh);
2572 0 : if (linc)
2573 0 : low = ORDERfndfirst(l, oidxh, vrl);
2574 : else
2575 0 : low = ORDERfndlast(l, oidxh, vrl);
2576 0 : if (hinc)
2577 0 : high = ORDERfndlast(l, oidxh, vrh);
2578 : else
2579 0 : high = ORDERfndfirst(l, oidxh, vrh);
2580 : }
2581 230 : if (high <= low)
2582 124 : continue;
2583 106 : if (li.sorted || li.revsorted) {
2584 106 : low = canditer_search(lci, low + l->hseqbase, true);
2585 106 : high = canditer_search(lci, high + l->hseqbase, true);
2586 106 : assert(high >= low);
2587 :
2588 106 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2589 0 : cnt = BATcount(r1) + high - low + 1024;
2590 0 : if (cnt > maxsize)
2591 : cnt = maxsize;
2592 0 : BATsetcount(r1, BATcount(r1));
2593 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2594 0 : goto bailout;
2595 0 : dst1 = (oid *) Tloc(r1, 0);
2596 0 : if (r2) {
2597 0 : BATsetcount(r2, BATcount(r2));
2598 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2599 0 : goto bailout;
2600 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2601 0 : dst2 = (oid *) Tloc(r2, 0);
2602 : }
2603 : }
2604 106 : canditer_setidx(lci, low);
2605 587 : while (low < high) {
2606 481 : dst1[r1->batCount++] = canditer_next(lci);
2607 481 : if (r2) {
2608 469 : dst2[r2->batCount++] = ro;
2609 : }
2610 481 : low++;
2611 : }
2612 : } else {
2613 0 : const oid *ord;
2614 :
2615 0 : assert(oidxh);
2616 0 : ord = (const oid *) oidxh->base + ORDERIDXOFF;
2617 :
2618 0 : if (BATcapacity(r1) < BATcount(r1) + high - low) {
2619 0 : cnt = BATcount(r1) + high - low + 1024;
2620 0 : if (cnt > maxsize)
2621 : cnt = maxsize;
2622 0 : BATsetcount(r1, BATcount(r1));
2623 0 : if (BATextend(r1, cnt) != GDK_SUCCEED)
2624 0 : goto bailout;
2625 0 : dst1 = (oid *) Tloc(r1, 0);
2626 0 : if (r2) {
2627 0 : BATsetcount(r2, BATcount(r2));
2628 0 : if (BATextend(r2, cnt) != GDK_SUCCEED)
2629 0 : goto bailout;
2630 0 : assert(BATcapacity(r1) == BATcapacity(r2));
2631 0 : dst2 = (oid *) Tloc(r2, 0);
2632 : }
2633 : }
2634 :
2635 0 : while (low < high) {
2636 0 : if (canditer_contains(lci, ord[low])) {
2637 0 : dst1[r1->batCount++] = ord[low];
2638 0 : if (r2) {
2639 0 : dst2[r2->batCount++] = ro;
2640 : }
2641 : }
2642 0 : low++;
2643 : }
2644 : }
2645 : }
2646 93 : if (oidxh)
2647 0 : HEAPdecref(oidxh, false);
2648 93 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
2649 93 : cnt = BATcount(r1);
2650 93 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
2651 21 : } else if (/* DISABLES CODE */ (0) &&
2652 : !anti && !symmetric &&
2653 : imprintable(li.type) &&
2654 : (BATcount(rl) > 2 ||
2655 : !li.transient ||
2656 : (VIEWtparent(l) != 0 &&
2657 : /* if enabled, need to fix/unfix parent bat */
2658 : /* batTransient access needs to be protected */
2659 : !(tmp = BBP_desc(VIEWtparent(l)))->batTransient) ||
2660 : BATcheckimprints(l)) &&
2661 : BATimprints(l) == GDK_SUCCEED) {
2662 : /* implementation using imprints on left column
2663 : *
2664 : * we use imprints if we can (the type is right for
2665 : * imprints) and either the left bat is persistent or
2666 : * already has imprints, or the right bats are long
2667 : * enough (for creating imprints being worth it) */
2668 : Imprints *imprints = NULL;
2669 :
2670 : if (tmp) {
2671 : MT_lock_set(&tmp->batIdxLock);
2672 : imprints = tmp->timprints;
2673 : if (imprints != NULL)
2674 : IMPSincref(imprints);
2675 : MT_lock_unset(&tmp->batIdxLock);
2676 : } else {
2677 : MT_lock_set(&l->batIdxLock);
2678 : imprints = l->timprints;
2679 : if (imprints != NULL)
2680 : IMPSincref(imprints);
2681 : MT_lock_unset(&l->batIdxLock);
2682 : }
2683 : /* in the unlikely case that the imprints were removed
2684 : * before we could get at them, take the slow, nested
2685 : * loop implementation */
2686 : if (imprints == NULL)
2687 : goto nestedloop;
2688 :
2689 : sorted = 2;
2690 : cnt = 0;
2691 : TIMEOUT_LOOP_IDX_DECL(i, rncand, qry_ctx) {
2692 : maxsize = cnt + (rncand - i) * lncand;
2693 : ro = canditer_next(rci);
2694 : if (rlvals) {
2695 : vrl = FVALUE(rl, ro - rl->hseqbase);
2696 : } else {
2697 : /* TYPE_void */
2698 : rlval = ro - rl->hseqbase + rl->tseqbase;
2699 : }
2700 : if (rhvals) {
2701 : vrh = FVALUE(rh, ro - rh->hseqbase);
2702 : } else {
2703 : /* TYPE_void */
2704 : rhval = ro - rl->hseqbase + rl->tseqbase;
2705 : }
2706 : dst1 = (oid *) Tloc(r1, 0);
2707 : canditer_reset(lci);
2708 : switch (t) {
2709 : case TYPE_bte: {
2710 : bte vl, vh;
2711 : if (is_bte_nil((vl = *(bte *) vrl)))
2712 : continue;
2713 : if (is_bte_nil((vh = *(bte *) vrh)))
2714 : continue;
2715 : if (!linc) {
2716 : if (vl == MAXVALUEbte)
2717 : continue;
2718 : vl = NEXTVALUEbte(vl);
2719 : }
2720 : if (!hinc) {
2721 : if (vh == MINVALUEbte)
2722 : continue;
2723 : vh = PREVVALUEbte(vh);
2724 : }
2725 : if (vl > vh)
2726 : continue;
2727 : ncnt = fullscan_bte(&li, lci, r1, &vl, &vh,
2728 : true, true, false,
2729 : false, true, true,
2730 : false, cnt,
2731 : l->hseqbase, dst1,
2732 : maxsize,
2733 : imprints, &algo);
2734 : break;
2735 : }
2736 : case TYPE_sht: {
2737 : sht vl, vh;
2738 : if (is_sht_nil((vl = *(sht *) vrl)))
2739 : continue;
2740 : if (is_sht_nil((vh = *(sht *) vrh)))
2741 : continue;
2742 : if (!linc) {
2743 : if (vl == MAXVALUEsht)
2744 : continue;
2745 : vl = NEXTVALUEsht(vl);
2746 : }
2747 : if (!hinc) {
2748 : if (vh == MINVALUEsht)
2749 : continue;
2750 : vh = PREVVALUEsht(vh);
2751 : }
2752 : if (vl > vh)
2753 : continue;
2754 : ncnt = fullscan_sht(&li, lci, r1, &vl, &vh,
2755 : true, true, false,
2756 : false, true, true,
2757 : false, cnt,
2758 : l->hseqbase, dst1,
2759 : maxsize,
2760 : imprints, &algo);
2761 : break;
2762 : }
2763 : case TYPE_int:
2764 : #if SIZEOF_OID == SIZEOF_INT
2765 : case TYPE_oid:
2766 : #endif
2767 : {
2768 : int vl, vh;
2769 : if (is_int_nil((vl = *(int *) vrl)))
2770 : continue;
2771 : if (is_int_nil((vh = *(int *) vrh)))
2772 : continue;
2773 : if (!linc) {
2774 : if (vl == MAXVALUEint)
2775 : continue;
2776 : vl = NEXTVALUEint(vl);
2777 : }
2778 : if (!hinc) {
2779 : #if SIZEOF_OID == SIZEOF_INT
2780 : if (t == TYPE_oid) {
2781 : if (vh == MINVALUEoid)
2782 : continue;
2783 : vh = PREVVALUEoid(vh);
2784 : } else
2785 : #endif
2786 : {
2787 : if (vh == MINVALUEint)
2788 : continue;
2789 : vh = PREVVALUEint(vh);
2790 : }
2791 : }
2792 : if (vl > vh)
2793 : continue;
2794 : ncnt = fullscan_int(&li, lci, r1, &vl, &vh,
2795 : true, true, false,
2796 : false, true, true,
2797 : false, cnt,
2798 : l->hseqbase, dst1,
2799 : maxsize,
2800 : imprints, &algo);
2801 : break;
2802 : }
2803 : case TYPE_lng:
2804 : #if SIZEOF_OID == SIZEOF_LNG
2805 : case TYPE_oid:
2806 : #endif
2807 : {
2808 : lng vl, vh;
2809 : if (is_lng_nil((vl = *(lng *) vrl)))
2810 : continue;
2811 : if (is_lng_nil((vh = *(lng *) vrh)))
2812 : continue;
2813 : if (!linc) {
2814 : if (vl == MAXVALUElng)
2815 : continue;
2816 : vl = NEXTVALUElng(vl);
2817 : }
2818 : if (!hinc) {
2819 : #if SIZEOF_OID == SIZEOF_LNG
2820 : if (t == TYPE_oid) {
2821 : if (vh == MINVALUEoid)
2822 : continue;
2823 : vh = PREVVALUEoid(vh);
2824 : } else
2825 : #endif
2826 : {
2827 : if (vh == MINVALUElng)
2828 : continue;
2829 : vh = PREVVALUElng(vh);
2830 : }
2831 : }
2832 : if (vl > vh)
2833 : continue;
2834 : ncnt = fullscan_lng(&li, lci, r1, &vl, &vh,
2835 : true, true, false,
2836 : false, true, true,
2837 : false, cnt,
2838 : l->hseqbase, dst1,
2839 : maxsize,
2840 : imprints, &algo);
2841 : break;
2842 : }
2843 : #ifdef HAVE_HGE
2844 : case TYPE_hge: {
2845 : hge vl, vh;
2846 : if (is_hge_nil((vl = *(hge *) vrl)))
2847 : continue;
2848 : if (is_hge_nil((vh = *(hge *) vrh)))
2849 : continue;
2850 : if (!linc) {
2851 : if (vl == MAXVALUEhge)
2852 : continue;
2853 : vl = NEXTVALUEhge(vl);
2854 : }
2855 : if (!hinc) {
2856 : if (vh == MINVALUEhge)
2857 : continue;
2858 : vh = PREVVALUEhge(vh);
2859 : }
2860 : if (vl > vh)
2861 : continue;
2862 : ncnt = fullscan_hge(&li, lci, r1, &vl, &vh,
2863 : true, true, false,
2864 : false, true, true,
2865 : false, cnt,
2866 : l->hseqbase, dst1,
2867 : maxsize,
2868 : imprints, &algo);
2869 : break;
2870 : }
2871 : #endif
2872 : case TYPE_flt: {
2873 : flt vl, vh;
2874 : vl = *(flt *) vrl;
2875 : if (is_flt_nil(vl))
2876 : continue;
2877 : vh = *(flt *) vrh;
2878 : if (is_flt_nil(vh))
2879 : continue;
2880 : if (!linc) {
2881 : if (vl == MAXVALUEflt)
2882 : continue;
2883 : vl = NEXTVALUEflt(vl);
2884 : }
2885 : if (!hinc) {
2886 : if (vh == MINVALUEflt)
2887 : continue;
2888 : vh = PREVVALUEflt(vh);
2889 : }
2890 : if (vl > vh)
2891 : continue;
2892 : ncnt = fullscan_flt(&li, lci, r1, &vl, &vh,
2893 : true, true, false,
2894 : false, true, true,
2895 : false, cnt,
2896 : l->hseqbase, dst1,
2897 : maxsize,
2898 : imprints, &algo);
2899 : break;
2900 : }
2901 : case TYPE_dbl: {
2902 : dbl vl, vh;
2903 : vl = *(dbl *) vrl;
2904 : if (is_dbl_nil(vl))
2905 : continue;
2906 : vh = *(dbl *) vrh;
2907 : if (is_dbl_nil(vh))
2908 : continue;
2909 : if (!linc) {
2910 : if (vl == MAXVALUEdbl)
2911 : continue;
2912 : vl = NEXTVALUEdbl(vl);
2913 : }
2914 : if (!hinc) {
2915 : if (vh == MINVALUEdbl)
2916 : continue;
2917 : vh = PREVVALUEdbl(vh);
2918 : }
2919 : if (vl > vh)
2920 : continue;
2921 : ncnt = fullscan_dbl(&li, lci, r1, &vl, &vh,
2922 : true, true, false,
2923 : false, true, true,
2924 : false, cnt,
2925 : l->hseqbase, dst1,
2926 : maxsize,
2927 : imprints, &algo);
2928 : break;
2929 : }
2930 : default:
2931 : ncnt = BUN_NONE;
2932 : GDKerror("unsupported type\n");
2933 : MT_UNREACHABLE();
2934 : }
2935 : if (ncnt == BUN_NONE) {
2936 : IMPSdecref(imprints, false);
2937 : goto bailout;
2938 : }
2939 : assert(ncnt >= cnt || ncnt == 0);
2940 : if (ncnt == cnt || ncnt == 0)
2941 : continue;
2942 : if (r2) {
2943 : if (BATcapacity(r2) < ncnt) {
2944 : BATsetcount(r2, cnt);
2945 : if (BATextend(r2, BATcapacity(r1)) != GDK_SUCCEED) {
2946 : IMPSdecref(imprints, false);
2947 : goto bailout;
2948 : }
2949 : dst2 = (oid *) Tloc(r2, 0);
2950 : }
2951 : while (cnt < ncnt)
2952 : dst2[cnt++] = ro;
2953 : } else {
2954 : cnt = ncnt;
2955 : }
2956 : }
2957 : IMPSdecref(imprints, false);
2958 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
2959 : } else {
2960 21 : nestedloop:;
2961 : /* nested loop implementation */
2962 21 : const void *vl;
2963 21 : const char *lvals;
2964 21 : oid lval;
2965 :
2966 21 : GDKclrerr(); /* not interested in BATimprints errors */
2967 21 : sorted = 1;
2968 21 : lvals = li.type == TYPE_void ? NULL : (const char *) li.base;
2969 21 : vl = &lval;
2970 289 : TIMEOUT_LOOP(lncand, qry_ctx) {
2971 247 : oid lo;
2972 :
2973 247 : lo = canditer_next(lci);
2974 247 : if (lvals) {
2975 247 : vl = VALUE(l, lo - l->hseqbase);
2976 247 : if (cmp(vl, nil) == 0)
2977 8 : continue;
2978 : } else {
2979 0 : lval = lo - l->hseqbase + l->tseqbase;
2980 : }
2981 239 : canditer_reset(rci);
2982 37791 : for (BUN j = 0; j < rncand; j++) {
2983 37552 : ro = canditer_next(rci);
2984 34297 : if (rlvals) {
2985 34297 : vrl = VALUE(rl, ro - rl->hseqbase);
2986 : } else {
2987 : /* TYPE_void */
2988 0 : rlval = ro - rl->hseqbase + rl->tseqbase;
2989 : }
2990 34297 : if (rhvals) {
2991 34297 : vrh = VALUE(rh, ro - rh->hseqbase);
2992 : } else {
2993 : /* TYPE_void */
2994 0 : rhval = ro - rh->hseqbase + rh->tseqbase;
2995 : }
2996 34311 : if (BETWEEN(vl, vrl, linc, vrh, hinc, any) != 1)
2997 24171 : continue;
2998 13381 : if (BATcount(r1) == BATcapacity(r1)) {
2999 2 : BUN newcap = BATgrows(r1);
3000 2 : if (newcap > maxsize)
3001 : newcap = maxsize;
3002 2 : BATsetcount(r1, BATcount(r1));
3003 2 : if (BATextend(r1, newcap) != GDK_SUCCEED)
3004 0 : goto bailout;
3005 2 : dst1 = (oid *) Tloc(r1, 0);
3006 2 : if (r2) {
3007 2 : BATsetcount(r2, BATcount(r2));
3008 2 : if (BATextend(r2, newcap) != GDK_SUCCEED)
3009 0 : goto bailout;
3010 2 : assert(BATcapacity(r1) == BATcapacity(r2));
3011 2 : dst2 = (oid *) Tloc(r2, 0);
3012 : }
3013 : }
3014 13381 : dst1[r1->batCount++] = lo;
3015 13381 : if (r2) {
3016 13372 : dst2[r2->batCount++] = ro;
3017 : }
3018 : }
3019 : }
3020 21 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
3021 21 : cnt = BATcount(r1);
3022 21 : assert(r2 == NULL || BATcount(r1) == BATcount(r2));
3023 : }
3024 :
3025 : /* also set other bits of heap to correct value to indicate size */
3026 114 : BATsetcount(r1, cnt);
3027 :
3028 : /* set properties using an extra scan (usually not complete) */
3029 113 : dst1 = (oid *) Tloc(r1, 0);
3030 113 : r1->tkey = true;
3031 113 : r1->tsorted = true;
3032 113 : r1->trevsorted = true;
3033 113 : r1->tseqbase = 0;
3034 113 : r1->tnil = false;
3035 113 : r1->tnonil = true;
3036 419 : for (ncnt = 1; ncnt < cnt; ncnt++) {
3037 312 : if (dst1[ncnt - 1] == dst1[ncnt]) {
3038 248 : r1->tseqbase = oid_nil;
3039 248 : r1->tkey = false;
3040 64 : } else if (dst1[ncnt - 1] < dst1[ncnt]) {
3041 61 : r1->trevsorted = false;
3042 61 : if (dst1[ncnt - 1] + 1 != dst1[ncnt])
3043 3 : r1->tseqbase = oid_nil;
3044 : } else {
3045 3 : assert(sorted != 1);
3046 3 : r1->tsorted = false;
3047 3 : r1->tseqbase = oid_nil;
3048 3 : r1->tkey = false;
3049 : }
3050 366 : if (!(r1->trevsorted | BATtdense(r1) | r1->tkey | ((sorted != 1) & r1->tsorted)))
3051 : break;
3052 : }
3053 113 : if (BATtdense(r1))
3054 94 : r1->tseqbase = cnt > 0 ? dst1[0] : 0;
3055 113 : if (r2) {
3056 97 : BATsetcount(r2, cnt);
3057 97 : dst2 = (oid *) Tloc(r2, 0);
3058 97 : r2->tkey = true;
3059 97 : r2->tsorted = true;
3060 97 : r2->trevsorted = true;
3061 97 : r2->tseqbase = 0;
3062 97 : r2->tnil = false;
3063 97 : r2->tnonil = true;
3064 404 : for (ncnt = 1; ncnt < cnt; ncnt++) {
3065 315 : if (dst2[ncnt - 1] == dst2[ncnt]) {
3066 45 : r2->tseqbase = oid_nil;
3067 45 : r2->tkey = false;
3068 270 : } else if (dst2[ncnt - 1] < dst2[ncnt]) {
3069 263 : r2->trevsorted = false;
3070 263 : if (dst2[ncnt - 1] + 1 != dst2[ncnt])
3071 80 : r2->tseqbase = oid_nil;
3072 : } else {
3073 7 : assert(sorted != 2);
3074 7 : r2->tsorted = false;
3075 7 : r2->tseqbase = oid_nil;
3076 7 : r2->tkey = false;
3077 : }
3078 410 : if (!(r2->trevsorted | BATtdense(r2) | r2->tkey | ((sorted != 2) & r2->tsorted)))
3079 : break;
3080 : }
3081 97 : if (BATtdense(r2))
3082 66 : r2->tseqbase = cnt > 0 ? dst2[0] : 0;
3083 : }
3084 113 : TRC_DEBUG(ALGO, "l=%s,rl=%s,rh=%s -> "
3085 : "(" ALGOBATFMT "," ALGOOPTBATFMT ")\n",
3086 : BATgetId(l), BATgetId(rl), BATgetId(rh),
3087 : ALGOBATPAR(r1), ALGOOPTBATPAR(r2));
3088 113 : bat_iterator_end(&li);
3089 114 : bat_iterator_end(&rli);
3090 114 : bat_iterator_end(&rhi);
3091 114 : return GDK_SUCCEED;
3092 :
3093 0 : bailout:
3094 0 : bat_iterator_end(&li);
3095 0 : bat_iterator_end(&rli);
3096 0 : bat_iterator_end(&rhi);
3097 0 : BBPreclaim(r1);
3098 0 : BBPreclaim(r2);
3099 : return GDK_FAIL;
3100 : }
|