Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : static inline oid *
18 105214573 : buninsfix(BAT *bn, oid *a, BUN i, oid v, BUN g, BUN m)
19 : {
20 105214573 : if (i == BATcapacity(bn)) {
21 32 : BATsetcount(bn, i);
22 32 : if (BATextend(bn, MIN(BATcapacity(bn) + g, m)) != GDK_SUCCEED)
23 : return NULL;
24 32 : a = (oid *) Tloc(bn, 0);
25 : }
26 105214573 : a[i] = v;
27 105214573 : return a;
28 : }
29 :
30 : BAT *
31 2090798 : virtualize(BAT *bn)
32 : {
33 : /* input must be a valid candidate list or NULL */
34 2090798 : if (bn == NULL)
35 : return NULL;
36 2090798 : if ((bn->ttype != TYPE_void && bn->ttype != TYPE_oid) || !bn->tkey || !bn->tsorted) {
37 0 : fprintf(stderr, "#bn type %d nil %d key %d sorted %d\n",
38 0 : bn->ttype, is_oid_nil(bn->tseqbase),
39 0 : bn->tkey, bn->tsorted);
40 0 : fflush(stderr);
41 : }
42 2091433 : assert(((bn->ttype == TYPE_void && !is_oid_nil(bn->tseqbase)) ||
43 : bn->ttype == TYPE_oid) &&
44 : bn->tkey && bn->tsorted);
45 2091433 : assert(BBP_refs(bn->batCacheid) == 1);
46 2091433 : assert(BBP_lrefs(bn->batCacheid) == 0);
47 : /* since bn has unique and strictly ascending values, we can
48 : * easily check whether the column is dense */
49 2091433 : if (bn->ttype == TYPE_oid &&
50 1483429 : (BATcount(bn) <= 1 ||
51 406440 : * (const oid *) Tloc(bn, 0) + BATcount(bn) - 1 ==
52 406440 : * (const oid *) Tloc(bn, BATcount(bn) - 1))) {
53 : /* column is dense, replace by virtual oid */
54 1119145 : oid tseq; /* work around bug in Intel compiler */
55 1119145 : if (BATcount(bn) == 0)
56 : tseq = 0;
57 : else
58 432705 : tseq = * (const oid *) Tloc(bn, 0);
59 1119145 : TRC_DEBUG(ALGO, ALGOBATFMT ",seq=" OIDFMT "\n",
60 : ALGOBATPAR(bn), tseq);
61 1118980 : bn->tseqbase = tseq;
62 1118980 : if (VIEWtparent(bn)) {
63 16 : Heap *h = GDKmalloc(sizeof(Heap));
64 16 : if (h == NULL) {
65 0 : BBPunfix(bn->batCacheid);
66 0 : return NULL;
67 : }
68 16 : *h = *bn->theap;
69 16 : settailname(h, BBP_physical(bn->batCacheid), TYPE_oid, 0);
70 16 : h->parentid = bn->batCacheid;
71 16 : h->base = NULL;
72 16 : h->hasfile = false;
73 16 : ATOMIC_INIT(&h->refs, 1);
74 16 : if (bn->theap->parentid != bn->batCacheid)
75 16 : BBPrelease(bn->theap->parentid);
76 16 : HEAPdecref(bn->theap, false);
77 16 : bn->theap = h;
78 : } else {
79 1118964 : HEAPfree(bn->theap, true);
80 : }
81 1119209 : bn->theap->storage = bn->theap->newstorage = STORE_MEM;
82 1119209 : bn->theap->size = 0;
83 1119209 : bn->ttype = TYPE_void;
84 1119209 : bn->twidth = 0;
85 1119209 : bn->tshift = 0;
86 : }
87 :
88 : return bn;
89 : }
90 :
91 : #define HASHloop_bound(bi, h, hb, v, lo, hi) \
92 : for (hb = HASHget(h, HASHprobe((h), v)); \
93 : hb != BUN_NONE; \
94 : hb = HASHgetlink(h,hb)) \
95 : if (hb >= (lo) && hb < (hi) && \
96 : (cmp == NULL || \
97 : (*cmp)(v, BUNtail(bi, hb)) == 0))
98 :
99 : static BAT *
100 65175 : hashselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
101 : const void *tl, BUN maximum, bool havehash, bool phash,
102 : const char **algo)
103 : {
104 65175 : BUN i, cnt;
105 65175 : oid o, *restrict dst;
106 65175 : BUN l, h, d = 0;
107 65175 : oid seq;
108 65175 : int (*cmp)(const void *, const void *);
109 65175 : BAT *b2 = NULL;
110 65175 : BATiter pbi = {0};
111 :
112 65175 : size_t counter = 0;
113 65175 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
114 :
115 65293 : assert(bn->ttype == TYPE_oid);
116 65293 : seq = bi->b->hseqbase;
117 65293 : l = ci->seq - seq;
118 65293 : h = canditer_last(ci) + 1 - seq;
119 :
120 65161 : *algo = "hashselect";
121 65161 : if (phash && (b2 = BATdescriptor(VIEWtparent(bi->b))) != NULL) {
122 64536 : *algo = "hashselect on parent";
123 64536 : TRC_DEBUG(ALGO, ALGOBATFMT
124 : " using parent(" ALGOBATFMT ") "
125 : "for hash\n",
126 : ALGOBATPAR(bi->b),
127 : ALGOBATPAR(b2));
128 64536 : d = bi->baseoff - b2->tbaseoff;
129 64536 : l += d;
130 64536 : h += d;
131 64536 : pbi = bat_iterator(b2);
132 64536 : bi = &pbi;
133 : } else {
134 : phash = false;
135 : }
136 :
137 65372 : if (!havehash) {
138 3 : if (BAThash(bi->b) != GDK_SUCCEED) {
139 0 : BBPreclaim(bn);
140 0 : BBPreclaim(b2);
141 0 : if (phash)
142 0 : bat_iterator_end(&pbi);
143 0 : return NULL;
144 : }
145 3 : MT_rwlock_rdlock(&bi->b->thashlock);
146 3 : if (bi->b->thash == NULL) {
147 0 : GDKerror("Hash destroyed before we could use it\n");
148 0 : goto bailout;
149 : }
150 : }
151 130741 : switch (ATOMbasetype(bi->type)) {
152 : case TYPE_bte:
153 : case TYPE_sht:
154 : cmp = NULL; /* no need to compare: "hash" is perfect */
155 : break;
156 65370 : default:
157 65370 : cmp = ATOMcompare(bi->type);
158 65370 : break;
159 : }
160 65372 : dst = (oid *) Tloc(bn, 0);
161 65372 : cnt = 0;
162 65372 : if (ci->tpe != cand_dense) {
163 40424 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
164 15860 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
165 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
166 15860 : o = (oid) (i + seq - d);
167 15860 : if (canditer_contains(ci, o)) {
168 30927 : dst = buninsfix(bn, dst, cnt, o,
169 15464 : maximum - BATcapacity(bn),
170 : maximum);
171 15463 : if (dst == NULL)
172 0 : goto bailout;
173 15463 : cnt++;
174 : }
175 : }
176 : } else {
177 173242 : HASHloop_bound(*bi, bi->b->thash, i, tl, l, h) {
178 69186 : GDK_CHECK_TIMEOUT(qry_ctx, counter,
179 : GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
180 69186 : o = (oid) (i + seq - d);
181 138296 : dst = buninsfix(bn, dst, cnt, o,
182 69186 : maximum - BATcapacity(bn),
183 : maximum);
184 69110 : if (dst == NULL)
185 0 : goto bailout;
186 69110 : cnt++;
187 : }
188 : }
189 65373 : MT_rwlock_rdunlock(&bi->b->thashlock);
190 65405 : BBPreclaim(b2);
191 65391 : BATsetcount(bn, cnt);
192 65382 : bn->tkey = true;
193 65382 : if (cnt > 1) {
194 : /* hash chains produce results in the order high to
195 : * low, so we just need to reverse */
196 34904 : for (l = 0, h = BATcount(bn) - 1; l < h; l++, h--) {
197 31971 : o = dst[l];
198 31971 : dst[l] = dst[h];
199 31971 : dst[h] = o;
200 : }
201 : }
202 65382 : bn->tsorted = true;
203 65382 : bn->trevsorted = bn->batCount <= 1;
204 65382 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? *dst : oid_nil;
205 65382 : if (phash)
206 64499 : bat_iterator_end(&pbi);
207 : return bn;
208 :
209 0 : bailout:
210 0 : MT_rwlock_rdunlock(&bi->b->thashlock);
211 0 : if (phash)
212 0 : bat_iterator_end(&pbi);
213 0 : BBPreclaim(b2);
214 0 : BBPreclaim(bn);
215 0 : return NULL;
216 : }
217 :
218 : /* core scan select loop with & without candidates */
219 : #define scanloop(NAME,canditer_next,TEST) \
220 : do { \
221 : BUN ncand = ci->ncand; \
222 : *algo = "select: " #NAME " " #TEST " (" #canditer_next ")"; \
223 : if (BATcapacity(bn) < maximum) { \
224 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) { \
225 : o = canditer_next(ci); \
226 : v = src[o-hseq]; \
227 : if (TEST) { \
228 : dst = buninsfix(bn, dst, cnt, o, \
229 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p) \
230 : * (dbl) (ncand-p) * 1.1 + 1024), \
231 : maximum); \
232 : if (dst == NULL) { \
233 : goto bailout; \
234 : } \
235 : cnt++; \
236 : } \
237 : } \
238 : } else { \
239 : TIMEOUT_LOOP(ncand, qry_ctx) { \
240 : o = canditer_next(ci); \
241 : v = src[o-hseq]; \
242 : assert(cnt < BATcapacity(bn)); \
243 : dst[cnt] = o; \
244 : cnt += (TEST) != 0; \
245 : } \
246 : } \
247 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx)); \
248 : } while (false)
249 :
250 : /* argument list for type-specific core scan select function call */
251 : #define scanargs \
252 : bi, ci, bn, tl, th, li, hi, equi, anti, nil_matches, lval, hval, \
253 : lnil, cnt, bi->b->hseqbase, dst, maximum, algo
254 :
255 : #define PREVVALUEbte(x) ((x) - 1)
256 : #define PREVVALUEsht(x) ((x) - 1)
257 : #define PREVVALUEint(x) ((x) - 1)
258 : #define PREVVALUElng(x) ((x) - 1)
259 : #ifdef HAVE_HGE
260 : #define PREVVALUEhge(x) ((x) - 1)
261 : #endif
262 : #define PREVVALUEoid(x) ((x) - 1)
263 : #define PREVVALUEflt(x) nextafterf((x), -GDK_flt_max)
264 : #define PREVVALUEdbl(x) nextafter((x), -GDK_dbl_max)
265 :
266 : #define NEXTVALUEbte(x) ((x) + 1)
267 : #define NEXTVALUEsht(x) ((x) + 1)
268 : #define NEXTVALUEint(x) ((x) + 1)
269 : #define NEXTVALUElng(x) ((x) + 1)
270 : #ifdef HAVE_HGE
271 : #define NEXTVALUEhge(x) ((x) + 1)
272 : #endif
273 : #define NEXTVALUEoid(x) ((x) + 1)
274 : #define NEXTVALUEflt(x) nextafterf((x), GDK_flt_max)
275 : #define NEXTVALUEdbl(x) nextafter((x), GDK_dbl_max)
276 :
277 : #define MINVALUEbte GDK_bte_min
278 : #define MINVALUEsht GDK_sht_min
279 : #define MINVALUEint GDK_int_min
280 : #define MINVALUElng GDK_lng_min
281 : #ifdef HAVE_HGE
282 : #define MINVALUEhge GDK_hge_min
283 : #endif
284 : #define MINVALUEoid GDK_oid_min
285 : #define MINVALUEflt GDK_flt_min
286 : #define MINVALUEdbl GDK_dbl_min
287 :
288 : #define MAXVALUEbte GDK_bte_max
289 : #define MAXVALUEsht GDK_sht_max
290 : #define MAXVALUEint GDK_int_max
291 : #define MAXVALUElng GDK_lng_max
292 : #ifdef HAVE_HGE
293 : #define MAXVALUEhge GDK_hge_max
294 : #endif
295 : #define MAXVALUEoid GDK_oid_max
296 : #define MAXVALUEflt GDK_flt_max
297 : #define MAXVALUEdbl GDK_dbl_max
298 :
299 : /* definition of type-specific core scan select function */
300 : #define scanfunc(NAME, TYPE, ISDENSE) \
301 : static BUN \
302 : NAME##_##TYPE(BATiter *bi, struct canditer *restrict ci, BAT *bn, \
303 : const TYPE *tl, const TYPE *th, bool li, bool hi, \
304 : bool equi, bool anti, bool nil_matches, bool lval, \
305 : bool hval, bool lnil, BUN cnt, const oid hseq, \
306 : oid *restrict dst, BUN maximum, const char **algo) \
307 : { \
308 : TYPE vl = *tl; \
309 : TYPE vh = *th; \
310 : TYPE v; \
311 : const TYPE minval = MINVALUE##TYPE; \
312 : const TYPE maxval = MAXVALUE##TYPE; \
313 : const TYPE *src = (const TYPE *) bi->base; \
314 : oid o; \
315 : BUN p; \
316 : (void) li; \
317 : (void) hi; \
318 : (void) lval; \
319 : (void) hval; \
320 : QryCtx *qry_ctx = MT_thread_get_qry_ctx(); \
321 : /* Normalize the variables li, hi, lval, hval, possibly */ \
322 : /* changing anti in the process. This works for all */ \
323 : /* (and only) numeric types. */ \
324 : \
325 : /* Note that the expression x < v is equivalent to x <= */ \
326 : /* v' where v' is the next smaller value in the domain */ \
327 : /* of v (similarly for x > v). Also note that for */ \
328 : /* floating point numbers there actually is such a */ \
329 : /* value. In fact, there is a function in standard C */ \
330 : /* that calculates that value. */ \
331 : \
332 : /* The result is: */ \
333 : /* li == !anti, hi == !anti, lval == true, hval == true */ \
334 : /* This means that all ranges that we check for are */ \
335 : /* closed ranges. If a range is one-sided, we fill in */ \
336 : /* the minimum resp. maximum value in the domain so that */ \
337 : /* we create a closed range. */ \
338 : if (anti && li) { \
339 : /* -inf < x < vl === -inf < x <= vl-1 */ \
340 : if (vl == MINVALUE##TYPE) { \
341 : /* -inf < x < MIN || *th <[=] x < +inf */ \
342 : /* degenerates into half range */ \
343 : /* *th <[=] x < +inf */ \
344 : anti = false; \
345 : vl = vh; \
346 : li = !hi; \
347 : hval = false; \
348 : /* further dealt with below */ \
349 : } else { \
350 : vl = PREVVALUE##TYPE(vl); \
351 : li = false; \
352 : } \
353 : } \
354 : if (anti && hi) { \
355 : /* vl < x < +inf === vl+1 <= x < +inf */ \
356 : if (vh == MAXVALUE##TYPE) { \
357 : /* -inf < x <[=] *tl || MAX > x > +inf */ \
358 : /* degenerates into half range */ \
359 : /* -inf < x <[=] *tl */ \
360 : anti = false; \
361 : vh = vl; \
362 : hi = !li; \
363 : lval = false; \
364 : /* further dealt with below */ \
365 : } else { \
366 : vh = NEXTVALUE##TYPE(vh); \
367 : hi = false; \
368 : } \
369 : } \
370 : if (!anti) { \
371 : if (lval) { \
372 : /* range bounded on left */ \
373 : if (!li) { \
374 : /* open range on left */ \
375 : if (vl == MAXVALUE##TYPE) { \
376 : *algo = "select: empty range"; \
377 : return 0; \
378 : } \
379 : /* vl < x === vl+1 <= x */ \
380 : vl = NEXTVALUE##TYPE(vl); \
381 : li = true; \
382 : } \
383 : } else { \
384 : /* -inf, i.e. smallest value */ \
385 : vl = MINVALUE##TYPE; \
386 : li = true; \
387 : lval = true; \
388 : } \
389 : if (hval) { \
390 : /* range bounded on right */ \
391 : if (!hi) { \
392 : /* open range on right */ \
393 : if (vh == MINVALUE##TYPE) { \
394 : *algo = "select: empty range"; \
395 : return 0; \
396 : } \
397 : /* x < vh === x <= vh-1 */ \
398 : vh = PREVVALUE##TYPE(vh); \
399 : hi = true; \
400 : } \
401 : } else { \
402 : /* +inf, i.e. largest value */ \
403 : vh = MAXVALUE##TYPE; \
404 : hi = true; \
405 : hval = true; \
406 : } \
407 : if (vl > vh) { \
408 : *algo = "select: empty range"; \
409 : return 0; \
410 : } \
411 : } \
412 : /* if anti is set, we can now check */ \
413 : /* (x <= vl || x >= vh) && x != nil */ \
414 : /* if anti is not set, we can check just */ \
415 : /* vl <= x && x <= vh */ \
416 : /* if equi==true, the check is x == vl */ \
417 : /* note that this includes the check for != nil */ \
418 : assert(li == !anti); \
419 : assert(hi == !anti); \
420 : assert(lval); \
421 : assert(hval); \
422 : if (equi) { \
423 : if (lnil) \
424 : scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v)); \
425 : else \
426 : scanloop(NAME, canditer_next##ISDENSE, v == vl); \
427 : } else if (anti) { \
428 : if (bi->nonil) { \
429 : scanloop(NAME, canditer_next##ISDENSE, (v <= vl || v >= vh)); \
430 : } else if (nil_matches) { \
431 : scanloop(NAME, canditer_next##ISDENSE, is_##TYPE##_nil(v) || v <= vl || v >= vh); \
432 : } else { \
433 : scanloop(NAME, canditer_next##ISDENSE, !is_##TYPE##_nil(v) && (v <= vl || v >= vh)); \
434 : } \
435 : } else if (bi->nonil && vl == minval) { \
436 : scanloop(NAME, canditer_next##ISDENSE, v <= vh); \
437 : } else if (vh == maxval) { \
438 : scanloop(NAME, canditer_next##ISDENSE, v >= vl); \
439 : } else { \
440 : scanloop(NAME, canditer_next##ISDENSE, v >= vl && v <= vh); \
441 : } \
442 : return cnt; \
443 : bailout: \
444 : BBPreclaim(bn); \
445 : return BUN_NONE; \
446 : }
447 :
448 : static BUN
449 551 : fullscan_any(BATiter *bi, struct canditer *restrict ci, BAT *bn,
450 : const void *tl, const void *th,
451 : bool li, bool hi, bool equi, bool anti, bool nil_matches,
452 : bool lval, bool hval, bool lnil, BUN cnt, const oid hseq,
453 : oid *restrict dst, BUN maximum, const char **algo)
454 : {
455 551 : const void *v;
456 551 : const void *restrict nil = ATOMnilptr(bi->type);
457 551 : int (*cmp)(const void *, const void *) = ATOMcompare(bi->type);
458 551 : oid o;
459 551 : BUN p, ncand = ci->ncand;
460 551 : int c;
461 :
462 551 : (void) maximum;
463 551 : (void) lnil;
464 551 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
465 :
466 551 : if (equi) {
467 540 : *algo = "select: fullscan equi";
468 540 : if (ci->tpe == cand_dense) {
469 9815569 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
470 9813911 : o = canditer_next_dense(ci);
471 9813911 : v = BUNtail(*bi, o-hseq);
472 9822378 : if ((*cmp)(tl, v) == 0) {
473 6514447 : dst = buninsfix(bn, dst, cnt, o,
474 2165491 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
475 2165491 : * (dbl) (ncand-p) * 1.1 + 1024),
476 : maximum);
477 2183465 : if (dst == NULL) {
478 0 : BBPreclaim(bn);
479 0 : return BUN_NONE;
480 : }
481 2183465 : cnt++;
482 : }
483 : }
484 : } else {
485 1545253 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
486 1544449 : o = canditer_next(ci);
487 1538118 : v = BUNtail(*bi, o-hseq);
488 1546516 : if ((*cmp)(tl, v) == 0) {
489 61315 : dst = buninsfix(bn, dst, cnt, o,
490 20437 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
491 20437 : * (dbl) (ncand-p) * 1.1 + 1024),
492 : maximum);
493 20441 : if (dst == NULL) {
494 0 : BBPreclaim(bn);
495 0 : return BUN_NONE;
496 : }
497 20441 : cnt++;
498 : }
499 : }
500 : }
501 11 : } else if (anti) {
502 2 : *algo = "select: fullscan anti";
503 2 : if (ci->tpe == cand_dense) {
504 14 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
505 8 : o = canditer_next_dense(ci);
506 8 : v = BUNtail(*bi, o-hseq);
507 8 : bool isnil = nil != NULL && (*cmp)(v, nil) == 0;
508 8 : if ((nil_matches && isnil) ||
509 8 : (!isnil &&
510 8 : ((lval &&
511 8 : ((c = (*cmp)(tl, v)) > 0 ||
512 6 : (!li && c == 0))) ||
513 6 : (hval &&
514 6 : ((c = (*cmp)(th, v)) < 0 ||
515 4 : (!hi && c == 0)))))) {
516 12 : dst = buninsfix(bn, dst, cnt, o,
517 4 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
518 4 : * (dbl) (ncand-p) * 1.1 + 1024),
519 : maximum);
520 4 : if (dst == NULL) {
521 0 : BBPreclaim(bn);
522 0 : return BUN_NONE;
523 : }
524 4 : cnt++;
525 : }
526 : }
527 : } else {
528 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
529 0 : o = canditer_next(ci);
530 0 : v = BUNtail(*bi, o-hseq);
531 0 : bool isnil = nil != NULL && (*cmp)(v, nil) == 0;
532 0 : if ((nil_matches && isnil) ||
533 0 : (!isnil &&
534 0 : ((lval &&
535 0 : ((c = (*cmp)(tl, v)) > 0 ||
536 0 : (!li && c == 0))) ||
537 0 : (hval &&
538 0 : ((c = (*cmp)(th, v)) < 0 ||
539 0 : (!hi && c == 0)))))) {
540 0 : dst = buninsfix(bn, dst, cnt, o,
541 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
542 0 : * (dbl) (ncand-p) * 1.1 + 1024),
543 : maximum);
544 0 : if (dst == NULL) {
545 0 : BBPreclaim(bn);
546 0 : return BUN_NONE;
547 : }
548 0 : cnt++;
549 : }
550 : }
551 : }
552 : } else {
553 9 : *algo = "select: fullscan range";
554 9 : if (ci->tpe == cand_dense) {
555 4910 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
556 4889 : o = canditer_next_dense(ci);
557 4889 : v = BUNtail(*bi, o-hseq);
558 4889 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
559 4027 : ((!lval ||
560 4027 : (c = cmp(tl, v)) < 0 ||
561 4889 : (li && c == 0)) &&
562 3906 : (!hval ||
563 3906 : (c = cmp(th, v)) > 0 ||
564 3662 : (hi && c == 0)))) {
565 798 : dst = buninsfix(bn, dst, cnt, o,
566 266 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
567 266 : * (dbl) (ncand-p) * 1.1 + 1024),
568 : maximum);
569 266 : if (dst == NULL) {
570 0 : BBPreclaim(bn);
571 0 : return BUN_NONE;
572 : }
573 266 : cnt++;
574 : }
575 : }
576 : } else {
577 232 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
578 226 : o = canditer_next(ci);
579 226 : v = BUNtail(*bi, o-hseq);
580 226 : if ((nil == NULL || (*cmp)(v, nil) != 0) &&
581 226 : ((!lval ||
582 226 : (c = cmp(tl, v)) < 0 ||
583 226 : (li && c == 0)) &&
584 0 : (!hval ||
585 0 : (c = cmp(th, v)) > 0 ||
586 0 : (hi && c == 0)))) {
587 474 : dst = buninsfix(bn, dst, cnt, o,
588 158 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
589 158 : * (dbl) (ncand-p) * 1.1 + 1024),
590 : maximum);
591 158 : if (dst == NULL) {
592 0 : BBPreclaim(bn);
593 0 : return BUN_NONE;
594 : }
595 158 : cnt++;
596 : }
597 : }
598 : }
599 : }
600 551 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
601 : return cnt;
602 0 : bailout:
603 0 : BBPreclaim(bn);
604 : return BUN_NONE;
605 : }
606 :
607 : static BUN
608 199850 : fullscan_str(BATiter *bi, struct canditer *restrict ci, BAT *bn,
609 : const char *tl, const char *th,
610 : bool li, bool hi, bool equi, bool anti, bool nil_matches,
611 : bool lval, bool hval, bool lnil, BUN cnt, const oid hseq,
612 : oid *restrict dst, BUN maximum, const char **algo)
613 : {
614 199850 : var_t pos;
615 199850 : BUN p, ncand = ci->ncand;
616 199850 : oid o;
617 199850 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
618 :
619 199875 : if (anti && tl == th && !bi->nonil && GDK_ELIMDOUBLES(bi->vh) &&
620 258 : strcmp(tl, str_nil) != 0 &&
621 129 : strLocate(bi->vh, str_nil) == (var_t) -2) {
622 : /* anti-equi select for non-nil value, and there are no
623 : * nils, so we can use fast path; trigger by setting
624 : * nonil */
625 129 : bi->nonil = true;
626 : }
627 199875 : if (!((equi ||
628 533 : (anti && tl == th && (bi->nonil || strcmp(tl, str_nil) == 0))) &&
629 199864 : GDK_ELIMDOUBLES(bi->vh)))
630 449 : return fullscan_any(bi, ci, bn, tl, th, li, hi, equi, anti,
631 : nil_matches, lval, hval, lnil, cnt, hseq,
632 : dst, maximum, algo);
633 199426 : if ((pos = strLocate(bi->vh, tl)) == (var_t) -2) {
634 889 : if (anti) {
635 : /* return the whole shebang */
636 20 : *algo = "select: fullscan anti-equi strelim (all)";
637 20 : if (BATextend(bn, ncand) != GDK_SUCCEED) {
638 0 : BBPreclaim(bn);
639 0 : return BUN_NONE;
640 : }
641 20 : dst = Tloc(bn, 0);
642 1262 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
643 1222 : dst[p] = canditer_next(ci);
644 : }
645 20 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
646 : return ncand;
647 : }
648 869 : *algo = "select: fullscan equi strelim (nomatch)";
649 869 : return 0;
650 : }
651 198526 : if (pos == (var_t) -1) {
652 0 : *algo = NULL;
653 0 : BBPreclaim(bn);
654 0 : return BUN_NONE;
655 : }
656 198526 : *algo = anti ? "select: fullscan anti-equi strelim" : "select: fullscan equi strelim";
657 198526 : assert(pos >= GDK_VAROFFSET);
658 198526 : switch (bi->width) {
659 163540 : case 1: {
660 163540 : const unsigned char *ptr = (const unsigned char *) bi->base;
661 163540 : pos -= GDK_VAROFFSET;
662 163540 : if (ci->tpe == cand_dense) {
663 162650 : if (anti) {
664 2676 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
665 2262 : o = canditer_next_dense(ci);
666 2262 : if (ptr[o - hseq] != pos) {
667 6174 : dst = buninsfix(bn, dst, cnt, o,
668 2058 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
669 2058 : * (dbl) (ncand-p) * 1.1 + 1024),
670 : maximum);
671 2058 : if (dst == NULL) {
672 0 : BBPreclaim(bn);
673 0 : return BUN_NONE;
674 : }
675 2058 : cnt++;
676 : }
677 : }
678 : } else {
679 22299243 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
680 21809538 : o = canditer_next_dense(ci);
681 21809538 : if (ptr[o - hseq] == pos) {
682 22774978 : dst = buninsfix(bn, dst, cnt, o,
683 7591649 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
684 7591649 : * (dbl) (ncand-p) * 1.1 + 1024),
685 : maximum);
686 7591680 : if (dst == NULL) {
687 0 : BBPreclaim(bn);
688 0 : return BUN_NONE;
689 : }
690 7591680 : cnt++;
691 : }
692 : }
693 : }
694 : } else {
695 890 : if (anti) {
696 1256 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
697 1196 : o = canditer_next(ci);
698 1196 : if (ptr[o - hseq] != pos) {
699 303 : dst = buninsfix(bn, dst, cnt, o,
700 101 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
701 101 : * (dbl) (ncand-p) * 1.1 + 1024),
702 : maximum);
703 101 : if (dst == NULL) {
704 0 : BBPreclaim(bn);
705 0 : return BUN_NONE;
706 : }
707 101 : cnt++;
708 : }
709 : }
710 : } else {
711 11669090 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
712 11665327 : o = canditer_next(ci);
713 11668467 : if (ptr[o - hseq] == pos) {
714 11830469 : dst = buninsfix(bn, dst, cnt, o,
715 3944536 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
716 3944536 : * (dbl) (ncand-p) * 1.1 + 1024),
717 : maximum);
718 3941397 : if (dst == NULL) {
719 0 : BBPreclaim(bn);
720 0 : return BUN_NONE;
721 : }
722 3941397 : cnt++;
723 : }
724 : }
725 : }
726 : }
727 : break;
728 : }
729 34983 : case 2: {
730 34983 : const unsigned short *ptr = (const unsigned short *) bi->base;
731 34983 : pos -= GDK_VAROFFSET;
732 34983 : if (ci->tpe == cand_dense) {
733 28152 : if (anti) {
734 1789 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
735 874 : o = canditer_next_dense(ci);
736 874 : if (ptr[o - hseq] != pos) {
737 1842 : dst = buninsfix(bn, dst, cnt, o,
738 614 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
739 614 : * (dbl) (ncand-p) * 1.1 + 1024),
740 : maximum);
741 614 : if (dst == NULL) {
742 0 : BBPreclaim(bn);
743 0 : return BUN_NONE;
744 : }
745 614 : cnt++;
746 : }
747 : }
748 : } else {
749 2260874 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
750 2177045 : o = canditer_next_dense(ci);
751 2177045 : if (ptr[o - hseq] == pos) {
752 155784 : dst = buninsfix(bn, dst, cnt, o,
753 51895 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
754 51895 : * (dbl) (ncand-p) * 1.1 + 1024),
755 : maximum);
756 51994 : if (dst == NULL) {
757 0 : BBPreclaim(bn);
758 0 : return BUN_NONE;
759 : }
760 51994 : cnt++;
761 : }
762 : }
763 : }
764 : } else {
765 6831 : if (anti) {
766 1994 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
767 1877 : o = canditer_next(ci);
768 1877 : if (ptr[o - hseq] != pos) {
769 5142 : dst = buninsfix(bn, dst, cnt, o,
770 1714 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
771 1714 : * (dbl) (ncand-p) * 1.1 + 1024),
772 : maximum);
773 1714 : if (dst == NULL) {
774 0 : BBPreclaim(bn);
775 0 : return BUN_NONE;
776 : }
777 1714 : cnt++;
778 : }
779 : }
780 : } else {
781 2322256 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
782 2301788 : o = canditer_next(ci);
783 2301584 : if (ptr[o - hseq] == pos) {
784 233638 : dst = buninsfix(bn, dst, cnt, o,
785 77803 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
786 77803 : * (dbl) (ncand-p) * 1.1 + 1024),
787 : maximum);
788 78032 : if (dst == NULL) {
789 0 : BBPreclaim(bn);
790 0 : return BUN_NONE;
791 : }
792 78032 : cnt++;
793 : }
794 : }
795 : }
796 : }
797 : break;
798 : }
799 : #if SIZEOF_VAR_T == 8
800 1 : case 4: {
801 1 : const unsigned int *ptr = (const unsigned int *) bi->base;
802 1 : if (ci->tpe == cand_dense) {
803 1 : if (anti) {
804 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
805 0 : o = canditer_next_dense(ci);
806 0 : if (ptr[o - hseq] != pos) {
807 0 : dst = buninsfix(bn, dst, cnt, o,
808 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
809 0 : * (dbl) (ncand-p) * 1.1 + 1024),
810 : maximum);
811 0 : if (dst == NULL) {
812 0 : BBPreclaim(bn);
813 0 : return BUN_NONE;
814 : }
815 0 : cnt++;
816 : }
817 : }
818 : } else {
819 973 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
820 970 : o = canditer_next_dense(ci);
821 970 : if (ptr[o - hseq] == pos) {
822 744 : dst = buninsfix(bn, dst, cnt, o,
823 248 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
824 248 : * (dbl) (ncand-p) * 1.1 + 1024),
825 : maximum);
826 248 : if (dst == NULL) {
827 0 : BBPreclaim(bn);
828 0 : return BUN_NONE;
829 : }
830 248 : cnt++;
831 : }
832 : }
833 : }
834 : } else {
835 0 : if (anti) {
836 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
837 0 : o = canditer_next(ci);
838 0 : if (ptr[o - hseq] != pos) {
839 0 : dst = buninsfix(bn, dst, cnt, o,
840 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
841 0 : * (dbl) (ncand-p) * 1.1 + 1024),
842 : maximum);
843 0 : if (dst == NULL) {
844 0 : BBPreclaim(bn);
845 0 : return BUN_NONE;
846 : }
847 0 : cnt++;
848 : }
849 : }
850 : } else {
851 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
852 0 : o = canditer_next(ci);
853 0 : if (ptr[o - hseq] == pos) {
854 0 : dst = buninsfix(bn, dst, cnt, o,
855 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
856 0 : * (dbl) (ncand-p) * 1.1 + 1024),
857 : maximum);
858 0 : if (dst == NULL) {
859 0 : BBPreclaim(bn);
860 0 : return BUN_NONE;
861 : }
862 0 : cnt++;
863 : }
864 : }
865 : }
866 : }
867 : break;
868 : }
869 : #endif
870 2 : default: {
871 2 : const var_t *ptr = (const var_t *) bi->base;
872 2 : if (ci->tpe == cand_dense) {
873 2 : if (anti) {
874 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
875 0 : o = canditer_next_dense(ci);
876 0 : if (ptr[o - hseq] != pos) {
877 0 : dst = buninsfix(bn, dst, cnt, o,
878 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
879 0 : * (dbl) (ncand-p) * 1.1 + 1024),
880 : maximum);
881 0 : if (dst == NULL) {
882 0 : BBPreclaim(bn);
883 0 : return BUN_NONE;
884 : }
885 0 : cnt++;
886 : }
887 : }
888 : } else {
889 26 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
890 20 : o = canditer_next_dense(ci);
891 20 : if (ptr[o - hseq] == pos) {
892 6 : dst = buninsfix(bn, dst, cnt, o,
893 2 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
894 2 : * (dbl) (ncand-p) * 1.1 + 1024),
895 : maximum);
896 2 : if (dst == NULL) {
897 0 : BBPreclaim(bn);
898 0 : return BUN_NONE;
899 : }
900 2 : cnt++;
901 : }
902 : }
903 : }
904 : } else {
905 0 : if (anti) {
906 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
907 0 : o = canditer_next(ci);
908 0 : if (ptr[o - hseq] != pos) {
909 0 : dst = buninsfix(bn, dst, cnt, o,
910 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
911 0 : * (dbl) (ncand-p) * 1.1 + 1024),
912 : maximum);
913 0 : if (dst == NULL) {
914 0 : BBPreclaim(bn);
915 0 : return BUN_NONE;
916 : }
917 0 : cnt++;
918 : }
919 : }
920 : } else {
921 0 : TIMEOUT_LOOP_IDX(p, ncand, qry_ctx) {
922 0 : o = canditer_next(ci);
923 0 : if (ptr[o - hseq] == pos) {
924 0 : dst = buninsfix(bn, dst, cnt, o,
925 0 : (BUN) ((dbl) cnt / (dbl) (p == 0 ? 1 : p)
926 0 : * (dbl) (ncand-p) * 1.1 + 1024),
927 : maximum);
928 0 : if (dst == NULL) {
929 0 : BBPreclaim(bn);
930 0 : return BUN_NONE;
931 : }
932 0 : cnt++;
933 : }
934 : }
935 : }
936 : }
937 : break;
938 : }
939 : }
940 198680 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
941 : return cnt;
942 0 : bailout:
943 0 : BBPreclaim(bn);
944 : return BUN_NONE;
945 : }
946 :
947 : /* scan select type switch */
948 : #ifdef HAVE_HGE
949 : #define scanfunc_hge(NAME, ISDENSE) \
950 : scanfunc(NAME, hge, ISDENSE)
951 : #else
952 : #define scanfunc_hge(NAME, ISDENSE)
953 : #endif
954 : #define scan_sel(NAME, ISDENSE) \
955 : scanfunc(NAME, bte, ISDENSE) \
956 : scanfunc(NAME, sht, ISDENSE) \
957 : scanfunc(NAME, int, ISDENSE) \
958 : scanfunc(NAME, flt, ISDENSE) \
959 : scanfunc(NAME, dbl, ISDENSE) \
960 : scanfunc(NAME, lng, ISDENSE) \
961 : scanfunc_hge(NAME, ISDENSE)
962 :
963 : /* scan select */
964 186296453 : scan_sel(fullscan, )
965 1274656305 : scan_sel(densescan, _dense)
966 :
967 : #if 0
968 : /* some programs that produce editor tags files don't recognize the
969 : * scanselect function because before it are the scan_del macro
970 : * calls that don't look like function definitions or variable
971 : * declarations, hence we have this hidden away function to realign the
972 : * tags program */
973 : void
974 : realign_tags(void)
975 : {
976 : }
977 : #endif
978 :
979 : static BAT *
980 1283369 : scanselect(BATiter *bi, struct canditer *restrict ci, BAT *bn,
981 : const void *tl, const void *th,
982 : bool li, bool hi, bool equi, bool anti, bool nil_matches,
983 : bool lval, bool hval, bool lnil,
984 : BUN maximum, const char **algo)
985 : {
986 : #ifndef NDEBUG
987 1283369 : int (*cmp)(const void *, const void *);
988 : #endif
989 1283369 : int t;
990 1283369 : BUN cnt = 0;
991 1283369 : oid *restrict dst;
992 :
993 1283369 : assert(bi->b != NULL);
994 1283369 : assert(bn != NULL);
995 1283369 : assert(bn->ttype == TYPE_oid);
996 1283369 : assert(!lval || tl != NULL);
997 1283369 : assert(!hval || th != NULL);
998 1283369 : assert(!equi || (li && hi && !anti));
999 1283369 : assert(!anti || lval || hval);
1000 1283369 : assert(bi->type != TYPE_void || equi || bi->nonil);
1001 :
1002 : #ifndef NDEBUG
1003 1283369 : cmp = ATOMcompare(bi->type);
1004 : #endif
1005 :
1006 1283369 : assert(!lval || !hval || tl == th || (*cmp)(tl, th) <= 0);
1007 :
1008 1283369 : dst = (oid *) Tloc(bn, 0);
1009 :
1010 1283369 : t = ATOMbasetype(bi->type);
1011 :
1012 : /* call type-specific core scan select function */
1013 1283369 : switch (t) {
1014 33544 : case TYPE_bte:
1015 33544 : if (ci->tpe == cand_dense)
1016 32042 : cnt = densescan_bte(scanargs);
1017 : else
1018 1502 : cnt = fullscan_bte(scanargs);
1019 : break;
1020 6873 : case TYPE_sht:
1021 6873 : if (ci->tpe == cand_dense)
1022 6178 : cnt = densescan_sht(scanargs);
1023 : else
1024 695 : cnt = fullscan_sht(scanargs);
1025 : break;
1026 1039761 : case TYPE_int:
1027 1039761 : if (ci->tpe == cand_dense)
1028 743756 : cnt = densescan_int(scanargs);
1029 : else
1030 296005 : cnt = fullscan_int(scanargs);
1031 : break;
1032 18 : case TYPE_flt:
1033 18 : if (ci->tpe == cand_dense)
1034 17 : cnt = densescan_flt(scanargs);
1035 : else
1036 1 : cnt = fullscan_flt(scanargs);
1037 : break;
1038 130 : case TYPE_dbl:
1039 130 : if (ci->tpe == cand_dense)
1040 121 : cnt = densescan_dbl(scanargs);
1041 : else
1042 9 : cnt = fullscan_dbl(scanargs);
1043 : break;
1044 2970 : case TYPE_lng:
1045 2970 : if (ci->tpe == cand_dense)
1046 2958 : cnt = densescan_lng(scanargs);
1047 : else
1048 12 : cnt = fullscan_lng(scanargs);
1049 : break;
1050 : #ifdef HAVE_HGE
1051 105 : case TYPE_hge:
1052 105 : if (ci->tpe == cand_dense)
1053 96 : cnt = densescan_hge(scanargs);
1054 : else
1055 9 : cnt = fullscan_hge(scanargs);
1056 : break;
1057 : #endif
1058 199866 : case TYPE_str:
1059 199866 : cnt = fullscan_str(scanargs);
1060 199866 : break;
1061 102 : default:
1062 102 : cnt = fullscan_any(scanargs);
1063 102 : break;
1064 : }
1065 1284188 : if (cnt == BUN_NONE) {
1066 : return NULL;
1067 : }
1068 1284188 : assert(bn->batCapacity >= cnt);
1069 :
1070 1284188 : BATsetcount(bn, cnt);
1071 1283851 : bn->tsorted = true;
1072 1283851 : bn->trevsorted = bn->batCount <= 1;
1073 1283851 : bn->tkey = true;
1074 1283851 : bn->tseqbase = cnt == 0 ? 0 : cnt == 1 || cnt == bi->count ? bi->b->hseqbase : oid_nil;
1075 :
1076 1283851 : return bn;
1077 : }
1078 :
1079 : #if SIZEOF_BUN == SIZEOF_INT
1080 : #define CALC_ESTIMATE(TPE) \
1081 : do { \
1082 : if (*(TPE*)tl < 1) { \
1083 : if ((int) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1084 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1085 : } else { \
1086 : if (-(int) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1087 : estimate = (BUN) ((int) *(TPE*)th - *(TPE*)tl); \
1088 : } \
1089 : } while (0)
1090 : #else
1091 : #define CALC_ESTIMATE(TPE) \
1092 : do { \
1093 : if (*(TPE*)tl < 1) { \
1094 : if ((lng) BUN_MAX + *(TPE*)tl >= *(TPE*)th) \
1095 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1096 : } else { \
1097 : if (-(lng) BUN_MAX + *(TPE*)tl <= *(TPE*)th) \
1098 : estimate = (BUN) ((lng) *(TPE*)th - *(TPE*)tl); \
1099 : } \
1100 : } while (0)
1101 : #endif
1102 :
1103 : static enum range_comp_t
1104 1862860 : BATrange(BATiter *bi, const void *tl, const void *th, bool li, bool hi)
1105 : {
1106 1862860 : enum range_comp_t range;
1107 1862860 : const ValRecord *minprop = NULL, *maxprop = NULL;
1108 1862860 : const void *minval = NULL, *maxval = NULL;
1109 1862860 : bool maxincl = true;
1110 1862860 : BAT *pb = NULL;
1111 1862860 : int c;
1112 1862860 : int (*atomcmp) (const void *, const void *) = ATOMcompare(bi->type);
1113 1862860 : BATiter bi2 = *bi;
1114 :
1115 1862860 : if (tl && (*atomcmp)(tl, ATOMnilptr(bi->type)) == 0)
1116 13272 : tl = NULL;
1117 1863229 : if (th && (*atomcmp)(th, ATOMnilptr(bi->type)) == 0)
1118 10607 : th = NULL;
1119 1864168 : if (tl == NULL && th == NULL)
1120 : return range_contains; /* looking for everything */
1121 :
1122 1857154 : if (VIEWtparent(bi->b))
1123 1671029 : pb = BATdescriptor(VIEWtparent(bi->b));
1124 :
1125 : /* keep locked while we look at the property values */
1126 1857464 : MT_lock_set(&bi->b->theaplock);
1127 1857069 : if (bi->sorted && (bi->nonil || atomcmp(BUNtail(*bi, 0), ATOMnilptr(bi->type)) != 0))
1128 314677 : minval = BUNtail(*bi, 0);
1129 1542394 : else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(*bi, bi->count - 1), ATOMnilptr(bi->type)) != 0))
1130 120716 : minval = BUNtail(*bi, bi->count - 1);
1131 1421678 : else if (bi->minpos != BUN_NONE)
1132 26677 : minval = BUNtail(*bi, bi->minpos);
1133 1395001 : else if ((minprop = BATgetprop_nolock(bi->b, GDK_MIN_BOUND)) != NULL)
1134 0 : minval = VALptr(minprop);
1135 1856950 : if (bi->sorted && (bi->nonil || atomcmp(BUNtail(bi2, bi->count - 1), ATOMnilptr(bi->type)) != 0)) {
1136 315082 : maxval = BUNtail(bi2, bi->count - 1);
1137 : maxincl = true;
1138 1541535 : } else if (bi->revsorted && (bi->nonil || atomcmp(BUNtail(bi2, 0), ATOMnilptr(bi->type)) != 0)) {
1139 121379 : maxval = BUNtail(bi2, 0);
1140 : maxincl = true;
1141 1420155 : } else if (bi->maxpos != BUN_NONE) {
1142 26621 : maxval = BUNtail(bi2, bi->maxpos);
1143 : maxincl = true;
1144 1393534 : } else if ((maxprop = BATgetprop_nolock(bi->b, GDK_MAX_BOUND)) != NULL) {
1145 0 : maxval = VALptr(maxprop);
1146 0 : maxincl = false;
1147 : }
1148 1856612 : bool keep = false; /* keep lock on parent bat? */
1149 1856612 : if (minval == NULL || maxval == NULL) {
1150 1394651 : if (pb != NULL) {
1151 1302741 : MT_lock_set(&pb->theaplock);
1152 1303066 : if (minval == NULL && (minprop = BATgetprop_nolock(pb, GDK_MIN_BOUND)) != NULL) {
1153 6 : keep = true;
1154 6 : minval = VALptr(minprop);
1155 : }
1156 1303076 : if (maxval == NULL && (maxprop = BATgetprop_nolock(pb, GDK_MAX_BOUND)) != NULL) {
1157 2 : keep = true;
1158 2 : maxval = VALptr(maxprop);
1159 2 : maxincl = true;
1160 : }
1161 1303036 : if (!keep) {
1162 1302998 : MT_lock_unset(&pb->theaplock);
1163 : }
1164 : }
1165 : }
1166 :
1167 1857069 : if (minval == NULL && maxval == NULL) {
1168 : range = range_inside; /* strictly: unknown */
1169 462923 : } else if (maxval &&
1170 456983 : tl &&
1171 456860 : ((c = atomcmp(tl, maxval)) > 0 ||
1172 830 : ((!maxincl || !li) && c == 0))) {
1173 : range = range_after;
1174 393357 : } else if (minval &&
1175 392073 : th &&
1176 392061 : ((c = atomcmp(th, minval)) < 0 ||
1177 365335 : (!hi && c == 0))) {
1178 : range = range_before;
1179 366562 : } else if (tl == NULL) {
1180 5859 : if (minval == NULL) {
1181 16 : c = atomcmp(th, maxval);
1182 16 : if (c < 0 || ((maxincl || !hi) && c == 0))
1183 : range = range_atstart;
1184 : else
1185 6439 : range = range_contains;
1186 : } else {
1187 5843 : c = atomcmp(th, minval);
1188 5843 : if (c < 0 || (!hi && c == 0))
1189 : range = range_before;
1190 5843 : else if (maxval == NULL)
1191 : range = range_atstart;
1192 : else {
1193 5841 : c = atomcmp(th, maxval);
1194 5841 : if (c < 0 || ((maxincl || !hi) && c == 0))
1195 : range = range_atstart;
1196 : else
1197 6439 : range = range_contains;
1198 : }
1199 : }
1200 360703 : } else if (th == NULL) {
1201 849 : if (maxval == NULL) {
1202 2 : c = atomcmp(tl, minval);
1203 2 : if (c >= 0)
1204 : range = range_atend;
1205 : else
1206 6439 : range = range_contains;
1207 : } else {
1208 847 : c = atomcmp(tl, maxval);
1209 847 : if (c > 0 || ((!maxincl || !li) && c == 0))
1210 : range = range_after;
1211 847 : else if (minval == NULL)
1212 : range = range_atend;
1213 : else {
1214 787 : c = atomcmp(tl, minval);
1215 787 : if (c >= 0)
1216 : range = range_atend;
1217 : else
1218 6439 : range = range_contains;
1219 : }
1220 : }
1221 359854 : } else if (minval == NULL) {
1222 468 : c = atomcmp(th, maxval);
1223 468 : if (c < 0 || ((maxincl || !hi) && c == 0))
1224 : range = range_inside;
1225 : else
1226 20650 : range = range_atend;
1227 359386 : } else if (maxval == NULL) {
1228 6 : c = atomcmp(tl, minval);
1229 6 : if (c >= 0)
1230 : range = range_inside;
1231 : else
1232 248 : range = range_atstart;
1233 : } else {
1234 359380 : c = atomcmp(tl, minval);
1235 359376 : if (c >= 0) {
1236 358818 : c = atomcmp(th, maxval);
1237 358904 : if (c < 0 || ((maxincl || !hi) && c == 0))
1238 : range = range_inside;
1239 : else
1240 20650 : range = range_atend;
1241 : } else {
1242 558 : c = atomcmp(th, maxval);
1243 558 : if (c < 0 || ((maxincl || !hi) && c == 0))
1244 : range = range_atstart;
1245 : else
1246 6439 : range = range_contains;
1247 : }
1248 : }
1249 :
1250 1857286 : MT_lock_unset(&bi->b->theaplock);
1251 1857664 : if (pb) {
1252 1671335 : if (keep)
1253 6 : MT_lock_unset(&pb->theaplock);
1254 1671335 : BBPreclaim(pb);
1255 : }
1256 :
1257 : return range;
1258 : }
1259 :
1260 : /* generic range select
1261 : *
1262 : * Return a BAT with the OID values of b for qualifying tuples. The
1263 : * return BAT is sorted (i.e. in the same order as the input BAT).
1264 : *
1265 : * If s is non-NULL, it is a list of candidates. s must be sorted.
1266 : *
1267 : * tl may not be NULL, li, hi, and anti must be either false or true.
1268 : *
1269 : * If th is NULL, hi is ignored.
1270 : *
1271 : * If anti is false, qualifying tuples are those whose value is between
1272 : * tl and th (as in x >[=] tl && x <[=] th, where equality depends on li
1273 : * and hi--so if tl > th, nothing will be returned). If li or hi is
1274 : * true, the respective boundary is inclusive, otherwise exclusive. If
1275 : * th is NULL it is taken to be equal to tl, turning this into an equi-
1276 : * or point-select. Note that for a point select to return anything, li
1277 : * (and hi if th was not NULL) must be true. There is a special case if
1278 : * tl is nil and th is NULL. This is the only way to select for nil
1279 : * values.
1280 : *
1281 : * If anti is true, the result is the complement of what the result
1282 : * would be if anti were 0, except that nils are filtered out if
1283 : * nil_matches is false.
1284 : *
1285 : * If nil_matches is true, NIL is considered an ordinary value that
1286 : * can match, else NIL must be considered to never match.
1287 : *
1288 : * In brief:
1289 : * - if tl==nil and th==NULL and anti==false, return all nils (only way
1290 : * to get nils);
1291 : * - it tl==nil and th==nil, return all but nils;
1292 : * - if tl==nil and th!=NULL, no lower bound;
1293 : * - if th==NULL or tl==th, point (equi) select;
1294 : * - if th==nil, no upper bound
1295 : *
1296 : * A complete breakdown of the various arguments follows. Here, v, v1
1297 : * and v2 are values from the appropriate domain, and
1298 : * v != nil, v1 != nil, v2 != nil, v1 < v2.
1299 : * Note that if nil_matches is true, all the "x != nil" conditions fall
1300 : * away and for the "equi" or "point" selects, i.e. when tl is nil and
1301 : * th is either NULL or nil, there is no special handling of nil (so
1302 : * look at the rows with tl == v and th == v or NULL).
1303 : * tl th li hi anti result list of OIDs for values
1304 : * -----------------------------------------------------------------
1305 : * nil NULL true ignored false x == nil (only way to get nil)
1306 : * nil NULL false ignored false NOTHING
1307 : * nil NULL ignored ignored true x != nil
1308 : * nil nil ignored ignored false x != nil
1309 : * nil nil ignored ignored true NOTHING
1310 : * nil v ignored false false x < v
1311 : * nil v ignored true false x <= v
1312 : * nil v ignored false true x >= v
1313 : * nil v ignored true true x > v
1314 : * v NULL true ignored false x == v
1315 : * v NULL false ignored false NOTHING
1316 : * v NULL true ignored true x != v && x != nil
1317 : * v NULL false ignored true x != nil
1318 : * v nil true ignored false x >= v
1319 : * v nil false ignored false x > v
1320 : * v nil true ignored true x < v
1321 : * v nil false ignored true x <= v
1322 : * v v true true false x == v
1323 : * v v false true false NOTHING
1324 : * v v true false false NOTHING
1325 : * v v false false false NOTHING
1326 : * v v true true true x != v && x != nil
1327 : * v v false true true x != nil
1328 : * v v true false true x != nil
1329 : * v v false false true x != nil
1330 : * v1 v2 true true false v1 <= x <= v2
1331 : * v1 v2 false true false v1 < x <= v2
1332 : * v1 v2 true false false v1 <= x < v2
1333 : * v1 v2 false false false v1 < x < v2
1334 : * v1 v2 true true true x < v1 or x > v2
1335 : * v1 v2 false true true x <= v1 or x > v2
1336 : * v1 v2 true false true x < v1 or x >= v2
1337 : * v1 v2 false false true x <= v1 or x >= v2
1338 : * v2 v1 ignored ignored false NOTHING
1339 : * v2 v1 ignored ignored true x != nil
1340 : */
1341 : BAT *
1342 3269497 : BATselect(BAT *b, BAT *s, const void *tl, const void *th,
1343 : bool li, bool hi, bool anti, bool nil_matches)
1344 : {
1345 3269497 : bool lval; /* low value used for comparison */
1346 3269497 : bool lnil; /* low value is nil */
1347 3269497 : bool hval; /* high value used for comparison */
1348 3269497 : bool equi; /* select for single value (not range) */
1349 3269497 : bool antiequi = false; /* select for all but single value */
1350 3269497 : bool wanthash = false; /* use hash (equi must be true) */
1351 3269497 : bool havehash = false; /* we have a hash (and the hashlock) */
1352 3269497 : bool phash = false; /* use hash on parent BAT (if view) */
1353 3269497 : int t; /* data type */
1354 3269497 : bat parent; /* b's parent bat (if b is a view) */
1355 3269497 : const void *nil;
1356 3269497 : BAT *bn;
1357 3269497 : struct canditer ci;
1358 3269497 : BUN estimate = BUN_NONE, maximum = BUN_NONE;
1359 3269497 : oid vwl = 0, vwh = 0;
1360 3269497 : lng vwo = 0;
1361 3269497 : Heap *oidxh = NULL;
1362 3269497 : const char *algo;
1363 3269497 : enum range_comp_t range;
1364 3269497 : const bool notnull = BATgetprop(b, GDK_NOT_NULL) != NULL;
1365 3272930 : lng t0 = GDKusec();
1366 :
1367 3271364 : BATcheck(b, NULL);
1368 3271364 : if (tl == NULL) {
1369 0 : GDKerror("tl value required");
1370 0 : return NULL;
1371 : }
1372 :
1373 3271364 : if (s && s->ttype != TYPE_msk && !s->tsorted) {
1374 0 : GDKerror("invalid argument: s must be sorted.\n");
1375 0 : return NULL;
1376 : }
1377 :
1378 3271364 : canditer_init(&ci, b, s);
1379 3271666 : if (ci.ncand == 0) {
1380 : /* trivially empty result */
1381 1385540 : MT_thread_setalgorithm("select: trivially empty");
1382 1385382 : bn = BATdense(0, 0, 0);
1383 1383618 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1384 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1385 : " (" LLFMT " usec): "
1386 : "trivially empty\n",
1387 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1388 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1389 1383618 : return bn;
1390 : }
1391 :
1392 1886126 : BATiter bi = bat_iterator(b);
1393 :
1394 1886346 : t = bi.type;
1395 1886346 : nil = ATOMnilptr(t);
1396 1886346 : if (nil == NULL)
1397 0 : nil_matches = false;
1398 : /* can we use the base type? */
1399 1886346 : t = ATOMbasetype(t);
1400 1886346 : lnil = nil && ATOMcmp(t, tl, nil) == 0; /* low value == nil? */
1401 1885890 : lval = !lnil || th == NULL; /* low value used for comparison */
1402 1885890 : equi = th == NULL || (lval && ATOMcmp(t, tl, th) == 0); /* point select? */
1403 1886002 : if (lnil && nil_matches && (th == NULL || ATOMcmp(t, th, nil) == 0)) {
1404 : /* if nil_matches is set, tl==th==nil is just an equi select */
1405 : equi = true;
1406 : lval = true;
1407 : }
1408 :
1409 1864336 : if (equi) {
1410 1843990 : assert(lval);
1411 1843990 : if (th == NULL)
1412 1657887 : hi = li;
1413 1843990 : th = tl;
1414 1843990 : hval = true;
1415 1843990 : if (!anti && (!li || !hi)) {
1416 : /* upper and lower bound of range are equal (or
1417 : * upper is NULL) and we want an interval that's
1418 : * open on at least one side (v <= x < v or v <
1419 : * x <= v or v < x < v all result in nothing) */
1420 59 : MT_thread_setalgorithm("select: empty interval");
1421 59 : bn = BATdense(0, 0, 0);
1422 59 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1423 : ",s=" ALGOOPTBATFMT ",li=%d,hi=%d,anti=%d -> "
1424 : ALGOOPTBATFMT " (" LLFMT " usec): "
1425 : "empty interval\n",
1426 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1427 : li, hi, anti, ALGOOPTBATPAR(bn),
1428 : GDKusec() - t0);
1429 59 : bat_iterator_end(&bi);
1430 59 : return bn;
1431 : }
1432 : } else {
1433 : /* range select: we only care about nil_matches in
1434 : * (anti-)equi-select */
1435 42012 : nil_matches = false;
1436 42012 : if (nil == NULL) {
1437 0 : assert(th != NULL);
1438 : hval = true;
1439 : } else {
1440 42012 : hval = ATOMcmp(t, th, nil) != 0;
1441 : }
1442 : }
1443 1885947 : if (anti) {
1444 75267 : if (lval != hval) {
1445 : /* one of the end points is nil and the other
1446 : * isn't: swap sub-ranges */
1447 47 : const void *tv;
1448 47 : bool ti;
1449 47 : assert(!equi);
1450 47 : ti = li;
1451 47 : li = !hi;
1452 47 : hi = !ti;
1453 47 : tv = tl;
1454 47 : tl = th;
1455 47 : th = tv;
1456 47 : ti = lval;
1457 47 : lval = hval;
1458 47 : hval = ti;
1459 47 : lnil = ATOMcmp(t, tl, nil) == 0;
1460 47 : anti = false;
1461 47 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1462 : ",s=" ALGOOPTBATFMT ",anti=%d "
1463 : "anti: switch ranges...\n",
1464 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1465 : anti);
1466 75220 : } else if (!lval && !hval) {
1467 : /* antiselect for nil-nil range: all non-nil
1468 : * values are in range; we must return all
1469 : * other non-nil values, i.e. nothing */
1470 26 : MT_thread_setalgorithm("select: anti: nil-nil range, nonil");
1471 26 : bn = BATdense(0, 0, 0);
1472 26 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1473 : ",s=" ALGOOPTBATFMT ",anti=%d -> "
1474 : ALGOOPTBATFMT " (" LLFMT " usec): "
1475 : "anti: nil-nil range, nonil\n",
1476 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1477 : anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1478 26 : bat_iterator_end(&bi);
1479 26 : return bn;
1480 75194 : } else if ((equi && (lnil || !(li && hi))) || ATOMcmp(t, tl, th) > 0) {
1481 : /* various ways to select for everything except nil */
1482 7295 : if (equi && !lnil && nil_matches && !(li && hi)) {
1483 : /* nil is not special, so return
1484 : * everything */
1485 0 : bn = canditer_slice(&ci, 0, ci.ncand);
1486 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1487 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1488 : " (" LLFMT " usec): "
1489 : "anti, equi, open, nil_matches\n",
1490 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1491 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1492 0 : bat_iterator_end(&bi);
1493 0 : return bn;
1494 : }
1495 7295 : bn = BATselect(b, s, nil, NULL, true, true, false, false);
1496 7294 : if (bn == NULL) {
1497 0 : bat_iterator_end(&bi);
1498 0 : return NULL;
1499 : }
1500 7294 : BAT *bn2;
1501 7294 : if (s) {
1502 4152 : bn2 = BATdiffcand(s, bn);
1503 : } else {
1504 3142 : bn2 = BATnegcands(ci.seq, bi.count, bn);
1505 : }
1506 7293 : bat_iterator_end(&bi);
1507 7296 : BBPreclaim(bn);
1508 7292 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1509 : ",s=" ALGOOPTBATFMT ",anti=1,equi=%d -> "
1510 : ALGOOPTBATFMT " (" LLFMT " usec): "
1511 : "everything except nil\n",
1512 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
1513 : equi, ALGOOPTBATPAR(bn2), GDKusec() - t0);
1514 7292 : return bn2; /* also if NULL */
1515 : } else {
1516 : antiequi = equi;
1517 : equi = false;
1518 : }
1519 : }
1520 :
1521 : /* if equi set, then so are both lval and hval */
1522 1878658 : assert(!equi || (lval && hval));
1523 :
1524 1878658 : if (hval && (equi ? !li || !hi : ATOMcmp(t, tl, th) > 0)) {
1525 : /* empty range */
1526 31 : MT_thread_setalgorithm("select: empty range");
1527 31 : bn = BATdense(0, 0, 0);
1528 31 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1529 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1530 : " (" LLFMT " usec) "
1531 : "empty range\n",
1532 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1533 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1534 31 : bat_iterator_end(&bi);
1535 31 : return bn;
1536 : }
1537 1878627 : if (equi && lnil && (notnull || bi.nonil)) {
1538 : /* return all nils, but there aren't any */
1539 14921 : MT_thread_setalgorithm("select: equi-nil, nonil");
1540 14921 : bn = BATdense(0, 0, 0);
1541 14920 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1542 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1543 : " (" LLFMT " usec): "
1544 : "equi-nil, nonil\n",
1545 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1546 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1547 14920 : bat_iterator_end(&bi);
1548 14920 : return bn;
1549 : }
1550 :
1551 1863706 : if (!equi && !lval && !hval && lnil && (notnull || bi.nonil)) {
1552 : /* return all non-nils from a BAT that doesn't have
1553 : * any: i.e. return everything */
1554 0 : MT_thread_setalgorithm("select: everything, nonil");
1555 0 : bn = canditer_slice(&ci, 0, ci.ncand);
1556 0 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1557 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1558 : " (" LLFMT " usec): "
1559 : "everything, nonil\n",
1560 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1561 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1562 0 : bat_iterator_end(&bi);
1563 0 : return bn;
1564 : }
1565 :
1566 : /* figure out how the searched for range compares with the known
1567 : * minimum and maximum values */
1568 1873577 : range = BATrange(&bi, lval ? tl : NULL, hval ? th : NULL, li, hi);
1569 1863626 : if (anti) {
1570 67910 : switch (range) {
1571 218 : case range_contains:
1572 : /* MIN..MAX range of values in BAT fully inside
1573 : * tl..th range, so nothing left over for
1574 : * anti */
1575 218 : MT_thread_setalgorithm("select: nothing, out of range");
1576 218 : bn = BATdense(0, 0, 0);
1577 218 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1578 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1579 : " (" LLFMT " usec): "
1580 : "nothing, out of range\n",
1581 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti, ALGOOPTBATPAR(bn), GDKusec() - t0);
1582 218 : bat_iterator_end(&bi);
1583 218 : return bn;
1584 5119 : case range_before:
1585 : case range_after:
1586 5119 : if (notnull || b->tnonil || nil_matches) {
1587 : /* search range does not overlap with
1588 : * BAT range, and there are no nils (or
1589 : * we want to include nils), so we can
1590 : * return everything */
1591 4916 : MT_thread_setalgorithm("select: everything, anti, nonil");
1592 4918 : bn = canditer_slice(&ci, 0, ci.ncand);
1593 4914 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1594 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1595 : " (" LLFMT " usec): "
1596 : "everything, nonil\n",
1597 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1598 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1599 4914 : bat_iterator_end(&bi);
1600 4914 : return bn;
1601 : }
1602 : break;
1603 : default:
1604 : break;
1605 : }
1606 1795716 : } else if (!equi || !lnil) {
1607 1788797 : switch (range) {
1608 91387 : case range_before:
1609 : case range_after:
1610 : /* range we're looking for either completely
1611 : * before or complete after the range of values
1612 : * in the BAT */
1613 91387 : MT_thread_setalgorithm("select: nothing, out of range");
1614 91404 : bn = BATdense(0, 0, 0);
1615 91361 : TRC_DEBUG(ALGO, "b="
1616 : ALGOBATFMT ",s="
1617 : ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1618 : " (" LLFMT " usec): "
1619 : "nothing, out of range\n",
1620 : ALGOBATPAR(b),
1621 : ALGOOPTBATPAR(s), anti,
1622 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1623 91361 : bat_iterator_end(&bi);
1624 91361 : return bn;
1625 6233 : case range_contains:
1626 6233 : if (notnull || b->tnonil) {
1627 : /* search range contains BAT range, and
1628 : * there are no nils, so we can return
1629 : * everything */
1630 6147 : MT_thread_setalgorithm("select: everything, nonil");
1631 6147 : bn = canditer_slice(&ci, 0, ci.ncand);
1632 6145 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT
1633 : ",s=" ALGOOPTBATFMT ",anti=%d -> " ALGOOPTBATFMT
1634 : " (" LLFMT " usec): "
1635 : "everything, nonil\n",
1636 : ALGOBATPAR(b), ALGOOPTBATPAR(s), anti,
1637 : ALGOOPTBATPAR(bn), GDKusec() - t0);
1638 6145 : bat_iterator_end(&bi);
1639 6145 : return bn;
1640 : }
1641 : break;
1642 : default:
1643 : break;
1644 : }
1645 : }
1646 :
1647 1760958 : parent = VIEWtparent(b);
1648 1599209 : assert(parent >= 0);
1649 1760958 : BAT *pb;
1650 1760958 : BATiter pbi;
1651 1760958 : if (parent > 0)
1652 1599234 : pb = BATdescriptor(parent);
1653 : else
1654 : pb = NULL;
1655 1761441 : pbi = bat_iterator(pb);
1656 : /* use hash only for equi-join if the bat is not sorted, but
1657 : * only if b or its parent already has a hash, or if b or its
1658 : * parent is persistent and the total size wouldn't be too
1659 : * large; check for existence of hash last since that may
1660 : * involve I/O */
1661 1760953 : if ((equi || antiequi) && !bi.sorted && !bi.revsorted) {
1662 1409650 : double cost = joincost(b, 1, &ci, &havehash, &phash, NULL);
1663 1408906 : if (cost > 0 && cost < ci.ncand) {
1664 90093 : wanthash = true;
1665 90093 : if (havehash) {
1666 65384 : if (phash) {
1667 64532 : MT_rwlock_rdlock(&pb->thashlock);
1668 64552 : if (pb->thash == NULL) {
1669 0 : MT_rwlock_rdunlock(&pb->thashlock);
1670 0 : havehash = false;
1671 : }
1672 : } else {
1673 852 : MT_rwlock_rdlock(&b->thashlock);
1674 853 : if (b->thash == NULL) {
1675 0 : MT_rwlock_rdunlock(&b->thashlock);
1676 0 : havehash = false;
1677 : }
1678 : }
1679 : }
1680 90114 : if (wanthash && !havehash && b->batRole != PERSISTENT) {
1681 0 : MT_lock_set(&b->theaplock);
1682 0 : if (++b->selcnt > 1000)
1683 0 : b->selcnt = 1000; /* limit value */
1684 : else
1685 : wanthash = false;
1686 0 : MT_lock_unset(&b->theaplock);
1687 : }
1688 : } else {
1689 1318813 : wanthash = havehash = false;
1690 : }
1691 : }
1692 :
1693 : /* at this point, if havehash is set, we have the hash lock
1694 : * the lock is on the parent if phash is set, on b itself if not
1695 : * also, if havehash is set, then so is wanthash (but not v.v.) */
1696 :
1697 1760230 : if (!havehash) {
1698 : /* make sure tsorted and trevsorted flags are set, but
1699 : * we only need to know if we're not yet sure that we're
1700 : * going for the hash (i.e. it already exists) */
1701 1695643 : (void) BATordered(b);
1702 1696551 : (void) BATordered_rev(b);
1703 : /* reinitialize iterator since properties may have changed */
1704 1696631 : bat_iterator_end(&bi);
1705 1696478 : bi = bat_iterator(b);
1706 : }
1707 :
1708 : /* If there is an order index or it is a view and the parent
1709 : * has an ordered index, and the bat is not tsorted or
1710 : * trevstorted then use the order index. And there is no cand
1711 : * list or if there is one, it is dense.
1712 : * TODO: we do not support anti-select with order index */
1713 1761006 : bool poidx = false;
1714 1761006 : if (!anti &&
1715 1698648 : !havehash &&
1716 1636854 : !bi.sorted &&
1717 1389328 : !bi.revsorted &&
1718 1263951 : ci.tpe == cand_dense) {
1719 971480 : BAT *view = NULL;
1720 971480 : (void) BATcheckorderidx(b);
1721 971582 : MT_lock_set(&b->batIdxLock);
1722 971503 : if ((oidxh = b->torderidx) != NULL)
1723 39 : HEAPincref(oidxh);
1724 971503 : MT_lock_unset(&b->batIdxLock);
1725 971511 : if (oidxh == NULL && pb != NULL) {
1726 908919 : (void) BATcheckorderidx(pb);
1727 908924 : MT_lock_set(&pb->batIdxLock);
1728 908887 : if ((oidxh = pb->torderidx) != NULL) {
1729 9 : HEAPincref(oidxh);
1730 9 : view = b;
1731 9 : b = pb;
1732 : }
1733 908887 : MT_lock_unset(&pb->batIdxLock);
1734 : }
1735 971506 : if (oidxh) {
1736 : /* Is query selective enough to use the ordered
1737 : * index? Finding the boundaries is 2*log(n)
1738 : * where n is the size of the bat, sorting is
1739 : * N*log(N) where N is the number of results.
1740 : * If the sum is less than n (cost of scan),
1741 : * it's cheaper. However, to find out how large
1742 : * N is, we'd have to do the two boundary
1743 : * searches. If we do that, we might as well do
1744 : * it all. */
1745 48 : if (view) {
1746 9 : bat_iterator_end(&bi);
1747 9 : bi = bat_iterator(b);
1748 9 : poidx = true; /* using parent oidx */
1749 9 : vwo = (lng) (view->tbaseoff - bi.baseoff);
1750 9 : vwl = b->hseqbase + (oid) vwo + ci.seq - view->hseqbase;
1751 9 : vwh = vwl + canditer_last(&ci) - ci.seq;
1752 9 : vwo = (lng) view->hseqbase - (lng) b->hseqbase - vwo;
1753 9 : TRC_DEBUG(ALGO, "Switch from " ALGOBATFMT " to " ALGOBATFMT " " OIDFMT "-" OIDFMT " hseq " LLFMT "\n", ALGOBATPAR(view), ALGOBATPAR(b), vwl, vwh, vwo);
1754 : } else {
1755 39 : vwl = ci.seq;
1756 39 : vwh = canditer_last(&ci);
1757 : }
1758 : }
1759 : }
1760 :
1761 1761032 : if (bi.sorted || bi.revsorted || (!havehash && oidxh != NULL)) {
1762 411285 : BUN low = 0;
1763 411285 : BUN high = bi.count;
1764 :
1765 411285 : if (BATtdensebi(&bi)) {
1766 : /* positional */
1767 : /* we expect nonil to be set, in which case we
1768 : * already know that we're not dealing with a
1769 : * nil equiselect (dealt with above) */
1770 23557 : assert(bi.nonil);
1771 23557 : assert(bi.sorted);
1772 23557 : assert(oidxh == NULL);
1773 23557 : algo = "select: dense";
1774 23557 : if (hval) {
1775 23798 : oid h = * (oid *) th + hi;
1776 23798 : if (h > bi.tseq)
1777 23801 : h -= bi.tseq;
1778 : else
1779 : h = 0;
1780 23798 : if ((BUN) h < high)
1781 23557 : high = (BUN) h;
1782 : }
1783 :
1784 23557 : if (lval) {
1785 23806 : oid l = *(oid *) tl + !li;
1786 23806 : if (l > bi.tseq)
1787 4540 : l -= bi.tseq;
1788 : else
1789 : l = 0;
1790 4540 : if ((BUN) l > low)
1791 4540 : low = (BUN) l;
1792 4540 : if (low > high)
1793 : low = high;
1794 : }
1795 387728 : } else if (bi.sorted) {
1796 260032 : assert(oidxh == NULL);
1797 260032 : algo = "select: sorted";
1798 260032 : if (lval) {
1799 259916 : if (li)
1800 259100 : low = SORTfndfirst(b, tl);
1801 : else
1802 816 : low = SORTfndlast(b, tl);
1803 : } else {
1804 : /* skip over nils at start of column */
1805 116 : low = SORTfndlast(b, nil);
1806 : }
1807 260354 : if (hval) {
1808 258918 : if (hi)
1809 258345 : high = SORTfndlast(b, th);
1810 : else
1811 573 : high = SORTfndfirst(b, th);
1812 : }
1813 127696 : } else if (bi.revsorted) {
1814 127648 : assert(oidxh == NULL);
1815 127648 : algo = "select: reverse sorted";
1816 127648 : if (lval) {
1817 127627 : if (li)
1818 127598 : high = SORTfndlast(b, tl);
1819 : else
1820 29 : high = SORTfndfirst(b, tl);
1821 : } else {
1822 : /* skip over nils at end of column */
1823 21 : high = SORTfndfirst(b, nil);
1824 : }
1825 127672 : if (hval) {
1826 127567 : if (hi)
1827 127519 : low = SORTfndfirst(b, th);
1828 : else
1829 48 : low = SORTfndlast(b, th);
1830 : }
1831 : } else {
1832 48 : assert(oidxh != NULL);
1833 48 : algo = poidx ? "select: parent orderidx" : "select: orderidx";
1834 48 : if (lval) {
1835 30 : if (li)
1836 24 : low = ORDERfndfirst(b, oidxh, tl);
1837 : else
1838 6 : low = ORDERfndlast(b, oidxh, tl);
1839 : } else {
1840 : /* skip over nils at start of column */
1841 18 : low = ORDERfndlast(b, oidxh, nil);
1842 : }
1843 48 : if (hval) {
1844 34 : if (hi)
1845 22 : high = ORDERfndlast(b, oidxh, th);
1846 : else
1847 12 : high = ORDERfndfirst(b, oidxh, th);
1848 : }
1849 : }
1850 411707 : if (anti) {
1851 38688 : assert(oidxh == NULL);
1852 38688 : if (bi.sorted) {
1853 36433 : BUN first = nil_matches ? 0 : SORTfndlast(b, nil);
1854 : /* match: [first..low) + [high..last) */
1855 36455 : bn = canditer_slice2val(&ci,
1856 : first + b->hseqbase,
1857 : low + b->hseqbase,
1858 36455 : high + b->hseqbase,
1859 : oid_nil);
1860 : } else {
1861 2255 : BUN last = nil_matches ? bi.count : SORTfndfirst(b, nil);
1862 : /* match: [first..low) + [high..last) */
1863 2255 : bn = canditer_slice2val(&ci,
1864 : oid_nil,
1865 : low + b->hseqbase,
1866 : high + b->hseqbase,
1867 2255 : last + b->hseqbase);
1868 : }
1869 : } else {
1870 373019 : if (bi.sorted || bi.revsorted) {
1871 372971 : assert(oidxh == NULL);
1872 : /* match: [low..high) */
1873 372971 : bn = canditer_sliceval(&ci,
1874 : low + b->hseqbase,
1875 372971 : high + b->hseqbase);
1876 : } else {
1877 48 : BUN i;
1878 48 : BUN cnt = 0;
1879 48 : const oid *rs;
1880 48 : oid *rbn;
1881 :
1882 48 : rs = (const oid *) oidxh->base + ORDERIDXOFF;
1883 48 : rs += low;
1884 48 : bn = COLnew(0, TYPE_oid, high-low, TRANSIENT);
1885 48 : if (bn == NULL) {
1886 0 : HEAPdecref(oidxh, false);
1887 0 : bat_iterator_end(&bi);
1888 0 : bat_iterator_end(&pbi);
1889 0 : BBPreclaim(pb);
1890 0 : return NULL;
1891 : }
1892 :
1893 48 : rbn = (oid *) Tloc((bn), 0);
1894 :
1895 251 : for (i = low; i < high; i++) {
1896 203 : if (vwl <= *rs && *rs <= vwh) {
1897 176 : *rbn++ = (oid) ((lng) *rs + vwo);
1898 176 : cnt++;
1899 : }
1900 203 : rs++;
1901 : }
1902 48 : HEAPdecref(oidxh, false);
1903 48 : BATsetcount(bn, cnt);
1904 :
1905 : /* output must be sorted */
1906 48 : GDKqsort(Tloc(bn, 0), NULL, NULL, (size_t) bn->batCount, sizeof(oid), 0, TYPE_oid, false, false);
1907 48 : bn->tsorted = true;
1908 48 : bn->trevsorted = bn->batCount <= 1;
1909 48 : bn->tkey = true;
1910 48 : bn->tseqbase = bn->batCount == 0 ? 0 : bn->batCount == 1 ? * (oid *) Tloc(bn, 0) : oid_nil;
1911 48 : bn->tnil = false;
1912 48 : bn->tnonil = true;
1913 48 : if (s) {
1914 9 : s = BATintersectcand(bn, s);
1915 9 : BBPunfix(bn->batCacheid);
1916 9 : bn = s;
1917 : }
1918 : }
1919 : }
1920 :
1921 411860 : bn = virtualize(bn);
1922 411073 : MT_thread_setalgorithm(algo);
1923 411493 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",anti=%s -> "
1924 : ALGOOPTBATFMT " %s (" LLFMT " usec)\n",
1925 : ALGOBATPAR(b), anti ? "true" : "false",
1926 : ALGOOPTBATPAR(bn), algo,
1927 : GDKusec() - t0);
1928 :
1929 411493 : bat_iterator_end(&bi);
1930 411830 : bat_iterator_end(&pbi);
1931 411868 : BBPreclaim(pb);
1932 411920 : return bn;
1933 : }
1934 :
1935 65380 : assert(oidxh == NULL);
1936 : /* upper limit for result size */
1937 1349747 : maximum = ci.ncand;
1938 1349747 : if ((equi || antiequi) && havehash) {
1939 : /* we can look in the hash struct to see whether all
1940 : * values are distinct and set estimate accordingly */
1941 65392 : if (phash) {
1942 64540 : if (pb->thash->nunique == pbi.count)
1943 : estimate = 1;
1944 852 : } else if (b->thash->nunique == bi.count)
1945 : estimate = 1;
1946 : }
1947 1344431 : if (estimate == BUN_NONE && (bi.key || (pb != NULL && pbi.key))) {
1948 : /* exact result size in special cases */
1949 641 : if (equi || (antiequi && wanthash)) {
1950 : estimate = 1;
1951 8 : } else if (!anti && lval && hval) {
1952 0 : switch (t) {
1953 0 : case TYPE_bte:
1954 0 : estimate = (BUN) (*(bte *) th - *(bte *) tl);
1955 0 : break;
1956 0 : case TYPE_sht:
1957 0 : estimate = (BUN) (*(sht *) th - *(sht *) tl);
1958 0 : break;
1959 0 : case TYPE_int:
1960 0 : CALC_ESTIMATE(int);
1961 : break;
1962 0 : case TYPE_lng:
1963 0 : CALC_ESTIMATE(lng);
1964 : break;
1965 : #ifdef HAVE_HGE
1966 0 : case TYPE_hge:
1967 0 : CALC_ESTIMATE(hge);
1968 : break;
1969 : #endif
1970 : }
1971 0 : if (estimate == BUN_NONE)
1972 0 : estimate += li + hi - 1;
1973 : }
1974 : }
1975 : /* refine upper limit by exact size (if known) */
1976 1349747 : maximum = MIN(maximum, estimate);
1977 1349747 : if (wanthash && !havehash && estimate == BUN_NONE) {
1978 : /* no exact result size, but we need estimate to
1979 : * choose between hash- & scan-select (if we already
1980 : * have a hash, it's a no-brainer: we use it) */
1981 24685 : if (ci.ncand <= 10000) {
1982 : /* "small" input: don't bother about more accurate
1983 : * estimate */
1984 : estimate = maximum;
1985 : } else {
1986 : /* layman's quick "pseudo-sample" of 1000 tuples,
1987 : * i.e., 333 from begin, middle & end of BAT */
1988 2 : BUN smpl_cnt = 0, slct_cnt = 0, pos, skip, delta;
1989 2 : BAT *smpl, *slct;
1990 :
1991 2 : delta = 1000 / 3 / 2;
1992 2 : skip = (bi.count - (2 * delta)) / 2;
1993 8 : for (pos = delta; pos < bi.count; pos += skip) {
1994 6 : smpl = BATslice(b, pos - delta, pos + delta);
1995 6 : if (smpl) {
1996 6 : slct = BATselect(smpl, NULL, tl,
1997 : th, li, hi, anti, nil_matches);
1998 6 : if (slct) {
1999 6 : smpl_cnt += BATcount(smpl);
2000 6 : slct_cnt += BATcount(slct);
2001 6 : BBPreclaim(slct);
2002 : }
2003 6 : BBPreclaim(smpl);
2004 : }
2005 : }
2006 2 : if (smpl_cnt > 0 && slct_cnt > 0) {
2007 : /* linear extrapolation plus 10% margin */
2008 1 : estimate = (BUN) ((dbl) slct_cnt / (dbl) smpl_cnt
2009 1 : * (dbl) bi.count * 1.1);
2010 1 : } else if (smpl_cnt > 0 && slct_cnt == 0) {
2011 : /* estimate low enough to trigger hash select */
2012 1 : estimate = (ci.ncand / 100) - 1;
2013 : }
2014 : }
2015 24685 : wanthash = estimate < ci.ncand / 100;
2016 : }
2017 1349746 : if (estimate == BUN_NONE) {
2018 : /* no better estimate possible/required:
2019 : * (pre-)allocate 1M tuples, i.e., avoid/delay extend
2020 : * without too much overallocation */
2021 1319125 : estimate = 1000000;
2022 : }
2023 : /* limit estimation by upper limit */
2024 1349747 : estimate = MIN(estimate, maximum);
2025 :
2026 1349747 : bn = COLnew(0, TYPE_oid, estimate, TRANSIENT);
2027 1349730 : if (bn == NULL) {
2028 0 : if (havehash) {
2029 0 : if (phash)
2030 0 : MT_rwlock_rdunlock(&pb->thashlock);
2031 : else
2032 0 : MT_rwlock_rdunlock(&b->thashlock);
2033 : }
2034 0 : bat_iterator_end(&bi);
2035 0 : bat_iterator_end(&pbi);
2036 0 : BBPreclaim(pb);
2037 0 : return NULL;
2038 : }
2039 :
2040 1349730 : if (wanthash) {
2041 : /* hashselect unlocks the hash lock */
2042 65360 : bn = hashselect(&bi, &ci, bn, tl, maximum, havehash, phash, &algo);
2043 65377 : if (bn && antiequi) {
2044 3513 : BAT *bn2;
2045 3513 : if (s) {
2046 3513 : bn2 = BATdiffcand(s, bn);
2047 : } else {
2048 0 : bn2 = BATnegcands(ci.seq, bi.count, bn);
2049 : }
2050 3513 : BBPreclaim(bn);
2051 3516 : bn = bn2;
2052 3516 : if (!bi.nonil) {
2053 16 : bn2 = BATselect(b, s, nil, NULL, true, true, false, false);
2054 16 : if (bn2 == NULL) {
2055 0 : BBPreclaim(bn);
2056 0 : return NULL;
2057 : }
2058 16 : BAT *bn3 = BATdiffcand(bn, bn2);
2059 16 : BBPreclaim(bn2);
2060 16 : BBPreclaim(bn);
2061 : bn = bn3;
2062 : }
2063 : }
2064 : } else {
2065 1284370 : assert(!havehash);
2066 1284370 : bn = scanselect(&bi, &ci, bn, tl, th, li, hi, equi, anti,
2067 : nil_matches, lval, hval, lnil, maximum,
2068 : &algo);
2069 : }
2070 1349584 : bat_iterator_end(&bi);
2071 1349844 : bat_iterator_end(&pbi);
2072 1349691 : BBPreclaim(pb);
2073 :
2074 1349815 : bn = virtualize(bn);
2075 1349469 : MT_thread_setalgorithm(algo);
2076 1349385 : TRC_DEBUG(ALGO, "b=" ALGOBATFMT ",s=" ALGOOPTBATFMT",anti=%s -> " ALGOOPTBATFMT
2077 : " %s (" LLFMT " usec)\n",
2078 : ALGOBATPAR(b), ALGOOPTBATPAR(s),
2079 : anti ? "true" : "false",
2080 : ALGOOPTBATPAR(bn), algo,
2081 : GDKusec() - t0);
2082 :
2083 : return bn;
2084 : }
2085 :
2086 : /* theta select
2087 : *
2088 : * Returns a BAT with the OID values of b for qualifying tuples. The
2089 : * return BAT is sorted (i.e. in the same order as the input BAT).
2090 : *
2091 : * If s is not NULL, it is a list of candidates. s must be sorted.
2092 : *
2093 : * Theta select returns all values from b which are less/greater than
2094 : * or (not) equal to the provided value depending on the value of op.
2095 : * Op is a string with one of the values: "=", "==", "<", "<=", ">",
2096 : * ">=", "<>", "!=" (the first two are equivalent and the last two are
2097 : * equivalent). Theta select never returns nils.
2098 : *
2099 : * If value is nil, the result is empty, except when using eq/ne as
2100 : * operator.
2101 : */
2102 : BAT *
2103 433310 : BATthetaselect(BAT *b, BAT *s, const void *val, const char *op)
2104 : {
2105 433310 : const void *nil;
2106 :
2107 433310 : BATcheck(b, NULL);
2108 433310 : BATcheck(val, NULL);
2109 433310 : BATcheck(op, NULL);
2110 :
2111 : /* eq/ne are can be used for "is" nil-handling */
2112 433310 : if (strcmp(op, "eq") == 0)
2113 16580 : return BATselect(b, s, val, NULL, true, true, false, true);
2114 416730 : if (strcmp(op, "ne") == 0)
2115 10974 : return BATselect(b, s, val, NULL, true, true, true, true);
2116 :
2117 405756 : nil = ATOMnilptr(b->ttype);
2118 405756 : if (ATOMcmp(b->ttype, val, nil) == 0)
2119 29 : return BATdense(0, 0, 0);
2120 405767 : if (op[0] == '=' && ((op[1] == '=' && op[2] == 0) || op[1] == 0)) {
2121 : /* "=" or "==" */
2122 326184 : return BATselect(b, s, val, NULL, true, true, false, false);
2123 : }
2124 79583 : if (op[0] == '!' && op[1] == '=' && op[2] == 0) {
2125 : /* "!=" (equivalent to "<>") */
2126 73916 : return BATselect(b, s, val, NULL, true, true, true, false);
2127 : }
2128 5667 : if (op[0] == '<') {
2129 679 : if (op[1] == 0) {
2130 : /* "<" */
2131 577 : return BATselect(b, s, nil, val, false, false, false, false);
2132 : }
2133 102 : if (op[1] == '=' && op[2] == 0) {
2134 : /* "<=" */
2135 96 : return BATselect(b, s, nil, val, false, true, false, false);
2136 : }
2137 6 : if (op[1] == '>' && op[2] == 0) {
2138 : /* "<>" (equivalent to "!=") */
2139 6 : return BATselect(b, s, val, NULL, true, true, true, false);
2140 : }
2141 : }
2142 4988 : if (op[0] == '>') {
2143 4988 : if (op[1] == 0) {
2144 : /* ">" */
2145 4452 : return BATselect(b, s, val, nil, false, false, false, false);
2146 : }
2147 536 : if (op[1] == '=' && op[2] == 0) {
2148 : /* ">=" */
2149 535 : return BATselect(b, s, val, nil, true, false, false, false);
2150 : }
2151 : }
2152 1 : GDKerror("unknown operator.\n");
2153 1 : return NULL;
2154 : }
|