Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : /*
18 : * BATproject returns a BAT aligned with the left input whose values
19 : * are the values from the right input that were referred to by the
20 : * OIDs in the left input.
21 : *
22 : * BATproject2 is similar, except instead of a single right input
23 : * there are two of which the second's hseqbase is equal to the first
24 : * hseqbase + its batCount.
25 : */
26 :
27 : #define project1_loop(TYPE) \
28 : static gdk_return \
29 : project1_##TYPE(BAT *restrict bn, BATiter *restrict li, \
30 : BATiter *restrict r1i, QryCtx *qry_ctx) \
31 : { \
32 : BUN lo; \
33 : const TYPE *restrict r1t; \
34 : TYPE *restrict bt; \
35 : oid r1seq, r1end; \
36 : \
37 : MT_thread_setalgorithm(__func__); \
38 : r1t = (const TYPE *) r1i->base; \
39 : bt = (TYPE *) Tloc(bn, 0); \
40 : r1seq = r1i->b->hseqbase; \
41 : r1end = r1seq + r1i->count; \
42 : if (BATtdensebi(li)) { \
43 : if (li->tseq < r1seq || \
44 : (li->tseq + li->count) >= r1end) { \
45 : GDKerror("does not match always\n"); \
46 : return GDK_FAIL; \
47 : } \
48 : oid off = li->tseq - r1seq; \
49 : r1t += off; \
50 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) \
51 : bt[lo] = r1t[lo]; \
52 : } else { \
53 : assert(li->type); \
54 : const oid *restrict ot = (const oid *) li->base; \
55 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) { \
56 : oid o = ot[lo]; \
57 : if (o < r1seq || o >= r1end) { \
58 : GDKerror("does not match always\n"); \
59 : return GDK_FAIL; \
60 : } \
61 : bt[lo] = r1t[o - r1seq]; \
62 : } \
63 : } \
64 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx)); \
65 : BATsetcount(bn, lo); \
66 : return GDK_SUCCEED; \
67 : }
68 :
69 : /* project type switch */
70 64514852 : project1_loop(bte)
71 31791734 : project1_loop(sht)
72 642947111 : project1_loop(int)
73 1564 : project1_loop(flt)
74 879802 : project1_loop(dbl)
75 195013888 : project1_loop(lng)
76 : #ifdef HAVE_HGE
77 77183479 : project1_loop(hge)
78 : #endif
79 150 : project1_loop(uuid)
80 :
81 : #define project_loop(TYPE) \
82 : static gdk_return \
83 : project_##TYPE(BAT *restrict bn, BATiter *restrict li, \
84 : struct canditer *restrict ci, \
85 : BATiter *restrict r1i, BATiter *restrict r2i, \
86 : QryCtx *qry_ctx) \
87 : { \
88 : BUN lo; \
89 : const TYPE *restrict r1t; \
90 : const TYPE *restrict r2t; \
91 : TYPE *restrict bt; \
92 : TYPE v; \
93 : oid r1seq, r1end; \
94 : oid r2seq, r2end; \
95 : \
96 : if (r2i == NULL && \
97 : (ci == NULL || (ci->tpe == cand_dense && BATtdensebi(li))) && \
98 : li->nonil && r1i->type && !BATtdensebi(r1i)) \
99 : return project1_##TYPE(bn, li, r1i, qry_ctx); \
100 : MT_thread_setalgorithm(__func__); \
101 : r1t = (const TYPE *) r1i->base; \
102 : bt = (TYPE *) Tloc(bn, 0); \
103 : r1seq = r1i->b->hseqbase; \
104 : r1end = r1seq + r1i->count; \
105 : if (r2i) { \
106 : r2t = (const TYPE *) r2i->base; \
107 : r2seq = r2i->b->hseqbase; \
108 : r2end = r2seq + r2i->count; \
109 : } else { \
110 : r2t = NULL; \
111 : r2seq = r2end = r1end; \
112 : } \
113 : if (ci) { \
114 : TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) { \
115 : oid o = canditer_next(ci); \
116 : if (o < r1seq || o >= r2end) { \
117 : GDKerror("does not match always\n"); \
118 : return GDK_FAIL; \
119 : } \
120 : if (o < r1end) \
121 : v = r1t[o - r1seq]; \
122 : else \
123 : v = r2t[o - r2seq]; \
124 : bt[lo] = v; \
125 : } \
126 : } else if (BATtdensebi(li)) { \
127 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) { \
128 : oid o = li->tseq + lo; \
129 : if (o < r1seq || o >= r2end) { \
130 : GDKerror("does not match always\n"); \
131 : return GDK_FAIL; \
132 : } \
133 : if (o < r1end) \
134 : v = r1t[o - r1seq]; \
135 : else \
136 : v = r2t[o - r2seq]; \
137 : bt[lo] = v; \
138 : } \
139 : } else { \
140 : const oid *restrict ot = (const oid *) li->base; \
141 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) { \
142 : oid o = ot[lo]; \
143 : if (is_oid_nil(o)) { \
144 : bt[lo] = v = TYPE##_nil; \
145 : bn->tnil = true; \
146 : } else if (o < r1seq || o >= r2end) { \
147 : GDKerror("does not match always\n"); \
148 : return GDK_FAIL; \
149 : } else if (o < r1end) { \
150 : v = r1t[o - r1seq]; \
151 : bt[lo] = v; \
152 : } else { \
153 : v = r2t[o - r2seq]; \
154 : bt[lo] = v; \
155 : } \
156 : } \
157 : } \
158 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx)); \
159 : BATsetcount(bn, lo); \
160 : return GDK_SUCCEED; \
161 : }
162 :
163 :
164 : /* project type switch */
165 386051 : project_loop(bte)
166 938005 : project_loop(sht)
167 64951955 : project_loop(int)
168 112 : project_loop(flt)
169 5709 : project_loop(dbl)
170 24642 : project_loop(lng)
171 : #ifdef HAVE_HGE
172 1224146 : project_loop(hge)
173 : #endif
174 28 : project_loop(uuid)
175 :
176 : static gdk_return
177 17106 : project_oid(BAT *restrict bn, BATiter *restrict li,
178 : struct canditer *restrict lci,
179 : BATiter *restrict r1i, BATiter *restrict r2i, QryCtx *qry_ctx)
180 : {
181 17106 : BUN lo;
182 17106 : oid *restrict bt;
183 17106 : oid r1seq, r1end;
184 17106 : oid r2seq, r2end;
185 17106 : const oid *restrict r1t = NULL;
186 17106 : const oid *restrict r2t = NULL;
187 17106 : struct canditer r1ci = {0}, r2ci = {0};
188 :
189 17106 : if ((!lci || (lci->tpe == cand_dense && BATtdensebi(li))) && r1i->type && !BATtdensebi(r1i) && !r2i && li->nonil) {
190 7953 : if (sizeof(oid) == sizeof(lng))
191 7953 : return project1_lng(bn, li, r1i, qry_ctx);
192 : else
193 : return project1_int(bn, li, r1i, qry_ctx);
194 : }
195 9153 : MT_thread_setalgorithm(__func__);
196 9172 : if (complex_cand(r1i->b))
197 35 : canditer_init(&r1ci, NULL, r1i->b);
198 9137 : else if (!BATtdensebi(r1i))
199 15 : r1t = (const oid *) r1i->base;
200 9172 : r1seq = r1i->b->hseqbase;
201 9172 : r1end = r1seq + r1i->count;
202 9172 : if (r2i) {
203 0 : if (complex_cand(r2i->b))
204 0 : canditer_init(&r2ci, NULL, r2i->b);
205 0 : else if (!BATtdensebi(r2i))
206 0 : r2t = (const oid *) r2i->base;
207 0 : r2seq = r2i->b->hseqbase;
208 0 : r2end = r2seq + r2i->count;
209 : } else {
210 : r2seq = r2end = r1end;
211 : }
212 9172 : bt = (oid *) Tloc(bn, 0);
213 9172 : if (lci) {
214 61756159 : TIMEOUT_LOOP_IDX(lo, lci->ncand, qry_ctx) {
215 61752217 : oid o = canditer_next(lci);
216 61752217 : if (o < r1seq || o >= r2end) {
217 0 : goto nomatch;
218 : }
219 61752217 : if (o < r1end) {
220 61752217 : if (r1ci.s)
221 0 : bt[lo] = canditer_idx(&r1ci, o - r1seq);
222 61752217 : else if (r1t)
223 691 : bt[lo] = r1t[o - r1seq];
224 : else
225 61751526 : bt[lo] = o - r1seq + r1i->tseq;
226 : } else {
227 0 : if (r2ci.s)
228 0 : bt[lo] = canditer_idx(&r2ci, o - r2seq);
229 0 : else if (r2t)
230 0 : bt[lo] = r2t[o - r2seq];
231 : else
232 0 : bt[lo] = o - r2seq + r2i->tseq;
233 : }
234 : }
235 9053 : } else if (BATtdensebi(li)) {
236 0 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
237 0 : oid o = li->tseq + lo;
238 0 : if (o < r1seq || o >= r2end) {
239 0 : goto nomatch;
240 : }
241 0 : if (o < r1end) {
242 0 : if (r1ci.s)
243 0 : bt[lo] = canditer_idx(&r1ci, o - r1seq);
244 0 : else if (r1t)
245 0 : bt[lo] = r1t[o - r1seq];
246 : else
247 0 : bt[lo] = o - r1seq + r1i->tseq;
248 : } else {
249 0 : if (r2ci.s)
250 0 : bt[lo] = canditer_idx(&r2ci, o - r2seq);
251 0 : else if (r2t)
252 0 : bt[lo] = r2t[o - r2seq];
253 : else
254 0 : bt[lo] = o - r2seq + r2i->tseq;
255 : }
256 : }
257 : } else {
258 9053 : const oid *ot = (const oid *) li->base;
259 178326112 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
260 178296519 : oid o = ot[lo];
261 178296519 : if (is_oid_nil(o)) {
262 0 : bt[lo] = oid_nil;
263 0 : bn->tnonil = false;
264 0 : bn->tnil = true;
265 178296519 : } else if (o < r1seq || o >= r2end) {
266 0 : goto nomatch;
267 178296519 : } else if (o < r1end) {
268 178296519 : if (r1ci.s)
269 6432 : bt[lo] = canditer_idx(&r1ci, o - r1seq);
270 178290087 : else if (r1t)
271 0 : bt[lo] = r1t[o - r1seq];
272 : else
273 178290087 : bt[lo] = o - r1seq + r1i->tseq;
274 : } else {
275 0 : if (r2ci.s)
276 0 : bt[lo] = canditer_idx(&r2ci, o - r2seq);
277 0 : else if (r2t)
278 0 : bt[lo] = r2t[o - r2seq];
279 : else
280 0 : bt[lo] = o - r2seq + r2i->tseq;
281 : }
282 : }
283 : }
284 9193 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
285 9181 : BATsetcount(bn, lo);
286 9181 : return GDK_SUCCEED;
287 0 : nomatch:
288 0 : GDKerror("does not match always\n");
289 0 : return GDK_FAIL;
290 : }
291 :
292 : static gdk_return
293 2788 : project_any(BAT *restrict bn, BATiter *restrict li,
294 : struct canditer *restrict ci,
295 : BATiter *restrict r1i, BATiter *restrict r2i, QryCtx *qry_ctx)
296 : {
297 2788 : BUN lo;
298 2788 : const void *nil = ATOMnilptr(r1i->type);
299 2788 : const void *v;
300 2788 : oid r1seq, r1end;
301 2788 : oid r2seq, r2end;
302 :
303 2788 : MT_thread_setalgorithm(__func__);
304 2790 : r1seq = r1i->b->hseqbase;
305 2790 : r1end = r1seq + r1i->count;
306 2790 : if (r2i) {
307 0 : r2seq = r2i->b->hseqbase;
308 0 : r2end = r2seq + r2i->count;
309 : } else {
310 : r2seq = r2end = r1end;
311 : }
312 2790 : if (ci) {
313 5 : TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) {
314 0 : oid o = canditer_next(ci);
315 0 : if (o < r1seq || o >= r2end) {
316 0 : GDKerror("does not match always\n");
317 0 : return GDK_FAIL;
318 : }
319 0 : if (o < r1end)
320 0 : v = BUNtail(*r1i, o - r1seq);
321 : else
322 0 : v = BUNtail(*r2i, o - r2seq);
323 0 : if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
324 : return GDK_FAIL;
325 : }
326 : }
327 2785 : } else if (BATtdensebi(li)) {
328 0 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
329 0 : oid o = li->tseq + lo;
330 0 : if (o < r1seq || o >= r2end) {
331 0 : GDKerror("does not match always\n");
332 0 : return GDK_FAIL;
333 : }
334 0 : if (o < r1end)
335 0 : v = BUNtail(*r1i, o - r1seq);
336 : else
337 0 : v = BUNtail(*r2i, o - r2seq);
338 0 : if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
339 : return GDK_FAIL;
340 : }
341 : }
342 : } else {
343 2785 : const oid *restrict ot = (const oid *) li->base;
344 :
345 61437 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
346 55857 : oid o = ot[lo];
347 55857 : if (is_oid_nil(o)) {
348 0 : v = nil;
349 0 : bn->tnil = true;
350 55857 : } else if (o < r1seq || o >= r2end) {
351 0 : GDKerror("does not match always\n");
352 0 : return GDK_FAIL;
353 55857 : } else if (o < r1end) {
354 55857 : v = BUNtail(*r1i, o - r1seq);
355 : } else {
356 0 : v = BUNtail(*r2i, o - r2seq);
357 : }
358 55857 : if (tfastins_nocheck(bn, lo, v) != GDK_SUCCEED) {
359 : return GDK_FAIL;
360 : }
361 : }
362 : }
363 2800 : TIMEOUT_CHECK(qry_ctx, TIMEOUT_HANDLER(GDK_FAIL, qry_ctx));
364 2795 : BATsetcount(bn, lo);
365 2795 : bn->theap->dirty = true;
366 2795 : return GDK_SUCCEED;
367 : }
368 :
369 : static BAT *
370 0 : project_str(BATiter *restrict li, struct canditer *restrict ci, int tpe,
371 : BATiter *restrict r1i, BATiter *restrict r2i,
372 : QryCtx *qry_ctx, lng t0)
373 : {
374 0 : BAT *bn;
375 0 : BUN lo;
376 0 : oid r1seq, r1end;
377 0 : oid r2seq, r2end;
378 0 : BUN h1off;
379 0 : BUN off;
380 0 : oid seq;
381 0 : var_t v;
382 0 : BATiter *ri;
383 :
384 0 : if ((bn = COLnew(li->b->hseqbase, tpe, ci ? ci->ncand : li->count,
385 : TRANSIENT)) == NULL)
386 : return NULL;
387 :
388 0 : v = (var_t) r1i->vhfree;
389 0 : if (r1i->vh == r2i->vh) {
390 0 : h1off = 0;
391 0 : assert(bn->tvheap->parentid == bn->batCacheid);
392 0 : HEAPdecref(bn->tvheap, true);
393 0 : HEAPincref(r1i->vh);
394 0 : bn->tvheap = r1i->vh;
395 0 : assert(bn->tvheap->parentid != bn->batCacheid);
396 0 : BBPretain(bn->tvheap->parentid);
397 : } else {
398 0 : v = (v + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
399 0 : h1off = (BUN) v;
400 0 : v += ((var_t) r2i->vhfree + GDK_VARALIGN - 1) & ~(GDK_VARALIGN - 1);
401 0 : if (HEAPextend(bn->tvheap, v, false) != GDK_SUCCEED) {
402 0 : BBPreclaim(bn);
403 0 : return NULL;
404 : }
405 0 : memcpy(bn->tvheap->base, r1i->vh->base, r1i->vhfree);
406 : #ifndef NDEBUG
407 0 : if (h1off > r1i->vhfree)
408 0 : memset(bn->tvheap->base + r1i->vhfree, 0, h1off - r1i->vhfree);
409 : #endif
410 0 : memcpy(bn->tvheap->base + h1off, r2i->vh->base, r2i->vhfree);
411 0 : bn->tvheap->free = h1off + r2i->vhfree;
412 0 : bn->tvheap->dirty = true;
413 : }
414 :
415 0 : if (v >= ((var_t) 1 << (8 << bn->tshift)) &&
416 0 : GDKupgradevarheap(bn, v, false, 0) != GDK_SUCCEED) {
417 0 : BBPreclaim(bn);
418 0 : return NULL;
419 : }
420 :
421 0 : r1seq = r1i->b->hseqbase;
422 0 : r1end = r1seq + r1i->count;
423 0 : r2seq = r2i->b->hseqbase;
424 0 : r2end = r2seq + r2i->count;
425 0 : if (ci) {
426 0 : TIMEOUT_LOOP_IDX(lo, ci->ncand, qry_ctx) {
427 0 : oid o = canditer_next(ci);
428 0 : if (o < r1seq || o >= r2end) {
429 0 : GDKerror("does not match always\n");
430 0 : BBPreclaim(bn);
431 0 : return NULL;
432 : }
433 0 : if (o < r1end) {
434 : ri = r1i;
435 : off = 0;
436 : seq = r1seq;
437 : } else {
438 0 : ri = r2i;
439 0 : off = h1off;
440 0 : seq = r2seq;
441 : }
442 0 : switch (ri->width) {
443 0 : case 1:
444 0 : v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
445 0 : break;
446 0 : case 2:
447 0 : v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
448 0 : break;
449 0 : case 4:
450 0 : v = (var_t) ((uint32_t *) ri->base)[o - seq];
451 0 : break;
452 0 : case 8:
453 0 : v = (var_t) ((uint64_t *) ri->base)[o - seq];
454 0 : break;
455 : }
456 0 : v += off;
457 0 : switch (bn->twidth) {
458 0 : case 1:
459 0 : ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
460 0 : break;
461 0 : case 2:
462 0 : ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
463 0 : break;
464 0 : case 4:
465 0 : ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
466 0 : break;
467 0 : case 8:
468 0 : ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
469 0 : break;
470 : }
471 : }
472 0 : } else if (BATtdensebi(li)) {
473 0 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
474 0 : oid o = li->tseq + lo;
475 0 : if (o < r1seq || o >= r2end) {
476 0 : GDKerror("does not match always\n");
477 0 : BBPreclaim(bn);
478 0 : return NULL;
479 : }
480 0 : if (o < r1end) {
481 : ri = r1i;
482 : off = 0;
483 : seq = r1seq;
484 : } else {
485 0 : ri = r2i;
486 0 : off = h1off;
487 0 : seq = r2seq;
488 : }
489 0 : switch (ri->width) {
490 0 : case 1:
491 0 : v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
492 0 : break;
493 0 : case 2:
494 0 : v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
495 0 : break;
496 0 : case 4:
497 0 : v = (var_t) ((uint32_t *) ri->base)[o - seq];
498 0 : break;
499 0 : case 8:
500 0 : v = (var_t) ((uint64_t *) ri->base)[o - seq];
501 0 : break;
502 : }
503 0 : v += off;
504 0 : switch (bn->twidth) {
505 0 : case 1:
506 0 : ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
507 0 : break;
508 0 : case 2:
509 0 : ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
510 0 : break;
511 0 : case 4:
512 0 : ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
513 0 : break;
514 0 : case 8:
515 0 : ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
516 0 : break;
517 : }
518 : }
519 : } else {
520 0 : const oid *restrict ot = (const oid *) li->base;
521 0 : TIMEOUT_LOOP_IDX(lo, li->count, qry_ctx) {
522 0 : oid o = ot[lo];
523 0 : if (o < r1seq || o >= r2end) {
524 0 : GDKerror("does not match always\n");
525 0 : BBPreclaim(bn);
526 0 : return NULL;
527 : }
528 0 : if (o < r1end) {
529 : ri = r1i;
530 : off = 0;
531 : seq = r1seq;
532 : } else {
533 0 : ri = r2i;
534 0 : off = h1off;
535 0 : seq = r2seq;
536 : }
537 0 : switch (ri->width) {
538 0 : case 1:
539 0 : v = (var_t) ((uint8_t *) ri->base)[o - seq] + GDK_VAROFFSET;
540 0 : break;
541 0 : case 2:
542 0 : v = (var_t) ((uint16_t *) ri->base)[o - seq] + GDK_VAROFFSET;
543 0 : break;
544 0 : case 4:
545 0 : v = (var_t) ((uint32_t *) ri->base)[o - seq];
546 0 : break;
547 0 : case 8:
548 0 : v = (var_t) ((uint64_t *) ri->base)[o - seq];
549 0 : break;
550 : }
551 0 : v += off;
552 0 : switch (bn->twidth) {
553 0 : case 1:
554 0 : ((uint8_t *) bn->theap->base)[lo] = (uint8_t) (v - GDK_VAROFFSET);
555 0 : break;
556 0 : case 2:
557 0 : ((uint16_t *) bn->theap->base)[lo] = (uint16_t) (v - GDK_VAROFFSET);
558 0 : break;
559 0 : case 4:
560 0 : ((uint32_t *) bn->theap->base)[lo] = (uint32_t) v;
561 0 : break;
562 0 : case 8:
563 0 : ((uint64_t *) bn->theap->base)[lo] = (uint64_t) v;
564 0 : break;
565 : }
566 : }
567 : }
568 0 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bailout, qry_ctx));
569 0 : BATsetcount(bn, lo);
570 0 : bn->tsorted = bn->trevsorted = false;
571 0 : bn->tnil = false;
572 0 : bn->tnonil = r1i->nonil & r2i->nonil;
573 0 : bn->tkey = false;
574 0 : bn->tunique_est =
575 0 : MIN(li->b->tunique_est?li->b->tunique_est:BATcount(li->b),
576 : r1i->b->tunique_est?r1i->b->tunique_est:BATcount(r1i->b));
577 0 : TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOBATFMT
578 : " -> " ALGOBATFMT "%s " LLFMT "us\n",
579 : ALGOBATPAR(li->b), ALGOBATPAR(r1i->b), ALGOBATPAR(r2i->b),
580 : ALGOBATPAR(bn),
581 : bn && bn->ttype == TYPE_str && bn->tvheap == r1i->vh ? " sharing string heap" : "",
582 : GDKusec() - t0);
583 : return bn;
584 0 : bailout:
585 0 : BBPreclaim(bn);
586 0 : return NULL;
587 : }
588 :
589 : BAT *
590 2761280 : BATproject2(BAT *restrict l, BAT *restrict r1, BAT *restrict r2)
591 : {
592 2761280 : BAT *bn = NULL;
593 2761280 : BAT *or1 = r1, *or2 = r2, *ol = l;
594 2761280 : oid lo, hi;
595 2761280 : gdk_return res;
596 2761280 : int tpe = ATOMtype(r1->ttype), otpe = tpe;
597 2761280 : bool stringtrick = false;
598 2761280 : struct canditer ci, *lci = NULL;
599 2761280 : const char *msg = "";
600 2761280 : lng t0 = 0;
601 2761280 : BATiter li = bat_iterator(l);
602 2771617 : BATiter r1i = bat_iterator(r1);
603 2772601 : BATiter r2i = bat_iterator(r2);
604 2770217 : BUN lcount = li.count;
605 :
606 2770217 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
607 :
608 2770217 : assert(ATOMtype(li.type) == TYPE_oid || li.type == TYPE_msk);
609 2770217 : assert(r2 == NULL || tpe == ATOMtype(r2i.type));
610 0 : assert(r2 == NULL || r1->hseqbase + r1i.count == r2->hseqbase);
611 :
612 2770217 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
613 :
614 2765496 : if (r2 && r1i.count == 0) {
615 : /* unlikely special case: r1 is empty, so we just have r2 */
616 0 : r1 = r2;
617 0 : r2 = NULL;
618 0 : bat_iterator_end(&r1i);
619 0 : r1i = r2i;
620 0 : r2i = bat_iterator(NULL);
621 : }
622 :
623 2765496 : if (BATtdensebi(&li) && lcount > 0) {
624 812480 : lo = l->tseqbase;
625 812480 : hi = l->tseqbase + lcount;
626 812480 : if (lo >= r1->hseqbase && hi <= r1->hseqbase + r1i.count) {
627 812480 : bn = BATslice(r1, lo - r1->hseqbase, hi - r1->hseqbase);
628 807139 : BAThseqbase(bn, l->hseqbase);
629 805075 : msg = " (slice)";
630 805075 : goto doreturn;
631 : }
632 0 : if (lo < r1->hseqbase || r2 == NULL || hi > r2->hseqbase + r2i.count) {
633 0 : GDKerror("does not match always\n");
634 0 : bat_iterator_end(&li);
635 0 : bat_iterator_end(&r1i);
636 0 : bat_iterator_end(&r2i);
637 0 : return NULL;
638 : }
639 0 : if (lo >= r2->hseqbase) {
640 0 : bn = BATslice(r2, lo - r2->hseqbase, hi - r2->hseqbase);
641 0 : BAThseqbase(bn, l->hseqbase);
642 0 : msg = " (slice2)";
643 0 : goto doreturn;
644 : }
645 : }
646 1953016 : if (complex_cand(l)) {
647 : /* l is candidate list with exceptions or is a bitmask */
648 25387 : assert(li.type == TYPE_msk || !is_oid_nil(l->tseqbase));
649 25387 : canditer_init(&ci, NULL, l);
650 25389 : lcount = ci.ncand;
651 25389 : lci = &ci;
652 1927629 : } else if (li.type == TYPE_msk) {
653 0 : l = BATunmask(l);
654 0 : if (l == NULL)
655 0 : goto doreturn;
656 0 : if (complex_cand(l)) {
657 0 : canditer_init(&ci, NULL, l);
658 0 : lcount = ci.ncand;
659 0 : lci = &ci;
660 : }
661 : }
662 1953018 : if (lcount == 0 ||
663 25368 : (li.type == TYPE_void && is_oid_nil(l->tseqbase)) ||
664 226775 : (r1i.type == TYPE_void && is_oid_nil(r1->tseqbase) &&
665 0 : (r2 == NULL ||
666 0 : (r2i.type == TYPE_void && is_oid_nil(r2->tseqbase))))) {
667 : /* trivial: all values are nil (includes no entries at all) */
668 1726243 : const void *nil = r1i.type == TYPE_msk ? &oid_nil : ATOMnilptr(r1i.type);
669 :
670 3443001 : bn = BATconstant(l->hseqbase, r1i.type == TYPE_oid || r1i.type == TYPE_msk ? TYPE_void : r1i.type,
671 : nil, lcount, TRANSIENT);
672 1727733 : if (bn != NULL &&
673 1727733 : ATOMtype(bn->ttype) == TYPE_oid &&
674 253757 : BATcount(bn) == 0) {
675 253991 : BATtseqbase(bn, 0);
676 : }
677 1727171 : msg = " (constant)";
678 1727171 : goto doreturn;
679 : }
680 :
681 226775 : if (ATOMstorage(tpe) == TYPE_str) {
682 33721 : if (li.nonil &&
683 33719 : r2 == NULL &&
684 33719 : (r1i.count == 0 ||
685 33721 : lcount > (r1i.count >> 3) ||
686 6442 : r1i.restricted == BAT_READ)) {
687 : /* insert strings as ints, we need to copy the
688 : * string heap whole sale; we can't do this if
689 : * there are nils in the left column, and we
690 : * won't do it if the left is much smaller than
691 : * the right and the right is writable (meaning
692 : * we have to actually copy the right string
693 : * heap) */
694 30942 : tpe = r1i.width == 1 ? TYPE_bte : (r1i.width == 2 ? TYPE_sht : (r1i.width == 4 ? TYPE_int : TYPE_lng));
695 : stringtrick = true;
696 2779 : } else if (li.nonil &&
697 0 : r2 != NULL &&
698 0 : (r1i.vh == r2i.vh ||
699 0 : (!GDK_ELIMDOUBLES(r1i.vh) /* && size tests */))) {
700 : /* r1 and r2 may explicitly share their vheap,
701 : * if they do, the result will also share the
702 : * vheap; this also means that for this case we
703 : * don't care about duplicate elimination: it
704 : * will remain the same */
705 0 : bn = project_str(&li, lci, tpe, &r1i, &r2i, qry_ctx, t0);
706 0 : bat_iterator_end(&li);
707 0 : bat_iterator_end(&r1i);
708 0 : bat_iterator_end(&r2i);
709 0 : return bn;
710 : }
711 193054 : } else if (ATOMvarsized(tpe) &&
712 152 : li.nonil &&
713 152 : r2 == NULL &&
714 152 : (r1i.count == 0 ||
715 152 : lcount > (r1i.count >> 3) ||
716 0 : r1i.restricted == BAT_READ)) {
717 152 : tpe = r1i.width == 4 ? TYPE_int : TYPE_lng;
718 : stringtrick = true;
719 192902 : } else if (tpe == TYPE_msk || mask_cand(r1)) {
720 14 : r1 = BATunmask(r1);
721 14 : if (r1 == NULL)
722 0 : goto doreturn;
723 14 : if (r2) {
724 0 : r2 = BATunmask(r2);
725 0 : if (r2 == NULL)
726 0 : goto doreturn;
727 : }
728 14 : tpe = TYPE_oid;
729 14 : bat_iterator_end(&r1i);
730 14 : bat_iterator_end(&r2i);
731 14 : r1i = bat_iterator(r1);
732 14 : r2i = bat_iterator(r2);
733 : }
734 444733 : bn = COLnew2(l->hseqbase, ATOMtype(r1i.type), lcount, TRANSIENT, stringtrick ? r1i.width : 0);
735 225455 : if (bn == NULL) {
736 0 : goto doreturn;
737 : }
738 225455 : bn->tnil = false;
739 225455 : if (r2) {
740 0 : bn->tnonil = li.nonil & r1i.nonil & r2i.nonil;
741 0 : bn->tsorted = li.count <= 1;
742 0 : bn->trevsorted = li.count <= 1;
743 0 : bn->tkey = li.count <= 1;
744 : } else {
745 226580 : bn->tnonil = li.nonil & r1i.nonil;
746 453160 : bn->tsorted = li.count <= 1
747 225776 : || (li.sorted & r1i.sorted)
748 165333 : || (li.revsorted & r1i.revsorted & li.nonil)
749 390942 : || (r1i.count <= 1 && li.nonil);
750 453160 : bn->trevsorted = li.count <= 1
751 226041 : || (li.sorted & r1i.revsorted & li.nonil)
752 206317 : || (li.revsorted & r1i.sorted)
753 431417 : || (r1i.count <= 1 && li.nonil);
754 254006 : bn->tkey = li.count <= 1 || (li.key & r1i.key);
755 : }
756 :
757 225455 : if (!stringtrick && tpe != TYPE_oid)
758 178065 : tpe = ATOMbasetype(tpe);
759 225455 : switch (tpe) {
760 23249 : case TYPE_bte:
761 23249 : res = project_bte(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
762 23249 : break;
763 24945 : case TYPE_sht:
764 24945 : res = project_sht(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
765 24945 : break;
766 142872 : case TYPE_int:
767 142872 : res = project_int(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
768 142872 : break;
769 97 : case TYPE_flt:
770 97 : res = project_flt(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
771 97 : break;
772 5270 : case TYPE_dbl:
773 5270 : res = project_dbl(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
774 5270 : break;
775 8560 : case TYPE_lng:
776 8560 : res = project_lng(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
777 8560 : break;
778 : #ifdef HAVE_HGE
779 530 : case TYPE_hge:
780 530 : res = project_hge(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
781 530 : break;
782 : #endif
783 17116 : case TYPE_oid:
784 17116 : res = project_oid(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
785 17116 : break;
786 25 : case TYPE_uuid:
787 25 : res = project_uuid(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
788 25 : break;
789 2791 : default:
790 2791 : res = project_any(bn, &li, lci, &r1i, r2 ? &r2i : NULL, qry_ctx);
791 2791 : break;
792 : }
793 :
794 226519 : if (res != GDK_SUCCEED)
795 828 : goto bailout;
796 :
797 : /* handle string trick */
798 225691 : if (stringtrick) {
799 30980 : assert(r1i.vh);
800 30980 : if (r1i.restricted == BAT_READ || VIEWvtparent(r1)) {
801 : /* really share string heap */
802 27144 : assert(r1i.vh->parentid > 0);
803 : /* there is no file, so we don't need to remove it */
804 27144 : HEAPdecref(bn->tvheap, false);
805 27263 : bn->tvheap = r1i.vh;
806 27263 : HEAPincref(r1i.vh);
807 27258 : assert(bn->tvheap->parentid != bn->batCacheid);
808 27258 : BBPretain(bn->tvheap->parentid);
809 : } else {
810 : /* make copy of string heap */
811 3836 : bn->tvheap->parentid = bn->batCacheid;
812 3836 : bn->tvheap->farmid = BBPselectfarm(bn->batRole, otpe, varheap);
813 3836 : strconcat_len(bn->tvheap->filename,
814 : sizeof(bn->tvheap->filename),
815 3836 : BBP_physical(bn->batCacheid), ".theap",
816 : NULL);
817 3847 : if (HEAPcopy(bn->tvheap, r1i.vh, 0) != GDK_SUCCEED)
818 0 : goto bailout;
819 : }
820 31094 : bn->ttype = r1i.type;
821 31094 : bn->twidth = r1i.width;
822 31094 : bn->tshift = r1i.shift;
823 31094 : bn->tascii = r1i.ascii;
824 : }
825 :
826 451610 : bn->tunique_est =
827 225805 : MIN(li.b->tunique_est?li.b->tunique_est:BATcount(li.b),
828 : r1i.b->tunique_est?r1i.b->tunique_est:BATcount(r1i.b));
829 225805 : if (!BATtdensebi(&r1i) || (r2 && !BATtdensebi(&r2i)))
830 216691 : BATtseqbase(bn, oid_nil);
831 :
832 9114 : doreturn:
833 2756548 : TRC_DEBUG(ALGO, "l=" ALGOBATFMT " r1=" ALGOBATFMT " r2=" ALGOOPTBATFMT
834 : " -> " ALGOOPTBATFMT "%s%s " LLFMT "us\n",
835 : ALGOBATPAR(l), ALGOBATPAR(or1), ALGOOPTBATPAR(or2),
836 : ALGOOPTBATPAR(bn),
837 : bn && bn->ttype == TYPE_str && bn->tvheap == r1i.vh ? " sharing string heap" : "",
838 : msg, GDKusec() - t0);
839 2756548 : bat_iterator_end(&li);
840 2765966 : bat_iterator_end(&r1i);
841 2771107 : bat_iterator_end(&r2i);
842 2746269 : if (l != ol)
843 0 : BBPreclaim(l);
844 2756475 : if (r1 != or1)
845 14 : BBPreclaim(r1);
846 2764397 : if (r2 != or2)
847 0 : BBPreclaim(r2);
848 : return bn;
849 :
850 828 : bailout:
851 828 : BBPreclaim(bn);
852 0 : bn = NULL;
853 0 : goto doreturn;
854 : }
855 :
856 : BAT *
857 761471 : BATproject(BAT *restrict l, BAT *restrict r)
858 : {
859 761471 : return BATproject2(l, r, NULL);
860 : }
861 :
862 : /* Calculate a chain of BATproject calls.
863 : * The argument is a NULL-terminated array of BAT pointers.
864 : * This function is equivalent (apart from reference counting) to a
865 : * sequence of calls
866 : * bn = BATproject(bats[0], bats[1]);
867 : * bn = BATproject(bn, bats[2]);
868 : * ...
869 : * bn = BATproject(bn, bats[n-1]);
870 : * return bn;
871 : * where none of the intermediates are actually produced (and bats[n]==NULL).
872 : * Note that all BATs except the last must have type oid/void or msk.
873 : *
874 : * We assume that all but the last BAT in the chain is temporary and
875 : * therefore there is no chance that another thread will modify it while
876 : * we're busy. This is not necessarily the case for that last BAT, so
877 : * it uses a BAT iterator.
878 : */
879 : BAT *
880 629685 : BATprojectchain(BAT **bats)
881 : {
882 629685 : struct ba {
883 : BAT *b;
884 : oid hlo;
885 : oid hhi;
886 : BUN cnt;
887 : oid *t;
888 : struct canditer ci; /* used if .ci.s != NULL */
889 : } *ba;
890 629685 : BAT **tobedeleted = NULL;
891 629685 : int ndelete = 0;
892 629685 : int n, i;
893 629685 : BAT *b = NULL, *bn = NULL;
894 629685 : BATiter bi;
895 629685 : bool allnil = false;
896 629685 : bool issorted = true;
897 629685 : bool nonil = true;
898 629685 : bool stringtrick = false;
899 629685 : const void *nil;
900 629685 : int tpe;
901 629685 : lng t0 = 0;
902 :
903 629685 : TRC_DEBUG_IF(ALGO) t0 = GDKusec();
904 :
905 629685 : QryCtx *qry_ctx = MT_thread_get_qry_ctx();
906 :
907 : /* count number of participating BATs and allocate some
908 : * temporary work space */
909 7249194 : for (n = 0; bats[n]; n++) {
910 5991662 : b = bats[n];
911 5991662 : ndelete += (b->ttype == TYPE_msk || mask_cand(b));
912 5991662 : TRC_DEBUG(ALGO, "arg %d: " ALGOBATFMT "\n",
913 : n + 1, ALGOBATPAR(b));
914 : }
915 627847 : if (n == 0) {
916 0 : GDKerror("must have BAT arguments\n");
917 0 : return NULL;
918 : }
919 627847 : if (n == 1) {
920 0 : bn = COLcopy(b, b->ttype, true, TRANSIENT);
921 0 : TRC_DEBUG(ALGO, "single bat: copy -> " ALGOOPTBATFMT
922 : " " LLFMT " usec\n",
923 : ALGOOPTBATPAR(bn), GDKusec() - t0);
924 0 : return bn;
925 : }
926 :
927 627847 : if (ndelete > 0 &&
928 9890 : (tobedeleted = GDKmalloc(sizeof(BAT *) * ndelete)) == NULL)
929 : return NULL;
930 627850 : ba = GDKmalloc(sizeof(*ba) * n);
931 629452 : if (ba == NULL) {
932 0 : GDKfree(tobedeleted);
933 0 : return NULL;
934 : }
935 :
936 : ndelete = 0;
937 6531205 : for (n = 0, i = 0; bats[n]; n++) {
938 5902094 : b = bats[n];
939 5902094 : if (b->ttype == TYPE_msk || mask_cand(b)) {
940 6257 : if ((b = BATunmask(b)) == NULL) {
941 0 : goto bunins_failed;
942 : }
943 9809 : tobedeleted[ndelete++] = b;
944 : }
945 5905646 : if (bats[n+1] && BATtdense(b) && b->hseqbase == b->tseqbase && b->tseqbase == bats[n+1]->hseqbase && BATcount(b) == BATcount(bats[n+1]))
946 4687526 : continue; /* skip dense bat */
947 1218120 : ba[i] = (struct ba) {
948 : .b = b,
949 1218120 : .hlo = b->hseqbase,
950 1218120 : .hhi = b->hseqbase + b->batCount,
951 : .cnt = b->batCount,
952 1218120 : .t = (oid *) Tloc(b, 0),
953 : };
954 1218120 : allnil |= b->ttype == TYPE_void && is_oid_nil(b->tseqbase);
955 1218120 : issorted &= b->tsorted;
956 1218120 : if (bats[n + 1])
957 588136 : nonil &= b->tnonil;
958 1218120 : if (b->tnonil && b->tkey && b->tsorted &&
959 689833 : ATOMtype(b->ttype) == TYPE_oid) {
960 554411 : canditer_init(&ba[i].ci, NULL, b);
961 : }
962 1214227 : i++;
963 : }
964 629111 : n = i;
965 629111 : if (i<=2) {
966 569746 : if (i == 1) {
967 102891 : bn = ba[0].b;
968 102891 : BBPfix(bn->batCacheid);
969 : } else {
970 466855 : bn = BATproject(ba[0].b, ba[1].b);
971 : }
972 569246 : while (ndelete-- > 0)
973 292 : BBPunfix(tobedeleted[ndelete]->batCacheid);
974 568954 : GDKfree(tobedeleted);
975 565772 : GDKfree(ba);
976 565772 : return bn;
977 : }
978 : /* b is last BAT in bats array */
979 59365 : tpe = ATOMtype(b->ttype);
980 59365 : nil = ATOMnilptr(tpe);
981 59365 : if (allnil || ba[0].cnt == 0) {
982 19505 : bn = BATconstant(ba[0].hlo, tpe == TYPE_oid ? TYPE_void : tpe,
983 : nil, ba[0].cnt, TRANSIENT);
984 19505 : while (ndelete-- > 0)
985 27290 : BBPreclaim(tobedeleted[ndelete]);
986 19658 : GDKfree(tobedeleted);
987 19609 : GDKfree(ba);
988 19704 : TRC_DEBUG(ALGO, "with %d bats: nil/empty -> " ALGOOPTBATFMT
989 : " " LLFMT " usec\n",
990 : n, ALGOOPTBATPAR(bn), GDKusec() - t0);
991 19704 : return bn;
992 : }
993 :
994 39860 : bi = bat_iterator(b);
995 39826 : if (nonil && ATOMstorage(tpe) == TYPE_str && bi.restricted == BAT_READ) {
996 8730 : stringtrick = true;
997 8730 : bn = COLnew2(ba[0].hlo, tpe, ba[0].cnt, TRANSIENT, bi.width);
998 8726 : if (bn && bn->tvheap) {
999 : /* no need to remove any files since they were
1000 : * never created for this bat */
1001 8727 : HEAPdecref(bn->tvheap, false);
1002 8738 : bn->tvheap = NULL;
1003 : }
1004 8737 : tpe = bi.width == 1 ? TYPE_bte : (bi.width == 2 ? TYPE_sht : (bi.width == 4 ? TYPE_int : TYPE_lng));
1005 : } else {
1006 31096 : bn = COLnew(ba[0].hlo, tpe, ba[0].cnt, TRANSIENT);
1007 : }
1008 39747 : if (bn == NULL) {
1009 0 : bat_iterator_end(&bi);
1010 0 : goto bunins_failed;
1011 : }
1012 :
1013 39747 : assert(ba[n - 1].b == b);
1014 39747 : ba[n - 1].t = bi.base;
1015 39747 : if (ATOMtype(b->ttype) == TYPE_oid) {
1016 : /* oid all the way */
1017 671 : oid *d = (oid *) Tloc(bn, 0);
1018 671 : assert(!stringtrick);
1019 19407403 : TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
1020 19403572 : oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
1021 80389676 : for (int i = 1; i < n; i++) {
1022 60986100 : if (is_oid_nil(o)) {
1023 0 : bn->tnil = true;
1024 0 : break;
1025 : }
1026 60986100 : if (o < ba[i].hlo || o >= ba[i].hhi) {
1027 0 : GDKerror("does not match always\n");
1028 0 : bat_iterator_end(&bi);
1029 0 : goto bunins_failed;
1030 : }
1031 60986100 : o -= ba[i].hlo;
1032 60986100 : o = ba[i].ci.s ?
1033 32180781 : (ba[i].ci.tpe == cand_dense) ?
1034 32180781 : canditer_idx_dense(&ba[i].ci, o) :
1035 92254282 : canditer_idx(&ba[i].ci, o) : ba[i].t[o];
1036 : }
1037 19403576 : *d++ = o;
1038 : }
1039 39076 : } else if (!ATOMvarsized(tpe)) {
1040 38826 : const void *v;
1041 38826 : char *d = Tloc(bn, 0);
1042 :
1043 38826 : bn->tnil = false;
1044 38826 : n--; /* stop one before the end, also ba[n] is last */
1045 144033139 : TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
1046 143902079 : oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
1047 :
1048 335204734 : for (int i = 1; i < n; i++) {
1049 191280486 : if (is_oid_nil(o)) {
1050 15 : bn->tnil = true;
1051 15 : break;
1052 : }
1053 191280471 : if (o < ba[i].hlo || o >= ba[i].hhi) {
1054 0 : GDKerror("does not match always\n");
1055 0 : bat_iterator_end(&bi);
1056 0 : goto bunins_failed;
1057 : }
1058 191280471 : o -= ba[i].hlo;
1059 191280471 : o = ba[i].ci.s ?
1060 67790677 : (ba[i].ci.tpe == cand_dense) ?
1061 67790677 : canditer_idx_dense(&ba[i].ci, o) :
1062 248464738 : canditer_idx(&ba[i].ci, o) : ba[i].t[o];
1063 : }
1064 143924263 : if (is_oid_nil(o)) {
1065 15 : assert(!stringtrick);
1066 15 : bn->tnil = true;
1067 15 : v = nil;
1068 143924248 : } else if (o < ba[n].hlo || o >= ba[n].hhi) {
1069 0 : GDKerror("does not match always\n");
1070 0 : bat_iterator_end(&bi);
1071 0 : goto bunins_failed;
1072 : } else {
1073 143924248 : o -= ba[n].hlo;
1074 143924248 : v = (const char *) bi.base + (o << bi.shift);
1075 : }
1076 143924263 : if (ATOMputFIX(tpe, d, v) != GDK_SUCCEED) {
1077 0 : bat_iterator_end(&bi);
1078 0 : goto bunins_failed;
1079 : }
1080 143902369 : d += bi.width;
1081 : }
1082 38945 : if (stringtrick) {
1083 8740 : bn->tnil = false;
1084 8740 : bn->tnonil = bi.nonil;
1085 8740 : bn->tkey = false;
1086 8740 : bn->tascii = bi.ascii;
1087 8740 : assert(bn->tvheap == NULL);
1088 8740 : bn->tvheap = bi.vh;
1089 8740 : HEAPincref(bi.vh);
1090 8736 : assert(bn->tvheap->parentid != bn->batCacheid);
1091 8736 : BBPretain(bn->tvheap->parentid);
1092 8738 : assert(bn->ttype == b->ttype);
1093 8739 : assert(bn->twidth == bi.width);
1094 8739 : assert(bn->tshift == bi.shift);
1095 : }
1096 : n++; /* undo for debug print */
1097 : } else {
1098 250 : const void *v;
1099 :
1100 250 : assert(!stringtrick);
1101 250 : bn->tnil = false;
1102 250 : n--; /* stop one before the end, also ba[n] is last */
1103 690914 : TIMEOUT_LOOP_IDX_DECL(p, ba[0].cnt, qry_ctx) {
1104 690374 : oid o = ba[0].ci.s ? canditer_next(&ba[0].ci) : ba[0].t[p];
1105 1392790 : for (int i = 1; i < n; i++) {
1106 702199 : if (is_oid_nil(o)) {
1107 0 : bn->tnil = true;
1108 0 : break;
1109 : }
1110 702199 : if (o < ba[i].hlo || o >= ba[i].hhi) {
1111 0 : GDKerror("does not match always\n");
1112 0 : bat_iterator_end(&bi);
1113 0 : goto bunins_failed;
1114 : }
1115 702199 : o -= ba[i].hlo;
1116 702199 : o = ba[i].ci.s ?
1117 26282 : (ba[i].ci.tpe == cand_dense) ?
1118 26282 : canditer_idx_dense(&ba[i].ci, o) :
1119 728244 : canditer_idx(&ba[i].ci, o) : ba[i].t[o];
1120 : }
1121 690591 : if (is_oid_nil(o)) {
1122 0 : bn->tnil = true;
1123 0 : v = nil;
1124 690591 : } else if (o < ba[n].hlo || o >= ba[n].hhi) {
1125 0 : GDKerror("does not match always\n");
1126 0 : bat_iterator_end(&bi);
1127 0 : goto bunins_failed;
1128 : } else {
1129 690591 : o -= ba[n].hlo;
1130 690591 : v = BUNtail(bi, o);
1131 : }
1132 690591 : if (bunfastapp(bn, v) != GDK_SUCCEED) {
1133 0 : bat_iterator_end(&bi);
1134 0 : goto bunins_failed;
1135 : }
1136 : }
1137 : n++; /* undo for debug print */
1138 : }
1139 39867 : bat_iterator_end(&bi);
1140 39842 : TIMEOUT_CHECK(qry_ctx, GOTO_LABEL_TIMEOUT_HANDLER(bunins_failed, qry_ctx));
1141 39798 : BATsetcount(bn, ba[0].cnt);
1142 39853 : bn->tsorted = (ba[0].cnt <= 1) | issorted;
1143 39853 : bn->trevsorted = ba[0].cnt <= 1;
1144 39853 : bn->tnonil = nonil & b->tnonil;
1145 39853 : bn->tseqbase = oid_nil;
1146 39853 : bn->tkey = (ba[0].cnt <= 1);
1147 39853 : double est = 0;
1148 176039 : for (int i = 0; i < n; i++) {
1149 136186 : double nest = ba[i].b->tunique_est?ba[i].b->tunique_est:BATcount(ba[i].b);
1150 136186 : if (est)
1151 96597 : est = MIN(est, nest);
1152 : else
1153 : est = nest;
1154 : }
1155 39853 : bn->tunique_est = est;
1156 : /* note, b may point to one of the bats in tobedeleted, so
1157 : * reclaim after the last use of b */
1158 39853 : while (ndelete-- > 0)
1159 41811 : BBPreclaim(tobedeleted[ndelete]);
1160 39851 : GDKfree(tobedeleted);
1161 39722 : GDKfree(ba);
1162 39926 : TRC_DEBUG(ALGO, "with %d bats: " ALGOOPTBATFMT " " LLFMT " usec\n",
1163 : n, ALGOOPTBATPAR(bn), GDKusec() - t0);
1164 : return bn;
1165 :
1166 0 : bunins_failed:
1167 0 : while (ndelete-- > 0)
1168 0 : BBPreclaim(tobedeleted[ndelete]);
1169 0 : GDKfree(tobedeleted);
1170 0 : GDKfree(ba);
1171 0 : BBPreclaim(bn);
1172 0 : TRC_DEBUG(ALGO, "failed " LLFMT "usec\n", GDKusec() - t0);
1173 : return NULL;
1174 : }
|