Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * Implementation for the column imprints index.
15 : * See paper:
16 : * Column Imprints: A Secondary Index Structure,
17 : * L.Sidirourgos and M.Kersten.
18 : */
19 :
20 : #include "monetdb_config.h"
21 : #include "gdk.h"
22 : #include "gdk_private.h"
23 : #include "gdk_imprints.h"
24 :
25 : /*
26 : * The imprints heap consists of five parts:
27 : * - header
28 : * - bins
29 : * - stats
30 : * - imps
31 : * - dict
32 : *
33 : * The header consists of four size_t values `size_t hdata[4]':
34 : * - hdata[0] = (1 << 16) | (IMPRINTS_VERSION << 8) | (uint8_t) imprints->bits
35 : * - hdata[1] = imprints->impcnt
36 : * - hdata[2] = imprints->dictcnt
37 : * - hdata[3] = BATcount(b)
38 : * The first word of the header includes a version number in case the
39 : * format changes, and a bit to indicate that the data was synced to disk.
40 : * This bit is the last thing written, so if it is set when reading back
41 : * the imprints heap, the data is complete. The fourth word is the size of
42 : * the BAT for which the imprints were created. If this size differs from
43 : * the size of the BAT at the time of reading the imprints back from disk,
44 : * the imprints are not usable.
45 : *
46 : * The bins area starts immediately after the header. It consists of 64
47 : * values in the domain of the BAT `TYPE bins[64]'.
48 : *
49 : * The stats area starts immediately after the bins area. It consists of
50 : * three times an array of 64 64 (32) bit integers `BUN stats[3][64]'. The
51 : * three arrays represent respectively min, max, and cnt for each of the 64
52 : * bins, so stats can be seen as `BUN min_bins[64]; BUN max_bins[64]; BUN
53 : * cnt_bins[64];'. The min and max values are positions of the smallest
54 : * and largest non-nil value in the corresponding bin.
55 : *
56 : * The imps area starts immediately after the stats area. It consists of
57 : * one mask per "page" of the input BAT indicating in which bins the values
58 : * in that page fall. The size of the mask is given by imprints->bits.
59 : * The list of masks may be run-length compressed, see the dict area. A
60 : * "page" is 64 bytes worth of values, so the number of values depends on
61 : * the type of the value.
62 : *
63 : * The dict area starts immediately after the imps area. It consists of
64 : * one cchdc_t value per "page" of the input. The precise use is described
65 : * below.
66 : *
67 : * There are up to 64 bins into which values are sorted. The number of
68 : * bins depends on the number of unique values in the input BAT (actually
69 : * on the number of unique values in a random sample of 2048 values of the
70 : * input BAT) and is 8, 16, 32, or 64. The number of bits in the mask is
71 : * stored in imprints->bits. The boundaries of the bins are dynamically
72 : * determined when the imprints are created and stored in the bins array.
73 : * In fact, bins[n] contains the lower boundary of the n-th bin (0 <= n <
74 : * N, N the number of bins). The value of bins[0] is not actually used:
75 : * all values smaller than bins[1] are sorted into this bin, including NIL.
76 : * The boundaries are simply computed by stepping with large steps through
77 : * the sorted sample and taking 63 (or 31, 15, 7) equally spaced values
78 : * from there.
79 : *
80 : * Once the appropriate bin n is determined for a particular value v
81 : * (bins[n] <= v < bins[n+1]), a bitmask can be constructed for the value
82 : * as ((uintN_t)1 << n) where N is the number of bits that are used for the
83 : * bitmasks and n is the number of the bin (0 <= n < N).
84 : *
85 : * The input BAT is divided into "pages" where a page is 64 bytes. This
86 : * means the number of rows in a page depends on the size of the values: 64
87 : * for byte-sized values down to 4 for hugeint (128 bit) values. For each
88 : * page, a bitmask is created which is the imprint for that page. The
89 : * bitmask has a bit set for each bin into which a value inside the page
90 : * falls. These bitmasks (imprints) are stored in the imprints->imps
91 : * array, but with a twist, see below.
92 : *
93 : * The imprints->dict array is an array of cchdc_t values. A cchdc_t value
94 : * consists of a bit .repeat and a 24-bit value .cnt. The sum of the .cnt
95 : * values is equal to the total number of pages in the input BAT. If the
96 : * .repeat value is 0, there are .cnt consecutive imprint bitmasks in the
97 : * imprints->imps array, each for one page. If the .repeat value is 1,
98 : * there is a single imprint bitmask in the imprints->imps array which is
99 : * valid for the next .cnt pages. In this way a run-length encoding
100 : * compression scheme is implemented for imprints.
101 : *
102 : * Imprints are used for range selects, i.e. finding all rows in a BAT
103 : * whose value is inside some given range, or alternatively, all rows in a
104 : * BAT whose value is outside some given range (anti select).
105 : *
106 : * A range necessarily covers one or more consecutive bins. A bit mask is
107 : * created for all bins that fall fully inside the range being selected (in
108 : * gdk_select.c called "innermask"), and a bit mask is created for all bins
109 : * that fall fully or partially inside the range (called "mask" in
110 : * gdk_select.c). Note that for an "anti" select, i.e. a select which
111 : * matches everything except a given range, the bits in the bit masks are
112 : * not consecutive.
113 : *
114 : * We then go through the imps table. All pages where the only set bits
115 : * are also set in "innermask" can be blindly added to the result: all
116 : * values fall inside the range. All pages where none of the set bits are
117 : * also set in "mask" can be blindly skipped: no value falls inside the
118 : * range. For the remaining pages, we scan the page and check each
119 : * individual value to see whether it is selected.
120 : *
121 : * Extra speed up is achieved by the run-length encoding of the imps table.
122 : * If a mask is in the category of fully inside the range or fully outside,
123 : * the complete set of pages can be added/skipped in one go.
124 : */
125 :
126 : #define IMPRINTS_VERSION 2
127 : #define IMPRINTS_HEADER_SIZE 4 /* nr of size_t fields in header */
128 :
129 : #define BINSIZE(B, FUNC, T) \
130 : do { \
131 : switch (B) { \
132 : case 8: FUNC(T,8); break; \
133 : case 16: FUNC(T,16); break; \
134 : case 32: FUNC(T,32); break; \
135 : case 64: FUNC(T,64); break; \
136 : default: MT_UNREACHABLE(); break; \
137 : } \
138 : } while (0)
139 :
140 :
141 : #define GETBIN(Z,X,B) \
142 : do { \
143 : int _i; \
144 : Z = 0; \
145 : for (_i = 1; _i < B; _i++) \
146 : Z += ((X) >= bins[_i]); \
147 : } while (0)
148 :
149 :
150 : #define IMPS_CREATE(TYPE,B) \
151 : do { \
152 : uint##B##_t mask, prvmask; \
153 : uint##B##_t *restrict im = (uint##B##_t *) imps; \
154 : const TYPE *restrict col = (TYPE *) bi->base; \
155 : const TYPE *restrict bins = (TYPE *) inbins; \
156 : const BUN page = IMPS_PAGE / sizeof(TYPE); \
157 : prvmask = 0; \
158 : for (i = 0; i < bi->count; ) { \
159 : const BUN lim = MIN(i + page, bi->count); \
160 : /* new mask */ \
161 : mask = 0; \
162 : /* build mask for all BUNs in one PAGE */ \
163 : for ( ; i < lim; i++) { \
164 : const TYPE val = col[i]; \
165 : GETBIN(bin,val,B); \
166 : mask = IMPSsetBit(B,mask,bin); \
167 : if (!is_##TYPE##_nil(val)) { /* do not count nils */ \
168 : if (!cnt_bins[bin]++) { \
169 : /* first in the bin */ \
170 : min_bins[bin] = max_bins[bin] = i; \
171 : } else { \
172 : if (val < col[min_bins[bin]]) \
173 : min_bins[bin] = i; \
174 : if (val > col[max_bins[bin]]) \
175 : max_bins[bin] = i; \
176 : } \
177 : } \
178 : } \
179 : /* same mask as previous and enough count to add */ \
180 : if ((prvmask == mask) && (dcnt > 0) && \
181 : (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
182 : /* not a repeat header */ \
183 : if (!dict[dcnt-1].repeat) { \
184 : /* if compressed */ \
185 : if (dict[dcnt-1].cnt > 1) { \
186 : /* uncompress last */ \
187 : dict[dcnt-1].cnt--; \
188 : /* new header */ \
189 : dict[dcnt].cnt = 1; \
190 : dict[dcnt].flags = 0; \
191 : dcnt++; \
192 : } \
193 : /* set repeat */ \
194 : dict[dcnt-1].repeat = 1; \
195 : } \
196 : /* increase cnt */ \
197 : dict[dcnt-1].cnt++; \
198 : } else { /* new mask (or run out of header count) */ \
199 : prvmask=mask; \
200 : im[icnt] = mask; \
201 : icnt++; \
202 : if ((dcnt > 0) && !(dict[dcnt-1].repeat) && \
203 : (dict[dcnt-1].cnt < (IMPS_MAX_CNT-1))) { \
204 : dict[dcnt-1].cnt++; \
205 : } else { \
206 : dict[dcnt].cnt = 1; \
207 : dict[dcnt].repeat = 0; \
208 : dict[dcnt].flags = 0; \
209 : dcnt++; \
210 : } \
211 : } \
212 : } \
213 : } while (0)
214 :
215 : static void
216 52 : imprints_create(BAT *b, BATiter *bi, void *inbins, BUN *stats, bte bits,
217 : void *imps, BUN *impcnt, cchdc_t *dict, BUN *dictcnt)
218 : {
219 52 : BUN i;
220 52 : BUN dcnt, icnt;
221 52 : BUN *restrict min_bins = stats;
222 52 : BUN *restrict max_bins = min_bins + 64;
223 52 : BUN *restrict cnt_bins = max_bins + 64;
224 52 : int bin = 0;
225 52 : dcnt = icnt = 0;
226 : #ifndef NDEBUG
227 52 : memset(min_bins, 0, 64 * SIZEOF_BUN);
228 52 : memset(max_bins, 0, 64 * SIZEOF_BUN);
229 : #endif
230 52 : memset(cnt_bins, 0, 64 * SIZEOF_BUN);
231 :
232 93 : switch (ATOMbasetype(b->ttype)) {
233 3 : case TYPE_bte:
234 69 : BINSIZE(bits, IMPS_CREATE, bte);
235 : break;
236 1 : case TYPE_sht:
237 34 : BINSIZE(bits, IMPS_CREATE, sht);
238 : break;
239 13 : case TYPE_int:
240 367 : BINSIZE(bits, IMPS_CREATE, int);
241 : break;
242 25 : case TYPE_lng:
243 64125979 : BINSIZE(bits, IMPS_CREATE, lng);
244 : break;
245 : #ifdef HAVE_HGE
246 5 : case TYPE_hge:
247 750 : BINSIZE(bits, IMPS_CREATE, hge);
248 : break;
249 : #endif
250 3 : case TYPE_flt:
251 102 : BINSIZE(bits, IMPS_CREATE, flt);
252 : break;
253 2 : case TYPE_dbl:
254 68 : BINSIZE(bits, IMPS_CREATE, dbl);
255 : break;
256 : default:
257 : /* should never reach here */
258 0 : MT_UNREACHABLE();
259 : }
260 :
261 52 : *dictcnt = dcnt;
262 52 : *impcnt = icnt;
263 52 : }
264 :
265 : #ifdef NDEBUG
266 : #define CLRMEM() ((void) 0)
267 : #else
268 : #define CLRMEM() while (k < 64) h[k++] = 0
269 : #endif
270 :
271 : #define FILL_HISTOGRAM(TYPE) \
272 : do { \
273 : BUN k; \
274 : TYPE *restrict s = (TYPE *) Tloc(s4, 0); \
275 : TYPE *restrict h = imprints->bins; \
276 : if (cnt < 64-1) { \
277 : TYPE max = GDK_##TYPE##_max; \
278 : for (k = 0; k < cnt; k++) \
279 : h[k] = s[k]; \
280 : while (k < (BUN) imprints->bits) \
281 : h[k++] = max; \
282 : CLRMEM(); \
283 : } else { \
284 : double y, ystep = (double) cnt / (64 - 1); \
285 : for (k = 0, y = 0; (BUN) y < cnt; y += ystep, k++) \
286 : h[k] = s[(BUN) y]; \
287 : if (k == 64 - 1) /* there is one left */ \
288 : h[k] = s[cnt - 1]; \
289 : } \
290 : } while (0)
291 :
292 : /* Check whether we have imprints on b (and return true if we do). It
293 : * may be that the imprints were made persistent, but we hadn't seen
294 : * that yet, so check the file system. This also returns true if b is
295 : * a view and there are imprints on b's parent.
296 : *
297 : * Note that the b->timprints pointer can be NULL, meaning there are
298 : * no imprints; (Imprints *) 1, meaning there are no imprints loaded,
299 : * but they may exist on disk; or a valid pointer to loaded imprints.
300 : * These values are maintained here, in the IMPSdestroy and IMPSfree
301 : * functions, and in BBPdiskscan during initialization. */
302 : bool
303 181 : BATcheckimprints(BAT *b)
304 : {
305 181 : bool ret;
306 181 : BATiter bi = bat_iterator(b);
307 :
308 180 : if (VIEWtparent(b)) {
309 29 : assert(b->timprints == NULL);
310 29 : b = BATdescriptor(VIEWtparent(b));
311 30 : if (b == NULL) {
312 0 : bat_iterator_end(&bi);
313 0 : return false;
314 : }
315 : }
316 :
317 181 : if (b->timprints == (Imprints *) 1) {
318 0 : MT_lock_set(&b->batIdxLock);
319 0 : if (b->timprints == (Imprints *) 1) {
320 0 : Imprints *imprints;
321 0 : const char *nme = BBP_physical(b->batCacheid);
322 :
323 0 : assert(!GDKinmemory(bi.h->farmid));
324 0 : b->timprints = NULL;
325 0 : if ((imprints = GDKzalloc(sizeof(Imprints))) != NULL &&
326 0 : (imprints->imprints.farmid = BBPselectfarm(b->batRole, bi.type, imprintsheap)) >= 0) {
327 0 : int fd;
328 :
329 0 : strconcat_len(imprints->imprints.filename,
330 : sizeof(imprints->imprints.filename),
331 : nme, ".timprints", NULL);
332 0 : imprints->imprints.storage = imprints->imprints.newstorage = STORE_INVALID;
333 : /* check whether a persisted imprints index
334 : * can be found */
335 0 : if ((fd = GDKfdlocate(imprints->imprints.farmid, nme, "rb", "timprints")) >= 0) {
336 0 : size_t hdata[4];
337 0 : struct stat st;
338 0 : size_t pages;
339 :
340 0 : pages = (((size_t) bi.count * bi.width) + IMPS_PAGE - 1) / IMPS_PAGE;
341 0 : if (read(fd, hdata, sizeof(hdata)) == sizeof(hdata) &&
342 0 : hdata[0] & ((size_t) 1 << 16) &&
343 0 : ((hdata[0] & 0xFF00) >> 8) == IMPRINTS_VERSION &&
344 0 : hdata[3] == (size_t) bi.count &&
345 0 : fstat(fd, &st) == 0 &&
346 0 : st.st_size >= (off_t) (imprints->imprints.size =
347 0 : imprints->imprints.free =
348 0 : 64 * bi.width +
349 : 64 * 3 * SIZEOF_BUN +
350 0 : pages * ((bte) hdata[0] / 8) +
351 0 : hdata[2] * sizeof(cchdc_t) +
352 : sizeof(uint64_t) /* padding for alignment */
353 0 : + 4 * SIZEOF_SIZE_T) &&
354 0 : HEAPload(&imprints->imprints, nme, "timprints", false) == GDK_SUCCEED) {
355 : /* usable */
356 0 : imprints->bits = (bte) (hdata[0] & 0xFF);
357 0 : imprints->impcnt = (BUN) hdata[1];
358 0 : imprints->dictcnt = (BUN) hdata[2];
359 0 : imprints->bins = imprints->imprints.base + 4 * SIZEOF_SIZE_T;
360 0 : imprints->stats = (BUN *) ((char *) imprints->bins + 64 * bi.width);
361 0 : imprints->imps = (void *) (imprints->stats + 64 * 3);
362 0 : imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
363 0 : close(fd);
364 0 : imprints->imprints.parentid = b->batCacheid;
365 0 : imprints->imprints.hasfile = true;
366 0 : ATOMIC_INIT(&imprints->imprints.refs, 1);
367 0 : b->timprints = imprints;
368 0 : TRC_DEBUG(ACCELERATOR, ALGOBATFMT " reusing persisted imprints\n", ALGOBATPAR(b));
369 0 : MT_lock_unset(&b->batIdxLock);
370 0 : if (bi.b != b)
371 0 : BBPunfix(b->batCacheid);
372 0 : bat_iterator_end(&bi);
373 0 : return true;
374 : }
375 0 : close(fd);
376 : /* unlink unusable file */
377 0 : GDKunlink(imprints->imprints.farmid, BATDIR, nme, "timprints");
378 0 : imprints->imprints.hasfile = false;
379 : }
380 : }
381 0 : GDKfree(imprints);
382 0 : GDKclrerr(); /* we're not currently interested in errors */
383 : }
384 0 : MT_lock_unset(&b->batIdxLock);
385 : }
386 181 : if (bi.b != b)
387 30 : BBPunfix(b->batCacheid);
388 181 : bat_iterator_end(&bi);
389 180 : ret = b->timprints != NULL;
390 180 : if( ret)
391 0 : TRC_DEBUG(ACCELERATOR, ALGOBATFMT " already has imprints\n", ALGOBATPAR(b));
392 : return ret;
393 : }
394 :
395 : static void
396 41 : BATimpsync(void *arg)
397 : {
398 41 : BAT *b = arg;
399 41 : Imprints *imprints;
400 41 : int fd;
401 41 : lng t0 = GDKusec();
402 41 : const char *failed = " failed";
403 :
404 41 : MT_lock_set(&b->batIdxLock);
405 41 : if ((imprints = b->timprints) != NULL) {
406 41 : Heap *hp = &imprints->imprints;
407 41 : if (HEAPsave(hp, hp->filename, NULL, true, hp->free, NULL) == GDK_SUCCEED) {
408 41 : if (hp->storage == STORE_MEM) {
409 41 : if ((fd = GDKfdlocate(hp->farmid, hp->filename, "rb+", NULL)) >= 0) {
410 : /* add version number */
411 41 : ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
412 : /* sync-on-disk checked bit */
413 41 : ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
414 41 : if (write(fd, hp->base, SIZEOF_SIZE_T) >= 0) {
415 41 : failed = ""; /* not failed */
416 41 : if (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)) {
417 : #if defined(NATIVE_WIN32)
418 : _commit(fd);
419 : #elif defined(HAVE_FDATASYNC)
420 0 : fdatasync(fd);
421 : #elif defined(HAVE_FSYNC)
422 : fsync(fd);
423 : #endif
424 : }
425 41 : hp->dirty = false;
426 : } else {
427 0 : failed = " write failed";
428 0 : perror("write hash");
429 : }
430 41 : close(fd);
431 : }
432 : } else {
433 : /* add version number */
434 0 : ((size_t *) hp->base)[0] |= (size_t) IMPRINTS_VERSION << 8;
435 : /* sync-on-disk checked bit */
436 0 : ((size_t *) hp->base)[0] |= (size_t) 1 << 16;
437 0 : if (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK) &&
438 0 : MT_msync(hp->base, SIZEOF_SIZE_T) < 0) {
439 0 : failed = " sync failed";
440 0 : ((size_t *) hp->base)[0] &= ~((size_t) IMPRINTS_VERSION << 8);
441 : } else {
442 0 : hp->dirty = false;
443 0 : failed = ""; /* not failed */
444 : }
445 : }
446 41 : TRC_DEBUG(ACCELERATOR, ALGOBATFMT " imprints persisted "
447 : "(" LLFMT " usec)%s\n", ALGOBATPAR(b),
448 : GDKusec() - t0, failed);
449 : }
450 : }
451 41 : MT_lock_unset(&b->batIdxLock);
452 41 : BBPunfix(b->batCacheid);
453 41 : }
454 :
455 : gdk_return
456 61 : BATimprints(BAT *b)
457 : {
458 61 : BAT *s1 = NULL, *s2 = NULL, *s3 = NULL, *s4 = NULL;
459 61 : bat unfix = 0;
460 61 : Imprints *imprints;
461 61 : BATiter bi;
462 61 : lng t0 = GDKusec();
463 :
464 61 : BATcheck(b, GDK_FAIL);
465 :
466 : /* we only create imprints for types that look like types we know */
467 61 : if (!imprintable(b->ttype)) {
468 : /* doesn't look enough like base type: do nothing */
469 9 : GDKerror("unsupported type\n");
470 9 : return GDK_FAIL;
471 : }
472 :
473 52 : if (BATcheckimprints(b))
474 : return GDK_SUCCEED;
475 :
476 52 : if (VIEWtparent(b)) {
477 : /* views always keep null pointer and need to obtain
478 : * the latest imprint from the parent at query time */
479 0 : s2 = b; /* remember for ACCELDEBUG print */
480 0 : b = BATdescriptor(VIEWtparent(b));
481 0 : if (b == NULL)
482 : return GDK_FAIL;
483 0 : unfix = b->batCacheid; /* bat to be unfixed */
484 0 : if (BATcheckimprints(b)) {
485 0 : BBPunfix(unfix);
486 0 : return GDK_SUCCEED;
487 : }
488 : }
489 52 : bi = bat_iterator(b);
490 52 : MT_lock_set(&b->batIdxLock);
491 :
492 52 : if (b->timprints == NULL) {
493 52 : BUN cnt;
494 52 : const char *nme = GDKinmemory(bi.h->farmid) ? ":memory:" : BBP_physical(b->batCacheid);
495 52 : size_t pages;
496 :
497 52 : MT_lock_unset(&b->batIdxLock);
498 52 : MT_thread_setalgorithm("create imprints");
499 :
500 52 : if (s2)
501 0 : TRC_DEBUG(ACCELERATOR, ALGOBATFMT
502 : " creating imprints on parent "
503 : ALGOBATFMT "\n",
504 : ALGOBATPAR(s2), ALGOBATPAR(b));
505 : else
506 52 : TRC_DEBUG(ACCELERATOR, ALGOBATFMT
507 : " creating imprints\n",
508 : ALGOBATPAR(b));
509 :
510 52 : s2 = NULL;
511 :
512 52 : imprints = GDKzalloc(sizeof(Imprints));
513 52 : if (imprints == NULL) {
514 0 : bat_iterator_end(&bi);
515 0 : if (unfix)
516 0 : BBPunfix(unfix);
517 0 : return GDK_FAIL;
518 : }
519 52 : strconcat_len(imprints->imprints.filename,
520 : sizeof(imprints->imprints.filename),
521 : nme, ".timprints", NULL);
522 52 : pages = (((size_t) bi.count * bi.width) + IMPS_PAGE - 1) / IMPS_PAGE;
523 52 : imprints->imprints.farmid = BBPselectfarm(b->batRole, bi.type,
524 : imprintsheap);
525 52 : imprints->imprints.parentid = b->batCacheid;
526 :
527 : #define SMP_SIZE 2048
528 52 : s1 = BATsample(b, SMP_SIZE);
529 52 : if (s1 == NULL) {
530 0 : GDKfree(imprints);
531 0 : bat_iterator_end(&bi);
532 0 : if (unfix)
533 0 : BBPunfix(unfix);
534 0 : return GDK_FAIL;
535 : }
536 52 : s2 = BATunique(b, s1);
537 52 : if (s2 == NULL) {
538 0 : BBPunfix(s1->batCacheid);
539 0 : GDKfree(imprints);
540 0 : bat_iterator_end(&bi);
541 0 : if (unfix)
542 0 : BBPunfix(unfix);
543 0 : return GDK_FAIL;
544 : }
545 52 : s3 = BATproject(s2, b);
546 52 : if (s3 == NULL) {
547 0 : BBPunfix(s1->batCacheid);
548 0 : BBPunfix(s2->batCacheid);
549 0 : GDKfree(imprints);
550 0 : bat_iterator_end(&bi);
551 0 : if (unfix)
552 0 : BBPunfix(unfix);
553 0 : return GDK_FAIL;
554 : }
555 52 : s3->tkey = true; /* we know is unique on tail now */
556 52 : if (BATsort(&s4, NULL, NULL, s3, NULL, NULL, false, false, false) != GDK_SUCCEED) {
557 0 : BBPunfix(s1->batCacheid);
558 0 : BBPunfix(s2->batCacheid);
559 0 : BBPunfix(s3->batCacheid);
560 0 : GDKfree(imprints);
561 0 : bat_iterator_end(&bi);
562 0 : if (unfix)
563 0 : BBPunfix(unfix);
564 0 : return GDK_FAIL;
565 : }
566 : /* s4 now is ordered and unique on tail */
567 52 : assert(s4->tkey && s4->tsorted);
568 52 : cnt = BATcount(s4);
569 52 : imprints->bits = 64;
570 52 : if (cnt <= 32)
571 51 : imprints->bits = 32;
572 51 : if (cnt <= 16)
573 51 : imprints->bits = 16;
574 51 : if (cnt <= 8)
575 51 : imprints->bits = 8;
576 :
577 : /* The heap we create here consists of four parts:
578 : * bins, max 64 entries with bin boundaries, domain of b;
579 : * stats, min/max/count for each bin, min/max are oid, and count BUN;
580 : * imps, max one entry per "page", entry is "bits" wide;
581 : * dict, max two entries per three "pages".
582 : * In addition, we add some housekeeping entries at
583 : * the start so that we can determine whether we can
584 : * trust the imprints when encountered on startup (including
585 : * a version number -- CURRENT VERSION is 2). */
586 52 : MT_lock_set(&b->batIdxLock);
587 104 : if (b->timprints != NULL ||
588 52 : HEAPalloc(&imprints->imprints,
589 : IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T + /* extra info */
590 52 : 64 * bi.width + /* bins */
591 : 64 * 3 * SIZEOF_BUN + /* {min,max,cnt}_bins */
592 52 : pages * (imprints->bits / 8) + /* imps */
593 52 : sizeof(uint64_t) + /* padding for alignment */
594 : pages * sizeof(cchdc_t), /* dict */
595 : 1) != GDK_SUCCEED) {
596 0 : MT_lock_unset(&b->batIdxLock);
597 0 : bat_iterator_end(&bi);
598 0 : GDKfree(imprints);
599 0 : BBPunfix(s1->batCacheid);
600 0 : BBPunfix(s2->batCacheid);
601 0 : BBPunfix(s3->batCacheid);
602 0 : BBPunfix(s4->batCacheid);
603 0 : if (b->timprints != NULL) {
604 0 : if (unfix)
605 0 : BBPunfix(unfix);
606 0 : return GDK_SUCCEED; /* we were beaten to it */
607 : }
608 0 : if (unfix)
609 0 : BBPunfix(unfix);
610 0 : return GDK_FAIL;
611 : }
612 52 : imprints->bins = imprints->imprints.base + IMPRINTS_HEADER_SIZE * SIZEOF_SIZE_T;
613 52 : imprints->stats = (BUN *) ((char *) imprints->bins + 64 * bi.width);
614 52 : imprints->imps = (void *) (imprints->stats + 64 * 3);
615 52 : imprints->dict = (void *) ((uintptr_t) ((char *) imprints->imps + pages * (imprints->bits / 8) + sizeof(uint64_t)) & ~(sizeof(uint64_t) - 1));
616 :
617 93 : switch (ATOMbasetype(bi.type)) {
618 3 : case TYPE_bte:
619 195 : FILL_HISTOGRAM(bte);
620 : break;
621 1 : case TYPE_sht:
622 65 : FILL_HISTOGRAM(sht);
623 : break;
624 13 : case TYPE_int:
625 845 : FILL_HISTOGRAM(int);
626 : break;
627 25 : case TYPE_lng:
628 1625 : FILL_HISTOGRAM(lng);
629 : break;
630 : #ifdef HAVE_HGE
631 5 : case TYPE_hge:
632 325 : FILL_HISTOGRAM(hge);
633 : break;
634 : #endif
635 3 : case TYPE_flt:
636 195 : FILL_HISTOGRAM(flt);
637 : break;
638 2 : case TYPE_dbl:
639 130 : FILL_HISTOGRAM(dbl);
640 : break;
641 : default:
642 : /* should never reach here */
643 0 : MT_UNREACHABLE();
644 : }
645 :
646 52 : imprints_create(b, &bi,
647 : imprints->bins,
648 : imprints->stats,
649 52 : imprints->bits,
650 : imprints->imps,
651 : &imprints->impcnt,
652 52 : imprints->dict,
653 : &imprints->dictcnt);
654 52 : assert(imprints->impcnt <= pages);
655 52 : assert(imprints->dictcnt <= pages);
656 : #ifndef NDEBUG
657 52 : memset((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8), 0, (char *) imprints->dict - ((char *) imprints->imps + imprints->impcnt * (imprints->bits / 8)));
658 : #endif
659 52 : imprints->imprints.free = (size_t) ((char *) ((cchdc_t *) imprints->dict + imprints->dictcnt) - imprints->imprints.base);
660 : /* add info to heap for when they become persistent */
661 52 : ((size_t *) imprints->imprints.base)[0] = (size_t) (imprints->bits);
662 52 : ((size_t *) imprints->imprints.base)[1] = (size_t) imprints->impcnt;
663 52 : ((size_t *) imprints->imprints.base)[2] = (size_t) imprints->dictcnt;
664 52 : ((size_t *) imprints->imprints.base)[3] = (size_t) bi.count;
665 52 : imprints->imprints.dirty = true;
666 52 : MT_lock_set(&b->theaplock);
667 52 : if (b->batCount != bi.count) {
668 : /* bat changed under our feet, can't use imprints */
669 0 : MT_lock_unset(&b->theaplock);
670 0 : MT_lock_unset(&b->batIdxLock);
671 0 : bat_iterator_end(&bi);
672 0 : HEAPfree(&imprints->imprints, true);
673 0 : GDKfree(imprints);
674 0 : BBPunfix(s1->batCacheid);
675 0 : BBPunfix(s2->batCacheid);
676 0 : BBPunfix(s3->batCacheid);
677 0 : BBPunfix(s4->batCacheid);
678 0 : GDKerror("Imprints creation aborted due to concurrent change to bat\n");
679 0 : TRC_DEBUG(ACCELERATOR, "failed imprints construction: bat %s changed, " LLFMT " usec\n", BATgetId(b), GDKusec() - t0);
680 0 : if (unfix)
681 0 : BBPunfix(unfix);
682 0 : return GDK_FAIL;
683 : }
684 52 : ATOMIC_INIT(&imprints->imprints.refs, 1);
685 52 : b->timprints = imprints;
686 52 : MT_lock_unset(&b->theaplock);
687 52 : if (BBP_status(b->batCacheid) & BBPEXISTING &&
688 83 : !b->theap->dirty &&
689 82 : !GDKinmemory(bi.h->farmid) &&
690 41 : !GDKinmemory(imprints->imprints.farmid)) {
691 41 : MT_Id tid;
692 41 : BBPfix(b->batCacheid);
693 41 : char name[MT_NAME_LEN];
694 41 : snprintf(name, sizeof(name), "impssync%d", b->batCacheid);
695 41 : if (MT_create_thread(&tid, BATimpsync, b,
696 : MT_THR_DETACHED, name) < 0)
697 0 : BBPunfix(b->batCacheid);
698 : }
699 : }
700 :
701 52 : TRC_DEBUG(ACCELERATOR, "%s: imprints construction " LLFMT " usec\n",
702 : BATgetId(b), GDKusec() - t0);
703 52 : MT_lock_unset(&b->batIdxLock);
704 52 : bat_iterator_end(&bi);
705 :
706 : /* BBPUnfix tries to get the imprints lock which might lead to
707 : * a deadlock if those were unfixed earlier */
708 52 : if (s1) {
709 52 : BBPunfix(s1->batCacheid);
710 52 : BBPunfix(s2->batCacheid);
711 52 : BBPunfix(s3->batCacheid);
712 52 : BBPunfix(s4->batCacheid);
713 : }
714 52 : if (unfix)
715 0 : BBPunfix(unfix);
716 : return GDK_SUCCEED;
717 : }
718 :
719 : #define getbin(TYPE,B) \
720 : do { \
721 : const TYPE val = * (TYPE *) v; \
722 : GETBIN(ret,val,B); \
723 : } while (0)
724 :
725 : int
726 0 : IMPSgetbin(int tpe, bte bits, const char *restrict inbins, const void *restrict v)
727 : {
728 0 : int ret = -1;
729 :
730 0 : switch (tpe) {
731 0 : case TYPE_bte: {
732 0 : const bte *restrict bins = (bte *) inbins;
733 0 : BINSIZE(bits, getbin, bte);
734 : break;
735 : }
736 0 : case TYPE_sht: {
737 0 : const sht *restrict bins = (sht *) inbins;
738 0 : BINSIZE(bits, getbin, sht);
739 : break;
740 : }
741 0 : case TYPE_int: {
742 0 : const int *restrict bins = (int *) inbins;
743 0 : BINSIZE(bits, getbin, int);
744 : break;
745 : }
746 0 : case TYPE_lng: {
747 0 : const lng *restrict bins = (lng *) inbins;
748 0 : BINSIZE(bits, getbin, lng);
749 : break;
750 : }
751 : #ifdef HAVE_HGE
752 0 : case TYPE_hge: {
753 0 : const hge *restrict bins = (hge *) inbins;
754 0 : BINSIZE(bits, getbin, hge);
755 : break;
756 : }
757 : #endif
758 0 : case TYPE_flt: {
759 0 : const flt *restrict bins = (flt *) inbins;
760 0 : BINSIZE(bits, getbin, flt);
761 : break;
762 : }
763 0 : case TYPE_dbl: {
764 0 : const dbl *restrict bins = (dbl *) inbins;
765 0 : BINSIZE(bits, getbin, dbl);
766 : break;
767 : }
768 : default:
769 0 : MT_UNREACHABLE();
770 : }
771 0 : return ret;
772 : }
773 :
774 : lng
775 9893039 : IMPSimprintsize(BAT *b)
776 : {
777 9893039 : lng sz = 0;
778 9893039 : MT_lock_set(&b->batIdxLock);
779 9912302 : if (b->timprints && b->timprints != (Imprints *) 1) {
780 0 : sz = (lng) b->timprints->imprints.free;
781 : }
782 9912302 : MT_lock_unset(&b->batIdxLock);
783 9917192 : return sz;
784 : }
785 :
786 : void
787 85382069 : IMPSdestroy(BAT *b)
788 : {
789 85382069 : if (b && b->timprints) {
790 39 : MT_lock_set(&b->batIdxLock);
791 45 : if (b->timprints == (Imprints *) 1) {
792 0 : b->timprints = NULL;
793 0 : GDKunlink(BBPselectfarm(b->batRole, b->ttype, imprintsheap),
794 : BATDIR,
795 0 : BBP_physical(b->batCacheid),
796 : "timprints");
797 45 : } else if (b->timprints != NULL) {
798 45 : IMPSdecref(b->timprints, b->timprints->imprints.parentid == b->batCacheid);
799 48 : b->timprints = NULL;
800 : }
801 48 : MT_lock_unset(&b->batIdxLock);
802 : }
803 85382077 : }
804 :
805 : /* free the memory associated with the imprints, do not remove the
806 : * heap files; indicate that imprints are available on disk by setting
807 : * the imprints pointer to 1 */
808 : void
809 521397 : IMPSfree(BAT *b)
810 : {
811 521397 : Imprints *imprints;
812 :
813 521397 : if (b && b->timprints) {
814 4 : MT_lock_set(&b->batIdxLock);
815 4 : imprints = b->timprints;
816 4 : if (imprints != NULL && imprints != (Imprints *) 1) {
817 4 : if (GDKinmemory(imprints->imprints.farmid)) {
818 0 : b->timprints = NULL;
819 0 : IMPSdecref(imprints, imprints->imprints.parentid == b->batCacheid);
820 : } else {
821 4 : if (imprints->imprints.parentid == b->batCacheid)
822 4 : b->timprints = (Imprints *) 1;
823 : else
824 0 : b->timprints = NULL;
825 4 : IMPSdecref(imprints, false);
826 : }
827 : }
828 4 : MT_lock_unset(&b->batIdxLock);
829 : }
830 521397 : }
831 :
832 : void
833 48 : IMPSdecref(Imprints *imprints, bool remove)
834 : {
835 48 : TRC_DEBUG(ACCELERATOR, "Decrement ref count of %s\n", imprints->imprints.filename);
836 48 : if (remove)
837 46 : ATOMIC_OR(&imprints->imprints.refs, HEAPREMOVE);
838 48 : ATOMIC_BASE_TYPE refs = ATOMIC_DEC(&imprints->imprints.refs);
839 48 : if ((refs & HEAPREFS) == 0) {
840 52 : HEAPfree(&imprints->imprints, (bool) (refs & HEAPREMOVE));
841 52 : GDKfree(imprints);
842 : }
843 48 : }
844 :
845 : void
846 0 : IMPSincref(Imprints *imprints)
847 : {
848 0 : TRC_DEBUG(ACCELERATOR, "Increment ref count of %s\n", imprints->imprints.filename);
849 0 : ATOMIC_INC(&imprints->imprints.refs);
850 0 : }
851 :
852 : #ifndef NDEBUG
853 : /* never called, useful for debugging */
854 :
855 : #define IMPSPRNTMASK(T, B) \
856 : do { \
857 : uint##B##_t *restrict im = (uint##B##_t *) imprints->imps; \
858 : for (j = 0; j < imprints->bits; j++) \
859 : s[j] = IMPSisSet(B, im[icnt], j) ? 'x' : '.'; \
860 : s[j] = '\0'; \
861 : } while (0)
862 :
863 : /* function used for debugging */
864 : void
865 0 : IMPSprint(BAT *b)
866 : {
867 0 : Imprints *imprints;
868 0 : cchdc_t *restrict d;
869 0 : char s[65]; /* max number of bits + 1 */
870 0 : BUN icnt, dcnt, l, pages;
871 0 : BUN *restrict min_bins, *restrict max_bins;
872 0 : BUN *restrict cnt_bins;
873 0 : bte j;
874 0 : int i;
875 :
876 0 : if (!BATcheckimprints(b)) {
877 0 : printf("No imprint\n");
878 0 : return;
879 : }
880 0 : imprints = b->timprints;
881 0 : d = (cchdc_t *) imprints->dict;
882 0 : min_bins = imprints->stats;
883 0 : max_bins = min_bins + 64;
884 0 : cnt_bins = max_bins + 64;
885 :
886 0 : printf("bits = %d, impcnt = " BUNFMT ", dictcnt = " BUNFMT "\n",
887 0 : imprints->bits, imprints->impcnt, imprints->dictcnt);
888 0 : printf("MIN\n");
889 0 : for (i = 0; i < imprints->bits; i++) {
890 0 : printf("[ " BUNFMT " ]\n", min_bins[i]);
891 : }
892 :
893 0 : printf("MAX\n");
894 0 : for (i = 0; i < imprints->bits; i++) {
895 0 : printf("[ " BUNFMT " ]\n", max_bins[i]);
896 : }
897 0 : printf("COUNT\n");
898 0 : for (i = 0; i < imprints->bits; i++) {
899 0 : printf("[ " BUNFMT " ]\n", cnt_bins[i]);
900 : }
901 0 : for (dcnt = 0, icnt = 0, pages = 1; dcnt < imprints->dictcnt; dcnt++) {
902 0 : if (d[dcnt].repeat) {
903 0 : BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
904 0 : pages += d[dcnt].cnt;
905 0 : printf("[ " BUNFMT " ]r %s\n", pages, s);
906 0 : icnt++;
907 : } else {
908 0 : l = icnt + d[dcnt].cnt;
909 0 : for (; icnt < l; icnt++) {
910 0 : BINSIZE(imprints->bits, IMPSPRNTMASK, " ");
911 0 : printf("[ " BUNFMT " ] %s\n", pages++, s);
912 : }
913 : }
914 : }
915 0 : fflush(stdout);
916 : }
917 : #endif
|