Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : /* The maximum number of entries in a MergeState's pending-runs
18 : * stack. This is enough to sort arrays of size up to about
19 : *
20 : * 32 * phi ** MAX_MERGE_PENDING
21 : *
22 : * where phi ~= 1.618. 85 is ridiculously large enough, good for an
23 : * array with 2**64 elements. */
24 : #define MAX_MERGE_PENDING 85
25 :
26 : /* When we get into galloping mode, we stay there until both runs win
27 : * less often than MIN_GALLOP consecutive times. See listsort.txt for
28 : * more info. */
29 : #define MIN_GALLOP 7
30 :
31 : /* Avoid malloc for small temp arrays. */
32 : #define MERGESTATE_TEMP_SIZE (256 * sizeof(void *))
33 :
34 : /* One MergeState exists on the stack per invocation of mergesort.
35 : * It's just a convenient way to pass state around among the helper
36 : * functions. */
37 : struct slice {
38 : size_t base;
39 : ssize_t len;
40 : };
41 :
42 : typedef struct {
43 : /* The comparison function. */
44 : int (*compare) (const void *, const void *);
45 : const char *heap;
46 : int hs;
47 : int ts;
48 : void *restrict bh;
49 : void *restrict bt;
50 : /* Temporary storage for a single entry. If an entry is at
51 : * most 2 lng's, we don't need to allocate anything. */
52 : void *th;
53 : void *tt;
54 : #ifdef HAVE_HGE
55 : hge tempstorageh[1]; /* 16 bytes should be wide enough ... */
56 : hge tempstoraget[1]; /* ... for all our fixed-sized data */
57 : #else
58 : lng tempstorageh[2]; /* 16 bytes should be wide enough ... */
59 : lng tempstoraget[2]; /* ... for all our fixed-sized data */
60 : #endif
61 :
62 : /* This controls when we get *into* galloping mode. It's
63 : * initialized to MIN_GALLOP. merge_lo and merge_hi tend to
64 : * nudge it higher for random data, and lower for highly
65 : * structured data. */
66 : ssize_t min_gallop;
67 :
68 : /* 'ah' and 'at' are temp storage to help with merges. They
69 : * contain room for alloced[ht] entries. */
70 : void *ah;
71 : ssize_t allocedh;
72 : void *at;
73 : ssize_t allocedt;
74 :
75 : /* A stack of n pending runs yet to be merged. Run #i starts
76 : * at address base[i] and extends for len[i] elements. It's
77 : * always true (so long as the indices are in bounds) that
78 : *
79 : * pending[i].base + pending[i].len == pending[i+1].base
80 : *
81 : * so we could cut the storage for this, but it's a minor
82 : * amount, and keeping all the info explicit simplifies the
83 : * code. */
84 : int n;
85 : struct slice pending[MAX_MERGE_PENDING];
86 :
87 : /* 'ah' and 'at' point to this when possible, rather than muck
88 : * with malloc. */
89 : char temparrayh[MERGESTATE_TEMP_SIZE];
90 : char temparrayt[MERGESTATE_TEMP_SIZE];
91 : } MergeState;
92 :
93 : /* Free all the temp memory owned by the MergeState. This must be
94 : * called when you're done with a MergeState, and may be called before
95 : * then if you want to free the temp memory early. */
96 : static void
97 184 : merge_freemem(MergeState *ms)
98 : {
99 184 : assert(ms != NULL);
100 184 : if (ms->ah != (void *) ms->temparrayh)
101 1 : GDKfree(ms->ah);
102 184 : ms->ah = (void *) ms->temparrayh;
103 184 : ms->allocedh = MERGESTATE_TEMP_SIZE;
104 184 : if (ms->at != (void *) ms->temparrayt)
105 1 : GDKfree(ms->at);
106 184 : ms->at = (void *) ms->temparrayt;
107 184 : ms->allocedt = MERGESTATE_TEMP_SIZE;
108 184 : }
109 :
110 : /* Ensure enough temp memory for 'need' array slots is available.
111 : * Returns 0 on success and -1 if the memory can't be gotten. */
112 : static int
113 2 : merge_getmem(MergeState *ms, ssize_t need, void **ap,
114 : ssize_t *allocedp, int s, char *temparray)
115 : {
116 2 : assert(ms != NULL);
117 2 : need *= s;
118 2 : if (need <= *allocedp)
119 : return 0;
120 : /* Don't realloc! That can cost cycles to copy the old data,
121 : * but we don't care what's in the block. */
122 2 : if (*ap != (void *) temparray)
123 0 : GDKfree(*ap);
124 2 : *ap = GDKmalloc(need);
125 2 : if (*ap) {
126 2 : *allocedp = need;
127 2 : return 0;
128 : }
129 0 : merge_freemem(ms); /* reset to sane state */
130 0 : return -1;
131 : }
132 :
133 : #define MERGE_GETMEMH(MS, NEED) \
134 : ((NEED) * (MS)->hs <= (MS)->allocedh ? 0 : \
135 : merge_getmem(MS, NEED, &(MS)->ah, &(MS)->allocedh, (MS)->hs, \
136 : (MS)->temparrayh))
137 : #define MERGE_GETMEMT(MS, NEED) \
138 : ((NEED) * (MS)->ts <= (MS)->allocedt ? 0 : \
139 : merge_getmem(MS, NEED, &(MS)->at, &(MS)->allocedt, (MS)->ts, \
140 : (MS)->temparrayt))
141 :
142 : #define PTRADD(p, n, w) ((void *) ((char *) (p) + (n) * (w)))
143 :
144 : #define COPY_bte(d,s,w) (* (bte *) (d) = * (bte *) (s))
145 : #define COPY_sht(d,s,w) (* (sht *) (d) = * (sht *) (s))
146 : #define COPY_int(d,s,w) (* (int *) (d) = * (int *) (s))
147 : #define COPY_lng(d,s,w) (* (lng *) (d) = * (lng *) (s))
148 : #ifdef HAVE_HGE
149 : #define COPY_hge(d,s,w) (* (hge *) (d) = * (hge *) (s))
150 : #endif
151 : #define COPY_flt(d,s,w) (* (flt *) (d) = * (flt *) (s))
152 : #define COPY_dbl(d,s,w) (* (dbl *) (d) = * (dbl *) (s))
153 : #define COPY_oid(d,s,w) (* (oid *) (d) = * (oid *) (s))
154 :
155 : #define COPY_any(d,s,w) \
156 : do { \
157 : switch (w) { \
158 : case 0: \
159 : break; \
160 : case sizeof(bte): \
161 : * (bte *) (d) = * (bte *) (s); \
162 : break; \
163 : case sizeof(sht): \
164 : * (sht *) (d) = * (sht *) (s); \
165 : break; \
166 : case sizeof(int): \
167 : * (int *) (d) = * (int *) (s); \
168 : break; \
169 : case sizeof(lng): \
170 : * (lng *) (d) = * (lng *) (s); \
171 : break; \
172 : case 2 * sizeof(lng): \
173 : * (lng *) (d) = * (lng *) (s); \
174 : * ((lng *) (d) + 1) = * ((lng *) (s) + 1); \
175 : break; \
176 : default: \
177 : memcpy((d), (s), (size_t) (w)); \
178 : break; \
179 : } \
180 : } while (0)
181 :
182 : #define COPY_anyN(d,s,w,N) \
183 : do { \
184 : int i; \
185 : switch (w) { \
186 : case 0: \
187 : break; \
188 : case sizeof(bte): \
189 : for (i = 0; i < N; i++) \
190 : ((bte*)(d))[i] = ((bte*)(s))[i]; \
191 : break; \
192 : case sizeof(sht): \
193 : for (i = 0; i < N; i++) \
194 : ((sht*)(d))[i] = ((sht*)(s))[i]; \
195 : break; \
196 : case sizeof(int): \
197 : for (i = 0; i < N; i++) \
198 : ((int*)(d))[i] = ((int*)(s))[i]; \
199 : break; \
200 : case sizeof(lng): \
201 : for (i = 0; i < N; i++) \
202 : ((lng*)(d))[i] = ((lng*)(s))[i]; \
203 : break; \
204 : case 2 * sizeof(lng): \
205 : for (i = 0; i < N*2; i++) \
206 : ((lng*)(d))[i] = ((lng*)(s))[i]; \
207 : break; \
208 : default: \
209 : memcpy((d), (s), (size_t) (w)*N); \
210 : break; \
211 : } \
212 : } while (0)
213 :
214 : #define ISLT_any(X, Y, ms) (((ms)->heap ? (*(ms)->compare)((ms)->heap + VarHeapVal(X,0,(ms)->hs), (ms)->heap + VarHeapVal(Y,0,(ms)->hs)) : (*(ms)->compare)((X), (Y))) < 0)
215 : #define ISLT_bte(X, Y, ms) (* (bte *) (X) < * (bte *) (Y))
216 : #define ISLT_sht(X, Y, ms) (* (sht *) (X) < * (sht *) (Y))
217 : #define ISLT_int(X, Y, ms) (* (int *) (X) < * (int *) (Y))
218 : #define ISLT_lng(X, Y, ms) (* (lng *) (X) < * (lng *) (Y))
219 : #ifdef HAVE_HGE
220 : #define ISLT_hge(X, Y, ms) (* (hge *) (X) < * (hge *) (Y))
221 : #endif
222 : #define ISLT_flt(X, Y, ms) (!is_flt_nil(*(flt*)(Y)) && (is_flt_nil(*(flt*)(X)) || *(flt*)(X) < *(flt*)(Y)))
223 : #define ISLT_dbl(X, Y, ms) (!is_dbl_nil(*(dbl*)(Y)) && (is_dbl_nil(*(dbl*)(X)) || *(dbl*)(X) < *(dbl*)(Y)))
224 : #if SIZEOF_OID == SIZEOF_LNG
225 : #define ISLT_oid(X, Y, ms) ISLT_lng(X, Y, ms)
226 : #else
227 : #define ISLT_oid(X, Y, ms) ISLT_int(X, Y, ms)
228 : #endif
229 :
230 : /* for reverse order, just reverse arguments */
231 : #define ISLT_any_rev(X, Y, ms) ISLT_any(Y, X, ms)
232 : #define ISLT_bte_rev(X, Y, ms) ISLT_bte(Y, X, ms)
233 : #define ISLT_sht_rev(X, Y, ms) ISLT_sht(Y, X, ms)
234 : #define ISLT_int_rev(X, Y, ms) ISLT_int(Y, X, ms)
235 : #define ISLT_lng_rev(X, Y, ms) ISLT_lng(Y, X, ms)
236 : #ifdef HAVE_HGE
237 : #define ISLT_hge_rev(X, Y, ms) ISLT_hge(Y, X, ms)
238 : #endif
239 : #define ISLT_flt_rev(X, Y, ms) ISLT_flt(Y, X, ms)
240 : #define ISLT_dbl_rev(X, Y, ms) ISLT_dbl(Y, X, ms)
241 : #define ISLT_oid_rev(X, Y, ms) ISLT_oid(Y, X, ms)
242 :
243 : /* Reverse a slice of a list in place, from lo up to (exclusive) hi. */
244 : static void
245 142 : reverse_slice(size_t lo, size_t hi, MergeState *ms)
246 : {
247 142 : void *th, *tt;
248 142 : int hs, ts;
249 :
250 142 : assert(ms);
251 :
252 142 : th = ms->th;
253 142 : tt = ms->tt;
254 142 : hs = ms->hs;
255 142 : ts = ms->ts;
256 :
257 142 : hi--;
258 365 : while (lo < hi) {
259 223 : COPY_any(th, PTRADD(ms->bh, lo, hs), hs);
260 223 : COPY_any(PTRADD(ms->bh, lo, hs), PTRADD(ms->bh, hi, hs), hs);
261 223 : COPY_any(PTRADD(ms->bh, hi, hs), th, hs);
262 223 : COPY_any(tt, PTRADD(ms->bt, lo, ts), ts);
263 57 : COPY_any(PTRADD(ms->bt, lo, ts), PTRADD(ms->bt, hi, ts), ts);
264 57 : COPY_any(PTRADD(ms->bt, hi, ts), tt, ts);
265 223 : lo++;
266 223 : hi--;
267 : }
268 142 : }
269 :
270 : static ssize_t
271 184 : merge_compute_minrun(ssize_t n)
272 : {
273 184 : ssize_t r = 0; /* becomes 1 if any 1 bits are shifted off */
274 :
275 184 : assert(n >= 0);
276 199 : while (n >= 16) {
277 15 : r |= n & 1;
278 15 : n >>= 1;
279 : }
280 184 : return n + r;
281 : }
282 :
283 :
284 : #define COPY COPY_bte
285 :
286 : #define binarysort binarysort_bte
287 : #define do_ssort do_ssort_bte
288 : #define gallop_left gallop_left_bte
289 : #define gallop_right gallop_right_bte
290 : #define ISLT ISLT_bte
291 : #define merge_at merge_at_bte
292 : #include "gdk_ssort_impl.h"
293 : #undef binarysort
294 : #undef do_ssort
295 : #undef gallop_left
296 : #undef gallop_right
297 : #undef ISLT
298 : #undef merge_at
299 :
300 : #define binarysort binarysort_bte_rev
301 : #define do_ssort do_ssort_bte_rev
302 : #define gallop_left gallop_left_bte_rev
303 : #define gallop_right gallop_right_bte_rev
304 : #define ISLT ISLT_bte_rev
305 : #define merge_at merge_at_bte_rev
306 : #include "gdk_ssort_impl.h"
307 : #undef binarysort
308 : #undef do_ssort
309 : #undef gallop_left
310 : #undef gallop_right
311 : #undef ISLT
312 : #undef merge_at
313 :
314 : #undef COPY
315 :
316 : #define COPY COPY_sht
317 :
318 : #define binarysort binarysort_sht
319 : #define do_ssort do_ssort_sht
320 : #define gallop_left gallop_left_sht
321 : #define gallop_right gallop_right_sht
322 : #define ISLT ISLT_sht
323 : #define merge_at merge_at_sht
324 : #include "gdk_ssort_impl.h"
325 : #undef binarysort
326 : #undef do_ssort
327 : #undef gallop_left
328 : #undef gallop_right
329 : #undef ISLT
330 : #undef merge_at
331 :
332 : #define binarysort binarysort_sht_rev
333 : #define do_ssort do_ssort_sht_rev
334 : #define gallop_left gallop_left_sht_rev
335 : #define gallop_right gallop_right_sht_rev
336 : #define ISLT ISLT_sht_rev
337 : #define merge_at merge_at_sht_rev
338 : #include "gdk_ssort_impl.h"
339 : #undef binarysort
340 : #undef do_ssort
341 : #undef gallop_left
342 : #undef gallop_right
343 : #undef ISLT
344 : #undef merge_at
345 :
346 : #undef COPY
347 :
348 : #define COPY COPY_int
349 :
350 : #define binarysort binarysort_int
351 : #define do_ssort do_ssort_int
352 : #define gallop_left gallop_left_int
353 : #define gallop_right gallop_right_int
354 : #define ISLT ISLT_int
355 : #define merge_at merge_at_int
356 : #include "gdk_ssort_impl.h"
357 : #undef binarysort
358 : #undef do_ssort
359 : #undef gallop_left
360 : #undef gallop_right
361 : #undef ISLT
362 : #undef merge_at
363 :
364 : #define binarysort binarysort_int_rev
365 : #define do_ssort do_ssort_int_rev
366 : #define gallop_left gallop_left_int_rev
367 : #define gallop_right gallop_right_int_rev
368 : #define ISLT ISLT_int_rev
369 : #define merge_at merge_at_int_rev
370 : #include "gdk_ssort_impl.h"
371 : #undef binarysort
372 : #undef do_ssort
373 : #undef gallop_left
374 : #undef gallop_right
375 : #undef ISLT
376 : #undef merge_at
377 :
378 : #undef COPY
379 :
380 : #define COPY COPY_lng
381 :
382 : #define binarysort binarysort_lng
383 : #define do_ssort do_ssort_lng
384 : #define gallop_left gallop_left_lng
385 : #define gallop_right gallop_right_lng
386 : #define ISLT ISLT_lng
387 : #define merge_at merge_at_lng
388 : #include "gdk_ssort_impl.h"
389 : #undef binarysort
390 : #undef do_ssort
391 : #undef gallop_left
392 : #undef gallop_right
393 : #undef ISLT
394 : #undef merge_at
395 :
396 : #define binarysort binarysort_lng_rev
397 : #define do_ssort do_ssort_lng_rev
398 : #define gallop_left gallop_left_lng_rev
399 : #define gallop_right gallop_right_lng_rev
400 : #define ISLT ISLT_lng_rev
401 : #define merge_at merge_at_lng_rev
402 : #include "gdk_ssort_impl.h"
403 : #undef binarysort
404 : #undef do_ssort
405 : #undef gallop_left
406 : #undef gallop_right
407 : #undef ISLT
408 : #undef merge_at
409 :
410 : #undef COPY
411 :
412 : #ifdef HAVE_HGE
413 : #define COPY COPY_hge
414 :
415 : #define binarysort binarysort_hge
416 : #define do_ssort do_ssort_hge
417 : #define gallop_left gallop_left_hge
418 : #define gallop_right gallop_right_hge
419 : #define ISLT ISLT_hge
420 : #define merge_at merge_at_hge
421 : #include "gdk_ssort_impl.h"
422 : #undef binarysort
423 : #undef do_ssort
424 : #undef gallop_left
425 : #undef gallop_right
426 : #undef ISLT
427 : #undef merge_at
428 :
429 : #define binarysort binarysort_hge_rev
430 : #define do_ssort do_ssort_hge_rev
431 : #define gallop_left gallop_left_hge_rev
432 : #define gallop_right gallop_right_hge_rev
433 : #define ISLT ISLT_hge_rev
434 : #define merge_at merge_at_hge_rev
435 : #include "gdk_ssort_impl.h"
436 : #undef binarysort
437 : #undef do_ssort
438 : #undef gallop_left
439 : #undef gallop_right
440 : #undef ISLT
441 : #undef merge_at
442 :
443 : #undef COPY
444 : #endif
445 :
446 : #define COPY COPY_flt
447 :
448 : #define binarysort binarysort_flt
449 : #define do_ssort do_ssort_flt
450 : #define gallop_left gallop_left_flt
451 : #define gallop_right gallop_right_flt
452 : #define ISLT ISLT_flt
453 : #define merge_at merge_at_flt
454 : #include "gdk_ssort_impl.h"
455 : #undef binarysort
456 : #undef do_ssort
457 : #undef gallop_left
458 : #undef gallop_right
459 : #undef ISLT
460 : #undef merge_at
461 :
462 : #define binarysort binarysort_flt_rev
463 : #define do_ssort do_ssort_flt_rev
464 : #define gallop_left gallop_left_flt_rev
465 : #define gallop_right gallop_right_flt_rev
466 : #define ISLT ISLT_flt_rev
467 : #define merge_at merge_at_flt_rev
468 : #include "gdk_ssort_impl.h"
469 : #undef binarysort
470 : #undef do_ssort
471 : #undef gallop_left
472 : #undef gallop_right
473 : #undef ISLT
474 : #undef merge_at
475 :
476 : #undef COPY
477 :
478 : #define COPY COPY_dbl
479 :
480 : #define binarysort binarysort_dbl
481 : #define do_ssort do_ssort_dbl
482 : #define gallop_left gallop_left_dbl
483 : #define gallop_right gallop_right_dbl
484 : #define ISLT ISLT_dbl
485 : #define merge_at merge_at_dbl
486 : #include "gdk_ssort_impl.h"
487 : #undef binarysort
488 : #undef do_ssort
489 : #undef gallop_left
490 : #undef gallop_right
491 : #undef ISLT
492 : #undef merge_at
493 :
494 : #define binarysort binarysort_dbl_rev
495 : #define do_ssort do_ssort_dbl_rev
496 : #define gallop_left gallop_left_dbl_rev
497 : #define gallop_right gallop_right_dbl_rev
498 : #define ISLT ISLT_dbl_rev
499 : #define merge_at merge_at_dbl_rev
500 : #include "gdk_ssort_impl.h"
501 : #undef binarysort
502 : #undef do_ssort
503 : #undef gallop_left
504 : #undef gallop_right
505 : #undef ISLT
506 : #undef merge_at
507 :
508 : #undef COPY
509 :
510 : #define COPY COPY_any
511 :
512 : #define binarysort binarysort_any
513 : #define do_ssort do_ssort_any
514 : #define gallop_left gallop_left_any
515 : #define gallop_right gallop_right_any
516 : #define ISLT ISLT_any
517 : #define merge_at merge_at_any
518 :
519 : #define GDKssortimpl GDKssort
520 :
521 : #include "gdk_ssort_impl.h"
522 :
523 : #undef GDKssortimpl
524 :
525 : #undef binarysort
526 : #undef do_ssort
527 : #undef gallop_left
528 : #undef gallop_right
529 : #undef ISLT
530 : #undef merge_at
531 :
532 : #define binarysort binarysort_any_rev
533 : #define do_ssort do_ssort_any_rev
534 : #define gallop_left gallop_left_any_rev
535 : #define gallop_right gallop_right_any_rev
536 : #define ISLT ISLT_any_rev
537 : #define merge_at merge_at_any_rev
538 :
539 : #define GDKssortimpl GDKssort_rev
540 : #define do_ssort_bte do_ssort_bte_rev
541 : #define do_ssort_sht do_ssort_sht_rev
542 : #define do_ssort_int do_ssort_int_rev
543 : #define do_ssort_lng do_ssort_lng_rev
544 : #ifdef HAVE_HGE
545 : #define do_ssort_hge do_ssort_hge_rev
546 : #endif
547 : #define do_ssort_flt do_ssort_flt_rev
548 : #define do_ssort_dbl do_ssort_dbl_rev
549 : #define do_ssort_any do_ssort_any_rev
550 :
551 : #include "gdk_ssort_impl.h"
552 :
553 : #undef GDKssortimpl
554 : #undef do_ssort_bte
555 : #undef do_ssort_sht
556 : #undef do_ssort_int
557 : #undef do_ssort_lng
558 : #ifdef HAVE_HGE
559 : #undef do_ssort_hge
560 : #endif
561 : #undef do_ssort_flt
562 : #undef do_ssort_dbl
563 : #undef do_ssort_any
564 :
565 : #undef binarysort
566 : #undef do_ssort
567 : #undef gallop_left
568 : #undef gallop_right
569 : #undef ISLT
570 : #undef merge_at
571 :
572 : #undef COPY
|