Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * This file implements a stable sort algorithm known as "timsort".
15 : * The algorithm is a straight copy of the listsort function in the
16 : * Python 2.5 source code, heavily modified to fit into the MonetDB
17 : * environment.
18 : * The original author of the sort algorithm was Tim Peters, the
19 : * adaptation was done by Sjoerd Mullender.
20 : *
21 : *
22 : * This file is included multiple times. We expect a bunch of tokens
23 : * to be redefined differently each time (see gdk_ssort.c). If the
24 : * token GDKssortimpl is defined, the main interface is defined.
25 : */
26 :
27 : /* binarysort is the best method for sorting small arrays: it does few
28 : * compares, but can do data movement quadratic in the number of
29 : * elements.
30 : * [lo, hi) is a contiguous slice of a list, and is sorted via binary
31 : * insertion. This sort is stable. On entry, must have lo <= start <=
32 : * hi, and that [lo, start) is already sorted (pass start == lo if you
33 : * don't know!). */
34 : static void
35 145 : binarysort(size_t lo, size_t hi, size_t start, MergeState *ms)
36 : {
37 145 : register size_t l, p, r;
38 :
39 145 : assert(lo <= start && start <= hi);
40 : /* assert [lo, start) is sorted */
41 145 : if (lo == start)
42 0 : start++;
43 : /* [lo,start) is sorted, insert start (the pivot) into this
44 : * area, finding its position using binary search. */
45 720 : for (; start < hi; start++) {
46 : /* set l to where start belongs */
47 575 : l = lo;
48 575 : r = start;
49 : /* ms->t[ht] is the pivot */
50 575 : COPY(ms->th, PTRADD(ms->bh, r, ms->hs), ms->hs);
51 575 : COPY_any(ms->tt, PTRADD(ms->bt, r, ms->ts), ms->ts);
52 : /* Invariants:
53 : * pivot >= all in [lo, l).
54 : * pivot < all in [r, start).
55 : * The second is vacuously true at the start. */
56 575 : assert(l < r);
57 1375 : do {
58 1375 : p = l + ((r - l) >> 1);
59 1375 : if (ISLT(ms->th, PTRADD(ms->bh, p, ms->hs), ms))
60 : r = p;
61 : else
62 633 : l = p + 1;
63 1375 : } while (l < r);
64 575 : assert(l == r);
65 : /* The invariants still hold, so pivot >= all in [lo,
66 : * l) and pivot < all in [l, start), so pivot belongs
67 : * at l. Note that if there are elements equal to
68 : * pivot, l points to the first slot after them --
69 : * that's why this sort is stable. Slide over to make
70 : * room.
71 : * Caution: using memmove is much slower under MSVC 5;
72 : * we're not usually moving many slots. */
73 2021 : for (p = start, r = p - 1; p > l; p = r, r = p - 1) {
74 1446 : COPY(PTRADD(ms->bh, p, ms->hs),
75 : PTRADD(ms->bh, r, ms->hs), ms->hs);
76 1446 : COPY_any(PTRADD(ms->bt, p, ms->ts),
77 : PTRADD(ms->bt, r, ms->ts), ms->ts);
78 : }
79 575 : COPY(PTRADD(ms->bh, l, ms->hs), ms->th, ms->hs);
80 575 : COPY_any(PTRADD(ms->bt, l, ms->ts), ms->tt, ms->ts);
81 : }
82 145 : }
83 :
84 : /* Locate the proper position of key in a sorted vector; if the vector
85 : * contains an element equal to key, return the position immediately
86 : * to the left of the leftmost equal element. [gallop_right() does
87 : * the same except returns the position to the right of the rightmost
88 : * equal element (if any).]
89 : *
90 : * "a" is a sorted vector with n elements, starting at a[0]. n must
91 : * be > 0.
92 : *
93 : * "hint" is an index at which to begin the search, 0 <= hint < n.
94 : * The closer hint is to the final result, the faster this runs.
95 : *
96 : * The return value is the int k in 0..n such that
97 : *
98 : * a[k-1] < key <= a[k]
99 : *
100 : * pretending that *(a-1) is minus infinity and a[n] is plus infinity.
101 : * IOW, key belongs at index k; or, IOW, the first k elements of a
102 : * should precede key, and the last n-k should follow key.
103 : *
104 : * Returns -1 on error. See listsort.txt for info on the method. */
105 : static ssize_t
106 11 : gallop_left(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms)
107 : {
108 11 : ssize_t ofs;
109 11 : ssize_t lastofs;
110 11 : ssize_t k;
111 :
112 11 : assert(key && a && n > 0 && hint >= 0 && hint < n);
113 :
114 11 : a = PTRADD(a, hint, ms->hs);
115 11 : lastofs = 0;
116 11 : ofs = 1;
117 11 : if (ISLT(a, key, ms)) {
118 : /* a[hint] < key -- gallop right, until
119 : * a[hint + lastofs] < key <= a[hint + ofs] */
120 9 : const ssize_t maxofs = n - hint; /* &a[n-1] is highest */
121 27 : while (ofs < maxofs) {
122 18 : if (ISLT(PTRADD(a, ofs, ms->hs), key, ms)) {
123 18 : lastofs = ofs;
124 18 : ofs = (ofs << 1) + 1;
125 18 : if (ofs <= 0) /* int overflow */
126 0 : ofs = maxofs;
127 : } else /* key <= a[hint + ofs] */
128 : break;
129 : }
130 9 : if (ofs > maxofs)
131 : ofs = maxofs;
132 : /* Translate back to offsets relative to &a[0]. */
133 9 : lastofs += hint;
134 9 : ofs += hint;
135 : } else {
136 : /* key <= a[hint] -- gallop left, until
137 : * a[hint - ofs] < key <= a[hint - lastofs] */
138 2 : const ssize_t maxofs = hint + 1; /* &a[0] is lowest */
139 16 : while (ofs < maxofs) {
140 16 : if (ISLT(PTRADD(a, -ofs, ms->hs), key, ms))
141 : break;
142 : /* key <= a[hint - ofs] */
143 14 : lastofs = ofs;
144 14 : ofs = (ofs << 1) + 1;
145 14 : if (ofs <= 0) /* int overflow */
146 0 : ofs = maxofs;
147 : }
148 2 : if (ofs > maxofs)
149 : ofs = maxofs;
150 : /* Translate back to positive offsets relative to &a[0]. */
151 2 : k = lastofs;
152 2 : lastofs = hint - ofs;
153 2 : ofs = hint - k;
154 : }
155 11 : a = PTRADD(a, -hint, ms->hs);
156 :
157 11 : assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
158 : /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to
159 : * the right of lastofs but no farther right than ofs. Do a
160 : * binary search, with invariant a[lastofs-1] < key <=
161 : * a[ofs]. */
162 11 : ++lastofs;
163 49 : while (lastofs < ofs) {
164 27 : ssize_t m = lastofs + ((ofs - lastofs) >> 1);
165 :
166 27 : if (ISLT(PTRADD(a, m, ms->hs), key, ms))
167 23 : lastofs = m + 1; /* a[m] < key */
168 : else
169 : ofs = m; /* key <= a[m] */
170 : }
171 11 : assert(lastofs == ofs); /* so a[ofs-1] < key <= a[ofs] */
172 11 : return ofs;
173 : }
174 :
175 : /* Exactly like gallop_left(), except that if key already exists in
176 : * a[0:n], finds the position immediately to the right of the
177 : * rightmost equal value.
178 : *
179 : * The return value is the int k in 0..n such that
180 : *
181 : * a[k-1] <= key < a[k]
182 : *
183 : * or -1 if error.
184 : *
185 : * The code duplication is massive, but this is enough different given
186 : * that we're sticking to "<" comparisons that it's much harder to
187 : * follow if written as one routine with yet another "left or right?"
188 : * flag. */
189 : static ssize_t
190 18 : gallop_right(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms)
191 : {
192 18 : ssize_t ofs;
193 18 : ssize_t lastofs;
194 18 : ssize_t k;
195 :
196 18 : assert(key && a && n > 0 && hint >= 0 && hint < n);
197 :
198 18 : a = PTRADD(a, hint, ms->hs);
199 18 : lastofs = 0;
200 18 : ofs = 1;
201 18 : if (ISLT(key, a, ms)) {
202 : /* key < a[hint] -- gallop left, until
203 : * a[hint - ofs] <= key < a[hint - lastofs] */
204 9 : const ssize_t maxofs = hint + 1; /* &a[0] is lowest */
205 9 : while (ofs < maxofs) {
206 0 : if (ISLT(key, PTRADD(a, -ofs, ms->hs), ms)) {
207 0 : lastofs = ofs;
208 0 : ofs = (ofs << 1) + 1;
209 0 : if (ofs <= 0) /* int overflow */
210 0 : ofs = maxofs;
211 : } else /* a[hint - ofs] <= key */
212 : break;
213 : }
214 9 : if (ofs > maxofs)
215 : ofs = maxofs;
216 : /* Translate back to positive offsets relative to &a[0]. */
217 9 : k = lastofs;
218 9 : lastofs = hint - ofs;
219 9 : ofs = hint - k;
220 : } else {
221 : /* a[hint] <= key -- gallop right, until
222 : * a[hint + lastofs] <= key < a[hint + ofs] */
223 9 : const ssize_t maxofs = n - hint; /* &a[n-1] is highest */
224 40 : while (ofs < maxofs) {
225 39 : if (ISLT(key, PTRADD(a, ofs, ms->hs), ms))
226 : break;
227 : /* a[hint + ofs] <= key */
228 31 : lastofs = ofs;
229 31 : ofs = (ofs << 1) + 1;
230 31 : if (ofs <= 0) /* int overflow */
231 0 : ofs = maxofs;
232 : }
233 9 : if (ofs > maxofs)
234 : ofs = maxofs;
235 : /* Translate back to offsets relative to &a[0]. */
236 9 : lastofs += hint;
237 9 : ofs += hint;
238 : }
239 18 : a = PTRADD(a, -hint, ms->hs);
240 :
241 18 : assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
242 : /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to
243 : * the right of lastofs but no farther right than ofs. Do a
244 : * binary search, with invariant a[lastofs-1] <= key <
245 : * a[ofs]. */
246 18 : ++lastofs;
247 67 : while (lastofs < ofs) {
248 31 : ssize_t m = lastofs + ((ofs - lastofs) >> 1);
249 :
250 31 : if (ISLT(key, PTRADD(a, m, ms->hs), ms))
251 : ofs = m; /* key < a[m] */
252 : else
253 12 : lastofs = m+1; /* a[m] <= key */
254 : }
255 18 : assert(lastofs == ofs); /* so a[ofs-1] <= key < a[ofs] */
256 18 : return ofs;
257 : }
258 :
259 : /* Merge the two runs at stack indices i and i+1.
260 : * Returns 0 on success, -1 on error. */
261 : static ssize_t
262 9 : merge_at(MergeState *ms, ssize_t i)
263 : {
264 9 : size_t pa, pb;
265 9 : ssize_t na, nb;
266 9 : ssize_t k;
267 :
268 9 : assert(ms != NULL);
269 9 : assert(ms->n >= 2);
270 9 : assert(i >= 0);
271 9 : assert(i == ms->n - 2 || i == ms->n - 3);
272 :
273 9 : pa = ms->pending[i].base;
274 9 : na = ms->pending[i].len;
275 9 : pb = ms->pending[i + 1].base;
276 9 : nb = ms->pending[i + 1].len;
277 9 : assert(na > 0 && nb > 0);
278 9 : assert(pa + na == pb);
279 :
280 : /* Record the length of the combined runs; if i is the
281 : * 3rd-last run now, also slide over the last run (which isn't
282 : * involved in this merge). The current run i+1 goes away in
283 : * any case. */
284 9 : ms->pending[i].len = na + nb;
285 9 : if (i == ms->n - 3)
286 0 : ms->pending[i + 1] = ms->pending[i + 2];
287 9 : --ms->n;
288 :
289 : /* Where does b start in a? Elements in a before that can be
290 : * ignored (already in place). */
291 18 : k = gallop_right(PTRADD(ms->bh, pb, ms->hs),
292 9 : PTRADD(ms->bh, pa, ms->hs), na, 0, ms);
293 9 : pa += k;
294 9 : na -= k;
295 9 : if (na == 0)
296 : return 0;
297 :
298 : /* Where does a end in b? Elements in b after that can be
299 : * ignored (already in place). */
300 9 : nb = gallop_left(PTRADD(ms->bh, pa + na - 1, ms->hs),
301 0 : PTRADD(ms->bh, pb, ms->hs), nb, nb-1, ms);
302 9 : if (nb <= 0)
303 : return nb;
304 :
305 : /* Merge what remains of the runs, using a temp array with
306 : * min(na, nb) elements. */
307 9 : if (na <= nb) {
308 : /* Merge the na elements starting at pa with the nb elements starting
309 : * at pb in a stable way, in-place. na and nb must be > 0, and pa +
310 : * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at
311 : * the end of the merge, and should have na <= nb. See listsort.txt
312 : * for more info. Return 0 if successful, -1 if error. */
313 9 : size_t dest;
314 9 : ssize_t min_gallop = ms->min_gallop;
315 :
316 9 : assert(ms && na > 0 && nb > 0 && pa + na == pb);
317 9 : if (MERGE_GETMEMH(ms, na) < 0)
318 : return -1;
319 9 : if (MERGE_GETMEMT(ms, na) < 0)
320 : return -1;
321 465 : COPY_anyN(ms->ah, PTRADD(ms->bh, pa, ms->hs), ms->hs, na);
322 450 : COPY_anyN(ms->at, PTRADD(ms->bt, pa, ms->ts), ms->ts, na);
323 9 : dest = pa;
324 9 : pa = 0;
325 :
326 9 : COPY(PTRADD(ms->bh, dest, ms->hs),
327 : PTRADD(ms->bh, pb, ms->hs), ms->hs);
328 9 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
329 : PTRADD(ms->bt, pb, ms->ts), ms->ts);
330 9 : dest++;
331 9 : pb++;
332 9 : --nb;
333 9 : if (nb == 0)
334 0 : goto SucceedA;
335 9 : if (na == 1)
336 0 : goto CopyB;
337 :
338 9 : for (;;) {
339 9 : ssize_t acount = 0; /* # of times A won in a row */
340 9 : ssize_t bcount = 0; /* # of times B won in a row */
341 :
342 : /* Do the straightforward thing until (if
343 : * ever) one run appears to win
344 : * consistently. */
345 63 : for (;;) {
346 63 : assert(na > 1 && nb > 0);
347 63 : k = ISLT(PTRADD(ms->bh, pb, ms->hs),
348 : PTRADD(ms->ah, pa, ms->hs), ms);
349 63 : if (k) {
350 63 : COPY(PTRADD(ms->bh, dest, ms->hs),
351 : PTRADD(ms->bh, pb, ms->hs),
352 : ms->hs);
353 63 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
354 : PTRADD(ms->bt, pb, ms->ts),
355 : ms->ts);
356 63 : dest++;
357 63 : pb++;
358 63 : ++bcount;
359 63 : acount = 0;
360 63 : --nb;
361 63 : if (nb == 0)
362 0 : goto SucceedA;
363 63 : if (bcount >= min_gallop)
364 : break;
365 : } else {
366 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
367 : PTRADD(ms->ah, pa, ms->hs),
368 : ms->hs);
369 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
370 : PTRADD(ms->at, pa, ms->ts),
371 : ms->ts);
372 0 : dest++;
373 0 : pa++;
374 0 : ++acount;
375 0 : bcount = 0;
376 0 : --na;
377 0 : if (na == 1)
378 0 : goto CopyB;
379 0 : if (acount >= min_gallop)
380 : break;
381 : }
382 : }
383 :
384 : /* One run is winning so consistently that
385 : * galloping may be a huge win. So try that,
386 : * and continue galloping until (if ever)
387 : * neither run appears to be winning
388 : * consistently anymore. */
389 9 : ++min_gallop;
390 9 : do {
391 9 : assert(na > 1 && nb > 0);
392 9 : min_gallop -= min_gallop > 1;
393 9 : ms->min_gallop = min_gallop;
394 18 : k = gallop_right(PTRADD(ms->bh, pb, ms->hs),
395 9 : PTRADD(ms->ah, pa, ms->hs),
396 : na, 0, ms);
397 9 : acount = k;
398 9 : if (k) {
399 0 : COPY_anyN(PTRADD(ms->bh, dest, ms->hs),
400 : PTRADD(ms->ah, pa, ms->hs),
401 : ms->hs, k);
402 0 : COPY_anyN(PTRADD(ms->bt, dest, ms->ts),
403 : PTRADD(ms->at, pa, ms->ts),
404 : ms->ts, k);
405 0 : dest += k;
406 0 : pa += k;
407 0 : na -= k;
408 0 : if (na == 1)
409 0 : goto CopyB;
410 : /* na==0 is impossible now if
411 : * the comparison function is
412 : * consistent, but we can't
413 : * assume that it is. */
414 0 : if (na == 0)
415 0 : goto SucceedA;
416 : }
417 9 : COPY(PTRADD(ms->bh, dest, ms->hs),
418 : PTRADD(ms->bh, pb, ms->hs), ms->hs);
419 9 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
420 : PTRADD(ms->bt, pb, ms->ts), ms->ts);
421 9 : dest++;
422 9 : pb++;
423 9 : --nb;
424 9 : if (nb == 0)
425 7 : goto SucceedA;
426 :
427 4 : k = gallop_left(PTRADD(ms->ah, pa, ms->hs),
428 2 : PTRADD(ms->bh, pb, ms->hs),
429 : nb, 0, ms);
430 2 : bcount = k;
431 2 : if (k) {
432 2 : memmove(PTRADD(ms->bh, dest, ms->hs),
433 0 : PTRADD(ms->bh, pb, ms->hs),
434 2 : k * ms->hs);
435 2 : memmove(PTRADD(ms->bt, dest, ms->ts),
436 2 : PTRADD(ms->bt, pb, ms->ts),
437 2 : k * ms->ts);
438 2 : dest += k;
439 2 : pb += k;
440 2 : nb -= k;
441 2 : if (nb == 0)
442 2 : goto SucceedA;
443 : }
444 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
445 : PTRADD(ms->ah, pa, ms->hs), ms->hs);
446 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
447 : PTRADD(ms->at, pa, ms->ts), ms->ts);
448 0 : dest++;
449 0 : pa++;
450 0 : --na;
451 0 : if (na == 1)
452 0 : goto CopyB;
453 0 : } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
454 0 : ++min_gallop; /* penalize it for leaving galloping mode */
455 0 : ms->min_gallop = min_gallop;
456 : }
457 9 : SucceedA:
458 9 : if (na) {
459 465 : COPY_anyN(PTRADD(ms->bh, dest, ms->hs),
460 : PTRADD(ms->ah, pa, ms->hs), ms->hs, na);
461 450 : COPY_anyN(PTRADD(ms->bt, dest, ms->ts),
462 : PTRADD(ms->at, pa, ms->ts), ms->ts, na);
463 : }
464 9 : return 0;
465 0 : CopyB:
466 0 : assert(na == 1 && nb > 0);
467 : /* The last element of pa belongs at the end of the merge. */
468 0 : memmove(PTRADD(ms->bh, dest, ms->hs),
469 0 : PTRADD(ms->bh, pb, ms->hs), nb * ms->hs);
470 0 : memmove(PTRADD(ms->bt, dest, ms->ts),
471 0 : PTRADD(ms->bt, pb, ms->ts), nb * ms->ts);
472 0 : COPY(PTRADD(ms->bh, dest + nb, ms->hs),
473 : PTRADD(ms->ah, pa, ms->hs), ms->hs);
474 0 : COPY_any(PTRADD(ms->bt, dest + nb, ms->ts),
475 : PTRADD(ms->at, pa, ms->ts), ms->ts);
476 0 : return 0;
477 : } else {
478 : /* Merge the na elements starting at pa with the nb elements starting
479 : * at pb in a stable way, in-place. na and nb must be > 0, and pa +
480 : * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at
481 : * the end of the merge, and should have na >= nb. See listsort.txt
482 : * for more info. Return 0 if successful, -1 if error. */
483 0 : size_t dest;
484 0 : size_t basea;
485 0 : size_t baseb;
486 0 : ssize_t min_gallop = ms->min_gallop;
487 :
488 0 : assert(ms && na > 0 && nb > 0 && pa + na == pb);
489 0 : if (MERGE_GETMEMH(ms, nb) < 0)
490 : return -1;
491 0 : if (MERGE_GETMEMT(ms, nb) < 0)
492 : return -1;
493 0 : dest = pb + nb - 1;
494 0 : COPY_anyN(ms->ah, PTRADD(ms->bh, pb, ms->hs), ms->hs, nb);
495 0 : COPY_anyN(ms->at, PTRADD(ms->bt, pb, ms->ts), ms->ts, nb);
496 0 : basea = pa;
497 0 : baseb = 0;
498 0 : pb = nb - 1;
499 0 : pa += na - 1;
500 :
501 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
502 : PTRADD(ms->bh, pa, ms->hs), ms->hs);
503 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
504 : PTRADD(ms->bt, pa, ms->ts), ms->ts);
505 0 : dest--;
506 0 : pa--;
507 0 : --na;
508 0 : if (na == 0)
509 : goto SucceedB;
510 0 : if (nb == 1)
511 0 : goto CopyA;
512 :
513 0 : for (;;) {
514 0 : ssize_t acount = 0; /* # of times A won in a row */
515 0 : ssize_t bcount = 0; /* # of times B won in a row */
516 :
517 : /* Do the straightforward thing until (if
518 : * ever) one run appears to win
519 : * consistently. */
520 0 : for (;;) {
521 0 : assert(na > 0 && nb > 1);
522 0 : k = ISLT(PTRADD(ms->ah, pb, ms->hs),
523 : PTRADD(ms->bh, pa, ms->hs), ms);
524 0 : if (k) {
525 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
526 : PTRADD(ms->bh, pa, ms->hs),
527 : ms->hs);
528 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
529 : PTRADD(ms->bt, pa, ms->ts),
530 : ms->ts);
531 0 : dest--;
532 0 : pa--;
533 0 : ++acount;
534 0 : bcount = 0;
535 0 : --na;
536 0 : if (na == 0)
537 0 : goto SucceedB;
538 0 : if (acount >= min_gallop)
539 : break;
540 : } else {
541 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
542 : PTRADD(ms->ah, pb, ms->hs),
543 : ms->hs);
544 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
545 : PTRADD(ms->at, pb, ms->ts),
546 : ms->ts);
547 0 : dest--;
548 0 : pb--;
549 0 : ++bcount;
550 0 : acount = 0;
551 0 : --nb;
552 0 : if (nb == 1)
553 0 : goto CopyA;
554 0 : if (bcount >= min_gallop)
555 : break;
556 : }
557 : }
558 :
559 : /* One run is winning so consistently that
560 : * galloping may be a huge win. So try that,
561 : * and continue galloping until (if ever)
562 : * neither run appears to be winning
563 : * consistently anymore. */
564 0 : ++min_gallop;
565 0 : do {
566 0 : assert(na > 0 && nb > 1);
567 0 : min_gallop -= min_gallop > 1;
568 0 : ms->min_gallop = min_gallop;
569 0 : k = gallop_right(PTRADD(ms->ah, pb, ms->hs),
570 0 : PTRADD(ms->bh, basea, ms->hs),
571 : na, na - 1, ms);
572 0 : k = na - k;
573 0 : acount = k;
574 0 : if (k) {
575 0 : dest -= k;
576 0 : pa -= k;
577 0 : memmove(PTRADD(ms->bh, dest + 1,
578 : ms->hs),
579 0 : PTRADD(ms->bh, pa + 1, ms->hs),
580 0 : k * ms->hs);
581 0 : memmove(PTRADD(ms->bt, dest + 1,
582 : ms->ts),
583 0 : PTRADD(ms->bt, pa + 1, ms->ts),
584 0 : k * ms->ts);
585 0 : na -= k;
586 0 : if (na == 0)
587 0 : goto SucceedB;
588 : }
589 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
590 : PTRADD(ms->ah, pb, ms->hs), ms->hs);
591 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
592 : PTRADD(ms->at, pb, ms->ts), ms->ts);
593 0 : dest--;
594 0 : pb--;
595 0 : --nb;
596 0 : if (nb == 1)
597 0 : goto CopyA;
598 :
599 0 : k = gallop_left(PTRADD(ms->bh, pa, ms->hs),
600 0 : PTRADD(ms->ah, baseb, ms->hs),
601 : nb, nb - 1, ms);
602 0 : k = nb - k;
603 0 : bcount = k;
604 0 : if (k) {
605 0 : dest -= k;
606 0 : pb -= k;
607 0 : memmove(PTRADD(ms->bh, dest + 1,
608 : ms->hs),
609 0 : PTRADD(ms->ah, pb + 1, ms->hs),
610 0 : k * ms->hs);
611 0 : memmove(PTRADD(ms->bt, dest + 1,
612 : ms->ts),
613 0 : PTRADD(ms->at, pb + 1, ms->ts),
614 0 : k * ms->ts);
615 0 : nb -= k;
616 0 : if (nb == 1)
617 0 : goto CopyA;
618 : /* nb==0 is impossible now if
619 : * the comparison function is
620 : * consistent, but we can't
621 : * assume that it is. */
622 0 : if (nb == 0)
623 0 : goto SucceedB;
624 : }
625 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
626 : PTRADD(ms->bh, pa, ms->hs), ms->hs);
627 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
628 : PTRADD(ms->bt, pa, ms->ts), ms->ts);
629 0 : dest--;
630 0 : pa--;
631 0 : --na;
632 0 : if (na == 0)
633 0 : goto SucceedB;
634 0 : } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
635 0 : ++min_gallop; /* penalize it for leaving galloping mode */
636 0 : ms->min_gallop = min_gallop;
637 : }
638 0 : SucceedB:
639 0 : if (nb) {
640 0 : COPY_anyN(PTRADD(ms->bh, dest + 1 - nb, ms->hs),
641 : PTRADD(ms->ah, baseb, ms->hs), ms->hs, nb);
642 0 : COPY_anyN(PTRADD(ms->bt, dest + 1 - nb, ms->ts),
643 : PTRADD(ms->at, baseb, ms->ts), ms->ts, nb);
644 : }
645 0 : return 0;
646 0 : CopyA:
647 0 : assert(nb == 1 && na > 0);
648 : /* The first element of pb belongs at the front of the
649 : * merge. */
650 0 : dest -= na;
651 0 : pa -= na;
652 0 : memmove(PTRADD(ms->bh, dest + 1, ms->hs),
653 0 : PTRADD(ms->bh, pa + 1, ms->hs),
654 0 : na * ms->hs);
655 0 : memmove(PTRADD(ms->bt, dest + 1, ms->ts),
656 0 : PTRADD(ms->bt, pa + 1, ms->ts),
657 0 : na * ms->ts);
658 0 : COPY(PTRADD(ms->bh, dest, ms->hs),
659 : PTRADD(ms->ah, pb, ms->hs), ms->hs);
660 0 : COPY_any(PTRADD(ms->bt, dest, ms->ts),
661 : PTRADD(ms->at, pb, ms->ts), ms->ts);
662 0 : return 0;
663 : }
664 : }
665 :
666 : static int
667 184 : do_ssort(MergeState *ms, ssize_t nremaining, size_t lo, size_t hi, ssize_t minrun)
668 : {
669 193 : do {
670 193 : int descending;
671 193 : ssize_t n;
672 :
673 : /* Identify next run. */
674 : {
675 : /* Return the length of the run beginning at lo, in the slice [lo,
676 : * hi). lo < hi is required on entry. "A run" is the longest
677 : * ascending sequence, with
678 : *
679 : * lo[0] <= lo[1] <= lo[2] <= ...
680 : *
681 : * or the longest descending sequence, with
682 : *
683 : * lo[0] > lo[1] > lo[2] > ...
684 : *
685 : * Boolean descending is set to 0 in the former case, or to 1 in the
686 : * latter. For its intended use in a stable mergesort, the strictness
687 : * of the defn of "descending" is needed so that the caller can safely
688 : * reverse a descending sequence without violating stability (strict >
689 : * ensures there are no equal elements to get out of order). */
690 193 : size_t nlo;
691 193 : size_t olo;
692 :
693 193 : assert(lo < hi);
694 193 : descending = 0;
695 193 : olo = lo;
696 193 : nlo = lo + 1;
697 193 : if (nlo == hi) {
698 : n = 1;
699 : } else {
700 193 : n = 2;
701 193 : if (ISLT(PTRADD(ms->bh, nlo, ms->hs),
702 : PTRADD(ms->bh, olo, ms->hs), ms)) {
703 142 : descending = 1;
704 142 : for (olo = nlo++;
705 321 : nlo < hi;
706 179 : olo = nlo++, ++n) {
707 281 : if (!ISLT(PTRADD(ms->bh, nlo,
708 : ms->hs),
709 : PTRADD(ms->bh, olo,
710 : ms->hs), ms))
711 : break;
712 : }
713 : }
714 : else {
715 51 : for (olo = nlo++;
716 2191 : nlo < hi;
717 2140 : olo = nlo++, ++n) {
718 2185 : if (ISLT(PTRADD(ms->bh, nlo,
719 : ms->hs),
720 : PTRADD(ms->bh, olo,
721 : ms->hs), ms))
722 : break;
723 : }
724 : }
725 : }
726 : }
727 193 : if (descending)
728 142 : reverse_slice(lo, lo + n, ms);
729 : /* If short, extend to min(minrun, nremaining). */
730 193 : if (n < minrun) {
731 145 : ssize_t force = nremaining <= minrun ? nremaining : minrun;
732 :
733 145 : binarysort(lo, lo + force, lo + n, ms);
734 145 : n = force;
735 : }
736 : /* Push run onto pending-runs stack, and maybe merge. */
737 193 : assert(ms->n < MAX_MERGE_PENDING);
738 193 : ms->pending[ms->n].base = lo;
739 193 : ms->pending[ms->n].len = n;
740 193 : ms->n++;
741 : {
742 : /* Examine the stack of runs waiting to be merged, merging adjacent
743 : * runs until the stack invariants are re-established:
744 : *
745 : * 1. len[-3] > len[-2] + len[-1]
746 : * 2. len[-2] > len[-1]
747 : *
748 : * See listsort.txt for more info.
749 : *
750 : * Returns 0 on success, -1 on error. */
751 193 : struct slice *p = ms->pending;
752 :
753 201 : while (ms->n > 1) {
754 9 : ssize_t i = ms->n - 2;
755 :
756 9 : if ((i > 0 &&
757 0 : p[i-1].len <= p[i].len + p[i+1].len) ||
758 0 : (i > 1 &&
759 0 : p[i-2].len <= p[i-1].len + p[i].len)) {
760 0 : if (p[i - 1].len < p[i + 1].len)
761 0 : --i;
762 0 : if (merge_at(ms, i) < 0)
763 : return -1;
764 9 : } else if (p[i].len <= p[i + 1].len) {
765 8 : if (merge_at(ms, i) < 0)
766 : return -1;
767 : } else
768 : break;
769 : }
770 : }
771 : /* Advance to find next run. */
772 193 : lo += n;
773 193 : nremaining -= n;
774 193 : } while (nremaining > 0);
775 184 : assert(lo == hi);
776 :
777 : {
778 : /* Regardless of invariants, merge all runs on the stack until only
779 : * one remains. This is used at the end of the mergesort.
780 : *
781 : * Returns 0 on success, -1 on error. */
782 : struct slice *p = ms->pending;
783 :
784 185 : while (ms->n > 1) {
785 1 : ssize_t n = ms->n - 2;
786 :
787 1 : if (n > 0 && p[n - 1].len < p[n + 1].len)
788 0 : --n;
789 1 : if (merge_at(ms, n) < 0)
790 : return -1;
791 : }
792 : }
793 : return 0;
794 : }
795 :
796 : #ifdef GDKssortimpl
797 : /* Stable sort an array "h" (and move t accordingly).
798 : * "nitems" is the number of items to sort; "hs"+"ts" is the size of
799 : * the items, "tpe" is the type of the key within the items. If "heap"
800 : * is non-NULL, the key is actually an offset relative to "heap" and
801 : * the actual key is found at that offset (MonetDB var-sized
802 : * atoms). */
803 : gdk_return
804 184 : GDKssortimpl(void *restrict h, void *restrict t, const void *restrict heap,
805 : size_t nitems, int hs, int ts, int tpe)
806 : {
807 184 : char temp;
808 184 : MergeState ms;
809 184 : ssize_t nremaining;
810 184 : gdk_return result = GDK_FAIL;
811 184 : size_t lo, hi;
812 184 : ssize_t minrun;
813 :
814 184 : assert(h);
815 184 : assert(hs > 0);
816 :
817 184 : ms.ah = (void *) ms.temparrayh;
818 184 : ms.allocedh = MERGESTATE_TEMP_SIZE;
819 184 : ms.at = (void *) ms.temparrayt;
820 184 : ms.allocedt = MERGESTATE_TEMP_SIZE;
821 184 : ms.n = 0;
822 184 : ms.min_gallop = MIN_GALLOP;
823 184 : ms.compare = ATOMcompare(tpe);
824 184 : ms.heap = heap;
825 184 : ms.hs = hs;
826 184 : ms.ts = ts;
827 184 : ms.bh = h;
828 184 : if (!t)
829 110 : t = &temp;
830 184 : ms.bt = t;
831 184 : ms.th = ms.tempstorageh;
832 184 : ms.tt = ms.tempstoraget;
833 184 : assert((size_t) hs <= sizeof(ms.tempstorageh));
834 184 : assert((size_t) ts <= sizeof(ms.tempstoraget));
835 184 : nremaining = (ssize_t) nitems;
836 :
837 184 : if (nremaining < 2)
838 0 : goto succeed;
839 :
840 184 : tpe = ATOMbasetype(tpe);
841 :
842 : /* March over the array once, left to right, finding natural
843 : * runs, and extending short natural runs to minrun
844 : * elements. */
845 184 : lo = 0;
846 184 : hi = lo + nremaining;
847 184 : minrun = merge_compute_minrun(nremaining);
848 184 : switch (tpe) {
849 2 : case TYPE_bte:
850 2 : if (do_ssort_bte(&ms, nremaining, lo, hi, minrun) < 0)
851 0 : goto fail;
852 : break;
853 1 : case TYPE_sht:
854 1 : if (do_ssort_sht(&ms, nremaining, lo, hi, minrun) < 0)
855 0 : goto fail;
856 : break;
857 127 : case TYPE_int:
858 127 : if (do_ssort_int(&ms, nremaining, lo, hi, minrun) < 0)
859 0 : goto fail;
860 : break;
861 24 : case TYPE_lng:
862 24 : if (do_ssort_lng(&ms, nremaining, lo, hi, minrun) < 0)
863 0 : goto fail;
864 : break;
865 : #ifdef HAVE_HGE
866 5 : case TYPE_hge:
867 5 : if (do_ssort_hge(&ms, nremaining, lo, hi, minrun) < 0)
868 0 : goto fail;
869 : break;
870 : #endif
871 3 : case TYPE_flt:
872 3 : if (do_ssort_flt(&ms, nremaining, lo, hi, minrun) < 0)
873 0 : goto fail;
874 : break;
875 2 : case TYPE_dbl:
876 2 : if (do_ssort_dbl(&ms, nremaining, lo, hi, minrun) < 0)
877 0 : goto fail;
878 : break;
879 20 : default:
880 20 : if (do_ssort_any(&ms, nremaining, lo, hi, minrun) < 0)
881 0 : goto fail;
882 : break;
883 : }
884 184 : assert(ms.n == 1);
885 184 : assert(ms.pending[0].base == 0);
886 184 : assert(ms.pending[0].len == (ssize_t) nitems);
887 :
888 : succeed:
889 : result = GDK_SUCCEED;
890 184 : fail:
891 184 : merge_freemem(&ms);
892 184 : return result;
893 : }
894 : #endif /* GDKssortimpl */
|