LCOV - code coverage report
Current view: top level - gdk - gdk_ssort_impl.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 286 478 59.8 %
Date: 2024-12-19 20:05:57 Functions: 26 82 31.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : /*
      14             :  * This file implements a stable sort algorithm known as "timsort".
      15             :  * The algorithm is a straight copy of the listsort function in the
      16             :  * Python 2.5 source code, heavily modified to fit into the MonetDB
      17             :  * environment.
      18             :  * The original author of the sort algorithm was Tim Peters, the
      19             :  * adaptation was done by Sjoerd Mullender.
      20             :  *
      21             :  *
      22             :  * This file is included multiple times.  We expect a bunch of tokens
      23             :  * to be redefined differently each time (see gdk_ssort.c).  If the
      24             :  * token GDKssortimpl is defined, the main interface is defined.
      25             :  */
      26             : 
      27             : /* binarysort is the best method for sorting small arrays: it does few
      28             :  * compares, but can do data movement quadratic in the number of
      29             :  * elements.
      30             :  * [lo, hi) is a contiguous slice of a list, and is sorted via binary
      31             :  * insertion.  This sort is stable. On entry, must have lo <= start <=
      32             :  * hi, and that [lo, start) is already sorted (pass start == lo if you
      33             :  * don't know!). */
      34             : static void
      35         145 : binarysort(size_t lo, size_t hi, size_t start, MergeState *ms)
      36             : {
      37         145 :         register size_t l, p, r;
      38             : 
      39         145 :         assert(lo <= start && start <= hi);
      40             :         /* assert [lo, start) is sorted */
      41         145 :         if (lo == start)
      42           0 :                 start++;
      43             :         /* [lo,start) is sorted, insert start (the pivot) into this
      44             :          * area, finding its position using binary search. */
      45         720 :         for (; start < hi; start++) {
      46             :                 /* set l to where start belongs */
      47         575 :                 l = lo;
      48         575 :                 r = start;
      49             :                 /* ms->t[ht] is the pivot */
      50         575 :                 COPY(ms->th, PTRADD(ms->bh, r, ms->hs), ms->hs);
      51         575 :                 COPY_any(ms->tt, PTRADD(ms->bt, r, ms->ts), ms->ts);
      52             :                 /* Invariants:
      53             :                  * pivot >= all in [lo, l).
      54             :                  * pivot < all in [r, start).
      55             :                  * The second is vacuously true at the start. */
      56         575 :                 assert(l < r);
      57        1375 :                 do {
      58        1375 :                         p = l + ((r - l) >> 1);
      59        1375 :                         if (ISLT(ms->th, PTRADD(ms->bh, p, ms->hs), ms))
      60             :                                 r = p;
      61             :                         else
      62         633 :                                 l = p + 1;
      63        1375 :                 } while (l < r);
      64         575 :                 assert(l == r);
      65             :                 /* The invariants still hold, so pivot >= all in [lo,
      66             :                  * l) and pivot < all in [l, start), so pivot belongs
      67             :                  * at l.  Note that if there are elements equal to
      68             :                  * pivot, l points to the first slot after them --
      69             :                  * that's why this sort is stable. Slide over to make
      70             :                  * room.
      71             :                  * Caution: using memmove is much slower under MSVC 5;
      72             :                  * we're not usually moving many slots. */
      73        2021 :                 for (p = start, r = p - 1; p > l; p = r, r = p - 1) {
      74        1446 :                         COPY(PTRADD(ms->bh, p, ms->hs),
      75             :                              PTRADD(ms->bh, r, ms->hs), ms->hs);
      76        1446 :                         COPY_any(PTRADD(ms->bt, p, ms->ts),
      77             :                                  PTRADD(ms->bt, r, ms->ts), ms->ts);
      78             :                 }
      79         575 :                 COPY(PTRADD(ms->bh, l, ms->hs), ms->th, ms->hs);
      80         575 :                 COPY_any(PTRADD(ms->bt, l, ms->ts), ms->tt, ms->ts);
      81             :         }
      82         145 : }
      83             : 
      84             : /* Locate the proper position of key in a sorted vector; if the vector
      85             :  * contains an element equal to key, return the position immediately
      86             :  * to the left of the leftmost equal element.  [gallop_right() does
      87             :  * the same except returns the position to the right of the rightmost
      88             :  * equal element (if any).]
      89             :  *
      90             :  * "a" is a sorted vector with n elements, starting at a[0].  n must
      91             :  * be > 0.
      92             :  *
      93             :  * "hint" is an index at which to begin the search, 0 <= hint < n.
      94             :  * The closer hint is to the final result, the faster this runs.
      95             :  *
      96             :  * The return value is the int k in 0..n such that
      97             :  *
      98             :  * a[k-1] < key <= a[k]
      99             :  *
     100             :  * pretending that *(a-1) is minus infinity and a[n] is plus infinity.
     101             :  * IOW, key belongs at index k; or, IOW, the first k elements of a
     102             :  * should precede key, and the last n-k should follow key.
     103             :  *
     104             :  * Returns -1 on error.  See listsort.txt for info on the method. */
     105             : static ssize_t
     106          11 : gallop_left(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms)
     107             : {
     108          11 :         ssize_t ofs;
     109          11 :         ssize_t lastofs;
     110          11 :         ssize_t k;
     111             : 
     112          11 :         assert(key && a && n > 0 && hint >= 0 && hint < n);
     113             : 
     114          11 :         a = PTRADD(a, hint, ms->hs);
     115          11 :         lastofs = 0;
     116          11 :         ofs = 1;
     117          11 :         if (ISLT(a, key, ms)) {
     118             :                 /* a[hint] < key -- gallop right, until
     119             :                  * a[hint + lastofs] < key <= a[hint + ofs] */
     120           9 :                 const ssize_t maxofs = n - hint;        /* &a[n-1] is highest */
     121          27 :                 while (ofs < maxofs) {
     122          18 :                         if (ISLT(PTRADD(a, ofs, ms->hs), key, ms)) {
     123          18 :                                 lastofs = ofs;
     124          18 :                                 ofs = (ofs << 1) + 1;
     125          18 :                                 if (ofs <= 0)        /* int overflow */
     126           0 :                                         ofs = maxofs;
     127             :                         } else  /* key <= a[hint + ofs] */
     128             :                                 break;
     129             :                 }
     130           9 :                 if (ofs > maxofs)
     131             :                         ofs = maxofs;
     132             :                 /* Translate back to offsets relative to &a[0]. */
     133           9 :                 lastofs += hint;
     134           9 :                 ofs += hint;
     135             :         } else {
     136             :                 /* key <= a[hint] -- gallop left, until
     137             :                  * a[hint - ofs] < key <= a[hint - lastofs] */
     138           2 :                 const ssize_t maxofs = hint + 1;        /* &a[0] is lowest */
     139          16 :                 while (ofs < maxofs) {
     140          16 :                         if (ISLT(PTRADD(a, -ofs, ms->hs), key, ms))
     141             :                                 break;
     142             :                         /* key <= a[hint - ofs] */
     143          14 :                         lastofs = ofs;
     144          14 :                         ofs = (ofs << 1) + 1;
     145          14 :                         if (ofs <= 0)        /* int overflow */
     146           0 :                                 ofs = maxofs;
     147             :                 }
     148           2 :                 if (ofs > maxofs)
     149             :                         ofs = maxofs;
     150             :                 /* Translate back to positive offsets relative to &a[0]. */
     151           2 :                 k = lastofs;
     152           2 :                 lastofs = hint - ofs;
     153           2 :                 ofs = hint - k;
     154             :         }
     155          11 :         a = PTRADD(a, -hint, ms->hs);
     156             : 
     157          11 :         assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
     158             :         /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to
     159             :          * the right of lastofs but no farther right than ofs.  Do a
     160             :          * binary search, with invariant a[lastofs-1] < key <=
     161             :          * a[ofs]. */
     162          11 :         ++lastofs;
     163          49 :         while (lastofs < ofs) {
     164          27 :                 ssize_t m = lastofs + ((ofs - lastofs) >> 1);
     165             : 
     166          27 :                 if (ISLT(PTRADD(a, m, ms->hs), key, ms))
     167          23 :                         lastofs = m + 1; /* a[m] < key */
     168             :                 else
     169             :                         ofs = m;        /* key <= a[m] */
     170             :         }
     171          11 :         assert(lastofs == ofs);         /* so a[ofs-1] < key <= a[ofs] */
     172          11 :         return ofs;
     173             : }
     174             : 
     175             : /* Exactly like gallop_left(), except that if key already exists in
     176             :  * a[0:n], finds the position immediately to the right of the
     177             :  * rightmost equal value.
     178             :  *
     179             :  * The return value is the int k in 0..n such that
     180             :  *
     181             :  * a[k-1] <= key < a[k]
     182             :  *
     183             :  * or -1 if error.
     184             :  *
     185             :  * The code duplication is massive, but this is enough different given
     186             :  * that we're sticking to "<" comparisons that it's much harder to
     187             :  * follow if written as one routine with yet another "left or right?"
     188             :  * flag. */
     189             : static ssize_t
     190          18 : gallop_right(void *key, void *a, ssize_t n, ssize_t hint, MergeState *ms)
     191             : {
     192          18 :         ssize_t ofs;
     193          18 :         ssize_t lastofs;
     194          18 :         ssize_t k;
     195             : 
     196          18 :         assert(key && a && n > 0 && hint >= 0 && hint < n);
     197             : 
     198          18 :         a = PTRADD(a, hint, ms->hs);
     199          18 :         lastofs = 0;
     200          18 :         ofs = 1;
     201          18 :         if (ISLT(key, a, ms)) {
     202             :                 /* key < a[hint] -- gallop left, until
     203             :                  * a[hint - ofs] <= key < a[hint - lastofs] */
     204           9 :                 const ssize_t maxofs = hint + 1;        /* &a[0] is lowest */
     205           9 :                 while (ofs < maxofs) {
     206           0 :                         if (ISLT(key, PTRADD(a, -ofs, ms->hs), ms)) {
     207           0 :                                 lastofs = ofs;
     208           0 :                                 ofs = (ofs << 1) + 1;
     209           0 :                                 if (ofs <= 0)        /* int overflow */
     210           0 :                                         ofs = maxofs;
     211             :                         } else  /* a[hint - ofs] <= key */
     212             :                                 break;
     213             :                 }
     214           9 :                 if (ofs > maxofs)
     215             :                         ofs = maxofs;
     216             :                 /* Translate back to positive offsets relative to &a[0]. */
     217           9 :                 k = lastofs;
     218           9 :                 lastofs = hint - ofs;
     219           9 :                 ofs = hint - k;
     220             :         } else {
     221             :                 /* a[hint] <= key -- gallop right, until
     222             :                  * a[hint + lastofs] <= key < a[hint + ofs] */
     223           9 :                 const ssize_t maxofs = n - hint;        /* &a[n-1] is highest */
     224          40 :                 while (ofs < maxofs) {
     225          39 :                         if (ISLT(key, PTRADD(a, ofs, ms->hs), ms))
     226             :                                 break;
     227             :                         /* a[hint + ofs] <= key */
     228          31 :                         lastofs = ofs;
     229          31 :                         ofs = (ofs << 1) + 1;
     230          31 :                         if (ofs <= 0)        /* int overflow */
     231           0 :                                 ofs = maxofs;
     232             :                 }
     233           9 :                 if (ofs > maxofs)
     234             :                         ofs = maxofs;
     235             :                 /* Translate back to offsets relative to &a[0]. */
     236           9 :                 lastofs += hint;
     237           9 :                 ofs += hint;
     238             :         }
     239          18 :         a = PTRADD(a, -hint, ms->hs);
     240             : 
     241          18 :         assert(-1 <= lastofs && lastofs < ofs && ofs <= n);
     242             :         /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to
     243             :          * the right of lastofs but no farther right than ofs.  Do a
     244             :          * binary search, with invariant a[lastofs-1] <= key <
     245             :          * a[ofs]. */
     246          18 :         ++lastofs;
     247          67 :         while (lastofs < ofs) {
     248          31 :                 ssize_t m = lastofs + ((ofs - lastofs) >> 1);
     249             : 
     250          31 :                 if (ISLT(key, PTRADD(a, m, ms->hs), ms))
     251             :                         ofs = m;        /* key < a[m] */
     252             :                 else
     253          12 :                         lastofs = m+1;  /* a[m] <= key */
     254             :         }
     255          18 :         assert(lastofs == ofs);         /* so a[ofs-1] <= key < a[ofs] */
     256          18 :         return ofs;
     257             : }
     258             : 
     259             : /* Merge the two runs at stack indices i and i+1.
     260             :  * Returns 0 on success, -1 on error. */
     261             : static ssize_t
     262           9 : merge_at(MergeState *ms, ssize_t i)
     263             : {
     264           9 :         size_t pa, pb;
     265           9 :         ssize_t na, nb;
     266           9 :         ssize_t k;
     267             : 
     268           9 :         assert(ms != NULL);
     269           9 :         assert(ms->n >= 2);
     270           9 :         assert(i >= 0);
     271           9 :         assert(i == ms->n - 2 || i == ms->n - 3);
     272             : 
     273           9 :         pa = ms->pending[i].base;
     274           9 :         na = ms->pending[i].len;
     275           9 :         pb = ms->pending[i + 1].base;
     276           9 :         nb = ms->pending[i + 1].len;
     277           9 :         assert(na > 0 && nb > 0);
     278           9 :         assert(pa + na == pb);
     279             : 
     280             :         /* Record the length of the combined runs; if i is the
     281             :          * 3rd-last run now, also slide over the last run (which isn't
     282             :          * involved in this merge).  The current run i+1 goes away in
     283             :          * any case. */
     284           9 :         ms->pending[i].len = na + nb;
     285           9 :         if (i == ms->n - 3)
     286           0 :                 ms->pending[i + 1] = ms->pending[i + 2];
     287           9 :         --ms->n;
     288             : 
     289             :         /* Where does b start in a?  Elements in a before that can be
     290             :          * ignored (already in place). */
     291          18 :         k = gallop_right(PTRADD(ms->bh, pb, ms->hs),
     292           9 :                          PTRADD(ms->bh, pa, ms->hs), na, 0, ms);
     293           9 :         pa += k;
     294           9 :         na -= k;
     295           9 :         if (na == 0)
     296             :                 return 0;
     297             : 
     298             :         /* Where does a end in b?  Elements in b after that can be
     299             :          * ignored (already in place). */
     300           9 :         nb = gallop_left(PTRADD(ms->bh, pa + na - 1, ms->hs),
     301           0 :                          PTRADD(ms->bh, pb, ms->hs), nb, nb-1, ms);
     302           9 :         if (nb <= 0)
     303             :                 return nb;
     304             : 
     305             :         /* Merge what remains of the runs, using a temp array with
     306             :          * min(na, nb) elements. */
     307           9 :         if (na <= nb) {
     308             : /* Merge the na elements starting at pa with the nb elements starting
     309             :  * at pb in a stable way, in-place.  na and nb must be > 0, and pa +
     310             :  * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at
     311             :  * the end of the merge, and should have na <= nb.  See listsort.txt
     312             :  * for more info. Return 0 if successful, -1 if error. */
     313           9 :                 size_t dest;
     314           9 :                 ssize_t min_gallop = ms->min_gallop;
     315             : 
     316           9 :                 assert(ms && na > 0 && nb > 0 && pa + na == pb);
     317           9 :                 if (MERGE_GETMEMH(ms, na) < 0)
     318             :                         return -1;
     319           9 :                 if (MERGE_GETMEMT(ms, na) < 0)
     320             :                         return -1;
     321         465 :                 COPY_anyN(ms->ah, PTRADD(ms->bh, pa, ms->hs), ms->hs, na);
     322         450 :                 COPY_anyN(ms->at, PTRADD(ms->bt, pa, ms->ts), ms->ts, na);
     323           9 :                 dest = pa;
     324           9 :                 pa = 0;
     325             : 
     326           9 :                 COPY(PTRADD(ms->bh, dest, ms->hs),
     327             :                      PTRADD(ms->bh, pb, ms->hs), ms->hs);
     328           9 :                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     329             :                          PTRADD(ms->bt, pb, ms->ts), ms->ts);
     330           9 :                 dest++;
     331           9 :                 pb++;
     332           9 :                 --nb;
     333           9 :                 if (nb == 0)
     334           0 :                         goto SucceedA;
     335           9 :                 if (na == 1)
     336           0 :                         goto CopyB;
     337             : 
     338           9 :                 for (;;) {
     339           9 :                         ssize_t acount = 0;     /* # of times A won in a row */
     340           9 :                         ssize_t bcount = 0;     /* # of times B won in a row */
     341             : 
     342             :                         /* Do the straightforward thing until (if
     343             :                          * ever) one run appears to win
     344             :                          * consistently. */
     345          63 :                         for (;;) {
     346          63 :                                 assert(na > 1 && nb > 0);
     347          63 :                                 k = ISLT(PTRADD(ms->bh, pb, ms->hs),
     348             :                                          PTRADD(ms->ah, pa, ms->hs), ms);
     349          63 :                                 if (k) {
     350          63 :                                         COPY(PTRADD(ms->bh, dest, ms->hs),
     351             :                                              PTRADD(ms->bh, pb, ms->hs),
     352             :                                              ms->hs);
     353          63 :                                         COPY_any(PTRADD(ms->bt, dest, ms->ts),
     354             :                                                  PTRADD(ms->bt, pb, ms->ts),
     355             :                                                  ms->ts);
     356          63 :                                         dest++;
     357          63 :                                         pb++;
     358          63 :                                         ++bcount;
     359          63 :                                         acount = 0;
     360          63 :                                         --nb;
     361          63 :                                         if (nb == 0)
     362           0 :                                                 goto SucceedA;
     363          63 :                                         if (bcount >= min_gallop)
     364             :                                                 break;
     365             :                                 } else {
     366           0 :                                         COPY(PTRADD(ms->bh, dest, ms->hs),
     367             :                                              PTRADD(ms->ah, pa, ms->hs),
     368             :                                              ms->hs);
     369           0 :                                         COPY_any(PTRADD(ms->bt, dest, ms->ts),
     370             :                                                  PTRADD(ms->at, pa, ms->ts),
     371             :                                                  ms->ts);
     372           0 :                                         dest++;
     373           0 :                                         pa++;
     374           0 :                                         ++acount;
     375           0 :                                         bcount = 0;
     376           0 :                                         --na;
     377           0 :                                         if (na == 1)
     378           0 :                                                 goto CopyB;
     379           0 :                                         if (acount >= min_gallop)
     380             :                                                 break;
     381             :                                 }
     382             :                         }
     383             : 
     384             :                         /* One run is winning so consistently that
     385             :                          * galloping may be a huge win.  So try that,
     386             :                          * and continue galloping until (if ever)
     387             :                          * neither run appears to be winning
     388             :                          * consistently anymore. */
     389           9 :                         ++min_gallop;
     390           9 :                         do {
     391           9 :                                 assert(na > 1 && nb > 0);
     392           9 :                                 min_gallop -= min_gallop > 1;
     393           9 :                                 ms->min_gallop = min_gallop;
     394          18 :                                 k = gallop_right(PTRADD(ms->bh, pb, ms->hs),
     395           9 :                                                  PTRADD(ms->ah, pa, ms->hs),
     396             :                                                  na, 0, ms);
     397           9 :                                 acount = k;
     398           9 :                                 if (k) {
     399           0 :                                         COPY_anyN(PTRADD(ms->bh, dest, ms->hs),
     400             :                                                   PTRADD(ms->ah, pa, ms->hs),
     401             :                                                   ms->hs, k);
     402           0 :                                         COPY_anyN(PTRADD(ms->bt, dest, ms->ts),
     403             :                                                   PTRADD(ms->at, pa, ms->ts),
     404             :                                                   ms->ts, k);
     405           0 :                                         dest += k;
     406           0 :                                         pa += k;
     407           0 :                                         na -= k;
     408           0 :                                         if (na == 1)
     409           0 :                                                 goto CopyB;
     410             :                                         /* na==0 is impossible now if
     411             :                                          * the comparison function is
     412             :                                          * consistent, but we can't
     413             :                                          * assume that it is. */
     414           0 :                                         if (na == 0)
     415           0 :                                                 goto SucceedA;
     416             :                                 }
     417           9 :                                 COPY(PTRADD(ms->bh, dest, ms->hs),
     418             :                                      PTRADD(ms->bh, pb, ms->hs), ms->hs);
     419           9 :                                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     420             :                                          PTRADD(ms->bt, pb, ms->ts), ms->ts);
     421           9 :                                 dest++;
     422           9 :                                 pb++;
     423           9 :                                 --nb;
     424           9 :                                 if (nb == 0)
     425           7 :                                         goto SucceedA;
     426             : 
     427           4 :                                 k = gallop_left(PTRADD(ms->ah, pa, ms->hs),
     428           2 :                                                 PTRADD(ms->bh, pb, ms->hs),
     429             :                                                 nb, 0, ms);
     430           2 :                                 bcount = k;
     431           2 :                                 if (k) {
     432           2 :                                         memmove(PTRADD(ms->bh, dest, ms->hs),
     433           0 :                                                 PTRADD(ms->bh, pb, ms->hs),
     434           2 :                                                 k * ms->hs);
     435           2 :                                         memmove(PTRADD(ms->bt, dest, ms->ts),
     436           2 :                                                 PTRADD(ms->bt, pb, ms->ts),
     437           2 :                                                 k * ms->ts);
     438           2 :                                         dest += k;
     439           2 :                                         pb += k;
     440           2 :                                         nb -= k;
     441           2 :                                         if (nb == 0)
     442           2 :                                                 goto SucceedA;
     443             :                                 }
     444           0 :                                 COPY(PTRADD(ms->bh, dest, ms->hs),
     445             :                                      PTRADD(ms->ah, pa, ms->hs), ms->hs);
     446           0 :                                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     447             :                                          PTRADD(ms->at, pa, ms->ts), ms->ts);
     448           0 :                                 dest++;
     449           0 :                                 pa++;
     450           0 :                                 --na;
     451           0 :                                 if (na == 1)
     452           0 :                                         goto CopyB;
     453           0 :                         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
     454           0 :                         ++min_gallop;   /* penalize it for leaving galloping mode */
     455           0 :                         ms->min_gallop = min_gallop;
     456             :                 }
     457           9 :         SucceedA:
     458           9 :                 if (na) {
     459         465 :                         COPY_anyN(PTRADD(ms->bh, dest, ms->hs),
     460             :                                   PTRADD(ms->ah, pa, ms->hs), ms->hs, na);
     461         450 :                         COPY_anyN(PTRADD(ms->bt, dest, ms->ts),
     462             :                                   PTRADD(ms->at, pa, ms->ts), ms->ts, na);
     463             :                 }
     464           9 :                 return 0;
     465           0 :         CopyB:
     466           0 :                 assert(na == 1 && nb > 0);
     467             :                 /* The last element of pa belongs at the end of the merge. */
     468           0 :                 memmove(PTRADD(ms->bh, dest, ms->hs),
     469           0 :                         PTRADD(ms->bh, pb, ms->hs), nb * ms->hs);
     470           0 :                 memmove(PTRADD(ms->bt, dest, ms->ts),
     471           0 :                         PTRADD(ms->bt, pb, ms->ts), nb * ms->ts);
     472           0 :                 COPY(PTRADD(ms->bh, dest + nb, ms->hs),
     473             :                      PTRADD(ms->ah, pa, ms->hs), ms->hs);
     474           0 :                 COPY_any(PTRADD(ms->bt, dest + nb, ms->ts),
     475             :                          PTRADD(ms->at, pa, ms->ts), ms->ts);
     476           0 :                 return 0;
     477             :         } else {
     478             : /* Merge the na elements starting at pa with the nb elements starting
     479             :  * at pb in a stable way, in-place.  na and nb must be > 0, and pa +
     480             :  * na == pb. Must also have that *pb < *pa, that pa[na-1] belongs at
     481             :  * the end of the merge, and should have na >= nb.  See listsort.txt
     482             :  * for more info. Return 0 if successful, -1 if error. */
     483           0 :                 size_t dest;
     484           0 :                 size_t basea;
     485           0 :                 size_t baseb;
     486           0 :                 ssize_t min_gallop = ms->min_gallop;
     487             : 
     488           0 :                 assert(ms && na > 0 && nb > 0 && pa + na == pb);
     489           0 :                 if (MERGE_GETMEMH(ms, nb) < 0)
     490             :                         return -1;
     491           0 :                 if (MERGE_GETMEMT(ms, nb) < 0)
     492             :                         return -1;
     493           0 :                 dest = pb + nb - 1;
     494           0 :                 COPY_anyN(ms->ah, PTRADD(ms->bh, pb, ms->hs), ms->hs, nb);
     495           0 :                 COPY_anyN(ms->at, PTRADD(ms->bt, pb, ms->ts), ms->ts, nb);
     496           0 :                 basea = pa;
     497           0 :                 baseb = 0;
     498           0 :                 pb = nb - 1;
     499           0 :                 pa += na - 1;
     500             : 
     501           0 :                 COPY(PTRADD(ms->bh, dest, ms->hs),
     502             :                      PTRADD(ms->bh, pa, ms->hs), ms->hs);
     503           0 :                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     504             :                          PTRADD(ms->bt, pa, ms->ts), ms->ts);
     505           0 :                 dest--;
     506           0 :                 pa--;
     507           0 :                 --na;
     508           0 :                 if (na == 0)
     509             :                         goto SucceedB;
     510           0 :                 if (nb == 1)
     511           0 :                         goto CopyA;
     512             : 
     513           0 :                 for (;;) {
     514           0 :                         ssize_t acount = 0;     /* # of times A won in a row */
     515           0 :                         ssize_t bcount = 0;     /* # of times B won in a row */
     516             : 
     517             :                         /* Do the straightforward thing until (if
     518             :                          * ever) one run appears to win
     519             :                          * consistently. */
     520           0 :                         for (;;) {
     521           0 :                                 assert(na > 0 && nb > 1);
     522           0 :                                 k = ISLT(PTRADD(ms->ah, pb, ms->hs),
     523             :                                          PTRADD(ms->bh, pa, ms->hs), ms);
     524           0 :                                 if (k) {
     525           0 :                                         COPY(PTRADD(ms->bh, dest, ms->hs),
     526             :                                              PTRADD(ms->bh, pa, ms->hs),
     527             :                                              ms->hs);
     528           0 :                                         COPY_any(PTRADD(ms->bt, dest, ms->ts),
     529             :                                                  PTRADD(ms->bt, pa, ms->ts),
     530             :                                                  ms->ts);
     531           0 :                                         dest--;
     532           0 :                                         pa--;
     533           0 :                                         ++acount;
     534           0 :                                         bcount = 0;
     535           0 :                                         --na;
     536           0 :                                         if (na == 0)
     537           0 :                                                 goto SucceedB;
     538           0 :                                         if (acount >= min_gallop)
     539             :                                                 break;
     540             :                                 } else {
     541           0 :                                         COPY(PTRADD(ms->bh, dest, ms->hs),
     542             :                                              PTRADD(ms->ah, pb, ms->hs),
     543             :                                              ms->hs);
     544           0 :                                         COPY_any(PTRADD(ms->bt, dest, ms->ts),
     545             :                                                  PTRADD(ms->at, pb, ms->ts),
     546             :                                                  ms->ts);
     547           0 :                                         dest--;
     548           0 :                                         pb--;
     549           0 :                                         ++bcount;
     550           0 :                                         acount = 0;
     551           0 :                                         --nb;
     552           0 :                                         if (nb == 1)
     553           0 :                                                 goto CopyA;
     554           0 :                                         if (bcount >= min_gallop)
     555             :                                                 break;
     556             :                                 }
     557             :                         }
     558             : 
     559             :                         /* One run is winning so consistently that
     560             :                          * galloping may be a huge win.  So try that,
     561             :                          * and continue galloping until (if ever)
     562             :                          * neither run appears to be winning
     563             :                          * consistently anymore. */
     564           0 :                         ++min_gallop;
     565           0 :                         do {
     566           0 :                                 assert(na > 0 && nb > 1);
     567           0 :                                 min_gallop -= min_gallop > 1;
     568           0 :                                 ms->min_gallop = min_gallop;
     569           0 :                                 k = gallop_right(PTRADD(ms->ah, pb, ms->hs),
     570           0 :                                                  PTRADD(ms->bh, basea, ms->hs),
     571             :                                                  na, na - 1, ms);
     572           0 :                                 k = na - k;
     573           0 :                                 acount = k;
     574           0 :                                 if (k) {
     575           0 :                                         dest -= k;
     576           0 :                                         pa -= k;
     577           0 :                                         memmove(PTRADD(ms->bh, dest + 1,
     578             :                                                        ms->hs),
     579           0 :                                                 PTRADD(ms->bh, pa + 1, ms->hs),
     580           0 :                                                 k * ms->hs);
     581           0 :                                         memmove(PTRADD(ms->bt, dest + 1,
     582             :                                                        ms->ts),
     583           0 :                                                 PTRADD(ms->bt, pa + 1, ms->ts),
     584           0 :                                                 k * ms->ts);
     585           0 :                                         na -= k;
     586           0 :                                         if (na == 0)
     587           0 :                                                 goto SucceedB;
     588             :                                 }
     589           0 :                                 COPY(PTRADD(ms->bh, dest, ms->hs),
     590             :                                      PTRADD(ms->ah, pb, ms->hs), ms->hs);
     591           0 :                                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     592             :                                          PTRADD(ms->at, pb, ms->ts), ms->ts);
     593           0 :                                 dest--;
     594           0 :                                 pb--;
     595           0 :                                 --nb;
     596           0 :                                 if (nb == 1)
     597           0 :                                         goto CopyA;
     598             : 
     599           0 :                                 k = gallop_left(PTRADD(ms->bh, pa, ms->hs),
     600           0 :                                                 PTRADD(ms->ah, baseb, ms->hs),
     601             :                                                 nb, nb - 1, ms);
     602           0 :                                 k = nb - k;
     603           0 :                                 bcount = k;
     604           0 :                                 if (k) {
     605           0 :                                         dest -= k;
     606           0 :                                         pb -= k;
     607           0 :                                         memmove(PTRADD(ms->bh, dest + 1,
     608             :                                                        ms->hs),
     609           0 :                                                 PTRADD(ms->ah, pb + 1, ms->hs),
     610           0 :                                                 k * ms->hs);
     611           0 :                                         memmove(PTRADD(ms->bt, dest + 1,
     612             :                                                        ms->ts),
     613           0 :                                                 PTRADD(ms->at, pb + 1, ms->ts),
     614           0 :                                                 k * ms->ts);
     615           0 :                                         nb -= k;
     616           0 :                                         if (nb == 1)
     617           0 :                                                 goto CopyA;
     618             :                                         /* nb==0 is impossible now if
     619             :                                          * the comparison function is
     620             :                                          * consistent, but we can't
     621             :                                          * assume that it is. */
     622           0 :                                         if (nb == 0)
     623           0 :                                                 goto SucceedB;
     624             :                                 }
     625           0 :                                 COPY(PTRADD(ms->bh, dest, ms->hs),
     626             :                                      PTRADD(ms->bh, pa, ms->hs), ms->hs);
     627           0 :                                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     628             :                                          PTRADD(ms->bt, pa, ms->ts), ms->ts);
     629           0 :                                 dest--;
     630           0 :                                 pa--;
     631           0 :                                 --na;
     632           0 :                                 if (na == 0)
     633           0 :                                         goto SucceedB;
     634           0 :                         } while (acount >= MIN_GALLOP || bcount >= MIN_GALLOP);
     635           0 :                         ++min_gallop;   /* penalize it for leaving galloping mode */
     636           0 :                         ms->min_gallop = min_gallop;
     637             :                 }
     638           0 :         SucceedB:
     639           0 :                 if (nb) {
     640           0 :                         COPY_anyN(PTRADD(ms->bh, dest + 1 - nb, ms->hs),
     641             :                                   PTRADD(ms->ah, baseb, ms->hs), ms->hs, nb);
     642           0 :                         COPY_anyN(PTRADD(ms->bt, dest + 1 - nb, ms->ts),
     643             :                                   PTRADD(ms->at, baseb, ms->ts), ms->ts, nb);
     644             :                 }
     645           0 :                 return 0;
     646           0 :         CopyA:
     647           0 :                 assert(nb == 1 && na > 0);
     648             :                 /* The first element of pb belongs at the front of the
     649             :                  * merge. */
     650           0 :                 dest -= na;
     651           0 :                 pa -= na;
     652           0 :                 memmove(PTRADD(ms->bh, dest + 1, ms->hs),
     653           0 :                         PTRADD(ms->bh, pa + 1, ms->hs),
     654           0 :                         na * ms->hs);
     655           0 :                 memmove(PTRADD(ms->bt, dest + 1, ms->ts),
     656           0 :                         PTRADD(ms->bt, pa + 1, ms->ts),
     657           0 :                         na * ms->ts);
     658           0 :                 COPY(PTRADD(ms->bh, dest, ms->hs),
     659             :                      PTRADD(ms->ah, pb, ms->hs), ms->hs);
     660           0 :                 COPY_any(PTRADD(ms->bt, dest, ms->ts),
     661             :                          PTRADD(ms->at, pb, ms->ts), ms->ts);
     662           0 :                 return 0;
     663             :         }
     664             : }
     665             : 
     666             : static int
     667         185 : do_ssort(MergeState *ms, ssize_t nremaining, size_t lo, size_t hi, ssize_t minrun)
     668             : {
     669         194 :         do {
     670         194 :                 int descending;
     671         194 :                 ssize_t n;
     672             : 
     673             :                 /* Identify next run. */
     674             :                 {
     675             : /* Return the length of the run beginning at lo, in the slice [lo,
     676             :  * hi).  lo < hi is required on entry.  "A run" is the longest
     677             :  * ascending sequence, with
     678             :  *
     679             :  * lo[0] <= lo[1] <= lo[2] <= ...
     680             :  *
     681             :  * or the longest descending sequence, with
     682             :  *
     683             :  * lo[0] > lo[1] > lo[2] > ...
     684             :  *
     685             :  * Boolean descending is set to 0 in the former case, or to 1 in the
     686             :  * latter.  For its intended use in a stable mergesort, the strictness
     687             :  * of the defn of "descending" is needed so that the caller can safely
     688             :  * reverse a descending sequence without violating stability (strict >
     689             :  * ensures there are no equal elements to get out of order). */
     690         194 :                         size_t nlo;
     691         194 :                         size_t olo;
     692             : 
     693         194 :                         assert(lo < hi);
     694         194 :                         descending = 0;
     695         194 :                         olo = lo;
     696         194 :                         nlo = lo + 1;
     697         194 :                         if (nlo == hi) {
     698             :                                 n = 1;
     699             :                         } else {
     700         194 :                                 n = 2;
     701         194 :                                 if (ISLT(PTRADD(ms->bh, nlo, ms->hs),
     702             :                                          PTRADD(ms->bh, olo, ms->hs), ms)) {
     703         142 :                                         descending = 1;
     704         142 :                                         for (olo = nlo++;
     705         321 :                                              nlo < hi;
     706         179 :                                              olo = nlo++, ++n) {
     707         281 :                                                 if (!ISLT(PTRADD(ms->bh, nlo,
     708             :                                                                  ms->hs),
     709             :                                                           PTRADD(ms->bh, olo,
     710             :                                                                  ms->hs), ms))
     711             :                                                         break;
     712             :                                         }
     713             :                                 }
     714             :                                 else {
     715          52 :                                         for (olo = nlo++;
     716        2195 :                                              nlo < hi;
     717        2143 :                                              olo = nlo++, ++n) {
     718        2188 :                                                 if (ISLT(PTRADD(ms->bh, nlo,
     719             :                                                                 ms->hs),
     720             :                                                          PTRADD(ms->bh, olo,
     721             :                                                                 ms->hs), ms))
     722             :                                                         break;
     723             :                                         }
     724             :                                 }
     725             :                         }
     726             :                 }
     727         194 :                 if (descending)
     728         142 :                         reverse_slice(lo, lo + n, ms);
     729             :                 /* If short, extend to min(minrun, nremaining). */
     730         194 :                 if (n < minrun) {
     731         145 :                         ssize_t force = nremaining <= minrun ? nremaining : minrun;
     732             : 
     733         145 :                         binarysort(lo, lo + force, lo + n, ms);
     734         145 :                         n = force;
     735             :                 }
     736             :                 /* Push run onto pending-runs stack, and maybe merge. */
     737         194 :                 assert(ms->n < MAX_MERGE_PENDING);
     738         194 :                 ms->pending[ms->n].base = lo;
     739         194 :                 ms->pending[ms->n].len = n;
     740         194 :                 ms->n++;
     741             :                 {
     742             : /* Examine the stack of runs waiting to be merged, merging adjacent
     743             :  * runs until the stack invariants are re-established:
     744             :  *
     745             :  * 1. len[-3] > len[-2] + len[-1]
     746             :  * 2. len[-2] > len[-1]
     747             :  *
     748             :  * See listsort.txt for more info.
     749             :  *
     750             :  * Returns 0 on success, -1 on error. */
     751         194 :                         struct slice *p = ms->pending;
     752             : 
     753         202 :                         while (ms->n > 1) {
     754           9 :                                 ssize_t i = ms->n - 2;
     755             : 
     756           9 :                                 if ((i > 0 &&
     757           0 :                                      p[i-1].len <= p[i].len + p[i+1].len) ||
     758           0 :                                     (i > 1 &&
     759           0 :                                      p[i-2].len <= p[i-1].len + p[i].len)) {
     760           0 :                                         if (p[i - 1].len < p[i + 1].len)
     761           0 :                                                 --i;
     762           0 :                                         if (merge_at(ms, i) < 0)
     763             :                                                 return -1;
     764           9 :                                 } else if (p[i].len <= p[i + 1].len) {
     765           8 :                                         if (merge_at(ms, i) < 0)
     766             :                                                 return -1;
     767             :                                 } else
     768             :                                         break;
     769             :                         }
     770             :                 }
     771             :                 /* Advance to find next run. */
     772         194 :                 lo += n;
     773         194 :                 nremaining -= n;
     774         194 :         } while (nremaining > 0);
     775         185 :         assert(lo == hi);
     776             : 
     777             :         {
     778             : /* Regardless of invariants, merge all runs on the stack until only
     779             :  * one remains.  This is used at the end of the mergesort.
     780             :  *
     781             :  * Returns 0 on success, -1 on error. */
     782             :                 struct slice *p = ms->pending;
     783             : 
     784         186 :                 while (ms->n > 1) {
     785           1 :                         ssize_t n = ms->n - 2;
     786             : 
     787           1 :                         if (n > 0 && p[n - 1].len < p[n + 1].len)
     788           0 :                                 --n;
     789           1 :                         if (merge_at(ms, n) < 0)
     790             :                                 return -1;
     791             :                 }
     792             :         }
     793             :         return 0;
     794             : }
     795             : 
     796             : #ifdef GDKssortimpl
     797             : /* Stable sort an array "h" (and move t accordingly).
     798             :  * "nitems" is the number of items to sort; "hs"+"ts" is the size of
     799             :  * the items, "tpe" is the type of the key within the items. If "heap"
     800             :  * is non-NULL, the key is actually an offset relative to "heap" and
     801             :  * the actual key is found at that offset (MonetDB var-sized
     802             :  * atoms). */
     803             : gdk_return
     804         185 : GDKssortimpl(void *restrict h, void *restrict t, const void *restrict heap,
     805             :              size_t nitems, int hs, int ts, int tpe)
     806             : {
     807         185 :         char temp;
     808         185 :         MergeState ms;
     809         185 :         ssize_t nremaining;
     810         185 :         gdk_return result = GDK_FAIL;
     811         185 :         size_t lo, hi;
     812         185 :         ssize_t minrun;
     813             : 
     814         185 :         assert(h);
     815         185 :         assert(hs > 0);
     816             : 
     817         185 :         ms.ah = (void *) ms.temparrayh;
     818         185 :         ms.allocedh = MERGESTATE_TEMP_SIZE;
     819         185 :         ms.at = (void *) ms.temparrayt;
     820         185 :         ms.allocedt = MERGESTATE_TEMP_SIZE;
     821         185 :         ms.n = 0;
     822         185 :         ms.min_gallop = MIN_GALLOP;
     823         185 :         ms.compare = ATOMcompare(tpe);
     824         185 :         ms.heap = heap;
     825         185 :         ms.hs = hs;
     826         185 :         ms.ts = ts;
     827         185 :         ms.bh = h;
     828         185 :         if (!t)
     829         110 :                 t = &temp;
     830         185 :         ms.bt = t;
     831         185 :         ms.th = ms.tempstorageh;
     832         185 :         ms.tt = ms.tempstoraget;
     833         185 :         assert((size_t) hs <= sizeof(ms.tempstorageh));
     834         185 :         assert((size_t) ts <= sizeof(ms.tempstoraget));
     835         185 :         nremaining = (ssize_t) nitems;
     836             : 
     837         185 :         if (nremaining < 2)
     838           0 :                 goto succeed;
     839             : 
     840         185 :         tpe = ATOMbasetype(tpe);
     841             : 
     842             :         /* March over the array once, left to right, finding natural
     843             :          * runs, and extending short natural runs to minrun
     844             :          * elements. */
     845         185 :         lo = 0;
     846         185 :         hi = lo + nremaining;
     847         185 :         minrun = merge_compute_minrun(nremaining);
     848         185 :         switch (tpe) {
     849           2 :         case TYPE_bte:
     850           2 :                 if (do_ssort_bte(&ms, nremaining, lo, hi, minrun) < 0)
     851           0 :                         goto fail;
     852             :                 break;
     853           1 :         case TYPE_sht:
     854           1 :                 if (do_ssort_sht(&ms, nremaining, lo, hi, minrun) < 0)
     855           0 :                         goto fail;
     856             :                 break;
     857         128 :         case TYPE_int:
     858         128 :                 if (do_ssort_int(&ms, nremaining, lo, hi, minrun) < 0)
     859           0 :                         goto fail;
     860             :                 break;
     861          24 :         case TYPE_lng:
     862          24 :                 if (do_ssort_lng(&ms, nremaining, lo, hi, minrun) < 0)
     863           0 :                         goto fail;
     864             :                 break;
     865             : #ifdef HAVE_HGE
     866           5 :         case TYPE_hge:
     867           5 :                 if (do_ssort_hge(&ms, nremaining, lo, hi, minrun) < 0)
     868           0 :                         goto fail;
     869             :                 break;
     870             : #endif
     871           3 :         case TYPE_flt:
     872           3 :                 if (do_ssort_flt(&ms, nremaining, lo, hi, minrun) < 0)
     873           0 :                         goto fail;
     874             :                 break;
     875           2 :         case TYPE_dbl:
     876           2 :                 if (do_ssort_dbl(&ms, nremaining, lo, hi, minrun) < 0)
     877           0 :                         goto fail;
     878             :                 break;
     879          20 :         default:
     880          20 :                 if (do_ssort_any(&ms, nremaining, lo, hi, minrun) < 0)
     881           0 :                         goto fail;
     882             :                 break;
     883             :         }
     884         185 :         assert(ms.n == 1);
     885         185 :         assert(ms.pending[0].base == 0);
     886         185 :         assert(ms.pending[0].len == (ssize_t) nitems);
     887             : 
     888             :   succeed:
     889             :         result = GDK_SUCCEED;
     890         185 :   fail:
     891         185 :         merge_freemem(&ms);
     892         185 :         return result;
     893             : }
     894             : #endif  /* GDKssortimpl */

Generated by: LCOV version 1.14