Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : #define RADIX 8 /* one char at a time */
18 : #define NBUCKETS (1 << RADIX)
19 :
20 : gdk_return
21 20368 : GDKrsort(void *restrict h, void *restrict t, size_t n, size_t hs, size_t ts, bool reverse, bool isuuid)
22 : {
23 20368 : size_t (*counts)[NBUCKETS] = GDKzalloc(hs * sizeof(counts[0]));
24 20368 : size_t pos[NBUCKETS];
25 20368 : uint8_t *h1 = h;
26 20368 : uint8_t *h2;
27 20368 : uint8_t *t1 = NULL;
28 20368 : uint8_t *t2 = NULL;
29 20368 : Heap tmph, tmpt;
30 :
31 20368 : if (counts == NULL)
32 : return GDK_FAIL;
33 :
34 20368 : tmph = tmpt = (Heap) {
35 : .farmid = 1,
36 : };
37 :
38 40736 : snprintf(tmph.filename, sizeof(tmph.filename), "%s%crsort%zuh",
39 20368 : TEMPDIR_NAME, DIR_SEP, (size_t) MT_getpid());
40 20368 : if (HEAPalloc(&tmph, n, hs) != GDK_SUCCEED) {
41 0 : GDKfree(counts);
42 0 : return GDK_FAIL;
43 : }
44 20368 : h2 = (uint8_t *) tmph.base;
45 :
46 20368 : if (t) {
47 40286 : snprintf(tmpt.filename, sizeof(tmpt.filename), "%s%crsort%zut",
48 20143 : TEMPDIR_NAME, DIR_SEP, (size_t) MT_getpid());
49 20143 : if (HEAPalloc(&tmpt, n, ts) != GDK_SUCCEED) {
50 0 : GDKfree(counts);
51 0 : HEAPfree(&tmph, true);
52 0 : return GDK_FAIL;
53 : }
54 20143 : t1 = t;
55 20143 : t2 = (uint8_t *) tmpt.base;
56 : } else {
57 : ts = 0;
58 : }
59 :
60 : #ifndef WORDS_BIGENDIAN
61 20368 : if (isuuid /* UUID, treat like big-endian */) {
62 : #endif
63 5 : for (size_t i = 0, o = 0; i < n; i++, o += hs) {
64 68 : for (size_t j = 0, k = hs - 1; j < hs; j++, k--) {
65 64 : uint8_t v = h1[o + k];
66 64 : counts[j][v]++;
67 : }
68 : }
69 : #ifndef WORDS_BIGENDIAN
70 : } else {
71 44693936 : for (size_t i = 0, o = 0; i < n; i++, o += hs) {
72 227234582 : for (size_t j = 0; j < hs; j++) {
73 182561013 : uint8_t v = h1[o + j];
74 182561013 : counts[j][v]++;
75 : }
76 : }
77 : }
78 : #endif
79 : /* When sorting in ascending order, the negative numbers occupy
80 : * the second half of the buckets in the last iteration; when
81 : * sorting in descending order, the negative numbers occupy the
82 : * first half. In either case, at the end we need to put the
83 : * second half first and the first half after. */
84 20368 : size_t negpos = 0;
85 102293 : for (size_t j = 0, k = hs - 1; j < hs; j++, k--) {
86 81925 : size_t nb = counts[j][0] > 0;
87 81925 : if (reverse) {
88 74 : pos[NBUCKETS - 1] = 0;
89 18944 : for (size_t i = NBUCKETS - 1; i > 0; i--) {
90 18870 : pos[i - 1] = pos[i] + counts[j][i];
91 18870 : nb += counts[j][i] > 0;
92 : }
93 : } else {
94 81851 : pos[0] = 0;
95 20953601 : for (size_t i = 1; i < NBUCKETS; i++) {
96 20871750 : pos[i] = pos[i - 1] + counts[j][i - 1];
97 20871750 : nb += counts[j][i] > 0;
98 : }
99 : }
100 : /* we're only interested in the position in the last
101 : * iteration */
102 81925 : negpos = pos[NBUCKETS / 2 - reverse];
103 81925 : if (nb == 1) {
104 : /* no need to reshuffle data for this iteration:
105 : * everything is in the same bucket */
106 22168 : continue;
107 : }
108 : /* note, this loop changes the pos array */
109 : #ifndef WORDS_BIGENDIAN
110 59757 : if (isuuid /* UUID, treat like big-endian */)
111 : #endif
112 80 : for (size_t i = 0, ho = 0, to = 0; i < n; i++, ho += hs, to += ts) {
113 64 : uint8_t v = h1[ho + k];
114 64 : if (t)
115 64 : memcpy(t2 + ts * pos[v], t1 + to, ts);
116 64 : memcpy(h2 + hs * pos[v]++, h1 + ho, hs);
117 : }
118 : #ifndef WORDS_BIGENDIAN
119 : else
120 82310483 : for (size_t i = 0, ho = 0, to = 0; i < n; i++, ho += hs, to += ts) {
121 82250742 : uint8_t v = h1[ho + j];
122 82250742 : if (t)
123 78397777 : memcpy(t2 + ts * pos[v], t1 + to, ts);
124 82250742 : memcpy(h2 + hs * pos[v]++, h1 + ho, hs);
125 : }
126 : #endif
127 : uint8_t *t = h1;
128 : h1 = h2;
129 : h2 = t;
130 : t = t1;
131 : t1 = t2;
132 : t2 = t;
133 : }
134 20368 : GDKfree(counts);
135 :
136 20368 : if (h1 != (uint8_t *) h) {
137 : /* we need to copy the data back to the correct heap */
138 19120 : if (isuuid) {
139 : /* no negative values in uuid, so no shuffling */
140 0 : memcpy(h2, h1, n * hs);
141 0 : if (t)
142 0 : memcpy(t2, t1, n * ts);
143 : } else {
144 : /* copy the negative integers to the start, copy positive after */
145 19120 : if (negpos < n) {
146 25 : memcpy(h2, h1 + hs * negpos, (n - negpos) * hs);
147 25 : if (t)
148 22 : memcpy(t2, t1 + ts * negpos, (n - negpos) * ts);
149 : }
150 19120 : if (negpos > 0) {
151 19109 : memcpy(h2 + hs * (n - negpos), h1, negpos * hs);
152 19109 : if (t)
153 18961 : memcpy(t2 + ts * (n - negpos), t1, negpos * ts);
154 : }
155 : }
156 1248 : } else if (negpos > 0 && negpos < n && !isuuid) {
157 : /* copy the negative integers to the start, copy positive after */
158 26 : memcpy(h2, h1 + hs * negpos, (n - negpos) * hs);
159 26 : memcpy(h2 + hs * (n - negpos), h1, negpos * hs);
160 26 : memcpy(h, h2, n * hs);
161 26 : if (t) {
162 24 : memcpy(t2, t1 + ts * negpos, (n - negpos) * ts);
163 24 : memcpy(t2 + ts * (n - negpos), t1, negpos * ts);
164 24 : memcpy(t, t2, n * ts);
165 : }
166 : } /* else, everything is already in the correct place */
167 20368 : HEAPfree(&tmph, true);
168 20368 : if (t)
169 20143 : HEAPfree(&tmpt, true);
170 : return GDK_SUCCEED;
171 : }
|