Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "sql.h"
15 : #include "mal.h"
16 : #include "mal_client.h"
17 :
18 : #include "for.h"
19 :
20 : static sql_column *
21 5 : get_newcolumn(sql_trans *tr, sql_column *c)
22 : {
23 5 : sql_table *t = find_sql_table_id(tr, c->t->s, c->t->base.id);
24 5 : if (t)
25 5 : return find_sql_column(t, c->base.name);
26 : return NULL;
27 : }
28 :
29 : BAT *
30 12 : FORdecompress_(BAT *o, lng minval, int type, role_t role)
31 : {
32 12 : BAT *b = COLnew(o->hseqbase, type, BATcount(o), role);
33 :
34 12 : if (!b)
35 : return NULL;
36 12 : BUN cnt = BATcount(o);
37 : #ifdef HAVE_HGE
38 12 : if (type == TYPE_hge) {
39 0 : if (o->ttype == TYPE_bte) {
40 0 : hge *ov = Tloc(b, 0);
41 0 : bte *iv = Tloc(o, 0);
42 0 : for(BUN i = 0; i<cnt; i++)
43 0 : ov[i] = minval + iv[i];
44 : } else {
45 0 : hge *ov = Tloc(b, 0);
46 0 : sht *iv = Tloc(o, 0);
47 0 : for(BUN i = 0; i<cnt; i++)
48 0 : ov[i] = minval + iv[i];
49 : }
50 : } else
51 : #endif
52 12 : if (type == TYPE_lng) {
53 12 : if (o->ttype == TYPE_bte) {
54 11 : lng *ov = Tloc(b, 0);
55 11 : bte *iv = Tloc(o, 0);
56 59332 : for(BUN i = 0; i<cnt; i++)
57 59321 : ov[i] = minval + iv[i];
58 : } else {
59 1 : lng *ov = Tloc(b, 0);
60 1 : sht *iv = Tloc(o, 0);
61 4 : for(BUN i = 0; i<cnt; i++)
62 3 : ov[i] = minval + iv[i];
63 : }
64 0 : } else if (type == TYPE_int) {
65 0 : if (o->ttype == TYPE_bte) {
66 0 : int *ov = Tloc(b, 0);
67 0 : bte *iv = Tloc(o, 0);
68 0 : for(BUN i = 0; i<cnt; i++)
69 0 : ov[i] = (int) (minval + iv[i]);
70 : } else {
71 0 : int *ov = Tloc(b, 0);
72 0 : sht *iv = Tloc(o, 0);
73 0 : for(BUN i = 0; i<cnt; i++)
74 0 : ov[i] = (int) (minval + iv[i]);
75 : }
76 : }
77 12 : BATsetcount(b, cnt);
78 12 : BATnegateprops(b);
79 12 : return b;
80 : }
81 :
82 : str
83 11 : FORdecompress(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
84 : {
85 11 : (void)cntxt;
86 11 : bat *r = getArgReference_bat(stk, pci, 0);
87 11 : bat O = *getArgReference_bat(stk, pci, 1);
88 11 : int tt = getArgType(mb, pci, 2);
89 :
90 11 : if (
91 : #ifdef HAVE_HGE
92 11 : tt != TYPE_hge &&
93 : #endif
94 11 : tt != TYPE_lng && tt != TYPE_int)
95 0 : throw(SQL, "for.decompress", SQLSTATE(3F000) "for decompress: invalid offset type");
96 :
97 11 : BAT *o = BATdescriptor(O), *b = NULL;
98 11 : if (!o) {
99 0 : throw(SQL, "for.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
100 : }
101 11 : if (o->ttype != TYPE_bte && o->ttype != TYPE_sht) {
102 0 : bat_destroy(o);
103 0 : throw(SQL, "for.decompress", SQLSTATE(3F000) "for decompress: invalid type");
104 : }
105 :
106 11 : lng minval = *getArgReference_lng(stk, pci, 2);
107 :
108 11 : b = FORdecompress_(o, minval, tt, TRANSIENT);
109 11 : if (!b) {
110 0 : bat_destroy(o);
111 0 : throw(SQL, "for.decompress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
112 : }
113 11 : bat_destroy(o);
114 11 : *r = b->batCacheid;
115 11 : BBPkeepref(b);
116 11 : return MAL_SUCCEED;
117 : }
118 :
119 : static BAT *
120 5 : FORcompress_(BAT *b, lng min_val, lng max_val, role_t role)
121 : {
122 5 : BAT *o;
123 5 : BUN cnt = BATcount(b);
124 :
125 5 : if ((max_val-min_val) < GDK_bte_max/2) {
126 4 : o = COLnew(b->hseqbase, TYPE_bte, cnt, role);
127 4 : if (!o)
128 : return NULL;
129 4 : bte *ov = Tloc(o, 0);
130 4 : lng *iv = Tloc(b, 0);
131 60184 : for(BUN i = 0; i<cnt; i++)
132 60180 : ov[i] = (bte)(iv[i] - min_val);
133 : } else {
134 1 : o = COLnew(b->hseqbase, TYPE_sht, cnt, role);
135 1 : if (!o)
136 : return NULL;
137 1 : sht *ov = Tloc(o, 0);
138 1 : lng *iv = Tloc(b, 0);
139 4 : for(BUN i = 0; i<cnt; i++)
140 3 : ov[i] = (sht)(iv[i] - min_val);
141 : }
142 5 : BATsetcount(o, cnt);
143 5 : BATnegateprops(o);
144 5 : return o;
145 : }
146 :
147 : static str
148 5 : FORcompress_intern(char **comp_min_val, BAT **r, BAT *b)
149 : {
150 5 : BAT *o = NULL;
151 5 : char buf[64];
152 5 : int tt = b->ttype;
153 5 : ptr mn = NULL, mx = NULL;
154 5 : BUN cnt = BATcount(b);
155 :
156 5 : if (
157 : #ifdef HAVE_HGE
158 5 : tt != TYPE_hge &&
159 : #endif
160 5 : tt != TYPE_lng && tt != TYPE_int)
161 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: invalid column type");
162 5 : if (cnt == 0)
163 0 : throw(SQL, "for.compress", SQLSTATE(42000) "for compress: cannot compute range of values on empty columns");
164 :
165 : /* For now we only handle hge, lng, and int -> sht and bte */
166 5 : if (!(mn = BATmin(b, NULL)))
167 0 : throw(SQL, "for.compress", GDK_EXCEPTION);
168 5 : if (!(mx = BATmax(b, NULL))) {
169 0 : GDKfree(mn);
170 0 : throw(SQL, "for.compress", GDK_EXCEPTION);
171 : }
172 :
173 5 : if (tt == TYPE_lng) {
174 5 : lng min_val = *(lng*)mn;
175 5 : lng max_val = *(lng*)mx;
176 :
177 5 : GDKfree(mn);
178 5 : GDKfree(mx);
179 : /* defensive line, if there are 'holes' on b, 'for' compression cannot be done */
180 5 : if (is_lng_nil(min_val) || is_lng_nil(max_val))
181 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: for 'for' compression column's cannot have NULL's");
182 5 : if ((max_val-min_val) > GDK_sht_max)
183 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: too large value spread for 'for' compression");
184 5 : o = FORcompress_(b, min_val, max_val, PERSISTENT);
185 5 : if (!o)
186 0 : throw(SQL, "for.compress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
187 5 : snprintf(buf, 64, "FOR-" LLFMT, min_val);
188 : } else {
189 0 : GDKfree(mn);
190 0 : GDKfree(mx);
191 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: type %s not yet implemented", ATOMname(tt));
192 : }
193 5 : if (!(*comp_min_val = GDKstrdup(buf))) {
194 0 : bat_destroy(o);
195 0 : throw(SQL, "for.compress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
196 : }
197 5 : *r = o;
198 5 : return NULL;
199 : }
200 :
201 : str
202 5 : FORcompress_col(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci)
203 : {
204 5 : (void)mb;
205 : /* always assume one result */
206 5 : str msg = MAL_SUCCEED;
207 5 : const char *sname = *getArgReference_str(stk, pci, 1);
208 5 : const char *tname = *getArgReference_str(stk, pci, 2);
209 5 : const char *cname = *getArgReference_str(stk, pci, 3);
210 5 : backend *be = NULL;
211 5 : sql_trans *tr = NULL;
212 :
213 5 : if (!sname || !tname || !cname)
214 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: invalid column name");
215 5 : if (strNil(sname))
216 0 : throw(SQL, "for.compress", SQLSTATE(42000) "Schema name cannot be NULL");
217 5 : if (strNil(tname))
218 0 : throw(SQL, "for.compress", SQLSTATE(42000) "Table name cannot be NULL");
219 5 : if (strNil(cname))
220 0 : throw(SQL, "for.compress", SQLSTATE(42000) "Column name cannot be NULL");
221 5 : if ((msg = getBackendContext(cntxt, &be)) != MAL_SUCCEED)
222 : return msg;
223 5 : tr = be->mvc->session->tr;
224 :
225 5 : sql_schema *s = find_sql_schema(tr, sname);
226 5 : if (!s)
227 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "schema '%s' unknown", sname);
228 5 : sql_table *t = find_sql_table(tr, s, tname);
229 5 : if (!t)
230 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "table '%s.%s' unknown", sname, tname);
231 5 : if (!isTable(t))
232 0 : throw(SQL, "for.compress", SQLSTATE(42000) "%s '%s' is not persistent",
233 0 : TABLE_TYPE_DESCRIPTION(t->type, t->properties), t->base.name);
234 5 : if (isTempTable(t))
235 0 : throw(SQL, "for.compress", SQLSTATE(42000) "columns from temporary tables cannot be compressed");
236 5 : if (t->system)
237 0 : throw(SQL, "for.compress", SQLSTATE(42000) "columns from system tables cannot be compressed");
238 5 : sql_column *c = find_sql_column(t, cname);
239 5 : if (!c)
240 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "column '%s.%s.%s' unknown", sname, tname, cname);
241 5 : if (c->null)
242 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "for compress: for 'for' compression column's cannot have NULL's");
243 5 : if (c->storage_type)
244 0 : throw(SQL, "for.compress", SQLSTATE(3F000) "column '%s.%s.%s' already compressed", sname, tname, cname);
245 :
246 5 : sqlstore *store = tr->store;
247 5 : BAT *b = store->storage_api.bind_col(tr, c, RDONLY), *o = NULL;
248 5 : if( b == NULL)
249 0 : throw(SQL,"for.compress", SQLSTATE(HY005) "Cannot access column descriptor");
250 :
251 5 : char *comp_min_val = NULL;
252 5 : msg = FORcompress_intern(&comp_min_val, &o, b);
253 5 : bat_destroy(b);
254 5 : if (msg == MAL_SUCCEED) {
255 5 : switch (sql_trans_alter_storage(tr, c, comp_min_val)) {
256 0 : case -1:
257 0 : msg = createException(SQL, "for.compress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
258 0 : break;
259 0 : case -2:
260 : case -3:
261 0 : msg = createException(SQL, "for.compress", SQLSTATE(42000) "transaction conflict detected");
262 0 : break;
263 : default:
264 : break;
265 : }
266 5 : if (msg == MAL_SUCCEED && !(c = get_newcolumn(tr, c)))
267 0 : msg = createException(SQL, "for.compress", SQLSTATE(HY013) "alter_storage failed");
268 0 : if (msg == MAL_SUCCEED) {
269 5 : switch (store->storage_api.col_compress(tr, c, ST_FOR, o, NULL)) {
270 0 : case -1:
271 0 : msg = createException(SQL, "for.compress", SQLSTATE(HY013) MAL_MALLOC_FAIL);
272 0 : break;
273 0 : case -2:
274 : case -3:
275 0 : msg = createException(SQL, "for.compress", SQLSTATE(42000) "transaction conflict detected");
276 0 : break;
277 : default:
278 : break;
279 : }
280 : }
281 5 : GDKfree(comp_min_val);
282 5 : bat_destroy(o);
283 : }
284 : return msg;
285 : }
286 :
287 : int
288 0 : FORprepare4append(BAT **noffsets, BAT *b, lng minval, int tt)
289 : {
290 0 : ptr mn = NULL, mx = NULL;
291 0 : *noffsets = NULL;
292 :
293 0 : if (!(mn = BATmin(b, NULL)))
294 : return -1;
295 0 : if (!(mx = BATmax(b, NULL))) {
296 0 : GDKfree(mn);
297 0 : return -1;
298 : }
299 0 : if (b->ttype == TYPE_lng) {
300 0 : lng min_val = *(lng*)mn;
301 0 : lng max_val = *(lng*)mx;
302 0 : lng maxcnt = (tt == TYPE_bte)?GDK_bte_max/2:GDK_sht_max;
303 :
304 0 : GDKfree(mn);
305 0 : GDKfree(mx);
306 0 : if (min_val < minval || max_val < minval || (max_val - minval) > maxcnt || is_lng_nil(min_val) || is_lng_nil(max_val))
307 : return 0; /* decompress */
308 :
309 0 : *noffsets = FORcompress_(b, minval, max_val, TRANSIENT);
310 : }
311 : return 0;
312 : }
313 :
314 : int
315 1 : FORprepare4append_vals(void **noffsets, void *vals, BUN cnt, lng minval, int vtype, int tt)
316 : {
317 1 : *noffsets = NULL;
318 :
319 1 : assert(cnt);
320 1 : if (vtype == TYPE_lng) {
321 : BUN i = 0;
322 : lng min = GDK_lng_max;
323 : lng max = GDK_lng_min;
324 : lng *v = vals;
325 :
326 2 : for(i=0; i<cnt; i++) {
327 1 : if (is_lng_nil(v[i]))
328 : break;
329 1 : if (min > v[i])
330 : min = v[i];
331 1 : if (max < v[i])
332 : max = v[i];
333 : }
334 1 : if (i<cnt)
335 : return 0;
336 1 : lng maxcnt = (tt == TYPE_bte)?GDK_bte_max/2:GDK_sht_max;
337 1 : if (min < minval || max < minval || (max - min) > maxcnt)
338 : return 0; /* decompress */
339 0 : if (tt == TYPE_bte) {
340 0 : bte *n = *noffsets = GDKmalloc(sizeof(bte) * cnt);
341 0 : if (!n)
342 : return -1;
343 0 : for(BUN i=0; i<cnt; i++)
344 0 : n[i] = (bte) (v[i] - minval);
345 : } else {
346 0 : sht *n = *noffsets = GDKmalloc(sizeof(sht) * cnt);
347 0 : if (!n)
348 : return -1;
349 0 : for(BUN i=0; i<cnt; i++)
350 0 : n[i] = (sht) (v[i] - minval);
351 : }
352 : }
353 : return 0;
354 : }
|