Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "opt_commonTerms.h"
15 : #include "mal_exception.h"
16 : /*
17 : * Caveat. A lot of time was lost due to constants that are indistinguisable
18 : * at the surface level. It requires the constant optimizer to be ran first.
19 : */
20 :
21 : /* The key for finding common terms is that they share variables.
22 : * Therefore we skip all constants, except for a constant only situation.
23 : */
24 :
25 : /*
26 : * Speed up simple insert operations by skipping the common terms.
27 : */
28 :
29 : __attribute__((__pure__))
30 : static inline bool
31 130579 : isProjectConst(const InstrRecord *p)
32 : {
33 130579 : return (getModuleId(p) == algebraRef && getFunctionId(p) == projectRef);
34 : }
35 :
36 : static int __attribute__((__pure__))
37 7062720 : hashInstruction(const MalBlkRecord *mb, const InstrRecord *p)
38 : {
39 7062720 : int i;
40 18917422 : for (i = p->argc - 1; i >= p->retc; i--)
41 18610265 : if (!isVarConstant(mb, getArg(p, i)))
42 6755563 : return getArg(p, i);
43 307157 : if (isVarConstant(mb, getArg(p, p->retc)))
44 307157 : return p->retc;
45 : return -1;
46 : }
47 :
48 : str
49 445935 : OPTcommonTermsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
50 : InstrPtr pci)
51 : {
52 445935 : int i, j, k, barrier = 0, bailout = 0;
53 445935 : InstrPtr p, q;
54 445935 : int actions = 0;
55 445935 : int limit, slimit;
56 445935 : int duplicate;
57 445935 : int *alias = NULL;
58 445935 : int *hash = NULL, h;
59 445935 : int *list = NULL;
60 445935 : str msg = MAL_SUCCEED;
61 :
62 445935 : InstrPtr *old = NULL;
63 :
64 : /* catch simple insert operations */
65 445935 : if (isSimpleSQL(mb)) {
66 283633 : goto wrapup;
67 : }
68 :
69 162301 : (void) cntxt;
70 162301 : (void) stk;
71 162301 : alias = (int *) GDKzalloc(sizeof(int) * mb->vtop);
72 162302 : list = (int *) GDKzalloc(sizeof(int) * mb->stop);
73 162302 : hash = (int *) GDKzalloc(sizeof(int) * mb->vtop);
74 162303 : if (alias == NULL || list == NULL || hash == NULL) {
75 0 : msg = createException(MAL, "optimizer.commonTerms",
76 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
77 0 : goto wrapup;
78 : }
79 :
80 162303 : old = mb->stmt;
81 162303 : limit = mb->stop;
82 162303 : slimit = mb->ssize;
83 162303 : if (newMalBlkStmt(mb, mb->ssize) < 0) {
84 0 : msg = createException(MAL, "optimizer.commonTerms",
85 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
86 0 : old = NULL;
87 0 : goto wrapup;
88 : }
89 :
90 9171452 : for (i = 0; mb->errors == NULL && i < limit; i++) {
91 9171452 : p = old[i];
92 9171452 : duplicate = 0;
93 :
94 48905368 : for (k = 0; k < p->argc; k++)
95 39733916 : if (alias[getArg(p, k)])
96 159837 : getArg(p, k) = alias[getArg(p, k)];
97 :
98 9171452 : if (p->token == ENDsymbol) {
99 162297 : pushInstruction(mb, p);
100 162296 : old[i] = NULL;
101 162296 : break;
102 : }
103 : /*
104 : * Any barrier block signals the end of this optimizer,
105 : * because the impact of the block can affect the common code eliminated.
106 : */
107 18018310 : barrier |= (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
108 9009155 : || p->barrier == RETURNsymbol);
109 : /*
110 : * Also block further optimization when you have seen an assert().
111 : * This works particularly for SQL, because it is not easy to track
112 : * the BAT identifier aliases to look for updates. The sql.assert
113 : * at least tells us that an update is planned.
114 : * Like all optimizer decisions, it is safe to stop.
115 : */
116 9009155 : barrier |= getFunctionId(p) == assertRef;
117 9009155 : if (barrier || p->token == ASSIGNsymbol) {
118 256200 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d]: %d %d\n", i, barrier,
119 : p->retc == p->argc);
120 256200 : pushInstruction(mb, p);
121 256200 : old[i] = NULL;
122 256200 : continue;
123 : }
124 :
125 : /* when we enter a barrier block, we should ditch all previous instructions from consideration */
126 8752955 : if (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
127 8752955 : || p->barrier == RETURNsymbol) {
128 0 : memset(list, 0, sizeof(int) * mb->stop);
129 0 : memset(hash, 0, sizeof(int) * mb->vtop);
130 : }
131 : /* side-effect producing operators can never be replaced */
132 : /* the same holds for function calls without an argument, it is
133 : * unclear where the results comes from (e.g. clock()) */
134 8752955 : if (mayhaveSideEffects(cntxt, mb, p, TRUE) || p->argc == p->retc) {
135 1061011 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d] side-effect: %d\n", i,
136 : p->retc == p->argc);
137 1061011 : pushInstruction(mb, p);
138 1060979 : old[i] = NULL;
139 1060979 : continue;
140 : }
141 : /* simple SQL bind operations need not be merged, they are cheap
142 : * and/or can be duplicated eliminated elsewhere cheaper */
143 7692279 : if (getModuleId(p) == sqlRef && (getFunctionId(p) != tidRef && getFunctionId(p) != bindRef)) {
144 449040 : pushInstruction(mb, p);
145 449040 : old[i] = NULL;
146 449040 : continue;
147 : }
148 7243239 : if (getModuleId(p) == matRef) { /* mat.packIncrement has requirement on number of instructions (or that needs an update */
149 180700 : pushInstruction(mb, p);
150 180700 : old[i] = NULL;
151 180700 : continue;
152 : }
153 :
154 : /* from here we have a candidate to look for a match */
155 :
156 7062539 : h = hashInstruction(mb, p);
157 :
158 7062539 : TRC_DEBUG(MAL_OPTIMIZER, "Candidate[%d] look at list[%d] => %d\n", i, h,
159 : hash[h]);
160 7062539 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
161 :
162 7062577 : if (h < 0) {
163 0 : pushInstruction(mb, p);
164 0 : old[i] = NULL;
165 0 : continue;
166 : }
167 :
168 7062577 : bailout = 1024; // don't run over long collision list
169 : /* Look into the hash structure for matching instructions */
170 41218683 : for (j = hash[h]; j > 0 && bailout-- > 0; j = list[j]) {
171 34273401 : if ((q = getInstrPtr(mb, j))
172 34273401 : && getFunctionId(q) == getFunctionId(p)
173 24202803 : && getModuleId(q) == getModuleId(p)) {
174 24202805 : TRC_DEBUG(MAL_OPTIMIZER,
175 : "Candidate[%d->%d] %d %d :%d %d %d=%d %d %d %d\n", j,
176 : list[j], hasSameSignature(mb, p, q),
177 : hasSameArguments(mb, p, q), q->token != ASSIGNsymbol,
178 : list[getArg(q, q->argc - 1)], i, !hasCommonResults(p,
179 : q),
180 : !isUnsafeFunction(q), !isUpdateInstruction(q),
181 : isLinearFlow(q));
182 24202805 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
183 :
184 : /*
185 : * Simple assignments are not replaced either. They should be
186 : * handled by the alias removal part. All arguments should
187 : * be assigned their value before instruction p.
188 : */
189 24201400 : if (hasSameArguments(mb, p, q)
190 130597 : && hasSameSignature(mb, p, q)
191 130579 : && !hasCommonResults(p, q)
192 130579 : && !isUnsafeFunction(q)
193 130579 : && !isUpdateInstruction(q)
194 130579 : && !isProjectConst(q) && /* disable project(x,val), as its used for the result of case statements */
195 117195 : isLinearFlow(q)) {
196 117195 : if (safetyBarrier(p, q)) {
197 0 : TRC_DEBUG(MAL_OPTIMIZER, "Safety barrier reached\n");
198 : break;
199 : }
200 117195 : duplicate = 1;
201 117195 : clrFunction(p);
202 117195 : p->argc = p->retc;
203 241208 : for (k = 0; k < q->retc; k++) {
204 124013 : alias[getArg(p, k)] = getArg(q, k);
205 : /* we know the arguments fit so the instruction can safely be patched */
206 124013 : p = pushArgument(mb, p, getArg(q, k));
207 : }
208 :
209 117195 : TRC_DEBUG(MAL_OPTIMIZER, "Modified expression %d -> %d ",
210 : getArg(p, 0), getArg(p, 1));
211 117195 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
212 :
213 117195 : actions++;
214 117195 : break; /* end of search */
215 : }
216 10070596 : } else if (isUpdateInstruction(p)) {
217 0 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped: %d %d\n",
218 : mayhaveSideEffects(cntxt, mb, q, TRUE),
219 : isUpdateInstruction(p));
220 0 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
221 : }
222 : }
223 :
224 7062477 : if (duplicate) {
225 117195 : pushInstruction(mb, p);
226 117195 : old[i] = NULL;
227 117195 : continue;
228 : }
229 : /* update the hash structure with another candidate for reuse */
230 6945282 : TRC_DEBUG(MAL_OPTIMIZER,
231 : "Update hash[%d] - look at arg '%d' hash '%d' list '%d'\n", i,
232 : getArg(p, p->argc - 1), h, hash[h]);
233 6945282 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
234 :
235 6945247 : if (!mayhaveSideEffects(cntxt, mb, p, TRUE) && p->argc != p->retc
236 13890357 : && isLinearFlow(p) && !isUnsafeFunction(p)
237 6945144 : && !isUpdateInstruction(p)) {
238 6945056 : list[i] = hash[h];
239 6945056 : hash[h] = i;
240 6945056 : pushInstruction(mb, p);
241 6945043 : old[i] = NULL;
242 : }
243 : }
244 37669352 : for (; i < slimit; i++)
245 37507050 : if (old[i])
246 3936652 : pushInstruction(mb, old[i]);
247 : /* Defense line against incorrect plans */
248 162302 : if (actions > 0) {
249 8284 : msg = chkTypes(cntxt->usermodule, mb, FALSE);
250 8284 : if (!msg)
251 8284 : msg = chkFlow(mb);
252 8284 : if (!msg)
253 8284 : msg = chkDeclarations(mb);
254 : }
255 154018 : wrapup:
256 : /* keep actions taken as a fake argument */
257 445935 : (void) pushInt(mb, pci, actions);
258 :
259 445932 : if (alias)
260 162299 : GDKfree(alias);
261 445936 : if (list)
262 162303 : GDKfree(list);
263 445936 : if (hash)
264 162303 : GDKfree(hash);
265 445936 : if (old)
266 162303 : GDKfree(old);
267 445935 : return msg;
268 : }
|