Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "opt_commonTerms.h"
15 : #include "mal_exception.h"
16 : /*
17 : * Caveat. A lot of time was lost due to constants that are indistinguisable
18 : * at the surface level. It requires the constant optimizer to be ran first.
19 : */
20 :
21 : /* The key for finding common terms is that they share variables.
22 : * Therefore we skip all constants, except for a constant only situation.
23 : */
24 :
25 : /*
26 : * Speed up simple insert operations by skipping the common terms.
27 : */
28 :
29 : static inline bool __attribute__((__pure__))
30 240261 : isProjectConst(const InstrRecord *p)
31 : {
32 240261 : return (getModuleId(p) == algebraRef && getFunctionId(p) == projectRef);
33 : }
34 :
35 : static int __attribute__((__pure__))
36 13194900 : hashInstruction(const MalBlkRecord *mb, const InstrRecord *p)
37 : {
38 13194900 : int i;
39 35041521 : for (i = p->argc - 1; i >= p->retc; i--)
40 34699620 : if (!isVarConstant(mb, getArg(p, i)))
41 12852999 : return getArg(p, i);
42 341901 : if (isVarConstant(mb, getArg(p, p->retc)))
43 341901 : return p->retc;
44 : return -1;
45 : }
46 :
47 : str
48 483459 : OPTcommonTermsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
49 : InstrPtr pci)
50 : {
51 483459 : int i, j, k, barrier = 0, bailout = 0;
52 483459 : InstrPtr p, q;
53 483459 : int actions = 0;
54 483459 : int limit, slimit;
55 483459 : int duplicate;
56 483459 : int *alias = NULL;
57 483459 : int *hash = NULL, h;
58 483459 : int *list = NULL;
59 483459 : str msg = MAL_SUCCEED;
60 :
61 483459 : InstrPtr *old = NULL;
62 :
63 : /* catch simple insert operations */
64 483459 : if (isSimpleSQL(mb)) {
65 283868 : goto wrapup;
66 : }
67 :
68 199594 : (void) cntxt;
69 199594 : (void) stk;
70 199594 : alias = (int *) GDKzalloc(sizeof(int) * mb->vtop);
71 199627 : list = (int *) GDKzalloc(sizeof(int) * mb->stop);
72 199630 : hash = (int *) GDKzalloc(sizeof(int) * mb->vtop);
73 199620 : if (alias == NULL || list == NULL || hash == NULL) {
74 0 : msg = createException(MAL, "optimizer.commonTerms",
75 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
76 0 : goto wrapup;
77 : }
78 :
79 199620 : old = mb->stmt;
80 199620 : limit = mb->stop;
81 199620 : slimit = mb->ssize;
82 199620 : if (newMalBlkStmt(mb, mb->ssize) < 0) {
83 0 : msg = createException(MAL, "optimizer.commonTerms",
84 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
85 0 : old = NULL;
86 0 : goto wrapup;
87 : }
88 :
89 16430434 : for (i = 0; mb->errors == NULL && i < limit; i++) {
90 16430434 : p = old[i];
91 16430434 : duplicate = 0;
92 :
93 90247819 : for (k = 0; k < p->argc; k++)
94 73817385 : if (alias[getArg(p, k)])
95 300361 : getArg(p, k) = alias[getArg(p, k)];
96 :
97 16430434 : if (p->token == ENDsymbol) {
98 199587 : pushInstruction(mb, p);
99 199606 : old[i] = NULL;
100 199606 : break;
101 : }
102 : /*
103 : * Any barrier block signals the end of this optimizer,
104 : * because the impact of the block can affect the common code eliminated.
105 : */
106 32461694 : barrier |= (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
107 16230847 : || p->barrier == RETURNsymbol);
108 : /*
109 : * Also block further optimization when you have seen an assert().
110 : * This works particularly for SQL, because it is not easy to track
111 : * the BAT identifier aliases to look for updates. The sql.assert
112 : * at least tells us that an update is planned.
113 : * Like all optimizer decisions, it is safe to stop.
114 : */
115 16230847 : barrier |= getFunctionId(p) == assertRef;
116 16230847 : if (barrier || p->token == ASSIGNsymbol) {
117 257677 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d]: %d %d\n", i, barrier,
118 : p->retc == p->argc);
119 257677 : pushInstruction(mb, p);
120 257714 : old[i] = NULL;
121 257714 : continue;
122 : }
123 :
124 : /* when we enter a barrier block, we should ditch all previous instructions from consideration */
125 15973170 : if (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
126 15973170 : || p->barrier == RETURNsymbol) {
127 0 : memset(list, 0, sizeof(int) * mb->stop);
128 0 : memset(hash, 0, sizeof(int) * mb->vtop);
129 : }
130 : /* side-effect producing operators can never be replaced */
131 : /* the same holds for function calls without an argument, it is
132 : * unclear where the results comes from (e.g. clock()) */
133 15973170 : if (mayhaveSideEffects(cntxt, mb, p, TRUE) || p->argc == p->retc) {
134 1666908 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d] side-effect: %d\n", i,
135 : p->retc == p->argc);
136 1666908 : pushInstruction(mb, p);
137 1666866 : old[i] = NULL;
138 1666866 : continue;
139 : }
140 : /* simple SQL bind operations need not be merged, they are cheap
141 : * and/or can be duplicated eliminated elsewhere cheaper */
142 14307199 : if (getModuleId(p) == sqlRef && (getFunctionId(p) != tidRef && getFunctionId(p) != bindRef)) {
143 905946 : pushInstruction(mb, p);
144 905946 : old[i] = NULL;
145 905946 : continue;
146 : }
147 13401253 : if (getModuleId(p) == matRef) { /* mat.packIncrement has requirement on number of instructions (or that needs an update */
148 206823 : pushInstruction(mb, p);
149 206823 : old[i] = NULL;
150 206823 : continue;
151 : }
152 :
153 : /* from here we have a candidate to look for a match */
154 :
155 13194430 : h = hashInstruction(mb, p);
156 :
157 13194430 : TRC_DEBUG(MAL_OPTIMIZER, "Candidate[%d] look at list[%d] => %d\n", i, h,
158 : hash[h]);
159 13194430 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
160 :
161 13194577 : if (h < 0) {
162 0 : pushInstruction(mb, p);
163 0 : old[i] = NULL;
164 0 : continue;
165 : }
166 :
167 13194577 : bailout = 1024; // don't run over long collision list
168 : /* Look into the hash structure for matching instructions */
169 112142551 : for (j = hash[h]; j > 0 && bailout-- > 0; j = list[j]) {
170 99169628 : if ((q = getInstrPtr(mb, j))
171 99169628 : && getFunctionId(q) == getFunctionId(p)
172 73727530 : && getModuleId(q) == getModuleId(p)) {
173 73731058 : TRC_DEBUG(MAL_OPTIMIZER,
174 : "Candidate[%d->%d] %d %d :%d %d %d=%d %d %d %d\n", j,
175 : list[j], hasSameSignature(mb, p, q),
176 : hasSameArguments(mb, p, q), q->token != ASSIGNsymbol,
177 : list[getArg(q, q->argc - 1)], i, !hasCommonResults(p,
178 : q),
179 : !isUnsafeFunction(q), !isUpdateInstruction(q),
180 : isLinearFlow(q));
181 73731058 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
182 :
183 : /*
184 : * Simple assignments are not replaced either. They should be
185 : * handled by the alias removal part. All arguments should
186 : * be assigned their value before instruction p.
187 : */
188 73734730 : if (hasSameArguments(mb, p, q)
189 240279 : && hasSameSignature(mb, p, q)
190 240261 : && !hasCommonResults(p, q)
191 240261 : && !isUnsafeFunction(q)
192 240261 : && !isUpdateInstruction(q)
193 240261 : && !isProjectConst(q) && /* disable project(x,val), as its used for the result of case statements */
194 219201 : isLinearFlow(q)) {
195 219201 : if (safetyBarrier(p, q)) {
196 0 : TRC_DEBUG(MAL_OPTIMIZER, "Safety barrier reached\n");
197 : break;
198 : }
199 219201 : duplicate = 1;
200 219201 : clrFunction(p);
201 219201 : p->argc = p->retc;
202 450231 : for (k = 0; k < q->retc; k++) {
203 231030 : alias[getArg(p, k)] = getArg(q, k);
204 : /* we know the arguments fit so the instruction can safely be patched */
205 231030 : p = pushArgument(mb, p, getArg(q, k));
206 : }
207 :
208 219201 : TRC_DEBUG(MAL_OPTIMIZER, "Modified expression %d -> %d ",
209 : getArg(p, 0), getArg(p, 1));
210 219201 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
211 :
212 219201 : actions++;
213 219201 : break; /* end of search */
214 : }
215 25438570 : } else if (isUpdateInstruction(p)) {
216 0 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped: %d %d\n",
217 : mayhaveSideEffects(cntxt, mb, q, TRUE),
218 : isUpdateInstruction(p));
219 0 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
220 : }
221 : }
222 :
223 13192124 : if (duplicate) {
224 219201 : pushInstruction(mb, p);
225 219201 : old[i] = NULL;
226 219201 : continue;
227 : }
228 : /* update the hash structure with another candidate for reuse */
229 12972923 : TRC_DEBUG(MAL_OPTIMIZER,
230 : "Update hash[%d] - look at arg '%d' hash '%d' list '%d'\n", i,
231 : getArg(p, p->argc - 1), h, hash[h]);
232 12972923 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
233 :
234 12973535 : if (!mayhaveSideEffects(cntxt, mb, p, TRUE) && p->argc != p->retc
235 25948265 : && isLinearFlow(p) && !isUnsafeFunction(p)
236 12974213 : && !isUpdateInstruction(p)) {
237 12974036 : list[i] = hash[h];
238 12974036 : hash[h] = i;
239 12974036 : pushInstruction(mb, p);
240 12974599 : old[i] = NULL;
241 : }
242 : }
243 46181717 : for (; i < slimit; i++)
244 45982087 : if (old[i])
245 5055471 : pushInstruction(mb, old[i]);
246 : /* Defense line against incorrect plans */
247 199630 : if (actions > 0) {
248 10058 : msg = chkTypes(cntxt->usermodule, mb, FALSE);
249 10058 : if (!msg)
250 10058 : msg = chkFlow(mb);
251 10058 : if (!msg)
252 10058 : msg = chkDeclarations(mb);
253 : }
254 189572 : wrapup:
255 : /* keep actions taken as a fake argument */
256 483498 : (void) pushInt(mb, pci, actions);
257 :
258 483496 : if (alias)
259 199628 : GDKfree(alias);
260 483496 : if (list)
261 199628 : GDKfree(list);
262 483501 : if (hash)
263 199633 : GDKfree(hash);
264 483461 : if (old)
265 199593 : GDKfree(old);
266 483501 : return msg;
267 : }
|