Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "opt_commonTerms.h"
15 : #include "mal_exception.h"
16 : /*
17 : * Caveat. A lot of time was lost due to constants that are indistinguisable
18 : * at the surface level. It requires the constant optimizer to be ran first.
19 : */
20 :
21 : /* The key for finding common terms is that they share variables.
22 : * Therefore we skip all constants, except for a constant only situation.
23 : */
24 :
25 : /*
26 : * Speed up simple insert operations by skipping the common terms.
27 : */
28 :
29 : __attribute__((__pure__))
30 : static inline bool
31 248171 : isProjectConst(const InstrRecord *p)
32 : {
33 248171 : return (getModuleId(p) == algebraRef && getFunctionId(p) == projectRef);
34 : }
35 :
36 : __attribute__((__pure__))
37 : static int
38 13319553 : hashInstruction(const MalBlkRecord *mb, const InstrRecord *p)
39 : {
40 13319553 : int i;
41 35418874 : for (i = p->argc - 1; i >= p->retc; i--)
42 35071273 : if (!isVarConstant(mb, getArg(p, i)))
43 12971952 : return getArg(p, i);
44 347601 : if (isVarConstant(mb, getArg(p, p->retc)))
45 347601 : return p->retc;
46 : return -1;
47 : }
48 :
49 : str
50 498998 : OPTcommonTermsImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
51 : InstrPtr pci)
52 : {
53 498998 : int i, j, k, barrier = 0, bailout = 0;
54 498998 : InstrPtr p, q;
55 498998 : int actions = 0;
56 498998 : int limit, slimit;
57 498998 : int duplicate;
58 498998 : int *alias = NULL;
59 498998 : int *hash = NULL, h;
60 498998 : int *list = NULL;
61 498998 : str msg = MAL_SUCCEED;
62 :
63 498998 : InstrPtr *old = NULL;
64 :
65 : /* catch simple insert operations */
66 498998 : if (isSimpleSQL(mb)) {
67 287327 : goto wrapup;
68 : }
69 :
70 211757 : (void) cntxt;
71 211757 : (void) stk;
72 211757 : alias = (int *) GDKzalloc(sizeof(int) * mb->vtop);
73 211779 : list = (int *) GDKzalloc(sizeof(int) * mb->stop);
74 211782 : hash = (int *) GDKzalloc(sizeof(int) * mb->vtop);
75 211755 : if (alias == NULL || list == NULL || hash == NULL) {
76 0 : msg = createException(MAL, "optimizer.commonTerms",
77 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
78 0 : goto wrapup;
79 : }
80 :
81 211755 : old = mb->stmt;
82 211755 : limit = mb->stop;
83 211755 : slimit = mb->ssize;
84 211755 : if (newMalBlkStmt(mb, mb->ssize) < 0) {
85 0 : msg = createException(MAL, "optimizer.commonTerms",
86 : SQLSTATE(HY013) MAL_MALLOC_FAIL);
87 0 : old = NULL;
88 0 : goto wrapup;
89 : }
90 :
91 16498726 : for (i = 0; mb->errors == NULL && i < limit; i++) {
92 16498726 : p = old[i];
93 16498726 : duplicate = 0;
94 :
95 90969654 : for (k = 0; k < p->argc; k++)
96 74470928 : if (alias[getArg(p, k)])
97 309422 : getArg(p, k) = alias[getArg(p, k)];
98 :
99 16498726 : if (p->token == ENDsymbol) {
100 200385 : pushInstruction(mb, p);
101 200409 : old[i] = NULL;
102 200409 : break;
103 : }
104 : /*
105 : * Any barrier block signals the end of this optimizer,
106 : * because the impact of the block can affect the common code eliminated.
107 : */
108 32596682 : barrier |= (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
109 16298341 : || p->barrier == RETURNsymbol);
110 : /*
111 : * Also block further optimization when you have seen an assert().
112 : * This works particularly for SQL, because it is not easy to track
113 : * the BAT identifier aliases to look for updates. The sql.assert
114 : * at least tells us that an update is planned.
115 : * Like all optimizer decisions, it is safe to stop.
116 : */
117 16298341 : barrier |= getFunctionId(p) == assertRef;
118 16298341 : if (barrier || p->token == ASSIGNsymbol) {
119 11316 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d]: %d %d\n", i, barrier,
120 : p->retc == p->argc);
121 11316 : pushInstruction(mb, p);
122 11331 : old[i] = NULL;
123 11331 : break;
124 : }
125 :
126 : /* when we enter a barrier block, we should ditch all previous instructions from consideration */
127 16287025 : if (p->barrier == BARRIERsymbol || p->barrier == CATCHsymbol
128 16287025 : || p->barrier == RETURNsymbol) {
129 0 : memset(list, 0, sizeof(int) * mb->stop);
130 0 : memset(hash, 0, sizeof(int) * mb->vtop);
131 : }
132 : /* side-effect producing operators can never be replaced */
133 : /* the same holds for function calls without an argument, it is
134 : * unclear where the results comes from (e.g. clock()) */
135 16287025 : if (mayhaveSideEffects(cntxt, mb, p, TRUE) || p->argc == p->retc) {
136 1834491 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped[%d] side-effect: %d\n", i,
137 : p->retc == p->argc);
138 1834491 : pushInstruction(mb, p);
139 1834480 : old[i] = NULL;
140 1834480 : continue;
141 : }
142 : /* simple SQL bind operations need not be merged, they are cheap
143 : * and/or can be duplicated eliminated elsewhere cheaper */
144 14453350 : if (getModuleId(p) == sqlRef && (getFunctionId(p) != tidRef && getFunctionId(p) != bindRef)) {
145 926474 : pushInstruction(mb, p);
146 926474 : old[i] = NULL;
147 926474 : continue;
148 : }
149 13526876 : if (getModuleId(p) == matRef) { /* mat.packIncrement has requirement on number of instructions (or that needs an update */
150 207873 : pushInstruction(mb, p);
151 207873 : old[i] = NULL;
152 207873 : continue;
153 : }
154 :
155 : /* from here we have a candidate to look for a match */
156 :
157 13319003 : h = hashInstruction(mb, p);
158 :
159 13319003 : TRC_DEBUG(MAL_OPTIMIZER, "Candidate[%d] look at list[%d] => %d\n", i, h,
160 : hash[h]);
161 13319003 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
162 :
163 13319118 : if (h < 0) {
164 0 : pushInstruction(mb, p);
165 0 : old[i] = NULL;
166 0 : continue;
167 : }
168 :
169 13319118 : bailout = 1024; // don't run over long collision list
170 : /* Look into the hash structure for matching instructions */
171 113816861 : for (j = hash[h]; j > 0 && bailout-- > 0; j = list[j]) {
172 100727008 : if ((q = getInstrPtr(mb, j))
173 100727008 : && getFunctionId(q) == getFunctionId(p)
174 74973229 : && getModuleId(q) == getModuleId(p)) {
175 74977219 : TRC_DEBUG(MAL_OPTIMIZER,
176 : "Candidate[%d->%d] %d %d :%d %d %d=%d %d %d %d\n", j,
177 : list[j], hasSameSignature(mb, p, q),
178 : hasSameArguments(mb, p, q), q->token != ASSIGNsymbol,
179 : list[getArg(q, q->argc - 1)], i, !hasCommonResults(p,
180 : q),
181 : !isUnsafeFunction(q), !isUpdateInstruction(q),
182 : isLinearFlow(q));
183 74977219 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
184 :
185 : /*
186 : * Simple assignments are not replaced either. They should be
187 : * handled by the alias removal part. All arguments should
188 : * be assigned their value before instruction p.
189 : */
190 74978993 : if (hasSameArguments(mb, p, q)
191 248189 : && hasSameSignature(mb, p, q)
192 248171 : && !hasCommonResults(p, q)
193 248171 : && !isUnsafeFunction(q)
194 248171 : && !isUpdateInstruction(q)
195 248171 : && !isProjectConst(q) && /* disable project(x,val), as its used for the result of case statements */
196 227135 : isLinearFlow(q)) {
197 227135 : if (safetyBarrier(p, q)) {
198 0 : TRC_DEBUG(MAL_OPTIMIZER, "Safety barrier reached\n");
199 : break;
200 : }
201 227135 : duplicate = 1;
202 227135 : clrFunction(p);
203 227135 : p->argc = p->retc;
204 466744 : for (k = 0; k < q->retc; k++) {
205 239609 : alias[getArg(p, k)] = getArg(q, k);
206 : /* we know the arguments fit so the instruction can safely be patched */
207 239609 : p = pushArgument(mb, p, getArg(q, k));
208 : }
209 :
210 227135 : TRC_DEBUG(MAL_OPTIMIZER, "Modified expression %d -> %d ",
211 : getArg(p, 0), getArg(p, 1));
212 227135 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
213 :
214 227135 : actions++;
215 227135 : break; /* end of search */
216 : }
217 25749789 : } else if (isUpdateInstruction(p)) {
218 0 : TRC_DEBUG(MAL_OPTIMIZER, "Skipped: %d %d\n",
219 : mayhaveSideEffects(cntxt, mb, q, TRUE),
220 : isUpdateInstruction(p));
221 0 : traceInstruction(MAL_OPTIMIZER, mb, 0, q, LIST_MAL_ALL);
222 : }
223 : }
224 :
225 13316988 : if (duplicate) {
226 227135 : pushInstruction(mb, p);
227 227135 : old[i] = NULL;
228 227135 : continue;
229 : }
230 : /* update the hash structure with another candidate for reuse */
231 13089853 : TRC_DEBUG(MAL_OPTIMIZER,
232 : "Update hash[%d] - look at arg '%d' hash '%d' list '%d'\n", i,
233 : getArg(p, p->argc - 1), h, hash[h]);
234 13089853 : traceInstruction(MAL_OPTIMIZER, mb, 0, p, LIST_MAL_ALL);
235 :
236 13090499 : if (!mayhaveSideEffects(cntxt, mb, p, TRUE) && p->argc != p->retc
237 26181903 : && isLinearFlow(p) && !isUnsafeFunction(p)
238 13091041 : && !isUpdateInstruction(p)) {
239 13090716 : list[i] = hash[h];
240 13090716 : hash[h] = i;
241 13090716 : pushInstruction(mb, p);
242 13091419 : old[i] = NULL;
243 : }
244 : }
245 49325615 : for (; i < slimit; i++)
246 49113848 : if (old[i])
247 5679558 : pushInstruction(mb, old[i]);
248 : /* Defense line against incorrect plans */
249 211767 : if (actions > 0) {
250 10094 : msg = chkTypes(cntxt->usermodule, mb, FALSE);
251 10094 : if (!msg)
252 10094 : msg = chkFlow(mb);
253 10094 : if (!msg)
254 10094 : msg = chkDeclarations(mb);
255 : }
256 201673 : wrapup:
257 : /* keep actions taken as a fake argument */
258 499094 : (void) pushInt(mb, pci, actions);
259 :
260 499098 : if (alias)
261 211771 : GDKfree(alias);
262 499108 : if (list)
263 211781 : GDKfree(list);
264 499110 : if (hash)
265 211783 : GDKfree(hash);
266 499099 : if (old)
267 211772 : GDKfree(old);
268 499110 : return msg;
269 : }
|