Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "opt_costModel.h"
15 :
16 : /*
17 : * The cost formula are repetitive
18 : */
19 : #define newRows(W,X,Y,Z) \
20 : do { \
21 : c1 = getRowCnt(mb, getArg(p,W)); \
22 : c2 = getRowCnt(mb, getArg(p,X)); \
23 : /* just to ensure that rowcnt was/is never set to -1 */ \
24 : if (c1 == (BUN) -1 || c2 == (BUN) -1 \
25 : || c1 == BUN_NONE || c2 == BUN_NONE) \
26 : continue; \
27 : setRowCnt(mb, getArg(p,Z), (Y)); \
28 : } while (0)
29 :
30 : /*
31 : * The cost will be used in many places to make decisions.
32 : * Access should be fast.
33 : * The SQL front-end also makes the BAT index available as the
34 : * property bid. This can be used to access the BAT and involve
35 : * more properties into the decision procedure.
36 : * [to be done]
37 : * Also make sure you don't reuse variables, because then the
38 : * row count becomes non-deterministic.
39 : */
40 : str
41 493436 : OPTcostModelImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
42 : InstrPtr pci)
43 : {
44 493436 : int i;
45 493436 : BUN c1, c2;
46 493436 : InstrPtr p;
47 493436 : str msg = MAL_SUCCEED;
48 :
49 493436 : (void) cntxt;
50 493436 : (void) stk;
51 :
52 493436 : if (mb->inlineProp)
53 : return MAL_SUCCEED;
54 :
55 26610141 : for (i = 0; i < mb->stop; i++) {
56 26116705 : p = getInstrPtr(mb, i);
57 26116705 : if (getModuleId(p) == algebraRef) {
58 3535370 : if (getFunctionId(p) == selectRef
59 3513034 : || getFunctionId(p) == thetaselectRef) {
60 215491 : newRows(1, 2, (c1 > 2 ? c2 / 2 + 1 : c1 / 2 + 1), 0);
61 3319879 : } else if (getFunctionId(p) == selectNotNilRef
62 3319879 : || getFunctionId(p) == sortRef
63 3296118 : || getFunctionId(p) == projectRef) {
64 145395 : newRows(1, 1, c1, 0);
65 3174484 : } else if (getFunctionId(p) == joinRef
66 3099655 : || getFunctionId(p) == projectionRef
67 89339 : || getFunctionId(p) == bandjoinRef
68 89339 : || getFunctionId(p) == projectionpathRef) {
69 : /* assume 1-1 joins */
70 3085145 : newRows(1, 2, (c1 < c2 ? c1 : c2), 0);
71 89339 : } else if (getFunctionId(p) == crossRef) {
72 11010 : newRows(1, 2,
73 : ((log((double) c1) + log((double) c2) >
74 : log(INT_MAX) ? INT_MAX : c1 * c2 + 1)), 0);
75 : /* log sets errno if it cannot compute the log. This will then screw with code that checks errno */
76 11010 : if (errno == ERANGE || errno == EDOM) {
77 11010 : errno = 0;
78 : }
79 : }
80 22581335 : } else if (getModuleId(p) == batcalcRef) {
81 165837 : if (getFunctionId(p) == ifthenelseRef) {
82 4437 : if (isaBatType(getArgType(mb, p, 2))) {
83 96 : newRows(2, 2, c1, 0);
84 : } else {
85 4341 : newRows(3, 3, c1, 0);
86 : }
87 161400 : } else if (isaBatType(getArgType(mb, p, 1))) {
88 159643 : newRows(1, 1, c1, 0);
89 : } else {
90 1757 : newRows(2, 2, c2, 0);
91 : }
92 22415498 : } else if (getModuleId(p) == batstrRef) {
93 2992 : newRows(1, 1, c1, 0);
94 22412506 : } else if (getModuleId(p) == batRef) {
95 773906 : if (getFunctionId(p) == appendRef) {
96 : /*
97 : * Updates are a little more complicated, because you have to
98 : * propagate changes in the expected size up the expression tree.
99 : * For example, the SQL snippet:
100 : * _49:bat[:oid,:oid]{rows=0,bid=622} := sql.bind_dbat("sys","example",3);
101 : * _54 := bat.setWriteMode(_49);
102 : * bat.append(_54,_47,true);
103 : * shows what is produced when it encounters a deletion. If a non-empty
104 : * append is not properly passed back to _49, the emptySet
105 : * optimizer might remove the complete deletion code.
106 : * The same holds for replacement operations, which add information to
107 : * an initially empty insertion BAT.
108 : */
109 173635 : if (isaBatType(getArgType(mb, p, 2))) {
110 : /* insert BAT */
111 173601 : newRows(1, 2, (c1 + c2 + 1), 1);
112 : } else {
113 : /* insert scalars */
114 34 : newRows(1, 1, (c1 + 1), 1);
115 : }
116 600271 : } else if (getFunctionId(p) == deleteRef) {
117 6 : if (isaBatType(getArgType(mb, p, 2))) {
118 : /* delete BAT */
119 2 : newRows(1, 2, (c2 >= c1 ? 1 : c1 - c2), 1);
120 : } else {
121 : /* insert scalars */
122 4 : newRows(1, 1, (c1 <= 1 ? 1 : c1 - 1), 1);
123 : }
124 : }
125 21638600 : } else if (getModuleId(p) == groupRef) {
126 27150 : if (getFunctionId(p) == subgroupRef || getFunctionId(p) == groupRef) {
127 5986 : newRows(1, 1, (c1 / 10 + 1), 0);
128 : } else {
129 21164 : newRows(1, 1, c1, 0);
130 : }
131 21611450 : } else if (getModuleId(p) == aggrRef) {
132 57308 : if (getFunctionId(p) == sumRef || getFunctionId(p) == minRef
133 51629 : || getFunctionId(p) == maxRef || getFunctionId(p) == avgRef) {
134 7086 : newRows(1, 1, (c1 != 0 ? c1 : 1), 0);
135 50222 : } else if (getFunctionId(p) == countRef) {
136 42930 : newRows(1, 1, 1, 0);
137 : }
138 21554142 : } else if (p->token == ASSIGNsymbol && p->argc == 2) {
139 : /* copy the rows property */
140 2931918 : c1 = getRowCnt(mb, getArg(p, 1));
141 : /* just to ensure that rowcnt was/is never set to -1 */
142 2931918 : if (c1 != (BUN) -1 && c1 != BUN_NONE)
143 2931961 : setRowCnt(mb, getArg(p, 0), c1);
144 : }
145 : }
146 : /* Defense line against incorrect plans */
147 : /* plan remains unaffected */
148 : // msg = chkTypes(cntxt->usermodule, mb, FALSE);
149 : // if (!msg)
150 : // msg = chkFlow(mb);
151 : // if (!msg)
152 : // msg = chkDeclarations(mb);
153 :
154 : /* keep actions taken as a fake argument */
155 493436 : (void) pushInt(mb, pci, 1);
156 493436 : return msg;
157 : }
|