Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "opt_costModel.h"
15 :
16 : /*
17 : * The cost formula are repetitive
18 : */
19 : #define newRows(W,X,Y,Z) \
20 : do { \
21 : c1 = getRowCnt(mb, getArg(p,W)); \
22 : c2 = getRowCnt(mb, getArg(p,X)); \
23 : /* just to ensure that rowcnt was/is never set to -1 */ \
24 : if (c1 == (BUN) -1 || c2 == (BUN) -1 \
25 : || c1 == BUN_NONE || c2 == BUN_NONE) \
26 : continue; \
27 : setRowCnt(mb, getArg(p,Z), (Y)); \
28 : } while (0)
29 :
30 : /*
31 : * The cost will be used in many places to make decisions.
32 : * Access should be fast.
33 : * The SQL front-end also makes the BAT index available as the
34 : * property bid. This can be used to access the BAT and involve
35 : * more properties into the decision procedure.
36 : * [to be done]
37 : * Also make sure you don't reuse variables, because then the
38 : * row count becomes non-deterministic.
39 : */
40 : str
41 483992 : OPTcostModelImplementation(Client cntxt, MalBlkPtr mb, MalStkPtr stk,
42 : InstrPtr pci)
43 : {
44 483992 : int i;
45 483992 : BUN c1, c2;
46 483992 : InstrPtr p;
47 483992 : str msg = MAL_SUCCEED;
48 :
49 483992 : (void) cntxt;
50 483992 : (void) stk;
51 :
52 483992 : if (mb->inlineProp)
53 : return MAL_SUCCEED;
54 :
55 26119282 : for (i = 0; i < mb->stop; i++) {
56 25635290 : p = getInstrPtr(mb, i);
57 25635290 : if (getModuleId(p) == algebraRef) {
58 3560815 : if (getFunctionId(p) == selectRef
59 3518535 : || getFunctionId(p) == thetaselectRef) {
60 217717 : newRows(1, 2, (c1 > 2 ? c2 / 2 + 1 : c1 / 2 + 1), 0);
61 3343098 : } else if (getFunctionId(p) == selectNotNilRef
62 3343098 : || getFunctionId(p) == sortRef
63 3318768 : || getFunctionId(p) == projectRef) {
64 147585 : newRows(1, 1, c1, 0);
65 3195513 : } else if (getFunctionId(p) == joinRef
66 3119718 : || getFunctionId(p) == projectionRef
67 90942 : || getFunctionId(p) == bandjoinRef
68 90942 : || getFunctionId(p) == projectionpathRef) {
69 : /* assume 1-1 joins */
70 3104571 : newRows(1, 2, (c1 < c2 ? c1 : c2), 0);
71 90942 : } else if (getFunctionId(p) == crossRef) {
72 11537 : newRows(1, 2,
73 : ((log((double) c1) + log((double) c2) >
74 : log(INT_MAX) ? INT_MAX : c1 * c2 + 1)), 0);
75 : /* log sets errno if it cannot compute the log. This will then screw with code that checks errno */
76 11537 : if (errno == ERANGE || errno == EDOM) {
77 11537 : errno = 0;
78 : }
79 : }
80 22074475 : } else if (getModuleId(p) == batcalcRef) {
81 164247 : if (getFunctionId(p) == ifthenelseRef) {
82 4616 : if (isaBatType(getArgType(mb, p, 2))) {
83 96 : newRows(2, 2, c1, 0);
84 : } else {
85 4520 : newRows(3, 3, c1, 0);
86 : }
87 159631 : } else if (isaBatType(getArgType(mb, p, 1))) {
88 157801 : newRows(1, 1, c1, 0);
89 : } else {
90 1830 : newRows(2, 2, c2, 0);
91 : }
92 21910228 : } else if (getModuleId(p) == batstrRef) {
93 2993 : newRows(1, 1, c1, 0);
94 21907235 : } else if (getModuleId(p) == batRef) {
95 788216 : if (getFunctionId(p) == appendRef) {
96 : /*
97 : * Updates are a little more complicated, because you have to
98 : * propagate changes in the expected size up the expression tree.
99 : * For example, the SQL snippet:
100 : * _49:bat[:oid,:oid]{rows=0,bid=622} := sql.bind_dbat("sys","example",3);
101 : * _54 := bat.setWriteMode(_49);
102 : * bat.append(_54,_47,true);
103 : * shows what is produced when it encounters a deletion. If a non-empty
104 : * append is not properly passed back to _49, the emptySet
105 : * optimizer might remove the complete deletion code.
106 : * The same holds for replacement operations, which add information to
107 : * an initially empty insertion BAT.
108 : */
109 177485 : if (isaBatType(getArgType(mb, p, 2))) {
110 : /* insert BAT */
111 177451 : newRows(1, 2, (c1 + c2 + 1), 1);
112 : } else {
113 : /* insert scalars */
114 34 : newRows(1, 1, (c1 + 1), 1);
115 : }
116 610731 : } else if (getFunctionId(p) == deleteRef) {
117 6 : if (isaBatType(getArgType(mb, p, 2))) {
118 : /* delete BAT */
119 2 : newRows(1, 2, (c2 >= c1 ? 1 : c1 - c2), 1);
120 : } else {
121 : /* insert scalars */
122 4 : newRows(1, 1, (c1 <= 1 ? 1 : c1 - 1), 1);
123 : }
124 : }
125 21119019 : } else if (getModuleId(p) == groupRef) {
126 27775 : if (getFunctionId(p) == subgroupRef || getFunctionId(p) == groupRef) {
127 6161 : newRows(1, 1, (c1 / 10 + 1), 0);
128 : } else {
129 21614 : newRows(1, 1, c1, 0);
130 : }
131 21091244 : } else if (getModuleId(p) == aggrRef) {
132 57696 : if (getFunctionId(p) == sumRef || getFunctionId(p) == minRef
133 51893 : || getFunctionId(p) == maxRef || getFunctionId(p) == avgRef) {
134 7146 : newRows(1, 1, (c1 != 0 ? c1 : 1), 0);
135 50550 : } else if (getFunctionId(p) == countRef) {
136 42892 : newRows(1, 1, 1, 0);
137 : }
138 21033548 : } else if (p->token == ASSIGNsymbol && p->argc == 2) {
139 : /* copy the rows property */
140 2831596 : c1 = getRowCnt(mb, getArg(p, 1));
141 : /* just to ensure that rowcnt was/is never set to -1 */
142 2831596 : if (c1 != (BUN) -1 && c1 != BUN_NONE)
143 2831594 : setRowCnt(mb, getArg(p, 0), c1);
144 : }
145 : }
146 : /* Defense line against incorrect plans */
147 : /* plan remains unaffected */
148 : // msg = chkTypes(cntxt->usermodule, mb, FALSE);
149 : // if (!msg)
150 : // msg = chkFlow(mb);
151 : // if (!msg)
152 : // msg = chkDeclarations(mb);
153 :
154 : /* keep actions taken as a fake argument */
155 483992 : (void) pushInt(mb, pci, 1);
156 483992 : return msg;
157 : }
|