Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. J. Nes
15 : *
16 : * @* Transaction management
17 : * The Transaction Manager maintains the buffer of (permanent) BATS
18 : * held resident. Entries from the BAT buffer are always accessed by
19 : * BAT id. A BAT becomes permanent by assigning a name with
20 : * @%BBPrename@. Access to the transaction table is regulated by a
21 : * semaphore.
22 : */
23 : #include "monetdb_config.h"
24 : #include "gdk.h"
25 : #include "gdk_private.h"
26 :
27 : /*
28 : * The physical (disk) commit protocol is handled mostly by
29 : * BBPsync. Once a commit succeeded, there is the task of removing
30 : * ex-persistent bats (those that still were persistent in the
31 : * previous commit, but were made transient in this transaction).
32 : * Notice that such ex- (i.e. non-) persistent bats are not backed up
33 : * by the BBPsync protocol, so we cannot start deleting after we know
34 : * the commit will succeed.
35 : *
36 : * Another hairy issue are the delta statuses in BATs. These provide a
37 : * fast way to perform a transaction abort (HOT-abort, instead of
38 : * COLD-abort, which is achieved by the BBP recovery in a database
39 : * restart). Hot-abort functionality has not been important in MonetDB
40 : * for now, so it is not well-tested. The problem here is that if a
41 : * commit fails in the physical part (BBPsync), we have not sufficient
42 : * information to roll back the delta statuses.
43 : *
44 : * So a 'feature' of the abort is that after a failed commit,
45 : * in-memory we *will* commit the transaction. Subsequent commits can
46 : * retry to achieve a physical commit. The only way to abort in such a
47 : * situation is COLD-abort: quit the server and restart, so you get
48 : * the recovered disk images.
49 : */
50 :
51 : /* in the commit epilogue, the BBP-status of the bats is changed to
52 : * reflect their presence in the succeeded checkpoint. Also bats from
53 : * the previous checkpoint that were deleted now are physically
54 : * destroyed.
55 : */
56 : static void
57 12906 : epilogue(int cnt, bat *subcommit, bool locked)
58 : {
59 12906 : int i = 0;
60 :
61 12906 : while (++i < cnt) {
62 1985219 : bat bid = subcommit ? subcommit[i] : i;
63 1985219 : BAT *b;
64 :
65 1985219 : if (BBP_status(bid) & BBPPERSISTENT) {
66 : /* first turn off BBPNEW, then turn on
67 : * BBPEXISTING so that concurrent BATassertProps
68 : * doesn't fail */
69 1791711 : BBP_status_off(bid, BBPNEW);
70 1791711 : BBP_status_on(bid, BBPEXISTING);
71 193508 : } else if ((BBP_status(bid) & (BBPDELETED|BBPLOADED)) == (BBPDELETED|BBPLOADED)) {
72 : /* check mmap modes of bats that are now
73 : * transient. this has to be done after the
74 : * commit succeeded, because the mmap modes
75 : * allowed on transient bats would be
76 : * dangerous on persistent bats. If the commit
77 : * failed, the already processed bats that
78 : * would become transient after the commit,
79 : * but didn't due to the failure, would be a
80 : * consistency risk.
81 : */
82 56712 : b = BBP_desc(bid);
83 : /* check mmap modes */
84 56712 : MT_lock_set(&b->theaplock);
85 56712 : if (BATcheckmodes(b, true) != GDK_SUCCEED)
86 0 : GDKwarning("BATcheckmodes failed\n");
87 56712 : MT_lock_unset(&b->theaplock);
88 : }
89 1985219 : b = BBP_desc(bid);
90 1985219 : if (b->batCacheid != 0 && b->ttype >= 0 && ATOMvarsized(b->ttype)) {
91 417159 : MT_lock_set(&b->theaplock);
92 417159 : ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
93 417159 : if (p != NULL) {
94 413180 : Heap *tail = p->val.pval;
95 413180 : assert(b->oldtail != NULL);
96 413180 : BATrmprop_nolock(b, (enum prop_t) 20);
97 413180 : if (b->oldtail != (Heap *) 1)
98 2037 : HEAPdecref(b->oldtail, true);
99 413180 : if (tail == b->theap ||
100 1 : strcmp(tail->filename,
101 1 : b->theap->filename) == 0) {
102 : /* no upgrades done since saving
103 : * started */
104 413179 : b->oldtail = NULL;
105 413179 : HEAPdecref(tail, false);
106 : } else {
107 1 : b->oldtail = tail;
108 1 : ATOMIC_OR(&tail->refs, DELAYEDREMOVE);
109 : }
110 : }
111 417159 : MT_lock_unset(&b->theaplock);
112 : }
113 1985219 : if (!locked)
114 1974160 : MT_lock_set(&GDKswapLock(bid));
115 1985219 : if ((BBP_status(bid) & BBPDELETED) && BBP_refs(bid) <= 0 && BBP_lrefs(bid) <= 0) {
116 144 : if (!locked)
117 144 : MT_lock_unset(&GDKswapLock(bid));
118 144 : b = BBPquickdesc(bid);
119 :
120 : /* the unloaded ones are deleted without
121 : * loading deleted disk images */
122 144 : if (b) {
123 144 : BATdelete(b);
124 : }
125 144 : BBPclear(bid); /* also clears BBP_status */
126 : } else {
127 1985075 : BBP_status_off(bid, BBPDELETED | BBPSWAPPED | BBPNEW);
128 1985075 : if (!locked)
129 3972141 : MT_lock_unset(&GDKswapLock(bid));
130 : }
131 : }
132 12906 : GDKclrerr();
133 12906 : }
134 :
135 : /*
136 : * @- TMcommit
137 : * global commit without any multi-threaded access assumptions, thus
138 : * taking all BBP locks. It creates a new database checkpoint.
139 : */
140 : gdk_return
141 8 : TMcommit(void)
142 : {
143 8 : gdk_return ret = GDK_FAIL;
144 :
145 : /* commit with the BBP globally locked */
146 8 : BBPlock();
147 8 : if (BBPsync(getBBPsize(), NULL, NULL, getBBPlogno()) == GDK_SUCCEED) {
148 8 : epilogue(getBBPsize(), NULL, true);
149 8 : ret = GDK_SUCCEED;
150 : }
151 8 : BBPunlock();
152 8 : return ret;
153 : }
154 :
155 : /*
156 : * @- TMsubcommit
157 : *
158 : * Create a new checkpoint that is equal to the previous, with the
159 : * exception that for the passed list of bats, the current state
160 : * will be reflected in the new checkpoint.
161 : *
162 : * On the bats in this list we assume exclusive access during the
163 : * operation.
164 : *
165 : * This operation is useful for e.g. adding a new XQuery document or
166 : * SQL table to the committed state (after bulk-load). Or for dropping
167 : * a table or doc, without forcing the total database to be clean,
168 : * which may require a lot of I/O.
169 : *
170 : * We expect the globally locked phase (BBPsync) to take little time
171 : * (<100ms) as only the BBP.dir is written out; and for the existing
172 : * bats that were modified, only some heap moves are done (moved from
173 : * BAKDIR to SUBDIR). The atomic commit for sub-commit is the rename
174 : * of SUBDIR to DELDIR.
175 : *
176 : * As it does not take the BBP-locks (thanks to the assumption that
177 : * access is exclusive), the concurrency impact of subcommit is also
178 : * much lighter to ongoing concurrent query and update facilities than
179 : * a real global TMcommit.
180 : */
181 : gdk_return
182 12901 : TMsubcommit_list(bat *restrict subcommit, BUN *restrict sizes, int cnt, lng logno)
183 : {
184 12901 : int xx;
185 12901 : gdk_return ret = GDK_FAIL;
186 :
187 12901 : assert(cnt > 0);
188 12901 : assert(subcommit[0] == 0); /* BBP artifact: slot 0 in the array will be ignored */
189 :
190 12901 : if (GDKinmemory(0))
191 : return GDK_SUCCEED;
192 :
193 : /* sort the list on BAT id */
194 13148 : GDKqsort(subcommit + 1, sizes ? sizes + 1 : NULL, NULL, cnt - 1, sizeof(bat), sizes ? sizeof(BUN) : 0, TYPE_int, false, false);
195 :
196 12898 : assert(cnt == 1 || subcommit[1] > 0); /* all values > 0 */
197 : /* de-duplication of BAT ids in subcommit list
198 : * this is needed because of legacy reasons (database
199 : * upgrade) */
200 1974160 : for (xx = 2; xx < cnt; xx++) {
201 1961262 : if (subcommit[xx-1] == subcommit[xx]) {
202 0 : int i;
203 0 : cnt--;
204 0 : for (i = xx; i < cnt; i++)
205 0 : subcommit[i] = subcommit[i+1];
206 0 : if (sizes) {
207 0 : for (i = xx; i < cnt; i++)
208 0 : sizes[i] = sizes[i+1];
209 : }
210 : }
211 : }
212 : /* lock just prevents other global (sub-)commits */
213 12898 : BBPtmlock();
214 12898 : if (logno < 0)
215 26 : logno = getBBPlogno();
216 12898 : if (BBPsync(cnt, subcommit, sizes, logno) == GDK_SUCCEED) { /* write BBP.dir (++) */
217 12898 : epilogue(cnt, subcommit, false);
218 12898 : ret = GDK_SUCCEED;
219 : }
220 12898 : BBPtmunlock();
221 12898 : return ret;
222 : }
|