Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /*
14 : * @a M. L. Kersten, P. Boncz, N. J. Nes
15 : *
16 : * @* Transaction management
17 : * The Transaction Manager maintains the buffer of (permanent) BATS
18 : * held resident. Entries from the BAT buffer are always accessed by
19 : * BAT id. A BAT becomes permanent by assigning a name with
20 : * @%BBPrename@. Access to the transaction table is regulated by a
21 : * semaphore.
22 : */
23 : #include "monetdb_config.h"
24 : #include "gdk.h"
25 : #include "gdk_private.h"
26 :
27 : /*
28 : * The physical (disk) commit protocol is handled mostly by
29 : * BBPsync. Once a commit succeeded, there is the task of removing
30 : * ex-persistent bats (those that still were persistent in the
31 : * previous commit, but were made transient in this transaction).
32 : * Notice that such ex- (i.e. non-) persistent bats are not backed up
33 : * by the BBPsync protocol, so we cannot start deleting after we know
34 : * the commit will succeed.
35 : *
36 : * Another hairy issue are the delta statuses in BATs. These provide a
37 : * fast way to perform a transaction abort (HOT-abort, instead of
38 : * COLD-abort, which is achieved by the BBP recovery in a database
39 : * restart). Hot-abort functionality has not been important in MonetDB
40 : * for now, so it is not well-tested. The problem here is that if a
41 : * commit fails in the physical part (BBPsync), we have not sufficient
42 : * information to roll back the delta statuses.
43 : *
44 : * So a 'feature' of the abort is that after a failed commit,
45 : * in-memory we *will* commit the transaction. Subsequent commits can
46 : * retry to achieve a physical commit. The only way to abort in such a
47 : * situation is COLD-abort: quit the server and restart, so you get
48 : * the recovered disk images.
49 : */
50 :
51 : /* in the commit epilogue, the BBP-status of the bats is changed to
52 : * reflect their presence in the succeeded checkpoint. Also bats from
53 : * the previous checkpoint that were deleted now are physically
54 : * destroyed.
55 : */
56 : static void
57 11933 : epilogue(int cnt, bat *subcommit, bool locked)
58 : {
59 11933 : int i = 0;
60 :
61 11933 : while (++i < cnt) {
62 1816975 : bat bid = subcommit ? subcommit[i] : i;
63 1816975 : BAT *b;
64 :
65 1816975 : if (BBP_status(bid) & BBPPERSISTENT) {
66 : /* first turn off BBPNEW, then turn on
67 : * BBPEXISTING so that concurrent BATassertProps
68 : * doesn't fail */
69 1625486 : BBP_status_off(bid, BBPNEW);
70 1625486 : BBP_status_on(bid, BBPEXISTING);
71 191489 : } else if (BBP_status(bid) & BBPDELETED) {
72 : /* check mmap modes of bats that are now
73 : * transient. this has to be done after the
74 : * commit succeeded, because the mmap modes
75 : * allowed on transient bats would be
76 : * dangerous on persistent bats. If the commit
77 : * failed, the already processed bats that
78 : * would become transient after the commit,
79 : * but didn't due to the failure, would be a
80 : * consistency risk.
81 : */
82 60115 : b = BBP_cache(bid);
83 60115 : if (b) {
84 : /* check mmap modes */
85 55035 : MT_lock_set(&b->theaplock);
86 55035 : if (BATcheckmodes(b, true) != GDK_SUCCEED)
87 0 : GDKwarning("BATcheckmodes failed\n");
88 55035 : MT_lock_unset(&b->theaplock);
89 : }
90 : }
91 1816975 : b = BBP_desc(bid);
92 1816975 : if (b && b->ttype >= 0 && ATOMvarsized(b->ttype)) {
93 364620 : MT_lock_set(&b->theaplock);
94 364620 : ValPtr p = BATgetprop_nolock(b, (enum prop_t) 20);
95 364620 : if (p != NULL) {
96 360919 : Heap *tail = p->val.pval;
97 360919 : assert(b->oldtail != NULL);
98 360919 : BATrmprop_nolock(b, (enum prop_t) 20);
99 360919 : if (b->oldtail != (Heap *) 1)
100 2011 : HEAPdecref(b->oldtail, true);
101 360919 : if (tail == b->theap ||
102 1 : strcmp(tail->filename,
103 1 : b->theap->filename) == 0) {
104 : /* no upgrades done since saving
105 : * started */
106 360918 : b->oldtail = NULL;
107 360918 : HEAPdecref(tail, false);
108 : } else {
109 1 : b->oldtail = tail;
110 1 : ATOMIC_OR(&tail->refs, DELAYEDREMOVE);
111 : }
112 : }
113 364620 : MT_lock_unset(&b->theaplock);
114 : }
115 1816975 : if (!locked)
116 1806447 : MT_lock_set(&GDKswapLock(bid));
117 1816975 : if ((BBP_status(bid) & BBPDELETED) && BBP_refs(bid) <= 0 && BBP_lrefs(bid) <= 0) {
118 134 : if (!locked)
119 134 : MT_lock_unset(&GDKswapLock(bid));
120 134 : b = BBPquickdesc(bid);
121 :
122 : /* the unloaded ones are deleted without
123 : * loading deleted disk images */
124 134 : if (b) {
125 134 : BATdelete(b);
126 : }
127 134 : BBPclear(bid); /* also clears BBP_status */
128 : } else {
129 1816841 : BBP_status_off(bid, BBPDELETED | BBPSWAPPED | BBPNEW);
130 1816841 : if (!locked)
131 3635221 : MT_lock_unset(&GDKswapLock(bid));
132 : }
133 : }
134 11933 : GDKclrerr();
135 11933 : }
136 :
137 : /*
138 : * @- TMcommit
139 : * global commit without any multi-threaded access assumptions, thus
140 : * taking all BBP locks. It creates a new database checkpoint.
141 : */
142 : gdk_return
143 8 : TMcommit(void)
144 : {
145 8 : gdk_return ret = GDK_FAIL;
146 :
147 : /* commit with the BBP globally locked */
148 8 : BBPlock();
149 8 : if (BBPsync(getBBPsize(), NULL, NULL, getBBPlogno(), getBBPtransid()) == GDK_SUCCEED) {
150 8 : epilogue(getBBPsize(), NULL, true);
151 8 : ret = GDK_SUCCEED;
152 : }
153 8 : BBPunlock();
154 8 : return ret;
155 : }
156 :
157 : /*
158 : * @- TMsubcommit
159 : *
160 : * Create a new checkpoint that is equal to the previous, with the
161 : * exception that for the passed list of batnames, the current state
162 : * will be reflected in the new checkpoint.
163 : *
164 : * On the bats in this list we assume exclusive access during the
165 : * operation.
166 : *
167 : * This operation is useful for e.g. adding a new XQuery document or
168 : * SQL table to the committed state (after bulk-load). Or for dropping
169 : * a table or doc, without forcing the total database to be clean,
170 : * which may require a lot of I/O.
171 : *
172 : * We expect the globally locked phase (BBPsync) to take little time
173 : * (<100ms) as only the BBP.dir is written out; and for the existing
174 : * bats that were modified, only some heap moves are done (moved from
175 : * BAKDIR to SUBDIR). The atomic commit for sub-commit is the rename
176 : * of SUBDIR to DELDIR.
177 : *
178 : * As it does not take the BBP-locks (thanks to the assumption that
179 : * access is exclusive), the concurrency impact of subcommit is also
180 : * much lighter to ongoing concurrent query and update facilities than
181 : * a real global TMcommit.
182 : */
183 : gdk_return
184 11928 : TMsubcommit_list(bat *restrict subcommit, BUN *restrict sizes, int cnt, lng logno, lng transid)
185 : {
186 11928 : int xx;
187 11928 : gdk_return ret = GDK_FAIL;
188 :
189 11928 : assert(cnt > 0);
190 11928 : assert(subcommit[0] == 0); /* BBP artifact: slot 0 in the array will be ignored */
191 :
192 11928 : if (GDKinmemory(0))
193 : return GDK_SUCCEED;
194 :
195 : /* sort the list on BAT id */
196 12171 : GDKqsort(subcommit + 1, sizes ? sizes + 1 : NULL, NULL, cnt - 1, sizeof(bat), sizes ? sizeof(BUN) : 0, TYPE_bat, false, false);
197 :
198 11925 : assert(cnt == 1 || subcommit[1] > 0); /* all values > 0 */
199 : /* de-duplication of BAT ids in subcommit list
200 : * this is needed because of legacy reasons (database
201 : * upgrade) */
202 1806447 : for (xx = 2; xx < cnt; xx++) {
203 1794522 : if (subcommit[xx-1] == subcommit[xx]) {
204 0 : int i;
205 0 : cnt--;
206 0 : for (i = xx; i < cnt; i++)
207 0 : subcommit[i] = subcommit[i+1];
208 0 : if (sizes) {
209 0 : for (i = xx; i < cnt; i++)
210 0 : sizes[i] = sizes[i+1];
211 : }
212 : }
213 : }
214 : /* lock just prevents other global (sub-)commits */
215 11925 : BBPtmlock();
216 11925 : if (logno < 0)
217 24 : logno = getBBPlogno();
218 11925 : if (transid < 0)
219 24 : transid = getBBPtransid();
220 11925 : if (BBPsync(cnt, subcommit, sizes, logno, transid) == GDK_SUCCEED) { /* write BBP.dir (++) */
221 11925 : epilogue(cnt, subcommit, false);
222 11925 : ret = GDK_SUCCEED;
223 : }
224 11925 : BBPtmunlock();
225 11925 : return ret;
226 : }
227 :
228 : gdk_return
229 0 : TMsubcommit(BAT *b)
230 : {
231 0 : int cnt = 1;
232 0 : gdk_return ret = GDK_FAIL;
233 0 : bat *subcommit;
234 0 : BUN p, q;
235 :
236 0 : subcommit = GDKmalloc((BATcount(b) + 1) * sizeof(bat));
237 0 : if (subcommit == NULL)
238 : return GDK_FAIL;
239 :
240 0 : BATiter bi = bat_iterator(b);
241 0 : subcommit[0] = 0; /* BBP artifact: slot 0 in the array will be ignored */
242 : /* collect the list and save the new bats outside any
243 : * locking */
244 0 : BATloop(b, p, q) {
245 0 : bat bid = BBPindex((str) BUNtvar(bi, p));
246 :
247 0 : if (bid)
248 0 : subcommit[cnt++] = bid;
249 : }
250 0 : bat_iterator_end(&bi);
251 :
252 0 : ret = TMsubcommit_list(subcommit, NULL, cnt, -1, -1);
253 0 : GDKfree(subcommit);
254 0 : return ret;
255 : }
|