Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include "monetdb_config.h"
14 : #include "gdk.h"
15 : #include "gdk_private.h"
16 :
17 : //TODO Check if we need to input RTREEdestroy into drop_index func in sql_cat.c
18 : //TODO Should you check if the parent BAT is null?
19 :
20 : //TODO Re-check the conditions
21 : /* Conditions to persist the RTree:
22 : * - BAT has to be persistent
23 : * - No deleted rows (when does batInserted update?)
24 : * - The heap is not dirty -> no new values
25 : * - DB Farm is persistent i.e. not in memory
26 : */
27 : #ifdef HAVE_RTREE
28 :
29 : #ifndef SIZEOF_RTREE_COORD_T
30 : #define SIZEOF_RTREE_COORD_T 4
31 : #endif
32 : #include <rtree.h>
33 :
34 : struct RTree {
35 : ATOMIC_TYPE refs; /* counter for logical references to the rtree */
36 : rtree_t *rtree; /* rtree structure */
37 : bool destroy; /* destroy rtree when there are no more logical references */
38 : };
39 :
40 : /* this is a copy from the geom module */
41 : typedef struct mbr {
42 : float xmin;
43 : float ymin;
44 : float xmax;
45 : float ymax;
46 :
47 : } mbr;
48 :
49 : static bool
50 : RTREEpersistcheck(BAT *b)
51 : {
52 : return ((BBP_status(b->batCacheid) & BBPEXISTING)
53 : && b->batInserted == b->batCount
54 : && !b->theap->dirty
55 : && !GDKinmemory(b->theap->farmid));
56 : }
57 :
58 : static void
59 : RTREEdecref(BAT *b)
60 : {
61 : ATOMIC_BASE_TYPE refs = ATOMIC_DEC(&b->trtree->refs);
62 : //If RTree is marked for destruction and there are no refs, destroy the RTree
63 : if (b->trtree->destroy && refs == 0) {
64 : rtree_destroy(b->trtree->rtree);
65 : b->trtree->rtree = NULL;
66 : GDKfree(b->trtree);
67 : b->trtree = NULL;
68 : }
69 :
70 : }
71 :
72 : static void
73 : RTREEincref(BAT *b)
74 : {
75 : (void) ATOMIC_INC(&b->trtree->refs);
76 : }
77 :
78 : // Persist rtree to disk if the conditions are right
79 : static gdk_return
80 : persistRtree(BAT *b)
81 : {
82 : if (RTREEpersistcheck(b)) {
83 : //TODO Necessary?
84 : BBPfix(b->batCacheid);
85 : rtree_t *rtree = b->trtree->rtree;
86 :
87 : if (rtree) {
88 : const char * filename = BBP_physical(b->batCacheid);
89 : int farmid = b->theap->farmid;
90 :
91 : FILE *file_stream = GDKfilelocate(farmid, filename, "w", "bsrt");
92 :
93 : if (file_stream != NULL) {
94 : int err;
95 : if ((err = rtree_bsrt_write(rtree,file_stream)) != 0) {
96 : GDKerror("%s", rtree_strerror(err));
97 : fclose(file_stream);
98 : BBPunfix(b->batCacheid);
99 : return GDK_FAIL;
100 : }
101 :
102 : if (fflush(file_stream) == EOF ||
103 : (!(ATOMIC_GET(&GDKdebug) & NOSYNCMASK)
104 : #if defined(NATIVE_WIN32)
105 : && _commit(_fileno(file_stream)) < 0
106 : #elif defined(HAVE_FDATASYNC)
107 : && fdatasync(fileno(file_stream)) < 0
108 : #elif defined(HAVE_FSYNC)
109 : && fsync(fileno(file_stream)) < 0
110 : #endif
111 : )) {
112 : GDKsyserror("Syncing %s.bsrt failed\n", filename);
113 : }
114 : fclose(file_stream);
115 : }
116 : else {
117 : return GDK_FAIL;
118 : }
119 : }
120 : BBPunfix(b->batCacheid);
121 : }
122 : //TODO Should we just return success if the rtree is not persisted?
123 : return GDK_SUCCEED;
124 : }
125 :
126 : static gdk_return
127 : BATcheckrtree(BAT *b)
128 : {
129 : const char * filename = BBP_physical(b->batCacheid);
130 : int farmid = b->theap->farmid;
131 :
132 : //Do we have the rtree on file?
133 : FILE *file_stream = GDKfilelocate(farmid, filename, "r", "bsrt");
134 : if (file_stream != NULL) {
135 : rtree_t* rtree = rtree_bsrt_read(file_stream);
136 : if (!rtree) {
137 : GDKerror("%s", errno != 0 ? strerror(errno) : "Failed rtree_bsrt_read");
138 : fclose(file_stream);
139 : return GDK_FAIL;
140 : }
141 : b->trtree = GDKmalloc(sizeof(struct RTree));
142 : *b->trtree = (struct RTree) {
143 : .rtree = rtree,
144 : .destroy = false,
145 : .refs = ATOMIC_VAR_INIT(1),
146 : };
147 : fclose(file_stream);
148 : return GDK_SUCCEED;
149 : }
150 : else {
151 : return GDK_FAIL;
152 : }
153 : }
154 :
155 : //Check if RTree exists on file (previously created index)
156 : static bool
157 : RTREEexistsonfile(BAT *b)
158 : {
159 : const char * filename = BBP_physical(b->batCacheid);
160 :
161 : if (!b->theap) return false;
162 :
163 : int farmid = b->theap->farmid;
164 : int fd = GDKfdlocate(farmid, filename, "r", "bsrt");
165 :
166 : //Do we have the rtree on file?
167 : if (fd == -1) {
168 : return false;
169 : } else {
170 : close(fd);
171 : return true;
172 : }
173 : }
174 :
175 : //Check if RTree exists
176 : //We also check if it exists on file. If the index is not loaded, it will be
177 : //TODO Check for destroy -> it does not exist if destroy
178 : bool
179 : RTREEexists(BAT *b)
180 : {
181 : BAT *pb;
182 : bool ret;
183 : if (VIEWtparent(b)) {
184 : pb = BBP_desc(VIEWtparent(b));
185 : } else {
186 : pb = b;
187 : }
188 :
189 : MT_lock_set(&pb->batIdxLock);
190 : ret = (pb->trtree != NULL || RTREEexistsonfile(pb));
191 : MT_lock_unset(&pb->batIdxLock);
192 :
193 : return ret;
194 :
195 : }
196 :
197 : bool
198 : RTREEexists_bid(bat bid)
199 : {
200 : BAT *b;
201 : bool ret;
202 : if ((b = BATdescriptor(bid)) == NULL)
203 : return false;
204 : ret = RTREEexists(b);
205 : BBPunfix(b->batCacheid);
206 : return ret;
207 : }
208 :
209 : gdk_return
210 : BATrtree(BAT *wkb, BAT *mbrb)
211 : {
212 : BAT *pb;
213 : BATiter bi;
214 : rtree_t *rtree = NULL;
215 : struct canditer ci;
216 :
217 : //Check for a parent BAT of wkb, load if exists
218 : if (VIEWtparent(wkb)) {
219 : pb = BBP_desc(VIEWtparent(wkb));
220 : } else {
221 : pb = wkb;
222 : }
223 :
224 : //Check if rtree already exists
225 : if (pb->trtree == NULL) {
226 : //If it doesn't exist, take the lock to create/get the rtree
227 : MT_lock_set(&pb->batIdxLock);
228 :
229 : //Try to load it from disk
230 : if (BATcheckrtree(pb) == GDK_SUCCEED && pb->trtree != NULL) {
231 : MT_lock_unset(&pb->batIdxLock);
232 : return GDK_SUCCEED;
233 : }
234 :
235 : //First arg are dimensions: we only allow x, y
236 : //Second arg are flags: split strategy and nodes-per-page
237 : if ((rtree = rtree_new(2, RTREE_DEFAULT)) == NULL) {
238 : GDKerror("rtree_new failed\n");
239 : return GDK_FAIL;
240 : }
241 : bi = bat_iterator(mbrb);
242 : canditer_init(&ci, mbrb,NULL);
243 :
244 : for (BUN i = 0; i < ci.ncand; i++) {
245 : oid p = canditer_next(&ci) - mbrb->hseqbase;
246 : const mbr *inMBR = (const mbr *)BUNtail(bi, p);
247 :
248 : rtree_id_t rtree_id = i;
249 : rtree_coord_t rect[4];
250 : rect[0] = inMBR->xmin;
251 : rect[1] = inMBR->ymin;
252 : rect[2] = inMBR->xmax;
253 : rect[3] = inMBR->ymax;
254 : rtree_add_rect(rtree,rtree_id,rect);
255 : }
256 : bat_iterator_end(&bi);
257 : pb->trtree = GDKmalloc(sizeof(struct RTree));
258 : *pb->trtree = (struct RTree) {
259 : .rtree = rtree,
260 : .destroy = false,
261 : .refs = ATOMIC_VAR_INIT(1),
262 : };
263 : persistRtree(pb);
264 : MT_lock_unset(&pb->batIdxLock);
265 : }
266 : //TODO What do we do when the conditions are not right for creating the index?
267 : return GDK_SUCCEED;
268 : }
269 :
270 : //Free the RTree from memory,
271 : void
272 : RTREEfree(BAT *b)
273 : {
274 : BAT *pb;
275 : if (VIEWtparent(b)) {
276 : pb = BBP_desc(VIEWtparent(b));
277 : } else {
278 : pb = b;
279 : }
280 :
281 : if (pb && pb->trtree) {
282 : MT_lock_set(&pb->batIdxLock);
283 : //Mark the RTree for destruction
284 : pb->trtree->destroy = true;
285 : RTREEdecref(pb);
286 : MT_lock_unset(&b->batIdxLock);
287 : }
288 : }
289 :
290 : //Free the RTree from memory, unlink the file associated with it
291 : void
292 : RTREEdestroy(BAT *b)
293 : {
294 : BAT *pb;
295 : if (VIEWtparent(b)) {
296 : pb = BBP_desc(VIEWtparent(b));
297 : } else {
298 : pb = b;
299 : }
300 :
301 : MT_lock_set(&pb->batIdxLock);
302 : if (pb->trtree) {
303 : //Mark the RTree for destruction
304 : pb->trtree->destroy = true;
305 : RTREEdecref(pb);
306 : //If the farm is in-memory, don't unlink the file (there is no file in that case)
307 : if (pb->theap && !GDKinmemory(pb->theap->farmid)) {
308 : GDKunlink(pb->theap->farmid,
309 : BATDIR,
310 : BBP_physical(b->batCacheid),
311 : "bsrt");
312 : }
313 : }
314 : //If the rtree is not loaded (pb->trtree is null), but there is a file with the index (from previous execution),
315 : //we should remove the file
316 : else if (pb->theap && !GDKinmemory(pb->theap->farmid) && RTREEexistsonfile(pb)) {
317 : GDKunlink(pb->theap->farmid,
318 : BATDIR,
319 : BBP_physical(b->batCacheid),
320 : "bsrt");
321 : }
322 : MT_lock_unset(&b->batIdxLock);
323 : }
324 :
325 : struct results_rtree {
326 : int results_next;
327 : int results_left;
328 : BUN* candidates;
329 : };
330 :
331 : static int
332 : f(rtree_id_t id, void *context)
333 : {
334 : struct results_rtree *results_rtree = (struct results_rtree *) context;
335 : results_rtree->candidates[results_rtree->results_next++] = (BUN) id;
336 : --results_rtree->results_left;
337 : return results_rtree->results_left <= 0;
338 : }
339 :
340 : BUN*
341 : RTREEsearch(BAT *b, const void *inMBRptr, int result_limit)
342 : {
343 : BAT *pb;
344 : const mbr *inMBR = inMBRptr;
345 : if (VIEWtparent(b)) {
346 : pb = BBP_desc(VIEWtparent(b));
347 : } else {
348 : pb = b;
349 : }
350 :
351 : //Try to load if there is an RTree index on file
352 : MT_lock_set(&pb->batIdxLock);
353 : if (pb->trtree == NULL) {
354 : if (BATcheckrtree(pb) != GDK_SUCCEED) {
355 : MT_lock_unset(&pb->batIdxLock);
356 : return NULL;
357 : }
358 : }
359 : MT_lock_unset(&pb->batIdxLock);
360 :
361 : rtree_t *rtree = pb->trtree->rtree;
362 : if (rtree != NULL) {
363 : //Increase ref, we're gonna use the index
364 : RTREEincref(pb);
365 : BUN *candidates = GDKmalloc((result_limit + 1) * SIZEOF_BUN);
366 : memset(candidates, 0, (result_limit + 1) * SIZEOF_BUN);
367 :
368 : rtree_coord_t rect[4];
369 : rect[0] = inMBR->xmin;
370 : rect[1] = inMBR->ymin;
371 : rect[2] = inMBR->xmax;
372 : rect[3] = inMBR->ymax;
373 :
374 : struct results_rtree results;
375 : results.results_next = 0;
376 : results.results_left = result_limit;
377 : results.candidates = candidates;
378 :
379 : rtree_search(rtree, (const rtree_coord_t*) rect, f, &results);
380 : candidates[result_limit - results.results_left] = BUN_NONE;
381 :
382 : //Finished using the index, decrease ref
383 : RTREEdecref(pb);
384 :
385 : return candidates;
386 : } else
387 : return NULL;
388 : }
389 : #else
390 : void
391 80253987 : RTREEdestroy(BAT *b)
392 : {
393 80253987 : (void) b;
394 80253987 : }
395 :
396 : void
397 578301 : RTREEfree(BAT *b)
398 : {
399 578301 : (void) b;
400 578301 : }
401 : #endif
|