Mercurial > hg > MonetDB-extend
annotate regexp/regexp.c @ 33:a8ffdbc388ce
Ported to Jun2020 branch.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Tue, 09 Jun 2020 10:30:35 +0200 (2020-06-09) |
parents | 25cd8af6fa82 |
children | e5d2d0c9b7b3 |
rev | line source |
---|---|
16 | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
4 * | |
5 * Copyright 2013-2018 MonetDB B.V. | |
6 */ | |
7 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
8 /* monetdb_config.h must be included as the first include file */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
9 #include <monetdb_config.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
10 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
11 /* mal_exception.h actually contains everything we need */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
12 #include <mal_exception.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
13 |
33 | 14 /* for the candidate iterator */ |
15 #include <gdk_cand.h> | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
16 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
17 /* system include files */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
18 #include <string.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
19 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
20 /* we use the PCRE library to do regular expression matching */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
21 #include <pcre.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
22 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
23 /* __declspec() must be used on Windows, but not on other systems */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
24 #ifndef _MSC_VER |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
25 /* not Windows */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
26 #define __declspec(x) /* nothing */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
27 #endif |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
28 |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
29 /* these eight functions are the only externally visible functions |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
30 * since they are the only ones that are called from the MAL layer; on |
19 | 31 * Windows they must be exported, on other systems, declaring them as |
32 * extern is enough */ | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
33 extern __declspec(dllexport) char *regexpmatch(bit *ret, const char **val, const char **pat); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
34 extern __declspec(dllexport) char *regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags); |
19 | 35 extern __declspec(dllexport) char *regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti); |
36 extern __declspec(dllexport) char *regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti); | |
37 extern __declspec(dllexport) char *regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate); | |
38 extern __declspec(dllexport) char *regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const char **flags, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate); | |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
39 extern __declspec(dllexport) char *regexpmatchbulk(bat *ret, const bat *bid, const char **pat); |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
40 extern __declspec(dllexport) char *regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
41 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
42 static int |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
43 parseflags(const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
44 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
45 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
46 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
47 if (flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
48 while (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
49 switch (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
50 case 'i': /* case insensitive */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
51 options |= PCRE_CASELESS; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
52 break; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
53 case 'x': /* extended regular expressions */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
54 options |= PCRE_EXTENDED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
55 break; |
31
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
56 case 'm': /* multiline matching */ |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
57 options |= PCRE_MULTILINE | PCRE_DOTALL;; |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
58 break; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
59 default: |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
60 return -1; /* indicate there was an error */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
61 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
62 flags++; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
63 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
64 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
65 return options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
66 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
67 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
68 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
69 do_match(bit *ret, const char *val, const char *pat, const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
70 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
71 const char *err = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
72 int options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
73 int pos = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
74 pcre *re; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
75 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
76 /* if any of the input values are nil, the result is no match */ |
33 | 77 if (strNil(val) || strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
78 /* special case for NIL inputs: NILs don't match anything */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
79 *ret = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
80 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
81 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
82 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
83 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
84 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
85 throw(MAL, "regexp.rematch", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
86 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
87 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
88 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
89 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
90 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
91 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
92 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
93 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
94 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
95 if (pos < PCRE_ERROR_NOMATCH) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
96 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
97 "matching of regular expression (%s) failed with %d", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
98 pat, pos); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
99 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
100 *ret = pos >= 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
101 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
102 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
103 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
104 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
105 regexpmatch(bit *ret, const char **val, const char **pat) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
106 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
107 return do_match(ret, *val, *pat, ""); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
108 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
109 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
110 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
111 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
112 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
113 return do_match(ret, *val, *pat, *flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
114 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
115 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
116 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
117 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
118 { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
119 BAT *b; /* input BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
120 BATiter bi; /* helper to loop through values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
121 BAT *bn; /* result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
122 bit *outp; /* pointer through which we add to result */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
123 BUN start, end; /* iteration variables */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
124 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
125 const char *err = NULL; /* error message from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
126 int pos = 0; /* error position from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
127 int options; /* PCRE options */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
128 pcre *re; /* compiled regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
129 pcre_extra *sd; /* studied regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
130 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
131 /* from the BAT ID we need to get the BAT descriptor, making |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
132 * sure that the data of the BAT is loaded into memory */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
133 if ((b = BATdescriptor(bid)) == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
134 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
135 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
136 /* check that the BAT has the expected type: we expect str or |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
137 * something compatible with str (if we only want str, we need |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
138 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
139 * Note, the MAL interpreter will only call this function with |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
140 * a str BAT because that is the only interface that is |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
141 * defined in the MAL file, so this check is superfluous. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
142 if (ATOMstorage(b->ttype) != TYPE_str) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
143 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
144 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
145 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
146 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
147 /* if any of the input values are nil, the result is no match */ |
33 | 148 if (strNil(pat) || strNil(flags)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
149 /* no matches when the pattern or the flags is NIL |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
150 * we return an a BAT with all NIL values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
151 bit f = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
152 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL) |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
153 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
154 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
155 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
156 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
157 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
158 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
159 options = parseflags(flags); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
160 if (options == -1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
161 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
162 throw(MAL, "batregexp.rematch", "bad flag character"); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
163 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
164 |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
165 /* allocate a result BAT; the capacity we ask for is the size |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
166 * of the input BAT since we produce a value for each input |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
167 * value */ |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
168 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
169 if (bn == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
170 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
171 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
172 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
173 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
174 /* Position outp at the start of the result array. |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
175 * We know the array is large enough even if every value were |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
176 * to match, so we don't need to check for that. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
177 outp = (bit *) Tloc(bn, 0); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
178 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
179 /* compile the regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
180 re = pcre_compile(pat, options, &err, &pos, NULL); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
181 if (re == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
182 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
183 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
184 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
185 "compilation of regular expression (%s) failed at %d with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
186 pat, pos, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
187 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
188 /* invest in study of the r.e. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
189 sd = pcre_study(re, 0, &err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
190 if (err != NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
191 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
192 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
193 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
194 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
195 "study of regular expression (%s) failed with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
196 pat, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
197 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
198 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
199 /* now, start and end are the limits in b that we need to look |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
200 * at, and if set, cand and candend are the beginning and end |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
201 * of the list of OIDs of b that we need to consider */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
202 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
203 bi = bat_iterator(b); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
204 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
205 /* we will change these if we add a NIL */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
206 bn->tnil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
207 bn->tnonil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
208 for (start = 0, end = BATcount(b); start < end; start++) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
209 const char *val = BUNtvar(bi, start); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
210 /* nil values never match */ |
33 | 211 if (strNil(val)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
212 *outp++ = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
213 bn->tnil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
214 bn->tnonil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
215 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
216 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); |
32
25cd8af6fa82
TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
31
diff
changeset
|
217 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
218 /* error during processing */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
219 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
220 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
221 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
222 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
223 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
224 "matching of regular expression (%s) failed with %d", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
225 pat, pos); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
226 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
227 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
228 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
229 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
230 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
231 /* set properties and size of result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
232 BATsetcount(bn, BATcount(b)); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
233 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
234 if (BATcount(bn) > 1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
235 /* if more than 1 result, it is not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
236 bn->tsorted = false; /* probably not sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
237 bn->trevsorted = false; /* probably not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
238 bn->tkey = false; /* probably not key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
239 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
240 /* if empty or a single result, it is sorted, reverse |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
241 * sorted, and key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
242 bn->tsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
243 bn->trevsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
244 bn->tkey = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
245 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
246 bn->tnosorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
247 bn->tnorevsorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
248 bn->tnokey[0] = bn->tnokey[1] = 0; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
249 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
250 /* we're done with b and re */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
251 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
252 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
253 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
254 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
255 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
256 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
257 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
258 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
259 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
260 char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
261 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
262 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
263 return do_matchbulk(ret, *bid, *pat, ""); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
264 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
265 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
266 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
267 char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
268 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
269 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
270 return do_matchbulk(ret, *bid, *pat, *flags); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
271 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
272 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
273 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
274 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
275 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
276 BAT *b, *s = NULL; /* input BAT and optional candidate list */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
277 |
33 | 278 struct canditer ci; /* candidate iterator */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
279 BATiter bi; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
280 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
281 BAT *bn; /* result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
282 oid *outp; /* pointer through which we add to result */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
283 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
284 const char *err = NULL; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
285 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
286 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
287 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
288 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
289 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
290 /* if any of the input values are nil, the result is no match */ |
33 | 291 if (strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
292 /* no matches when the pattern or the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
293 * we return an empty BAT of the correct type */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
294 if ((bn = BATdense(0, 0, 0)) == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
295 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
296 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
297 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
298 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
299 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
300 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
301 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
302 throw(MAL, "regexp.rematchselect", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
303 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
304 /* from the BAT IDs we need to get the BAT descriptors, making |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
305 * sure that the data of the BATs are loaded into memory */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
306 if ((b = BATdescriptor(bid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
307 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
308 } |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
309 /* check that the BAT has the expected type: we expect str or |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
310 * something compatible with str (if we only want str, we need |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
311 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
312 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
313 * a str BAT because that is the only interface that is |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
314 * defined in the MAL file, so this check is superfluous. */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
315 if (ATOMstorage(b->ttype) != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
316 BBPunfix(b->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
317 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
318 } |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
319 if (!is_bat_nil(sid) && |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
320 (s = BATdescriptor(sid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
321 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
322 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
323 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
324 |
33 | 325 if (canditer_init(&ci, b, s) == 0) { |
326 /* trivially empty result */ | |
327 BBPunfix(b->batCacheid); | |
328 if (s) | |
329 BBPunfix(s->batCacheid); | |
330 bn = BATdense(0, 0, 0); | |
331 *ret = bn->batCacheid; | |
332 BBPkeepref(*ret); | |
333 return MAL_SUCCEED; | |
334 } | |
335 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
336 /* allocate a result BAT; the capacity we ask for is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
337 * maximum potential result size (i.e. the size of the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
338 * candidate list if there is one, else the size of the input |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
339 * BAT b) */ |
33 | 340 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
341 if (bn == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
342 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
343 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
344 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
345 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
346 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
347 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
348 /* Position outp at the start of the result array. |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
349 * We know the array is large enough even if every value were |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
350 * to match, so we don't need to check for that. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
351 outp = (oid *) Tloc(bn, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
352 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
353 /* compile the regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
354 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
355 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
356 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
357 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
358 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
359 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
360 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
361 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
362 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
363 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
364 /* invest in study of the r.e. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
365 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
366 if (err != NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
367 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
368 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
369 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
370 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
371 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
372 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
373 "study of regular expression (%s) failed with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
374 pat, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
375 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
376 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
377 bi = bat_iterator(b); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
378 |
33 | 379 /* iterate through the candidates */ |
380 for (BUN i = 0; i < ci.ncand; i++) { | |
381 /* get the next candidate */ | |
382 oid o = canditer_next(&ci); | |
383 | |
384 /* the candidate list has a list of OIDs which are | |
385 * relative to b->hseqbase, we need to convert that to | |
386 * an index relative to the start of the array in the | |
387 * (tail) heap */ | |
388 const char *val = BUNtvar(bi, o - b->hseqbase); | |
389 | |
390 /* nil values never match */ | |
391 if (!strNil(val)) { | |
392 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
393 if (pos >= 0) { | |
394 /* regular expression matched */ | |
395 if (!anti) | |
396 *outp++ = o; | |
397 } else if (pos == PCRE_ERROR_NOMATCH) { | |
398 /* regular expression didn't match */ | |
399 if (anti) | |
400 *outp++ = o; | |
401 } else { | |
402 /* error during processing */ | |
403 BBPunfix(b->batCacheid); | |
404 BBPunfix(s->batCacheid); | |
405 BBPreclaim(bn); | |
406 pcre_free_study(sd); | |
407 pcre_free(re); | |
408 throw(MAL, "regexp.rematchselect", | |
409 "matching of regular expression (%s) failed with %d", | |
410 pat, pos); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
411 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
412 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
413 } |
33 | 414 /* we're done with b, s, and re */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
415 BBPunfix(b->batCacheid); |
33 | 416 if (s) |
417 BBPunfix(s->batCacheid); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
418 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
419 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
420 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
421 /* set properties and size of result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
422 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
423 /* the result BAT of a select operation MUST be sorted, and |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
424 * all values MUST be distinct (i.e. it is a candidate list); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
425 * due to the way we created the result, we know this is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
426 * case */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
427 bn->tsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
428 bn->tnosorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
429 bn->tkey = true; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
430 bn->tseqbase = oid_nil; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
431 if (BATcount(bn) > 1) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
432 /* if more than 1 result, it is not reverse sorted */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
433 bn->trevsorted = false; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
434 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
435 /* the BAT is dense if the type is TYPE_oid, the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
436 * values are sorted in ascending order, they are all |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
437 * distinct, and they form a consecutive sequence (no |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
438 * missing values); we only need to check the last |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
439 * condition, which we do by checking the difference |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
440 * between the first and last values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
441 outp = (oid *) Tloc(bn, 0); /* pointer to start */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
442 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1) |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
443 bn->tseqbase = outp[0]; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
444 } else { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
445 /* if empty or a single result, it is reverse sorted |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
446 * and dense */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
447 bn->trevsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
448 bn->tnorevsorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
449 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
450 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
451 /* there are no NIL values in the result */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
452 bn->tnil = false; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
453 bn->tnonil = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
454 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
455 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
456 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
457 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
458 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
459 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
460 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
461 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
462 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
463 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
464 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
465 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
466 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
467 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
468 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
469 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
470 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
471 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
472 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
473 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
474 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
475 bat slid, bat srid, bit nil_matches, lng estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
476 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
477 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
478 BAT *bn1, *bn2; /* output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
479 |
33 | 480 struct canditer lci; /* candidate iterator for l */ |
481 struct canditer rci; /* candidate iterator for r */ | |
482 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
483 BATiter li; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
484 BATiter ri; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
485 oid ro; /* right OID being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
486 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
487 const char *pat; /* the regular expression being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
488 const char *err; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
489 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
490 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
491 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
492 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
493 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
494 (void) nil_matches; /* only relevant for equi-join */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
495 |
33 | 496 if (strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
497 /* no matches when the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
498 * we return two empty BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
499 bn1 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
500 bn2 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
501 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
502 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
503 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
504 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
505 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
506 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
507 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
508 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
509 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
510 throw(MAL, "regexp.rematchjoin", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
511 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
512 l = BATdescriptor(lid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
513 r = BATdescriptor(rid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
514 if (!is_bat_nil(slid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
515 sl = BATdescriptor(slid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
516 if (!is_bat_nil(srid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
517 sr = BATdescriptor(srid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
518 if (l == NULL || r == NULL || |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
519 (!is_bat_nil(slid) && sl == NULL) || |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
520 (!is_bat_nil(srid) && sr == NULL)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
521 /* one of the calls to BATdescriptor failed */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
522 if (l) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
523 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
524 if (r) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
525 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
526 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
527 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
528 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
529 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
530 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
531 } |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
532 /* check that the BATs have the expected type: we expect str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
533 * or something compatible with str for l (the values) and str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
534 * for r (the patterns). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
535 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
536 * a pair of str BATs because that is the only interface that |
27 | 537 * is defined in the MAL file, so this check is superfluous. |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
538 */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
539 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
540 BBPunfix(l->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
541 BBPunfix(r->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
542 if (sl) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
543 BBPunfix(sl->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
544 if (sr) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
545 BBPunfix(sr->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
546 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
547 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
548 |
33 | 549 if (canditer_init(&lci, l, sl) == 0 || |
550 canditer_init(&rci, r, sr) == 0) { | |
551 /* if either side is empty (or no candidates) the | |
552 * result is empty */ | |
553 BBPunfix(l->batCacheid); | |
554 BBPunfix(r->batCacheid); | |
555 if (sl) | |
556 BBPunfix(sl->batCacheid); | |
557 if (sr) | |
558 BBPunfix(sr->batCacheid); | |
559 bn1 = BATdense(0, 0, 0); | |
560 bn2 = BATdense(0, 0, 0); | |
561 if (bn1 == NULL || bn2 == NULL) { | |
562 BBPreclaim(bn1); | |
563 BBPreclaim(bn2); | |
564 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); | |
565 } | |
566 return MAL_SUCCEED; | |
567 } | |
568 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
569 /* if there is no valid estimate, use the size of the left |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
570 * input as size estimate */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
571 if (is_lng_nil(estimate) || estimate == 0) |
33 | 572 estimate = lci.ncand; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
573 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
574 /* create the output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
575 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
576 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
577 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
578 /* one of the calls to COLnew failed |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
579 * note, BBPreclaim checks whether its argument is NULL */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
580 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
581 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
582 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
583 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
584 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
585 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
586 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
587 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
588 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
589 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
590 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
591 li = bat_iterator(l); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
592 ri = bat_iterator(r); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
593 |
33 | 594 for (BUN i = 0; i < rci.ncand; i++) { |
595 ro = canditer_next(&rci); | |
596 pat = BUNtvar(ri, ro - r->hseqbase); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
597 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
598 /* nil regular expressions don't match (despite |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
599 * nil_matches) */ |
33 | 600 if (strNil(pat)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
601 continue; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
602 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
603 sd = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
604 if (re == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
605 goto bailout; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
606 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
607 if (err != NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
608 goto bailout; |
33 | 609 |
610 /* inner loop: reset iterator, then iterate over it */ | |
611 canditer_reset(&lci); | |
612 for (BUN j = 0; j < lci.ncand; j++) { | |
613 oid lo = canditer_next(&lci); | |
614 const char *val = BUNtvar(li, lo - l->hseqbase); | |
615 if (strNil(val)) | |
616 continue; | |
617 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
618 if (pos >= 0) { | |
619 /* regular expression matched */ | |
620 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED || | |
621 BUNappend(bn2, &ro, false) != GDK_SUCCEED) | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
622 goto bailout; |
33 | 623 } else if (pos != PCRE_ERROR_NOMATCH) { |
624 /* error during processing */ | |
625 err = "matching of regular expression failed"; | |
626 goto bailout; | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
627 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
628 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
629 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
630 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
631 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
632 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
633 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
634 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
635 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
636 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
637 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
638 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
639 *lres = bn1->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
640 *rres = bn2->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
641 BBPkeepref(*lres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
642 BBPkeepref(*rres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
643 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
644 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
645 bailout: |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
646 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
647 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
648 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
649 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
650 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
651 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
652 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
653 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
654 if (sd) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
655 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
656 if (re) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
657 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
658 if (err) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
659 throw(MAL, "pcre.rematchjoin", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
660 "error with regular expression: %s", err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
661 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
662 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
663 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
664 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
665 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
666 const bat *sl, const bat *sr, const bit *nil_matches, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
667 const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
668 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
669 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
670 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
671 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
672 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
673 char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
674 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
675 const char **flags, const bat *sl, const bat *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
676 const bit *nil_matches, const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
677 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
678 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
679 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
680 } |