Mercurial > hg > MonetDB-extend
annotate regexp/regexp.c @ 40:e70b12c15507
Updated for Oct2020 version.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Tue, 08 Jun 2021 14:55:38 +0200 (2021-06-08) |
parents | 4633ab41de55 |
children | da896864dbbd |
rev | line source |
---|---|
16 | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
4 * | |
5 * Copyright 2013-2018 MonetDB B.V. | |
6 */ | |
7 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
8 /* monetdb_config.h must be included as the first include file */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
9 #include <monetdb_config.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
10 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
11 /* mal_exception.h actually contains everything we need */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
12 #include <mal_exception.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
13 |
33 | 14 /* for the candidate iterator */ |
15 #include <gdk_cand.h> | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
16 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
17 /* system include files */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
18 #include <string.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
19 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
20 /* we use the PCRE library to do regular expression matching */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
21 #include <pcre.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
22 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
23 static int |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
24 parseflags(const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
25 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
27 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
28 if (flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
29 while (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
30 switch (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
31 case 'i': /* case insensitive */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
32 options |= PCRE_CASELESS; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
33 break; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
34 case 'x': /* extended regular expressions */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
35 options |= PCRE_EXTENDED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
36 break; |
31
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
37 case 'm': /* multiline matching */ |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
38 options |= PCRE_MULTILINE | PCRE_DOTALL;; |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
39 break; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
40 default: |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
41 return -1; /* indicate there was an error */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
42 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
43 flags++; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
44 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
45 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
46 return options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
47 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
48 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
49 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
50 do_match(bit *ret, const char *val, const char *pat, const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
51 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
52 const char *err = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
53 int options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
54 int pos = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
55 pcre *re; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
56 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
57 /* if any of the input values are nil, the result is no match */ |
33 | 58 if (strNil(val) || strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
59 /* special case for NIL inputs: NILs don't match anything */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
60 *ret = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
61 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
62 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
63 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
64 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
65 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
66 throw(MAL, "regexp.rematch", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
67 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
68 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
69 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
70 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
71 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
72 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
73 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
74 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
75 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
76 if (pos < PCRE_ERROR_NOMATCH) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
77 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
78 "matching of regular expression (%s) failed with %d", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
79 pat, pos); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
80 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
81 *ret = pos >= 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
82 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
83 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
84 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
85 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
86 regexpmatch(bit *ret, const char **val, const char **pat) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
87 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
88 return do_match(ret, *val, *pat, ""); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
89 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
90 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
91 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
93 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
94 return do_match(ret, *val, *pat, *flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
95 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
96 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
97 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
98 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
99 { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
100 BAT *b; /* input BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
101 BATiter bi; /* helper to loop through values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
102 BAT *bn; /* result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
103 bit *outp; /* pointer through which we add to result */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
104 BUN start, end; /* iteration variables */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
105 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
106 const char *err = NULL; /* error message from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
107 int pos = 0; /* error position from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
108 int options; /* PCRE options */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
109 pcre *re; /* compiled regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
110 pcre_extra *sd; /* studied regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
111 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
112 /* from the BAT ID we need to get the BAT descriptor, making |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
113 * sure that the data of the BAT is loaded into memory */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
114 if ((b = BATdescriptor(bid)) == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
115 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
116 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
117 /* check that the BAT has the expected type: we expect str or |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
118 * something compatible with str (if we only want str, we need |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
119 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
120 * Note, the MAL interpreter will only call this function with |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
121 * a str BAT because that is the only interface that is |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
122 * defined in the MAL file, so this check is superfluous. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
123 if (ATOMstorage(b->ttype) != TYPE_str) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
124 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
125 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
126 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
127 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
128 /* if any of the input values are nil, the result is no match */ |
33 | 129 if (strNil(pat) || strNil(flags)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
130 /* no matches when the pattern or the flags is NIL |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
131 * we return an a BAT with all NIL values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
132 bit f = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
133 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL) |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
134 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
135 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
136 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
137 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
138 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
139 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
140 options = parseflags(flags); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
141 if (options == -1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
142 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
143 throw(MAL, "batregexp.rematch", "bad flag character"); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
144 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
145 |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
146 /* allocate a result BAT; the capacity we ask for is the size |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
147 * of the input BAT since we produce a value for each input |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
148 * value */ |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
149 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
150 if (bn == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
151 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
152 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
153 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
154 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
155 /* Position outp at the start of the result array. |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
156 * We know the array is large enough even if every value were |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
157 * to match, so we don't need to check for that. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
158 outp = (bit *) Tloc(bn, 0); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
159 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
160 /* compile the regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
161 re = pcre_compile(pat, options, &err, &pos, NULL); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
162 if (re == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
163 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
164 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
165 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
166 "compilation of regular expression (%s) failed at %d with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
167 pat, pos, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
168 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
169 /* invest in study of the r.e. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
170 sd = pcre_study(re, 0, &err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
171 if (err != NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
172 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
173 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
174 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
175 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
176 "study of regular expression (%s) failed with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
177 pat, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
178 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
179 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
180 /* now, start and end are the limits in b that we need to look |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
181 * at, and if set, cand and candend are the beginning and end |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
182 * of the list of OIDs of b that we need to consider */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
183 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
184 bi = bat_iterator(b); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
185 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
186 /* we will change these if we add a NIL */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
187 bn->tnil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
188 bn->tnonil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
189 for (start = 0, end = BATcount(b); start < end; start++) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
190 const char *val = BUNtvar(bi, start); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
191 /* nil values never match */ |
33 | 192 if (strNil(val)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
193 *outp++ = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
194 bn->tnil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
195 bn->tnonil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
196 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
197 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); |
32
25cd8af6fa82
TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
31
diff
changeset
|
198 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
199 /* error during processing */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
200 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
201 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
202 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
203 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
204 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
205 "matching of regular expression (%s) failed with %d", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
206 pat, pos); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
207 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
208 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
209 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
210 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
211 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
212 /* set properties and size of result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
213 BATsetcount(bn, BATcount(b)); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
214 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
215 if (BATcount(bn) > 1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
216 /* if more than 1 result, it is not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
217 bn->tsorted = false; /* probably not sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
218 bn->trevsorted = false; /* probably not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
219 bn->tkey = false; /* probably not key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
220 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
221 /* if empty or a single result, it is sorted, reverse |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
222 * sorted, and key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
223 bn->tsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
224 bn->trevsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
225 bn->tkey = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
226 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
227 bn->tnosorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
228 bn->tnorevsorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
229 bn->tnokey[0] = bn->tnokey[1] = 0; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
230 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
231 /* we're done with b and re */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
232 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
233 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
234 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
235 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
236 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
237 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
238 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
239 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
240 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
241 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
242 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
243 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
244 return do_matchbulk(ret, *bid, *pat, ""); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
245 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
246 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
247 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
248 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
249 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
250 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
251 return do_matchbulk(ret, *bid, *pat, *flags); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
252 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
253 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
254 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
255 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
256 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
257 BAT *b, *s = NULL; /* input BAT and optional candidate list */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
258 |
33 | 259 struct canditer ci; /* candidate iterator */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
260 BATiter bi; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
261 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
262 BAT *bn; /* result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
263 oid *outp; /* pointer through which we add to result */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
264 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
265 const char *err = NULL; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
266 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
267 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
268 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
269 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
270 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
271 /* if any of the input values are nil, the result is no match */ |
33 | 272 if (strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
273 /* no matches when the pattern or the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
274 * we return an empty BAT of the correct type */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
275 if ((bn = BATdense(0, 0, 0)) == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
276 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
277 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
278 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
279 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
280 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
281 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
282 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
283 throw(MAL, "regexp.rematchselect", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
284 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
285 /* from the BAT IDs we need to get the BAT descriptors, making |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
286 * sure that the data of the BATs are loaded into memory */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
287 if ((b = BATdescriptor(bid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
288 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
289 } |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
290 /* check that the BAT has the expected type: we expect str or |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
291 * something compatible with str (if we only want str, we need |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
292 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
293 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
294 * a str BAT because that is the only interface that is |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
295 * defined in the MAL file, so this check is superfluous. */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
296 if (ATOMstorage(b->ttype) != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
297 BBPunfix(b->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
298 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
299 } |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
300 if (!is_bat_nil(sid) && |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
301 (s = BATdescriptor(sid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
302 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
303 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
304 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
305 |
33 | 306 if (canditer_init(&ci, b, s) == 0) { |
307 /* trivially empty result */ | |
308 BBPunfix(b->batCacheid); | |
309 if (s) | |
310 BBPunfix(s->batCacheid); | |
311 bn = BATdense(0, 0, 0); | |
312 *ret = bn->batCacheid; | |
313 BBPkeepref(*ret); | |
314 return MAL_SUCCEED; | |
315 } | |
316 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
317 /* allocate a result BAT; the capacity we ask for is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
318 * maximum potential result size (i.e. the size of the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
319 * candidate list if there is one, else the size of the input |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
320 * BAT b) */ |
33 | 321 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
322 if (bn == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
323 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
324 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
325 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
326 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
327 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
328 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
329 /* Position outp at the start of the result array. |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
330 * We know the array is large enough even if every value were |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
331 * to match, so we don't need to check for that. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
332 outp = (oid *) Tloc(bn, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
333 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
334 /* compile the regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
335 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
336 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
337 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
338 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
339 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
340 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
341 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
342 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
343 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
344 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
345 /* invest in study of the r.e. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
346 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
347 if (err != NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
348 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
349 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
350 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
351 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
352 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
353 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
354 "study of regular expression (%s) failed with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
355 pat, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
356 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
357 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
358 bi = bat_iterator(b); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
359 |
33 | 360 /* iterate through the candidates */ |
361 for (BUN i = 0; i < ci.ncand; i++) { | |
362 /* get the next candidate */ | |
363 oid o = canditer_next(&ci); | |
364 | |
365 /* the candidate list has a list of OIDs which are | |
366 * relative to b->hseqbase, we need to convert that to | |
367 * an index relative to the start of the array in the | |
368 * (tail) heap */ | |
369 const char *val = BUNtvar(bi, o - b->hseqbase); | |
370 | |
371 /* nil values never match */ | |
372 if (!strNil(val)) { | |
373 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
374 if (pos >= 0) { | |
375 /* regular expression matched */ | |
376 if (!anti) | |
377 *outp++ = o; | |
378 } else if (pos == PCRE_ERROR_NOMATCH) { | |
379 /* regular expression didn't match */ | |
380 if (anti) | |
381 *outp++ = o; | |
382 } else { | |
383 /* error during processing */ | |
384 BBPunfix(b->batCacheid); | |
385 BBPunfix(s->batCacheid); | |
386 BBPreclaim(bn); | |
387 pcre_free_study(sd); | |
388 pcre_free(re); | |
389 throw(MAL, "regexp.rematchselect", | |
390 "matching of regular expression (%s) failed with %d", | |
391 pat, pos); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
392 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
393 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
394 } |
33 | 395 /* we're done with b, s, and re */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
396 BBPunfix(b->batCacheid); |
33 | 397 if (s) |
398 BBPunfix(s->batCacheid); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
399 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
400 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
401 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
402 /* set properties and size of result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
403 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
404 /* the result BAT of a select operation MUST be sorted, and |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
405 * all values MUST be distinct (i.e. it is a candidate list); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
406 * due to the way we created the result, we know this is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
407 * case */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
408 bn->tsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
409 bn->tnosorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
410 bn->tkey = true; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
411 bn->tseqbase = oid_nil; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
412 if (BATcount(bn) > 1) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
413 /* if more than 1 result, it is not reverse sorted */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
414 bn->trevsorted = false; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
415 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
416 /* the BAT is dense if the type is TYPE_oid, the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
417 * values are sorted in ascending order, they are all |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
418 * distinct, and they form a consecutive sequence (no |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
419 * missing values); we only need to check the last |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
420 * condition, which we do by checking the difference |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
421 * between the first and last values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
422 outp = (oid *) Tloc(bn, 0); /* pointer to start */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
423 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1) |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
424 bn->tseqbase = outp[0]; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
425 } else { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
426 /* if empty or a single result, it is reverse sorted |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
427 * and dense */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
428 bn->trevsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
429 bn->tnorevsorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
430 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
431 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
432 /* there are no NIL values in the result */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
433 bn->tnil = false; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
434 bn->tnonil = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
435 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
436 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
437 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
438 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
439 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
440 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
441 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
442 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
443 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
444 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
445 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
446 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
447 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
448 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
449 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
450 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
451 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
452 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
453 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
454 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
455 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
456 bat slid, bat srid, bit nil_matches, lng estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
457 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
458 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
459 BAT *bn1, *bn2; /* output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
460 |
33 | 461 struct canditer lci; /* candidate iterator for l */ |
462 struct canditer rci; /* candidate iterator for r */ | |
463 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
464 BATiter li; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
465 BATiter ri; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
466 oid ro; /* right OID being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
467 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
468 const char *pat; /* the regular expression being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
469 const char *err; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
470 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
471 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
472 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
473 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
474 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
475 (void) nil_matches; /* only relevant for equi-join */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
476 |
33 | 477 if (strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
478 /* no matches when the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
479 * we return two empty BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
480 bn1 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
481 bn2 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
482 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
483 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
484 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
485 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
486 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
487 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
488 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
489 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
490 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
491 throw(MAL, "regexp.rematchjoin", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
492 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
493 l = BATdescriptor(lid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
494 r = BATdescriptor(rid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
495 if (!is_bat_nil(slid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
496 sl = BATdescriptor(slid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
497 if (!is_bat_nil(srid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
498 sr = BATdescriptor(srid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
499 if (l == NULL || r == NULL || |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
500 (!is_bat_nil(slid) && sl == NULL) || |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
501 (!is_bat_nil(srid) && sr == NULL)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
502 /* one of the calls to BATdescriptor failed */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
503 if (l) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
504 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
505 if (r) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
506 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
507 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
508 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
509 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
510 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
511 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
512 } |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
513 /* check that the BATs have the expected type: we expect str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
514 * or something compatible with str for l (the values) and str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
515 * for r (the patterns). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
516 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
517 * a pair of str BATs because that is the only interface that |
27 | 518 * is defined in the MAL file, so this check is superfluous. |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
519 */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
520 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
521 BBPunfix(l->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
522 BBPunfix(r->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
523 if (sl) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
524 BBPunfix(sl->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
525 if (sr) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
526 BBPunfix(sr->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
527 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
528 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
529 |
33 | 530 if (canditer_init(&lci, l, sl) == 0 || |
531 canditer_init(&rci, r, sr) == 0) { | |
532 /* if either side is empty (or no candidates) the | |
533 * result is empty */ | |
534 BBPunfix(l->batCacheid); | |
535 BBPunfix(r->batCacheid); | |
536 if (sl) | |
537 BBPunfix(sl->batCacheid); | |
538 if (sr) | |
539 BBPunfix(sr->batCacheid); | |
540 bn1 = BATdense(0, 0, 0); | |
541 bn2 = BATdense(0, 0, 0); | |
542 if (bn1 == NULL || bn2 == NULL) { | |
543 BBPreclaim(bn1); | |
544 BBPreclaim(bn2); | |
545 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); | |
546 } | |
547 return MAL_SUCCEED; | |
548 } | |
549 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
550 /* if there is no valid estimate, use the size of the left |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
551 * input as size estimate */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
552 if (is_lng_nil(estimate) || estimate == 0) |
33 | 553 estimate = lci.ncand; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
554 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
555 /* create the output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
556 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
557 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
558 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
559 /* one of the calls to COLnew failed |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
560 * note, BBPreclaim checks whether its argument is NULL */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
561 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
562 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
563 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
564 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
565 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
566 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
567 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
568 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
569 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
570 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
571 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
572 li = bat_iterator(l); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
573 ri = bat_iterator(r); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
574 |
33 | 575 for (BUN i = 0; i < rci.ncand; i++) { |
576 ro = canditer_next(&rci); | |
577 pat = BUNtvar(ri, ro - r->hseqbase); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
578 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
579 /* nil regular expressions don't match (despite |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
580 * nil_matches) */ |
33 | 581 if (strNil(pat)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
582 continue; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
583 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
584 sd = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
585 if (re == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
586 goto bailout; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
587 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
588 if (err != NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
589 goto bailout; |
33 | 590 |
591 /* inner loop: reset iterator, then iterate over it */ | |
592 canditer_reset(&lci); | |
593 for (BUN j = 0; j < lci.ncand; j++) { | |
594 oid lo = canditer_next(&lci); | |
595 const char *val = BUNtvar(li, lo - l->hseqbase); | |
596 if (strNil(val)) | |
597 continue; | |
598 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
599 if (pos >= 0) { | |
600 /* regular expression matched */ | |
601 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED || | |
602 BUNappend(bn2, &ro, false) != GDK_SUCCEED) | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
603 goto bailout; |
33 | 604 } else if (pos != PCRE_ERROR_NOMATCH) { |
605 /* error during processing */ | |
606 err = "matching of regular expression failed"; | |
607 goto bailout; | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
608 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
609 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
610 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
611 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
612 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
613 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
614 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
615 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
616 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
617 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
618 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
619 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
620 *lres = bn1->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
621 *rres = bn2->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
622 BBPkeepref(*lres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
623 BBPkeepref(*rres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
624 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
625 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
626 bailout: |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
627 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
628 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
629 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
630 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
631 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
632 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
633 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
634 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
635 if (sd) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
636 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
637 if (re) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
638 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
639 if (err) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
640 throw(MAL, "pcre.rematchjoin", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
641 "error with regular expression: %s", err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
642 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
643 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
644 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
645 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
646 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
647 const bat *sl, const bat *sr, const bit *nil_matches, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
648 const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
649 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
650 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
651 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
652 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
653 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
654 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
655 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
656 const char **flags, const bat *sl, const bat *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
657 const bit *nil_matches, const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
658 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
659 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
660 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
661 } |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
662 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
663 #include "mel.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
664 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
665 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)" |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
666 " EXTERNAL NAME regexp.rematch; " |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
667 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)" |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
668 " EXTERNAL NAME regexp.rematch;"; |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
669 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
670 static mel_func regexp_init_funcs[] = { |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
671 command("regexp", "rematch", regexpmatch, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
672 "Return true when the value 'val' matches the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
673 args(1,3, arg("",bit),arg("val",str),arg("pat",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
674 command("regexp", "rematchselect", regexpmatchselect, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
675 "Return the list of matches in 'val' that match the regular expression 'pat'", |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
676 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))), |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
677 command("regexp", "rematchjoin", regexpmatchjoin, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
678 "Return the matching pairs from the 'val' and 'pat' columns", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
679 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
680 command("batregexp", "rematch", regexpmatchbulk, false, |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
681 "Return a BAT with true for match and false for no match", |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
682 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))), |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
683 command("regexp", "rematch", regexpmatchf, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
684 "Return true when the value 'val' matches the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
685 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
686 command("regexp", "rematchselect", regexpmatchfselect, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
687 "Return the list of matches in 'val' that match the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
688 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
689 command("regexp", "rematchjoin", regexpmatchfjoin, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
690 "Return the matching pairs from the 'val' and 'pat'\ncolumns", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
691 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
692 command("batregexp", "rematch", regexpmatchfbulk, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
693 "Return a BAT with true for match and false for no match", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
694 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
695 { .imp=NULL } /* sentinel */ |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
696 }; |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
697 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
698 #include "mal_import.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
699 #include "sql_import.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
700 #ifdef _MSC_VER |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
701 #undef read |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
702 #pragma section(".CRT$XCU",read) |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
703 #endif |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
704 LIB_STARTUP_FUNC(init_regexp) |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
705 { |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
706 mal_module("regexp", NULL, regexp_init_funcs); |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
707 sql_register("regexp", regexp_sql); |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
708 } |