Mercurial > hg > MonetDB-extend
annotate regexp/regexp.c @ 55:68263b10998e
Updated: added bat_iterator_end.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Wed, 26 Jan 2022 14:59:05 +0100 (2022-01-26) |
parents | da896864dbbd |
children | 8122094c79b1 |
rev | line source |
---|---|
16 | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
4 * | |
41 | 5 * Copyright 2018-2021 MonetDB B.V. |
16 | 6 */ |
7 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
8 /* monetdb_config.h must be included as the first include file */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
9 #include <monetdb_config.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
10 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
11 /* mal_exception.h actually contains everything we need */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
12 #include <mal_exception.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
13 |
33 | 14 /* for the candidate iterator */ |
15 #include <gdk_cand.h> | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
16 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
17 /* system include files */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
18 #include <string.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
19 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
20 /* we use the PCRE library to do regular expression matching */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
21 #include <pcre.h> |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
22 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
23 static int |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
24 parseflags(const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
25 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
27 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
28 if (flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
29 while (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
30 switch (*flags) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
31 case 'i': /* case insensitive */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
32 options |= PCRE_CASELESS; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
33 break; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
34 case 'x': /* extended regular expressions */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
35 options |= PCRE_EXTENDED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
36 break; |
31
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
37 case 'm': /* multiline matching */ |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
38 options |= PCRE_MULTILINE | PCRE_DOTALL;; |
3510e17287e6
Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
30
diff
changeset
|
39 break; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
40 default: |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
41 return -1; /* indicate there was an error */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
42 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
43 flags++; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
44 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
45 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
46 return options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
47 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
48 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
49 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
50 do_match(bit *ret, const char *val, const char *pat, const char *flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
51 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
52 const char *err = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
53 int options; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
54 int pos = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
55 pcre *re; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
56 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
57 /* if any of the input values are nil, the result is no match */ |
33 | 58 if (strNil(val) || strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
59 /* special case for NIL inputs: NILs don't match anything */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
60 *ret = 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
61 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
62 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
63 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
64 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
65 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
66 throw(MAL, "regexp.rematch", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
67 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
68 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
69 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
70 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
71 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
72 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
73 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
74 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
75 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
76 if (pos < PCRE_ERROR_NOMATCH) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
77 throw(MAL, "regexp.rematch", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
78 "matching of regular expression (%s) failed with %d", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
79 pat, pos); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
80 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
81 *ret = pos >= 0; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
82 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
83 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
84 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
85 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
86 regexpmatch(bit *ret, const char **val, const char **pat) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
87 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
88 return do_match(ret, *val, *pat, ""); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
89 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
90 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
91 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
93 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
94 return do_match(ret, *val, *pat, *flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
95 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
96 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
97 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
98 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
99 { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
100 BAT *b; /* input BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
101 BATiter bi; /* helper to loop through values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
102 BAT *bn; /* result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
103 bit *outp; /* pointer through which we add to result */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
104 BUN start, end; /* iteration variables */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
105 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
106 const char *err = NULL; /* error message from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
107 int pos = 0; /* error position from PCRE library */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
108 int options; /* PCRE options */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
109 pcre *re; /* compiled regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
110 pcre_extra *sd; /* studied regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
111 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
112 /* from the BAT ID we need to get the BAT descriptor, making |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
113 * sure that the data of the BAT is loaded into memory */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
114 if ((b = BATdescriptor(bid)) == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
115 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
116 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
117 /* check that the BAT has the expected type: we expect str or |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
118 * something compatible with str (if we only want str, we need |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
119 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
120 * Note, the MAL interpreter will only call this function with |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
121 * a str BAT because that is the only interface that is |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
122 * defined in the MAL file, so this check is superfluous. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
123 if (ATOMstorage(b->ttype) != TYPE_str) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
124 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
125 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
126 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
127 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
128 /* if any of the input values are nil, the result is no match */ |
33 | 129 if (strNil(pat) || strNil(flags)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
130 /* no matches when the pattern or the flags is NIL |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
131 * we return an a BAT with all NIL values */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
132 bit f = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
133 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL) |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
134 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
135 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
136 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
137 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
138 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
139 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
140 options = parseflags(flags); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
141 if (options == -1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
142 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
143 throw(MAL, "batregexp.rematch", "bad flag character"); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
144 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
145 |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
146 /* allocate a result BAT; the capacity we ask for is the size |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
147 * of the input BAT since we produce a value for each input |
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
148 * value */ |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
149 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
150 if (bn == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
151 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
152 throw(MAL, "batregexp.rematch", GDK_EXCEPTION); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
153 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
154 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
155 /* Position outp at the start of the result array. |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
156 * We know the array is large enough even if every value were |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
157 * to match, so we don't need to check for that. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
158 outp = (bit *) Tloc(bn, 0); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
159 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
160 /* compile the regular expression */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
161 re = pcre_compile(pat, options, &err, &pos, NULL); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
162 if (re == NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
163 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
164 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
165 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
166 "compilation of regular expression (%s) failed at %d with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
167 pat, pos, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
168 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
169 /* invest in study of the r.e. */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
170 sd = pcre_study(re, 0, &err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
171 if (err != NULL) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
172 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
173 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
174 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
175 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
176 "study of regular expression (%s) failed with %s", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
177 pat, err); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
178 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
179 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
180 /* now, start and end are the limits in b that we need to look |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
181 * at, and if set, cand and candend are the beginning and end |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
182 * of the list of OIDs of b that we need to consider */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
183 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
184 bi = bat_iterator(b); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
185 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
186 /* we will change these if we add a NIL */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
187 bn->tnil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
188 bn->tnonil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
189 for (start = 0, end = BATcount(b); start < end; start++) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
190 const char *val = BUNtvar(bi, start); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
191 /* nil values never match */ |
33 | 192 if (strNil(val)) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
193 *outp++ = bit_nil; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
194 bn->tnil = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
195 bn->tnonil = false; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
196 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
197 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); |
32
25cd8af6fa82
TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents:
31
diff
changeset
|
198 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) { |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
199 /* error during processing */ |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
200 bat_iterator_end(&bi); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
201 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
202 BBPreclaim(bn); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
203 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
204 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
205 throw(MAL, "batregexp.rematch", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
206 "matching of regular expression (%s) failed with %d", |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
207 pat, pos); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
208 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
209 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
210 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
211 } |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
212 bat_iterator_end(&bi); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
213 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
214 /* set properties and size of result BAT */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
215 BATsetcount(bn, BATcount(b)); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
216 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
217 if (BATcount(bn) > 1) { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
218 /* if more than 1 result, it is not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
219 bn->tsorted = false; /* probably not sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
220 bn->trevsorted = false; /* probably not reverse sorted */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
221 bn->tkey = false; /* probably not key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
222 } else { |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
223 /* if empty or a single result, it is sorted, reverse |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
224 * sorted, and key */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
225 bn->tsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
226 bn->trevsorted = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
227 bn->tkey = true; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
228 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
229 bn->tnosorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
230 bn->tnorevsorted = 0; /* we don't know for sure */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
231 bn->tnokey[0] = bn->tnokey[1] = 0; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
232 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
233 /* we're done with b and re */ |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
234 BBPunfix(b->batCacheid); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
235 pcre_free_study(sd); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
236 pcre_free(re); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
237 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
238 *ret = bn->batCacheid; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
239 BBPkeepref(*ret); |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
240 return MAL_SUCCEED; |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
241 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
242 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
243 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
244 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
245 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
246 return do_matchbulk(ret, *bid, *pat, ""); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
247 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
248 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
249 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
250 static char * |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
251 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
252 { |
30
543dccbc169b
Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents:
29
diff
changeset
|
253 return do_matchbulk(ret, *bid, *pat, *flags); |
29
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
254 } |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
255 |
e44cffee8312
Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents:
28
diff
changeset
|
256 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
257 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
258 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
259 BAT *b, *s = NULL; /* input BAT and optional candidate list */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
260 |
33 | 261 struct canditer ci; /* candidate iterator */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
262 BATiter bi; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
263 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
264 BAT *bn; /* result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
265 oid *outp; /* pointer through which we add to result */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
266 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
267 const char *err = NULL; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
268 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
269 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
270 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
271 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
272 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
273 /* if any of the input values are nil, the result is no match */ |
33 | 274 if (strNil(pat) || strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
275 /* no matches when the pattern or the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
276 * we return an empty BAT of the correct type */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
277 if ((bn = BATdense(0, 0, 0)) == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
278 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
279 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
280 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
281 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
282 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
283 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
284 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
285 throw(MAL, "regexp.rematchselect", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
286 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
287 /* from the BAT IDs we need to get the BAT descriptors, making |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
288 * sure that the data of the BATs are loaded into memory */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
289 if ((b = BATdescriptor(bid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
290 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
291 } |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
292 /* check that the BAT has the expected type: we expect str or |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
293 * something compatible with str (if we only want str, we need |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
294 * to compare b->ttype with TYPE_str and not use ATOMstorage). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
295 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
296 * a str BAT because that is the only interface that is |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
297 * defined in the MAL file, so this check is superfluous. */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
298 if (ATOMstorage(b->ttype) != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
299 BBPunfix(b->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
300 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
301 } |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
302 if (!is_bat_nil(sid) && |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
303 (s = BATdescriptor(sid)) == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
304 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
305 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
306 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
307 |
33 | 308 if (canditer_init(&ci, b, s) == 0) { |
309 /* trivially empty result */ | |
310 BBPunfix(b->batCacheid); | |
311 if (s) | |
312 BBPunfix(s->batCacheid); | |
313 bn = BATdense(0, 0, 0); | |
314 *ret = bn->batCacheid; | |
315 BBPkeepref(*ret); | |
316 return MAL_SUCCEED; | |
317 } | |
318 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
319 /* allocate a result BAT; the capacity we ask for is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
320 * maximum potential result size (i.e. the size of the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
321 * candidate list if there is one, else the size of the input |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
322 * BAT b) */ |
33 | 323 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
324 if (bn == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
325 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
326 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
327 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
328 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
329 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
330 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
331 /* Position outp at the start of the result array. |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
332 * We know the array is large enough even if every value were |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
333 * to match, so we don't need to check for that. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
334 outp = (oid *) Tloc(bn, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
335 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
336 /* compile the regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
337 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
338 if (re == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
339 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
340 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
341 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
342 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
343 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
344 "compilation of regular expression (%s) failed at %d with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
345 pat, pos, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
346 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
347 /* invest in study of the r.e. */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
348 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
349 if (err != NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
350 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
351 BBPunfix(b->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
352 if (s) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
353 BBPunfix(s->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
354 BBPreclaim(bn); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
355 throw(MAL, "regexp.rematchselect", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
356 "study of regular expression (%s) failed with %s", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
357 pat, err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
358 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
359 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
360 bi = bat_iterator(b); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
361 |
33 | 362 /* iterate through the candidates */ |
363 for (BUN i = 0; i < ci.ncand; i++) { | |
364 /* get the next candidate */ | |
365 oid o = canditer_next(&ci); | |
366 | |
367 /* the candidate list has a list of OIDs which are | |
368 * relative to b->hseqbase, we need to convert that to | |
369 * an index relative to the start of the array in the | |
370 * (tail) heap */ | |
371 const char *val = BUNtvar(bi, o - b->hseqbase); | |
372 | |
373 /* nil values never match */ | |
374 if (!strNil(val)) { | |
375 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
376 if (pos >= 0) { | |
377 /* regular expression matched */ | |
378 if (!anti) | |
379 *outp++ = o; | |
380 } else if (pos == PCRE_ERROR_NOMATCH) { | |
381 /* regular expression didn't match */ | |
382 if (anti) | |
383 *outp++ = o; | |
384 } else { | |
385 /* error during processing */ | |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
386 bat_iterator_end(&bi); |
33 | 387 BBPunfix(b->batCacheid); |
388 BBPunfix(s->batCacheid); | |
389 BBPreclaim(bn); | |
390 pcre_free_study(sd); | |
391 pcre_free(re); | |
392 throw(MAL, "regexp.rematchselect", | |
393 "matching of regular expression (%s) failed with %d", | |
394 pat, pos); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
395 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
396 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
397 } |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
398 /* we're done with the BAT iterator */ |
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
399 bat_iterator_end(&bi); |
33 | 400 /* we're done with b, s, and re */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
401 BBPunfix(b->batCacheid); |
33 | 402 if (s) |
403 BBPunfix(s->batCacheid); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
404 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
405 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
406 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
407 /* set properties and size of result BAT */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
408 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
409 /* the result BAT of a select operation MUST be sorted, and |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
410 * all values MUST be distinct (i.e. it is a candidate list); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
411 * due to the way we created the result, we know this is the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
412 * case */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
413 bn->tsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
414 bn->tnosorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
415 bn->tkey = true; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
416 bn->tseqbase = oid_nil; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
417 if (BATcount(bn) > 1) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
418 /* if more than 1 result, it is not reverse sorted */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
419 bn->trevsorted = false; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
420 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
421 /* the BAT is dense if the type is TYPE_oid, the |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
422 * values are sorted in ascending order, they are all |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
423 * distinct, and they form a consecutive sequence (no |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
424 * missing values); we only need to check the last |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
425 * condition, which we do by checking the difference |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
426 * between the first and last values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
427 outp = (oid *) Tloc(bn, 0); /* pointer to start */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
428 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1) |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
429 bn->tseqbase = outp[0]; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
430 } else { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
431 /* if empty or a single result, it is reverse sorted |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
432 * and dense */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
433 bn->trevsorted = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
434 bn->tnorevsorted = 0; |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
435 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
436 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
437 /* there are no NIL values in the result */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
438 bn->tnil = false; |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
439 bn->tnonil = true; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
440 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
441 *ret = bn->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
442 BBPkeepref(*ret); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
443 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
444 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
445 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
446 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
447 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
448 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
449 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
450 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
451 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
452 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
453 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
454 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
455 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
456 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
457 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
458 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
459 static char * |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
460 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
461 bat slid, bat srid, bit nil_matches, lng estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
462 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
463 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
464 BAT *bn1, *bn2; /* output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
465 |
33 | 466 struct canditer lci; /* candidate iterator for l */ |
467 struct canditer rci; /* candidate iterator for r */ | |
468 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
469 BATiter li; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
470 BATiter ri; /* helper to loop through values */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
471 oid ro; /* right OID being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
472 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
473 const char *pat; /* the regular expression being matched */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
474 const char *err; /* error message from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
475 int pos = 0; /* error position from PCRE library */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
476 int options; /* PCRE options */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
477 pcre *re; /* compiled regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
478 pcre_extra *sd; /* studied regular expression */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
479 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
480 (void) nil_matches; /* only relevant for equi-join */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
481 |
33 | 482 if (strNil(flags)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
483 /* no matches when the flags is NIL |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
484 * we return two empty BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
485 bn1 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
486 bn2 = BATdense(0, 0, 0); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
487 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
488 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
489 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
490 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
491 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
492 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
493 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
494 options = parseflags(flags); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
495 if (options == -1) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
496 throw(MAL, "regexp.rematchjoin", "bad flag character"); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
497 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
498 l = BATdescriptor(lid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
499 r = BATdescriptor(rid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
500 if (!is_bat_nil(slid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
501 sl = BATdescriptor(slid); |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
502 if (!is_bat_nil(srid)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
503 sr = BATdescriptor(srid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
504 if (l == NULL || r == NULL || |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
505 (!is_bat_nil(slid) && sl == NULL) || |
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
506 (!is_bat_nil(srid) && sr == NULL)) { |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
507 /* one of the calls to BATdescriptor failed */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
508 if (l) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
509 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
510 if (r) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
511 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
512 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
513 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
514 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
515 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
516 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
517 } |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
518 /* check that the BATs have the expected type: we expect str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
519 * or something compatible with str for l (the values) and str |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
520 * for r (the patterns). |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
521 * Note, the MAL interpreter will only call this function with |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
522 * a pair of str BATs because that is the only interface that |
27 | 523 * is defined in the MAL file, so this check is superfluous. |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
524 */ |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
525 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) { |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
526 BBPunfix(l->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
527 BBPunfix(r->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
528 if (sl) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
529 BBPunfix(sl->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
530 if (sr) |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
531 BBPunfix(sr->batCacheid); |
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
532 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
533 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
534 |
33 | 535 if (canditer_init(&lci, l, sl) == 0 || |
536 canditer_init(&rci, r, sr) == 0) { | |
537 /* if either side is empty (or no candidates) the | |
538 * result is empty */ | |
539 BBPunfix(l->batCacheid); | |
540 BBPunfix(r->batCacheid); | |
541 if (sl) | |
542 BBPunfix(sl->batCacheid); | |
543 if (sr) | |
544 BBPunfix(sr->batCacheid); | |
545 bn1 = BATdense(0, 0, 0); | |
546 bn2 = BATdense(0, 0, 0); | |
547 if (bn1 == NULL || bn2 == NULL) { | |
548 BBPreclaim(bn1); | |
549 BBPreclaim(bn2); | |
550 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); | |
551 } | |
552 return MAL_SUCCEED; | |
553 } | |
554 | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
555 /* if there is no valid estimate, use the size of the left |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
556 * input as size estimate */ |
28
e925d55b369b
Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
27
diff
changeset
|
557 if (is_lng_nil(estimate) || estimate == 0) |
33 | 558 estimate = lci.ncand; |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
559 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
560 /* create the output BATs */ |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
561 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
562 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
563 if (bn1 == NULL || bn2 == NULL) { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
564 /* one of the calls to COLnew failed |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
565 * note, BBPreclaim checks whether its argument is NULL */ |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
566 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
567 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
568 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
569 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
570 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
571 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
572 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
573 BBPunfix(sr->batCacheid); |
25
f0739f6c1a43
A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents:
24
diff
changeset
|
574 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
575 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
576 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
577 li = bat_iterator(l); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
578 ri = bat_iterator(r); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
579 |
33 | 580 for (BUN i = 0; i < rci.ncand; i++) { |
581 ro = canditer_next(&rci); | |
582 pat = BUNtvar(ri, ro - r->hseqbase); | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
583 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
584 /* nil regular expressions don't match (despite |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
585 * nil_matches) */ |
33 | 586 if (strNil(pat)) |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
587 continue; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
588 re = pcre_compile(pat, options, &err, &pos, NULL); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
589 sd = NULL; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
590 if (re == NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
591 goto bailout; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
592 sd = pcre_study(re, 0, &err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
593 if (err != NULL) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
594 goto bailout; |
33 | 595 |
596 /* inner loop: reset iterator, then iterate over it */ | |
597 canditer_reset(&lci); | |
598 for (BUN j = 0; j < lci.ncand; j++) { | |
599 oid lo = canditer_next(&lci); | |
600 const char *val = BUNtvar(li, lo - l->hseqbase); | |
601 if (strNil(val)) | |
602 continue; | |
603 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0); | |
604 if (pos >= 0) { | |
605 /* regular expression matched */ | |
606 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED || | |
607 BUNappend(bn2, &ro, false) != GDK_SUCCEED) | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
608 goto bailout; |
33 | 609 } else if (pos != PCRE_ERROR_NOMATCH) { |
610 /* error during processing */ | |
611 err = "matching of regular expression failed"; | |
612 goto bailout; | |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
613 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
614 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
615 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
616 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
617 } |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
618 bat_iterator_end(&li); |
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
619 bat_iterator_end(&ri); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
620 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
621 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
622 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
623 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
624 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
625 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
626 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
627 *lres = bn1->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
628 *rres = bn2->batCacheid; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
629 BBPkeepref(*lres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
630 BBPkeepref(*rres); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
631 return MAL_SUCCEED; |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
632 |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
633 bailout: |
55
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
634 bat_iterator_end(&li); |
68263b10998e
Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents:
41
diff
changeset
|
635 bat_iterator_end(&ri); |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
636 BBPreclaim(bn1); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
637 BBPreclaim(bn2); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
638 BBPunfix(l->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
639 BBPunfix(r->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
640 if (sl) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
641 BBPunfix(sl->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
642 if (sr) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
643 BBPunfix(sr->batCacheid); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
644 if (sd) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
645 pcre_free_study(sd); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
646 if (re) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
647 pcre_free(re); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
648 if (err) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
649 throw(MAL, "pcre.rematchjoin", |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
650 "error with regular expression: %s", err); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
651 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
652 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
653 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
654 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
655 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
656 const bat *sl, const bat *sr, const bit *nil_matches, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
657 const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
658 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
659 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
660 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
661 } |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
662 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
663 static char * |
15
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
664 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
665 const char **flags, const bat *sl, const bat *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
666 const bit *nil_matches, const lng *estimate) |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
667 { |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
668 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
669 *nil_matches, *estimate); |
59bbfa0096b3
Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff
changeset
|
670 } |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
671 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
672 #include "mel.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
673 |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
674 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)" |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
675 " EXTERNAL NAME regexp.rematch; " |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
676 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)" |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
677 " EXTERNAL NAME regexp.rematch;"; |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
678 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
679 static mel_func regexp_init_funcs[] = { |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
680 command("regexp", "rematch", regexpmatch, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
681 "Return true when the value 'val' matches the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
682 args(1,3, arg("",bit),arg("val",str),arg("pat",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
683 command("regexp", "rematchselect", regexpmatchselect, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
684 "Return the list of matches in 'val' that match the regular expression 'pat'", |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
685 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))), |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
686 command("regexp", "rematchjoin", regexpmatchjoin, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
687 "Return the matching pairs from the 'val' and 'pat' columns", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
688 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
40
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
689 command("batregexp", "rematch", regexpmatchbulk, false, |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
690 "Return a BAT with true for match and false for no match", |
e70b12c15507
Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents:
39
diff
changeset
|
691 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))), |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
692 command("regexp", "rematch", regexpmatchf, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
693 "Return true when the value 'val' matches the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
694 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
695 command("regexp", "rematchselect", regexpmatchfselect, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
696 "Return the list of matches in 'val' that match the regular expression 'pat'", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
697 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
698 command("regexp", "rematchjoin", regexpmatchfjoin, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
699 "Return the matching pairs from the 'val' and 'pat'\ncolumns", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
700 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
701 command("batregexp", "rematch", regexpmatchfbulk, false, |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
702 "Return a BAT with true for match and false for no match", |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
703 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))), |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
704 { .imp=NULL } /* sentinel */ |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
705 }; |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
706 |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
707 #include "mal_import.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
708 #include "sql_import.h" |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
709 #ifdef _MSC_VER |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
710 #undef read |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
711 #pragma section(".CRT$XCU",read) |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
712 #endif |
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
713 LIB_STARTUP_FUNC(init_regexp) |
39
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
714 { |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
715 mal_module("regexp", NULL, regexp_init_funcs); |
4633ab41de55
Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents:
37
diff
changeset
|
716 sql_register("regexp", regexp_sql); |
37
e5d2d0c9b7b3
build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents:
33
diff
changeset
|
717 } |