annotate regexp/regexp.c @ 55:68263b10998e

Updated: added bat_iterator_end.
author Sjoerd Mullender <sjoerd@acm.org>
date Wed, 26 Jan 2022 14:59:05 +0100 (2022-01-26)
parents da896864dbbd
children 8122094c79b1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
1 /* This Source Code Form is subject to the terms of the Mozilla Public
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
4 *
41
da896864dbbd Update copyright year.
Sjoerd Mullender <sjoerd@acm.org>
parents: 40
diff changeset
5 * Copyright 2018-2021 MonetDB B.V.
16
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
6 */
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
7
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
8 /* monetdb_config.h must be included as the first include file */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
9 #include <monetdb_config.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
10
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
11 /* mal_exception.h actually contains everything we need */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
12 #include <mal_exception.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
13
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
14 /* for the candidate iterator */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
15 #include <gdk_cand.h>
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
16
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
17 /* system include files */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
18 #include <string.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
19
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
20 /* we use the PCRE library to do regular expression matching */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
21 #include <pcre.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
22
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
23 static int
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
24 parseflags(const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
25 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
27
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
28 if (flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
29 while (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
30 switch (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
31 case 'i': /* case insensitive */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
32 options |= PCRE_CASELESS;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
33 break;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
34 case 'x': /* extended regular expressions */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
35 options |= PCRE_EXTENDED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
36 break;
31
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
37 case 'm': /* multiline matching */
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
38 options |= PCRE_MULTILINE | PCRE_DOTALL;;
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
39 break;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
40 default:
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
41 return -1; /* indicate there was an error */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
42 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
43 flags++;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
44 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
45 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
46 return options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
47 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
48
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
49 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
50 do_match(bit *ret, const char *val, const char *pat, const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
51 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
52 const char *err = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
53 int options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
54 int pos = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
55 pcre *re;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
56
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
57 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
58 if (strNil(val) || strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
59 /* special case for NIL inputs: NILs don't match anything */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
60 *ret = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
61 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
62 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
63
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
64 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
65 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
66 throw(MAL, "regexp.rematch", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
67
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
68 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
69 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
70 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
71 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
72 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
73 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
74 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
75 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
76 if (pos < PCRE_ERROR_NOMATCH) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
77 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
78 "matching of regular expression (%s) failed with %d",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
79 pat, pos);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
80 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
81 *ret = pos >= 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
82 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
83 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
84
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
85 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
86 regexpmatch(bit *ret, const char **val, const char **pat)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
87 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
88 return do_match(ret, *val, *pat, "");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
89 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
90
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
91 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
93 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
94 return do_match(ret, *val, *pat, *flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
95 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
96
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
97 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
98 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
99 {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
100 BAT *b; /* input BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
101 BATiter bi; /* helper to loop through values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
102 BAT *bn; /* result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
103 bit *outp; /* pointer through which we add to result */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
104 BUN start, end; /* iteration variables */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
105
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
106 const char *err = NULL; /* error message from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
107 int pos = 0; /* error position from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
108 int options; /* PCRE options */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
109 pcre *re; /* compiled regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
110 pcre_extra *sd; /* studied regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
111
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
112 /* from the BAT ID we need to get the BAT descriptor, making
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
113 * sure that the data of the BAT is loaded into memory */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
114 if ((b = BATdescriptor(bid)) == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
115 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
116 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
117 /* check that the BAT has the expected type: we expect str or
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
118 * something compatible with str (if we only want str, we need
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
119 * to compare b->ttype with TYPE_str and not use ATOMstorage).
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
120 * Note, the MAL interpreter will only call this function with
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
121 * a str BAT because that is the only interface that is
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
122 * defined in the MAL file, so this check is superfluous. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
123 if (ATOMstorage(b->ttype) != TYPE_str) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
124 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
125 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
126 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
127
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
128 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
129 if (strNil(pat) || strNil(flags)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
130 /* no matches when the pattern or the flags is NIL
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
131 * we return an a BAT with all NIL values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
132 bit f = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
133 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL)
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
134 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
135 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
136 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
137 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
138 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
139 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
140 options = parseflags(flags);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
141 if (options == -1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
142 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
143 throw(MAL, "batregexp.rematch", "bad flag character");
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
144 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
145
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
146 /* allocate a result BAT; the capacity we ask for is the size
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
147 * of the input BAT since we produce a value for each input
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
148 * value */
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
149 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
150 if (bn == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
151 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
152 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
153 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
154
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
155 /* Position outp at the start of the result array.
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
156 * We know the array is large enough even if every value were
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
157 * to match, so we don't need to check for that. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
158 outp = (bit *) Tloc(bn, 0);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
159
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
160 /* compile the regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
161 re = pcre_compile(pat, options, &err, &pos, NULL);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
162 if (re == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
163 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
164 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
165 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
166 "compilation of regular expression (%s) failed at %d with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
167 pat, pos, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
168 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
169 /* invest in study of the r.e. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
170 sd = pcre_study(re, 0, &err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
171 if (err != NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
172 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
173 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
174 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
175 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
176 "study of regular expression (%s) failed with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
177 pat, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
178 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
179
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
180 /* now, start and end are the limits in b that we need to look
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
181 * at, and if set, cand and candend are the beginning and end
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
182 * of the list of OIDs of b that we need to consider */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
183
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
184 bi = bat_iterator(b);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
185
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
186 /* we will change these if we add a NIL */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
187 bn->tnil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
188 bn->tnonil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
189 for (start = 0, end = BATcount(b); start < end; start++) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
190 const char *val = BUNtvar(bi, start);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
191 /* nil values never match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
192 if (strNil(val)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
193 *outp++ = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
194 bn->tnil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
195 bn->tnonil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
196 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
197 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
32
25cd8af6fa82 TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 31
diff changeset
198 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
199 /* error during processing */
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
200 bat_iterator_end(&bi);
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
201 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
202 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
203 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
204 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
205 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
206 "matching of regular expression (%s) failed with %d",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
207 pat, pos);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
208 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
209 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
210 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
211 }
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
212 bat_iterator_end(&bi);
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
213
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
214 /* set properties and size of result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
215 BATsetcount(bn, BATcount(b));
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
216
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
217 if (BATcount(bn) > 1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
218 /* if more than 1 result, it is not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
219 bn->tsorted = false; /* probably not sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
220 bn->trevsorted = false; /* probably not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
221 bn->tkey = false; /* probably not key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
222 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
223 /* if empty or a single result, it is sorted, reverse
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
224 * sorted, and key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
225 bn->tsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
226 bn->trevsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
227 bn->tkey = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
228 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
229 bn->tnosorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
230 bn->tnorevsorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
231 bn->tnokey[0] = bn->tnokey[1] = 0;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
232
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
233 /* we're done with b and re */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
234 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
235 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
236 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
237
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
238 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
239 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
240 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
241 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
242
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
243 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
244 regexpmatchbulk(bat *ret, const bat *bid, const char **pat)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
245 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
246 return do_matchbulk(ret, *bid, *pat, "");
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
247 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
248
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
249
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
250 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
251 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
252 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
253 return do_matchbulk(ret, *bid, *pat, *flags);
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
254 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
255
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
256 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
257 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
258 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
259 BAT *b, *s = NULL; /* input BAT and optional candidate list */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
260
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
261 struct canditer ci; /* candidate iterator */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
262 BATiter bi; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
263
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
264 BAT *bn; /* result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
265 oid *outp; /* pointer through which we add to result */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
266
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
267 const char *err = NULL; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
268 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
269 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
270 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
271 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
272
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
273 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
274 if (strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
275 /* no matches when the pattern or the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
276 * we return an empty BAT of the correct type */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
277 if ((bn = BATdense(0, 0, 0)) == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
278 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
279 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
280 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
281 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
282 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
283 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
284 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
285 throw(MAL, "regexp.rematchselect", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
286
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
287 /* from the BAT IDs we need to get the BAT descriptors, making
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
288 * sure that the data of the BATs are loaded into memory */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
289 if ((b = BATdescriptor(bid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
290 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
291 }
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
292 /* check that the BAT has the expected type: we expect str or
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
293 * something compatible with str (if we only want str, we need
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
294 * to compare b->ttype with TYPE_str and not use ATOMstorage).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
295 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
296 * a str BAT because that is the only interface that is
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
297 * defined in the MAL file, so this check is superfluous. */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
298 if (ATOMstorage(b->ttype) != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
299 BBPunfix(b->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
300 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
301 }
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
302 if (!is_bat_nil(sid) &&
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
303 (s = BATdescriptor(sid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
304 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
305 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
306 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
307
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
308 if (canditer_init(&ci, b, s) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
309 /* trivially empty result */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
310 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
311 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
312 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
313 bn = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
314 *ret = bn->batCacheid;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
315 BBPkeepref(*ret);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
316 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
317 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
318
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
319 /* allocate a result BAT; the capacity we ask for is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
320 * maximum potential result size (i.e. the size of the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
321 * candidate list if there is one, else the size of the input
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
322 * BAT b) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
323 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
324 if (bn == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
325 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
326 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
327 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
328 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
329 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
330
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
331 /* Position outp at the start of the result array.
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
332 * We know the array is large enough even if every value were
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
333 * to match, so we don't need to check for that. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
334 outp = (oid *) Tloc(bn, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
335
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
336 /* compile the regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
337 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
338 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
339 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
340 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
341 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
342 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
343 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
344 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
345 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
346 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
347 /* invest in study of the r.e. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
348 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
349 if (err != NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
350 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
351 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
352 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
353 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
354 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
355 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
356 "study of regular expression (%s) failed with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
357 pat, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
358 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
359
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
360 bi = bat_iterator(b);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
361
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
362 /* iterate through the candidates */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
363 for (BUN i = 0; i < ci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
364 /* get the next candidate */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
365 oid o = canditer_next(&ci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
366
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
367 /* the candidate list has a list of OIDs which are
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
368 * relative to b->hseqbase, we need to convert that to
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
369 * an index relative to the start of the array in the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
370 * (tail) heap */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
371 const char *val = BUNtvar(bi, o - b->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
372
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
373 /* nil values never match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
374 if (!strNil(val)) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
375 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
376 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
377 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
378 if (!anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
379 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
380 } else if (pos == PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
381 /* regular expression didn't match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
382 if (anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
383 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
384 } else {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
385 /* error during processing */
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
386 bat_iterator_end(&bi);
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
387 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
388 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
389 BBPreclaim(bn);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
390 pcre_free_study(sd);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
391 pcre_free(re);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
392 throw(MAL, "regexp.rematchselect",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
393 "matching of regular expression (%s) failed with %d",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
394 pat, pos);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
395 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
396 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
397 }
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
398 /* we're done with the BAT iterator */
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
399 bat_iterator_end(&bi);
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
400 /* we're done with b, s, and re */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
401 BBPunfix(b->batCacheid);
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
402 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
403 BBPunfix(s->batCacheid);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
404 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
405 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
406
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
407 /* set properties and size of result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
408 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
409 /* the result BAT of a select operation MUST be sorted, and
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
410 * all values MUST be distinct (i.e. it is a candidate list);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
411 * due to the way we created the result, we know this is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
412 * case */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
413 bn->tsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
414 bn->tnosorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
415 bn->tkey = true;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
416 bn->tseqbase = oid_nil;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
417 if (BATcount(bn) > 1) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
418 /* if more than 1 result, it is not reverse sorted */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
419 bn->trevsorted = false;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
420 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
421 /* the BAT is dense if the type is TYPE_oid, the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
422 * values are sorted in ascending order, they are all
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
423 * distinct, and they form a consecutive sequence (no
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
424 * missing values); we only need to check the last
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
425 * condition, which we do by checking the difference
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
426 * between the first and last values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
427 outp = (oid *) Tloc(bn, 0); /* pointer to start */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
428 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1)
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
429 bn->tseqbase = outp[0];
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
430 } else {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
431 /* if empty or a single result, it is reverse sorted
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
432 * and dense */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
433 bn->trevsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
434 bn->tnorevsorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
435 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
436 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
437 /* there are no NIL values in the result */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
438 bn->tnil = false;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
439 bn->tnonil = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
440
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
441 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
442 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
443 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
444 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
445
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
446 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
447 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
448 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
449 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
450 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
451
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
452
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
453 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
454 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
455 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
456 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
457 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
458
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
459 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
460 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
461 bat slid, bat srid, bit nil_matches, lng estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
462 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
463 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
464 BAT *bn1, *bn2; /* output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
465
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
466 struct canditer lci; /* candidate iterator for l */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
467 struct canditer rci; /* candidate iterator for r */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
468
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
469 BATiter li; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
470 BATiter ri; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
471 oid ro; /* right OID being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
472
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
473 const char *pat; /* the regular expression being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
474 const char *err; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
475 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
476 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
477 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
478 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
479
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
480 (void) nil_matches; /* only relevant for equi-join */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
481
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
482 if (strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
483 /* no matches when the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
484 * we return two empty BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
485 bn1 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
486 bn2 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
487 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
488 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
489 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
490 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
491 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
492 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
493 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
494 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
495 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
496 throw(MAL, "regexp.rematchjoin", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
497
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
498 l = BATdescriptor(lid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
499 r = BATdescriptor(rid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
500 if (!is_bat_nil(slid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
501 sl = BATdescriptor(slid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
502 if (!is_bat_nil(srid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
503 sr = BATdescriptor(srid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
504 if (l == NULL || r == NULL ||
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
505 (!is_bat_nil(slid) && sl == NULL) ||
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
506 (!is_bat_nil(srid) && sr == NULL)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
507 /* one of the calls to BATdescriptor failed */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
508 if (l)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
509 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
510 if (r)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
511 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
512 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
513 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
514 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
515 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
516 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
517 }
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
518 /* check that the BATs have the expected type: we expect str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
519 * or something compatible with str for l (the values) and str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
520 * for r (the patterns).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
521 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
522 * a pair of str BATs because that is the only interface that
27
07a6ef1fde8e Fixed syntax err in comment
Jennie Zhang <y.zhang@cwi.nl>
parents: 26
diff changeset
523 * is defined in the MAL file, so this check is superfluous.
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
524 */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
525 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
526 BBPunfix(l->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
527 BBPunfix(r->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
528 if (sl)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
529 BBPunfix(sl->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
530 if (sr)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
531 BBPunfix(sr->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
532 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
533 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
534
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
535 if (canditer_init(&lci, l, sl) == 0 ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
536 canditer_init(&rci, r, sr) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
537 /* if either side is empty (or no candidates) the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
538 * result is empty */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
539 BBPunfix(l->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
540 BBPunfix(r->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
541 if (sl)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
542 BBPunfix(sl->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
543 if (sr)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
544 BBPunfix(sr->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
545 bn1 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
546 bn2 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
547 if (bn1 == NULL || bn2 == NULL) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
548 BBPreclaim(bn1);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
549 BBPreclaim(bn2);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
550 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
551 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
552 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
553 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
554
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
555 /* if there is no valid estimate, use the size of the left
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
556 * input as size estimate */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
557 if (is_lng_nil(estimate) || estimate == 0)
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
558 estimate = lci.ncand;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
559
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
560 /* create the output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
561 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
562 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
563 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
564 /* one of the calls to COLnew failed
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
565 * note, BBPreclaim checks whether its argument is NULL */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
566 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
567 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
568 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
569 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
570 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
571 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
572 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
573 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
574 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
575 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
576
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
577 li = bat_iterator(l);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
578 ri = bat_iterator(r);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
579
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
580 for (BUN i = 0; i < rci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
581 ro = canditer_next(&rci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
582 pat = BUNtvar(ri, ro - r->hseqbase);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
583
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
584 /* nil regular expressions don't match (despite
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
585 * nil_matches) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
586 if (strNil(pat))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
587 continue;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
588 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
589 sd = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
590 if (re == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
591 goto bailout;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
592 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
593 if (err != NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
594 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
595
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
596 /* inner loop: reset iterator, then iterate over it */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
597 canditer_reset(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
598 for (BUN j = 0; j < lci.ncand; j++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
599 oid lo = canditer_next(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
600 const char *val = BUNtvar(li, lo - l->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
601 if (strNil(val))
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
602 continue;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
603 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
604 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
605 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
606 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
607 BUNappend(bn2, &ro, false) != GDK_SUCCEED)
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
608 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
609 } else if (pos != PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
610 /* error during processing */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
611 err = "matching of regular expression failed";
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
612 goto bailout;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
613 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
614 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
615 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
616 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
617 }
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
618 bat_iterator_end(&li);
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
619 bat_iterator_end(&ri);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
620
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
621 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
622 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
623 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
624 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
625 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
626 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
627 *lres = bn1->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
628 *rres = bn2->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
629 BBPkeepref(*lres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
630 BBPkeepref(*rres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
631 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
632
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
633 bailout:
55
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
634 bat_iterator_end(&li);
68263b10998e Updated: added bat_iterator_end.
Sjoerd Mullender <sjoerd@acm.org>
parents: 41
diff changeset
635 bat_iterator_end(&ri);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
636 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
637 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
638 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
639 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
640 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
641 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
642 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
643 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
644 if (sd)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
645 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
646 if (re)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
647 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
648 if (err)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
649 throw(MAL, "pcre.rematchjoin",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
650 "error with regular expression: %s", err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
651 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
652 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
653
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
654 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
655 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
656 const bat *sl, const bat *sr, const bit *nil_matches,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
657 const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
658 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
659 return do_join(lres, rres, *lid, *rid, "", *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
660 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
661 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
662
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
663 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
664 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
665 const char **flags, const bat *sl, const bat *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
666 const bit *nil_matches, const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
667 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
668 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
669 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
670 }
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
671
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
672 #include "mel.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
673
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
674 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)"
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
675 " EXTERNAL NAME regexp.rematch; "
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
676 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)"
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
677 " EXTERNAL NAME regexp.rematch;";
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
678
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
679 static mel_func regexp_init_funcs[] = {
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
680 command("regexp", "rematch", regexpmatch, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
681 "Return true when the value 'val' matches the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
682 args(1,3, arg("",bit),arg("val",str),arg("pat",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
683 command("regexp", "rematchselect", regexpmatchselect, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
684 "Return the list of matches in 'val' that match the regular expression 'pat'",
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
685 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))),
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
686 command("regexp", "rematchjoin", regexpmatchjoin, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
687 "Return the matching pairs from the 'val' and 'pat' columns",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
688 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
689 command("batregexp", "rematch", regexpmatchbulk, false,
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
690 "Return a BAT with true for match and false for no match",
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
691 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))),
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
692 command("regexp", "rematch", regexpmatchf, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
693 "Return true when the value 'val' matches the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
694 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
695 command("regexp", "rematchselect", regexpmatchfselect, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
696 "Return the list of matches in 'val' that match the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
697 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
698 command("regexp", "rematchjoin", regexpmatchfjoin, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
699 "Return the matching pairs from the 'val' and 'pat'\ncolumns",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
700 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
701 command("batregexp", "rematch", regexpmatchfbulk, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
702 "Return a BAT with true for match and false for no match",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
703 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
704 { .imp=NULL } /* sentinel */
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
705 };
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
706
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
707 #include "mal_import.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
708 #include "sql_import.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
709 #ifdef _MSC_VER
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
710 #undef read
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
711 #pragma section(".CRT$XCU",read)
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
712 #endif
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
713 LIB_STARTUP_FUNC(init_regexp)
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
714 {
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
715 mal_module("regexp", NULL, regexp_init_funcs);
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
716 sql_register("regexp", regexp_sql);
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
717 }