annotate regexp/regexp.c @ 33:a8ffdbc388ce

Ported to Jun2020 branch.
author Sjoerd Mullender <sjoerd@acm.org>
date Tue, 09 Jun 2020 10:30:35 +0200 (2020-06-09)
parents 25cd8af6fa82
children e5d2d0c9b7b3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
1 /* This Source Code Form is subject to the terms of the Mozilla Public
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
4 *
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
5 * Copyright 2013-2018 MonetDB B.V.
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
6 */
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
7
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
8 /* monetdb_config.h must be included as the first include file */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
9 #include <monetdb_config.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
10
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
11 /* mal_exception.h actually contains everything we need */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
12 #include <mal_exception.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
13
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
14 /* for the candidate iterator */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
15 #include <gdk_cand.h>
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
16
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
17 /* system include files */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
18 #include <string.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
19
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
20 /* we use the PCRE library to do regular expression matching */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
21 #include <pcre.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
22
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
23 /* __declspec() must be used on Windows, but not on other systems */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
24 #ifndef _MSC_VER
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
25 /* not Windows */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
26 #define __declspec(x) /* nothing */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
27 #endif
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
28
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
29 /* these eight functions are the only externally visible functions
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
30 * since they are the only ones that are called from the MAL layer; on
19
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
31 * Windows they must be exported, on other systems, declaring them as
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
32 * extern is enough */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
33 extern __declspec(dllexport) char *regexpmatch(bit *ret, const char **val, const char **pat);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
34 extern __declspec(dllexport) char *regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags);
19
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
35 extern __declspec(dllexport) char *regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti);
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
36 extern __declspec(dllexport) char *regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti);
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
37 extern __declspec(dllexport) char *regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate);
499debebda5d Various updates.
Sjoerd Mullender <sjoerd@acm.org>
parents: 17
diff changeset
38 extern __declspec(dllexport) char *regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const char **flags, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate);
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
39 extern __declspec(dllexport) char *regexpmatchbulk(bat *ret, const bat *bid, const char **pat);
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
40 extern __declspec(dllexport) char *regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
41
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
42 static int
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
43 parseflags(const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
44 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
45 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
46
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
47 if (flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
48 while (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
49 switch (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
50 case 'i': /* case insensitive */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
51 options |= PCRE_CASELESS;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
52 break;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
53 case 'x': /* extended regular expressions */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
54 options |= PCRE_EXTENDED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
55 break;
31
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
56 case 'm': /* multiline matching */
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
57 options |= PCRE_MULTILINE | PCRE_DOTALL;;
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
58 break;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
59 default:
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
60 return -1; /* indicate there was an error */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
61 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
62 flags++;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
63 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
64 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
65 return options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
66 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
67
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
68 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
69 do_match(bit *ret, const char *val, const char *pat, const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
70 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
71 const char *err = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
72 int options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
73 int pos = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
74 pcre *re;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
75
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
76 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
77 if (strNil(val) || strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
78 /* special case for NIL inputs: NILs don't match anything */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
79 *ret = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
80 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
81 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
82
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
83 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
84 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
85 throw(MAL, "regexp.rematch", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
86
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
87 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
88 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
89 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
90 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
91 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
92 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
93 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
94 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
95 if (pos < PCRE_ERROR_NOMATCH) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
96 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
97 "matching of regular expression (%s) failed with %d",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
98 pat, pos);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
99 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
100 *ret = pos >= 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
101 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
102 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
103
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
104 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
105 regexpmatch(bit *ret, const char **val, const char **pat)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
106 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
107 return do_match(ret, *val, *pat, "");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
108 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
109
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
110 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
111 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
112 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
113 return do_match(ret, *val, *pat, *flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
114 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
115
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
116 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
117 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
118 {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
119 BAT *b; /* input BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
120 BATiter bi; /* helper to loop through values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
121 BAT *bn; /* result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
122 bit *outp; /* pointer through which we add to result */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
123 BUN start, end; /* iteration variables */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
124
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
125 const char *err = NULL; /* error message from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
126 int pos = 0; /* error position from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
127 int options; /* PCRE options */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
128 pcre *re; /* compiled regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
129 pcre_extra *sd; /* studied regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
130
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
131 /* from the BAT ID we need to get the BAT descriptor, making
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
132 * sure that the data of the BAT is loaded into memory */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
133 if ((b = BATdescriptor(bid)) == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
134 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
135 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
136 /* check that the BAT has the expected type: we expect str or
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
137 * something compatible with str (if we only want str, we need
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
138 * to compare b->ttype with TYPE_str and not use ATOMstorage).
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
139 * Note, the MAL interpreter will only call this function with
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
140 * a str BAT because that is the only interface that is
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
141 * defined in the MAL file, so this check is superfluous. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
142 if (ATOMstorage(b->ttype) != TYPE_str) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
143 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
144 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
145 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
146
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
147 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
148 if (strNil(pat) || strNil(flags)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
149 /* no matches when the pattern or the flags is NIL
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
150 * we return an a BAT with all NIL values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
151 bit f = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
152 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL)
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
153 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
154 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
155 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
156 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
157 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
158 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
159 options = parseflags(flags);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
160 if (options == -1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
161 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
162 throw(MAL, "batregexp.rematch", "bad flag character");
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
163 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
164
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
165 /* allocate a result BAT; the capacity we ask for is the size
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
166 * of the input BAT since we produce a value for each input
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
167 * value */
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
168 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
169 if (bn == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
170 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
171 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
172 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
173
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
174 /* Position outp at the start of the result array.
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
175 * We know the array is large enough even if every value were
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
176 * to match, so we don't need to check for that. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
177 outp = (bit *) Tloc(bn, 0);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
178
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
179 /* compile the regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
180 re = pcre_compile(pat, options, &err, &pos, NULL);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
181 if (re == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
182 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
183 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
184 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
185 "compilation of regular expression (%s) failed at %d with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
186 pat, pos, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
187 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
188 /* invest in study of the r.e. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
189 sd = pcre_study(re, 0, &err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
190 if (err != NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
191 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
192 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
193 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
194 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
195 "study of regular expression (%s) failed with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
196 pat, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
197 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
198
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
199 /* now, start and end are the limits in b that we need to look
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
200 * at, and if set, cand and candend are the beginning and end
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
201 * of the list of OIDs of b that we need to consider */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
202
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
203 bi = bat_iterator(b);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
204
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
205 /* we will change these if we add a NIL */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
206 bn->tnil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
207 bn->tnonil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
208 for (start = 0, end = BATcount(b); start < end; start++) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
209 const char *val = BUNtvar(bi, start);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
210 /* nil values never match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
211 if (strNil(val)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
212 *outp++ = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
213 bn->tnil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
214 bn->tnonil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
215 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
216 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
32
25cd8af6fa82 TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 31
diff changeset
217 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
218 /* error during processing */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
219 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
220 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
221 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
222 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
223 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
224 "matching of regular expression (%s) failed with %d",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
225 pat, pos);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
226 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
227 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
228 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
229 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
230
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
231 /* set properties and size of result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
232 BATsetcount(bn, BATcount(b));
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
233
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
234 if (BATcount(bn) > 1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
235 /* if more than 1 result, it is not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
236 bn->tsorted = false; /* probably not sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
237 bn->trevsorted = false; /* probably not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
238 bn->tkey = false; /* probably not key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
239 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
240 /* if empty or a single result, it is sorted, reverse
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
241 * sorted, and key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
242 bn->tsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
243 bn->trevsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
244 bn->tkey = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
245 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
246 bn->tnosorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
247 bn->tnorevsorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
248 bn->tnokey[0] = bn->tnokey[1] = 0;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
249
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
250 /* we're done with b and re */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
251 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
252 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
253 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
254
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
255 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
256 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
257 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
258 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
259
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
260 char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
261 regexpmatchbulk(bat *ret, const bat *bid, const char **pat)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
262 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
263 return do_matchbulk(ret, *bid, *pat, "");
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
264 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
265
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
266
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
267 char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
268 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
269 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
270 return do_matchbulk(ret, *bid, *pat, *flags);
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
271 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
272
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
273 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
274 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
275 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
276 BAT *b, *s = NULL; /* input BAT and optional candidate list */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
277
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
278 struct canditer ci; /* candidate iterator */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
279 BATiter bi; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
280
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
281 BAT *bn; /* result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
282 oid *outp; /* pointer through which we add to result */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
283
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
284 const char *err = NULL; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
285 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
286 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
287 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
288 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
289
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
290 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
291 if (strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
292 /* no matches when the pattern or the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
293 * we return an empty BAT of the correct type */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
294 if ((bn = BATdense(0, 0, 0)) == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
295 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
296 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
297 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
298 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
299 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
300 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
301 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
302 throw(MAL, "regexp.rematchselect", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
303
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
304 /* from the BAT IDs we need to get the BAT descriptors, making
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
305 * sure that the data of the BATs are loaded into memory */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
306 if ((b = BATdescriptor(bid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
307 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
308 }
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
309 /* check that the BAT has the expected type: we expect str or
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
310 * something compatible with str (if we only want str, we need
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
311 * to compare b->ttype with TYPE_str and not use ATOMstorage).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
312 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
313 * a str BAT because that is the only interface that is
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
314 * defined in the MAL file, so this check is superfluous. */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
315 if (ATOMstorage(b->ttype) != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
316 BBPunfix(b->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
317 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
318 }
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
319 if (!is_bat_nil(sid) &&
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
320 (s = BATdescriptor(sid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
321 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
322 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
323 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
324
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
325 if (canditer_init(&ci, b, s) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
326 /* trivially empty result */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
327 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
328 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
329 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
330 bn = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
331 *ret = bn->batCacheid;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
332 BBPkeepref(*ret);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
333 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
334 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
335
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
336 /* allocate a result BAT; the capacity we ask for is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
337 * maximum potential result size (i.e. the size of the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
338 * candidate list if there is one, else the size of the input
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
339 * BAT b) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
340 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
341 if (bn == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
342 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
343 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
344 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
345 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
346 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
347
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
348 /* Position outp at the start of the result array.
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
349 * We know the array is large enough even if every value were
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
350 * to match, so we don't need to check for that. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
351 outp = (oid *) Tloc(bn, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
352
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
353 /* compile the regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
354 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
355 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
356 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
357 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
358 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
359 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
360 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
361 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
362 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
363 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
364 /* invest in study of the r.e. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
365 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
366 if (err != NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
367 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
368 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
369 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
370 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
371 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
372 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
373 "study of regular expression (%s) failed with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
374 pat, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
375 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
376
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
377 bi = bat_iterator(b);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
378
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
379 /* iterate through the candidates */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
380 for (BUN i = 0; i < ci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
381 /* get the next candidate */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
382 oid o = canditer_next(&ci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
383
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
384 /* the candidate list has a list of OIDs which are
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
385 * relative to b->hseqbase, we need to convert that to
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
386 * an index relative to the start of the array in the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
387 * (tail) heap */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
388 const char *val = BUNtvar(bi, o - b->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
389
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
390 /* nil values never match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
391 if (!strNil(val)) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
392 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
393 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
394 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
395 if (!anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
396 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
397 } else if (pos == PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
398 /* regular expression didn't match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
399 if (anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
400 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
401 } else {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
402 /* error during processing */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
403 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
404 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
405 BBPreclaim(bn);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
406 pcre_free_study(sd);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
407 pcre_free(re);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
408 throw(MAL, "regexp.rematchselect",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
409 "matching of regular expression (%s) failed with %d",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
410 pat, pos);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
411 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
412 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
413 }
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
414 /* we're done with b, s, and re */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
415 BBPunfix(b->batCacheid);
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
416 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
417 BBPunfix(s->batCacheid);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
418 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
419 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
420
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
421 /* set properties and size of result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
422 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
423 /* the result BAT of a select operation MUST be sorted, and
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
424 * all values MUST be distinct (i.e. it is a candidate list);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
425 * due to the way we created the result, we know this is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
426 * case */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
427 bn->tsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
428 bn->tnosorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
429 bn->tkey = true;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
430 bn->tseqbase = oid_nil;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
431 if (BATcount(bn) > 1) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
432 /* if more than 1 result, it is not reverse sorted */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
433 bn->trevsorted = false;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
434 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
435 /* the BAT is dense if the type is TYPE_oid, the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
436 * values are sorted in ascending order, they are all
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
437 * distinct, and they form a consecutive sequence (no
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
438 * missing values); we only need to check the last
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
439 * condition, which we do by checking the difference
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
440 * between the first and last values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
441 outp = (oid *) Tloc(bn, 0); /* pointer to start */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
442 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1)
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
443 bn->tseqbase = outp[0];
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
444 } else {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
445 /* if empty or a single result, it is reverse sorted
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
446 * and dense */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
447 bn->trevsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
448 bn->tnorevsorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
449 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
450 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
451 /* there are no NIL values in the result */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
452 bn->tnil = false;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
453 bn->tnonil = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
454
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
455 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
456 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
457 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
458 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
459
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
460 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
461 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
462 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
463 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
464 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
465
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
466
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
467 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
468 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
469 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
470 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
471 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
472
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
473 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
474 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
475 bat slid, bat srid, bit nil_matches, lng estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
476 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
477 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
478 BAT *bn1, *bn2; /* output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
479
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
480 struct canditer lci; /* candidate iterator for l */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
481 struct canditer rci; /* candidate iterator for r */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
482
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
483 BATiter li; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
484 BATiter ri; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
485 oid ro; /* right OID being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
486
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
487 const char *pat; /* the regular expression being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
488 const char *err; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
489 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
490 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
491 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
492 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
493
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
494 (void) nil_matches; /* only relevant for equi-join */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
495
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
496 if (strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
497 /* no matches when the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
498 * we return two empty BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
499 bn1 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
500 bn2 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
501 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
502 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
503 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
504 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
505 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
506 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
507 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
508 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
509 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
510 throw(MAL, "regexp.rematchjoin", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
511
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
512 l = BATdescriptor(lid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
513 r = BATdescriptor(rid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
514 if (!is_bat_nil(slid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
515 sl = BATdescriptor(slid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
516 if (!is_bat_nil(srid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
517 sr = BATdescriptor(srid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
518 if (l == NULL || r == NULL ||
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
519 (!is_bat_nil(slid) && sl == NULL) ||
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
520 (!is_bat_nil(srid) && sr == NULL)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
521 /* one of the calls to BATdescriptor failed */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
522 if (l)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
523 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
524 if (r)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
525 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
526 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
527 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
528 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
529 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
530 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
531 }
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
532 /* check that the BATs have the expected type: we expect str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
533 * or something compatible with str for l (the values) and str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
534 * for r (the patterns).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
535 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
536 * a pair of str BATs because that is the only interface that
27
07a6ef1fde8e Fixed syntax err in comment
Jennie Zhang <y.zhang@cwi.nl>
parents: 26
diff changeset
537 * is defined in the MAL file, so this check is superfluous.
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
538 */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
539 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
540 BBPunfix(l->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
541 BBPunfix(r->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
542 if (sl)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
543 BBPunfix(sl->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
544 if (sr)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
545 BBPunfix(sr->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
546 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
547 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
548
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
549 if (canditer_init(&lci, l, sl) == 0 ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
550 canditer_init(&rci, r, sr) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
551 /* if either side is empty (or no candidates) the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
552 * result is empty */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
553 BBPunfix(l->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
554 BBPunfix(r->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
555 if (sl)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
556 BBPunfix(sl->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
557 if (sr)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
558 BBPunfix(sr->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
559 bn1 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
560 bn2 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
561 if (bn1 == NULL || bn2 == NULL) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
562 BBPreclaim(bn1);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
563 BBPreclaim(bn2);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
564 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
565 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
566 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
567 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
568
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
569 /* if there is no valid estimate, use the size of the left
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
570 * input as size estimate */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
571 if (is_lng_nil(estimate) || estimate == 0)
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
572 estimate = lci.ncand;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
573
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
574 /* create the output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
575 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
576 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
577 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
578 /* one of the calls to COLnew failed
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
579 * note, BBPreclaim checks whether its argument is NULL */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
580 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
581 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
582 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
583 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
584 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
585 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
586 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
587 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
588 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
589 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
590
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
591 li = bat_iterator(l);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
592 ri = bat_iterator(r);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
593
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
594 for (BUN i = 0; i < rci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
595 ro = canditer_next(&rci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
596 pat = BUNtvar(ri, ro - r->hseqbase);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
597
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
598 /* nil regular expressions don't match (despite
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
599 * nil_matches) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
600 if (strNil(pat))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
601 continue;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
602 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
603 sd = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
604 if (re == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
605 goto bailout;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
606 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
607 if (err != NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
608 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
609
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
610 /* inner loop: reset iterator, then iterate over it */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
611 canditer_reset(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
612 for (BUN j = 0; j < lci.ncand; j++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
613 oid lo = canditer_next(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
614 const char *val = BUNtvar(li, lo - l->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
615 if (strNil(val))
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
616 continue;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
617 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
618 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
619 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
620 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
621 BUNappend(bn2, &ro, false) != GDK_SUCCEED)
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
622 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
623 } else if (pos != PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
624 /* error during processing */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
625 err = "matching of regular expression failed";
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
626 goto bailout;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
627 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
628 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
629 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
630 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
631 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
632
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
633 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
634 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
635 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
636 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
637 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
638 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
639 *lres = bn1->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
640 *rres = bn2->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
641 BBPkeepref(*lres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
642 BBPkeepref(*rres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
643 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
644
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
645 bailout:
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
646 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
647 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
648 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
649 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
650 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
651 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
652 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
653 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
654 if (sd)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
655 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
656 if (re)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
657 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
658 if (err)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
659 throw(MAL, "pcre.rematchjoin",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
660 "error with regular expression: %s", err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
661 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
662 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
663
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
664 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
665 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
666 const bat *sl, const bat *sr, const bit *nil_matches,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
667 const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
668 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
669 return do_join(lres, rres, *lid, *rid, "", *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
670 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
671 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
672
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
673 char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
674 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
675 const char **flags, const bat *sl, const bat *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
676 const bit *nil_matches, const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
677 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
678 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
679 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
680 }