annotate regexp/regexp.c @ 40:e70b12c15507

Updated for Oct2020 version.
author Sjoerd Mullender <sjoerd@acm.org>
date Tue, 08 Jun 2021 14:55:38 +0200 (2021-06-08)
parents 4633ab41de55
children da896864dbbd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
1 /* This Source Code Form is subject to the terms of the Mozilla Public
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
4 *
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
5 * Copyright 2013-2018 MonetDB B.V.
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
6 */
e799d117c5b1 Update copyright.
Sjoerd Mullender <sjoerd@acm.org>
parents: 15
diff changeset
7
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
8 /* monetdb_config.h must be included as the first include file */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
9 #include <monetdb_config.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
10
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
11 /* mal_exception.h actually contains everything we need */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
12 #include <mal_exception.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
13
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
14 /* for the candidate iterator */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
15 #include <gdk_cand.h>
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
16
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
17 /* system include files */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
18 #include <string.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
19
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
20 /* we use the PCRE library to do regular expression matching */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
21 #include <pcre.h>
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
22
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
23 static int
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
24 parseflags(const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
25 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
27
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
28 if (flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
29 while (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
30 switch (*flags) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
31 case 'i': /* case insensitive */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
32 options |= PCRE_CASELESS;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
33 break;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
34 case 'x': /* extended regular expressions */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
35 options |= PCRE_EXTENDED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
36 break;
31
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
37 case 'm': /* multiline matching */
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
38 options |= PCRE_MULTILINE | PCRE_DOTALL;;
3510e17287e6 Add necessary options for multiline matching.
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 30
diff changeset
39 break;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
40 default:
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
41 return -1; /* indicate there was an error */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
42 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
43 flags++;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
44 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
45 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
46 return options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
47 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
48
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
49 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
50 do_match(bit *ret, const char *val, const char *pat, const char *flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
51 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
52 const char *err = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
53 int options;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
54 int pos = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
55 pcre *re;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
56
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
57 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
58 if (strNil(val) || strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
59 /* special case for NIL inputs: NILs don't match anything */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
60 *ret = 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
61 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
62 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
63
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
64 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
65 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
66 throw(MAL, "regexp.rematch", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
67
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
68 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
69 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
70 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
71 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
72 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
73 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
74 pos = pcre_exec(re, NULL, val, (int) strlen(val), 0, 0, NULL, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
75 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
76 if (pos < PCRE_ERROR_NOMATCH) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
77 throw(MAL, "regexp.rematch",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
78 "matching of regular expression (%s) failed with %d",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
79 pat, pos);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
80 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
81 *ret = pos >= 0;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
82 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
83 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
84
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
85 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
86 regexpmatch(bit *ret, const char **val, const char **pat)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
87 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
88 return do_match(ret, *val, *pat, "");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
89 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
90
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
91 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
93 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
94 return do_match(ret, *val, *pat, *flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
95 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
96
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
97 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
98 do_matchbulk(bat *ret, bat bid, const char *pat, const char *flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
99 {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
100 BAT *b; /* input BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
101 BATiter bi; /* helper to loop through values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
102 BAT *bn; /* result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
103 bit *outp; /* pointer through which we add to result */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
104 BUN start, end; /* iteration variables */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
105
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
106 const char *err = NULL; /* error message from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
107 int pos = 0; /* error position from PCRE library */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
108 int options; /* PCRE options */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
109 pcre *re; /* compiled regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
110 pcre_extra *sd; /* studied regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
111
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
112 /* from the BAT ID we need to get the BAT descriptor, making
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
113 * sure that the data of the BAT is loaded into memory */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
114 if ((b = BATdescriptor(bid)) == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
115 throw(MAL, "batregexp.rematch", RUNTIME_OBJECT_MISSING);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
116 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
117 /* check that the BAT has the expected type: we expect str or
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
118 * something compatible with str (if we only want str, we need
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
119 * to compare b->ttype with TYPE_str and not use ATOMstorage).
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
120 * Note, the MAL interpreter will only call this function with
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
121 * a str BAT because that is the only interface that is
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
122 * defined in the MAL file, so this check is superfluous. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
123 if (ATOMstorage(b->ttype) != TYPE_str) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
124 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
125 throw(MAL, "batregexp.rematch", SEMANTIC_TYPE_MISMATCH);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
126 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
127
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
128 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
129 if (strNil(pat) || strNil(flags)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
130 /* no matches when the pattern or the flags is NIL
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
131 * we return an a BAT with all NIL values */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
132 bit f = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
133 if ((bn = BATconstant(b->hseqbase, TYPE_bit, &f, BATcount(b), TRANSIENT)) == NULL)
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
134 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
135 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
136 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
137 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
138 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
139 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
140 options = parseflags(flags);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
141 if (options == -1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
142 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
143 throw(MAL, "batregexp.rematch", "bad flag character");
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
144 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
145
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
146 /* allocate a result BAT; the capacity we ask for is the size
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
147 * of the input BAT since we produce a value for each input
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
148 * value */
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
149 bn = COLnew(b->hseqbase, TYPE_bit, BATcount(b), TRANSIENT);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
150 if (bn == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
151 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
152 throw(MAL, "batregexp.rematch", GDK_EXCEPTION);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
153 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
154
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
155 /* Position outp at the start of the result array.
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
156 * We know the array is large enough even if every value were
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
157 * to match, so we don't need to check for that. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
158 outp = (bit *) Tloc(bn, 0);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
159
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
160 /* compile the regular expression */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
161 re = pcre_compile(pat, options, &err, &pos, NULL);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
162 if (re == NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
163 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
164 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
165 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
166 "compilation of regular expression (%s) failed at %d with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
167 pat, pos, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
168 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
169 /* invest in study of the r.e. */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
170 sd = pcre_study(re, 0, &err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
171 if (err != NULL) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
172 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
173 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
174 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
175 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
176 "study of regular expression (%s) failed with %s",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
177 pat, err);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
178 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
179
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
180 /* now, start and end are the limits in b that we need to look
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
181 * at, and if set, cand and candend are the beginning and end
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
182 * of the list of OIDs of b that we need to consider */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
183
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
184 bi = bat_iterator(b);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
185
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
186 /* we will change these if we add a NIL */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
187 bn->tnil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
188 bn->tnonil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
189 for (start = 0, end = BATcount(b); start < end; start++) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
190 const char *val = BUNtvar(bi, start);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
191 /* nil values never match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
192 if (strNil(val)) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
193 *outp++ = bit_nil;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
194 bn->tnil = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
195 bn->tnonil = false;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
196 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
197 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
32
25cd8af6fa82 TEMPORARILY relax error condition
Aris Koning <aris.koning@monetdbsolutions.com>
parents: 31
diff changeset
198 if (pos < 0 && pos != PCRE_ERROR_NOMATCH && pos != PCRE_ERROR_BADUTF8) {
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
199 /* error during processing */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
200 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
201 BBPreclaim(bn);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
202 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
203 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
204 throw(MAL, "batregexp.rematch",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
205 "matching of regular expression (%s) failed with %d",
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
206 pat, pos);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
207 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
208 *outp++ = pos >= 0; /* TRUE if match, FALSE if not */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
209 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
210 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
211
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
212 /* set properties and size of result BAT */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
213 BATsetcount(bn, BATcount(b));
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
214
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
215 if (BATcount(bn) > 1) {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
216 /* if more than 1 result, it is not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
217 bn->tsorted = false; /* probably not sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
218 bn->trevsorted = false; /* probably not reverse sorted */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
219 bn->tkey = false; /* probably not key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
220 } else {
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
221 /* if empty or a single result, it is sorted, reverse
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
222 * sorted, and key */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
223 bn->tsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
224 bn->trevsorted = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
225 bn->tkey = true;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
226 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
227 bn->tnosorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
228 bn->tnorevsorted = 0; /* we don't know for sure */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
229 bn->tnokey[0] = bn->tnokey[1] = 0;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
230
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
231 /* we're done with b and re */
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
232 BBPunfix(b->batCacheid);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
233 pcre_free_study(sd);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
234 pcre_free(re);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
235
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
236 *ret = bn->batCacheid;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
237 BBPkeepref(*ret);
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
238 return MAL_SUCCEED;
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
239 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
240
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
241 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
242 regexpmatchbulk(bat *ret, const bat *bid, const char **pat)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
243 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
244 return do_matchbulk(ret, *bid, *pat, "");
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
245 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
246
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
247
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
248 static char *
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
249 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags)
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
250 {
30
543dccbc169b Some fixes to comments and code.
Sjoerd Mullender <sjoerd@acm.org>
parents: 29
diff changeset
251 return do_matchbulk(ret, *bid, *pat, *flags);
29
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
252 }
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
253
e44cffee8312 Implemented bulk variant of match function.
Sjoerd Mullender <sjoerd@acm.org>
parents: 28
diff changeset
254 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
255 do_select(bat *ret, bat bid, bat sid, const char *pat, const char *flags, bit anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
256 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
257 BAT *b, *s = NULL; /* input BAT and optional candidate list */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
258
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
259 struct canditer ci; /* candidate iterator */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
260 BATiter bi; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
261
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
262 BAT *bn; /* result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
263 oid *outp; /* pointer through which we add to result */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
264
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
265 const char *err = NULL; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
266 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
267 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
268 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
269 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
270
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
271 /* if any of the input values are nil, the result is no match */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
272 if (strNil(pat) || strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
273 /* no matches when the pattern or the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
274 * we return an empty BAT of the correct type */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
275 if ((bn = BATdense(0, 0, 0)) == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
276 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
277 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
278 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
279 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
280 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
281 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
282 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
283 throw(MAL, "regexp.rematchselect", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
284
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
285 /* from the BAT IDs we need to get the BAT descriptors, making
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
286 * sure that the data of the BATs are loaded into memory */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
287 if ((b = BATdescriptor(bid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
288 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
289 }
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
290 /* check that the BAT has the expected type: we expect str or
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
291 * something compatible with str (if we only want str, we need
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
292 * to compare b->ttype with TYPE_str and not use ATOMstorage).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
293 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
294 * a str BAT because that is the only interface that is
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
295 * defined in the MAL file, so this check is superfluous. */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
296 if (ATOMstorage(b->ttype) != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
297 BBPunfix(b->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
298 throw(MAL, "regexp.rematchselect", SEMANTIC_TYPE_MISMATCH);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
299 }
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
300 if (!is_bat_nil(sid) &&
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
301 (s = BATdescriptor(sid)) == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
302 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
303 throw(MAL, "regexp.rematchselect", RUNTIME_OBJECT_MISSING);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
304 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
305
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
306 if (canditer_init(&ci, b, s) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
307 /* trivially empty result */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
308 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
309 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
310 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
311 bn = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
312 *ret = bn->batCacheid;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
313 BBPkeepref(*ret);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
314 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
315 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
316
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
317 /* allocate a result BAT; the capacity we ask for is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
318 * maximum potential result size (i.e. the size of the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
319 * candidate list if there is one, else the size of the input
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
320 * BAT b) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
321 bn = COLnew(0, TYPE_oid, ci.ncand, TRANSIENT);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
322 if (bn == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
323 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
324 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
325 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
326 throw(MAL, "regexp.rematchselect", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
327 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
328
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
329 /* Position outp at the start of the result array.
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
330 * We know the array is large enough even if every value were
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
331 * to match, so we don't need to check for that. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
332 outp = (oid *) Tloc(bn, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
333
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
334 /* compile the regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
335 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
336 if (re == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
337 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
338 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
339 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
340 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
341 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
342 "compilation of regular expression (%s) failed at %d with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
343 pat, pos, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
344 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
345 /* invest in study of the r.e. */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
346 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
347 if (err != NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
348 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
349 BBPunfix(b->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
350 if (s)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
351 BBPunfix(s->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
352 BBPreclaim(bn);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
353 throw(MAL, "regexp.rematchselect",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
354 "study of regular expression (%s) failed with %s",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
355 pat, err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
356 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
357
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
358 bi = bat_iterator(b);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
359
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
360 /* iterate through the candidates */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
361 for (BUN i = 0; i < ci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
362 /* get the next candidate */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
363 oid o = canditer_next(&ci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
364
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
365 /* the candidate list has a list of OIDs which are
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
366 * relative to b->hseqbase, we need to convert that to
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
367 * an index relative to the start of the array in the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
368 * (tail) heap */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
369 const char *val = BUNtvar(bi, o - b->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
370
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
371 /* nil values never match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
372 if (!strNil(val)) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
373 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
374 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
375 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
376 if (!anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
377 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
378 } else if (pos == PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
379 /* regular expression didn't match */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
380 if (anti)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
381 *outp++ = o;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
382 } else {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
383 /* error during processing */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
384 BBPunfix(b->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
385 BBPunfix(s->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
386 BBPreclaim(bn);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
387 pcre_free_study(sd);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
388 pcre_free(re);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
389 throw(MAL, "regexp.rematchselect",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
390 "matching of regular expression (%s) failed with %d",
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
391 pat, pos);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
392 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
393 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
394 }
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
395 /* we're done with b, s, and re */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
396 BBPunfix(b->batCacheid);
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
397 if (s)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
398 BBPunfix(s->batCacheid);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
399 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
400 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
401
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
402 /* set properties and size of result BAT */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
403 BATsetcount(bn, (BUN) (outp - (oid *) Tloc(bn, 0))); /* size is pointer difference */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
404 /* the result BAT of a select operation MUST be sorted, and
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
405 * all values MUST be distinct (i.e. it is a candidate list);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
406 * due to the way we created the result, we know this is the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
407 * case */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
408 bn->tsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
409 bn->tnosorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
410 bn->tkey = true;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
411 bn->tseqbase = oid_nil;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
412 if (BATcount(bn) > 1) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
413 /* if more than 1 result, it is not reverse sorted */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
414 bn->trevsorted = false;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
415 bn->tnorevsorted = 1; /* index 1 is larger than index 0 */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
416 /* the BAT is dense if the type is TYPE_oid, the
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
417 * values are sorted in ascending order, they are all
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
418 * distinct, and they form a consecutive sequence (no
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
419 * missing values); we only need to check the last
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
420 * condition, which we do by checking the difference
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
421 * between the first and last values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
422 outp = (oid *) Tloc(bn, 0); /* pointer to start */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
423 if (outp[BATcount(bn) - 1] - outp[0] == BATcount(bn) - 1)
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
424 bn->tseqbase = outp[0];
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
425 } else {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
426 /* if empty or a single result, it is reverse sorted
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
427 * and dense */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
428 bn->trevsorted = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
429 bn->tnorevsorted = 0;
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
430 bn->tseqbase = BATcount(bn) == 0 ? 0 : *(oid *) Tloc(bn, 0);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
431 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
432 /* there are no NIL values in the result */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
433 bn->tnil = false;
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
434 bn->tnonil = true;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
435
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
436 *ret = bn->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
437 BBPkeepref(*ret);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
438 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
439 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
440
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
441 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
442 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
443 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
444 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
445 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
446
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
447
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
448 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
449 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
450 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
451 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
452 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
453
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
454 static char *
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
455 do_join(bat *lres, bat *rres, bat lid, bat rid, const char *flags,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
456 bat slid, bat srid, bit nil_matches, lng estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
457 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
458 BAT *l, *r, *sl = NULL, *sr = NULL; /* input BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
459 BAT *bn1, *bn2; /* output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
460
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
461 struct canditer lci; /* candidate iterator for l */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
462 struct canditer rci; /* candidate iterator for r */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
463
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
464 BATiter li; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
465 BATiter ri; /* helper to loop through values */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
466 oid ro; /* right OID being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
467
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
468 const char *pat; /* the regular expression being matched */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
469 const char *err; /* error message from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
470 int pos = 0; /* error position from PCRE library */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
471 int options; /* PCRE options */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
472 pcre *re; /* compiled regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
473 pcre_extra *sd; /* studied regular expression */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
474
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
475 (void) nil_matches; /* only relevant for equi-join */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
476
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
477 if (strNil(flags)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
478 /* no matches when the flags is NIL
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
479 * we return two empty BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
480 bn1 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
481 bn2 = BATdense(0, 0, 0);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
482 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
483 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
484 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
485 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
486 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
487 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
488 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
489 options = parseflags(flags);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
490 if (options == -1)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
491 throw(MAL, "regexp.rematchjoin", "bad flag character");
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
492
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
493 l = BATdescriptor(lid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
494 r = BATdescriptor(rid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
495 if (!is_bat_nil(slid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
496 sl = BATdescriptor(slid);
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
497 if (!is_bat_nil(srid))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
498 sr = BATdescriptor(srid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
499 if (l == NULL || r == NULL ||
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
500 (!is_bat_nil(slid) && sl == NULL) ||
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
501 (!is_bat_nil(srid) && sr == NULL)) {
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
502 /* one of the calls to BATdescriptor failed */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
503 if (l)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
504 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
505 if (r)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
506 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
507 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
508 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
509 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
510 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
511 throw(MAL, "regexp.rematchjoin", RUNTIME_OBJECT_MISSING);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
512 }
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
513 /* check that the BATs have the expected type: we expect str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
514 * or something compatible with str for l (the values) and str
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
515 * for r (the patterns).
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
516 * Note, the MAL interpreter will only call this function with
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
517 * a pair of str BATs because that is the only interface that
27
07a6ef1fde8e Fixed syntax err in comment
Jennie Zhang <y.zhang@cwi.nl>
parents: 26
diff changeset
518 * is defined in the MAL file, so this check is superfluous.
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
519 */
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
520 if (ATOMstorage(l->ttype) != TYPE_str || r->ttype != TYPE_str) {
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
521 BBPunfix(l->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
522 BBPunfix(r->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
523 if (sl)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
524 BBPunfix(sl->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
525 if (sr)
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
526 BBPunfix(sr->batCacheid);
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
527 throw(MAL, "regexp.rematchjoin", SEMANTIC_TYPE_MISMATCH);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
528 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
529
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
530 if (canditer_init(&lci, l, sl) == 0 ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
531 canditer_init(&rci, r, sr) == 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
532 /* if either side is empty (or no candidates) the
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
533 * result is empty */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
534 BBPunfix(l->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
535 BBPunfix(r->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
536 if (sl)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
537 BBPunfix(sl->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
538 if (sr)
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
539 BBPunfix(sr->batCacheid);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
540 bn1 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
541 bn2 = BATdense(0, 0, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
542 if (bn1 == NULL || bn2 == NULL) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
543 BBPreclaim(bn1);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
544 BBPreclaim(bn2);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
545 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
546 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
547 return MAL_SUCCEED;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
548 }
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
549
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
550 /* if there is no valid estimate, use the size of the left
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
551 * input as size estimate */
28
e925d55b369b Updated to Aug2018 (11.31.X) version of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 27
diff changeset
552 if (is_lng_nil(estimate) || estimate == 0)
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
553 estimate = lci.ncand;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
554
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
555 /* create the output BATs */
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
556 bn1 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
557 bn2 = COLnew(0, TYPE_oid, estimate, TRANSIENT);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
558 if (bn1 == NULL || bn2 == NULL) {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
559 /* one of the calls to COLnew failed
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
560 * note, BBPreclaim checks whether its argument is NULL */
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
561 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
562 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
563 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
564 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
565 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
566 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
567 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
568 BBPunfix(sr->batCacheid);
25
f0739f6c1a43 A couple of small fixes; add type checking of input BAT.
Sjoerd Mullender <sjoerd@acm.org>
parents: 24
diff changeset
569 throw(MAL, "regexp.rematchjoin", GDK_EXCEPTION);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
570 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
571
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
572 li = bat_iterator(l);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
573 ri = bat_iterator(r);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
574
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
575 for (BUN i = 0; i < rci.ncand; i++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
576 ro = canditer_next(&rci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
577 pat = BUNtvar(ri, ro - r->hseqbase);
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
578
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
579 /* nil regular expressions don't match (despite
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
580 * nil_matches) */
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
581 if (strNil(pat))
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
582 continue;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
583 re = pcre_compile(pat, options, &err, &pos, NULL);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
584 sd = NULL;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
585 if (re == NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
586 goto bailout;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
587 sd = pcre_study(re, 0, &err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
588 if (err != NULL)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
589 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
590
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
591 /* inner loop: reset iterator, then iterate over it */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
592 canditer_reset(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
593 for (BUN j = 0; j < lci.ncand; j++) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
594 oid lo = canditer_next(&lci);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
595 const char *val = BUNtvar(li, lo - l->hseqbase);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
596 if (strNil(val))
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
597 continue;
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
598 pos = pcre_exec(re, sd, val, (int) strlen(val), 0, 0, NULL, 0);
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
599 if (pos >= 0) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
600 /* regular expression matched */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
601 if (BUNappend(bn1, &lo, false) != GDK_SUCCEED ||
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
602 BUNappend(bn2, &ro, false) != GDK_SUCCEED)
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
603 goto bailout;
33
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
604 } else if (pos != PCRE_ERROR_NOMATCH) {
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
605 /* error during processing */
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
606 err = "matching of regular expression failed";
a8ffdbc388ce Ported to Jun2020 branch.
Sjoerd Mullender <sjoerd@acm.org>
parents: 32
diff changeset
607 goto bailout;
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
608 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
609 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
610 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
611 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
612 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
613
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
614 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
615 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
616 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
617 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
618 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
619 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
620 *lres = bn1->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
621 *rres = bn2->batCacheid;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
622 BBPkeepref(*lres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
623 BBPkeepref(*rres);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
624 return MAL_SUCCEED;
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
625
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
626 bailout:
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
627 BBPreclaim(bn1);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
628 BBPreclaim(bn2);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
629 BBPunfix(l->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
630 BBPunfix(r->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
631 if (sl)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
632 BBPunfix(sl->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
633 if (sr)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
634 BBPunfix(sr->batCacheid);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
635 if (sd)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
636 pcre_free_study(sd);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
637 if (re)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
638 pcre_free(re);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
639 if (err)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
640 throw(MAL, "pcre.rematchjoin",
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
641 "error with regular expression: %s", err);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
642 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
643 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
644
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
645 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
646 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
647 const bat *sl, const bat *sr, const bit *nil_matches,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
648 const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
649 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
650 return do_join(lres, rres, *lid, *rid, "", *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
651 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
652 }
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
653
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
654 static char *
15
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
655 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
656 const char **flags, const bat *sl, const bat *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
657 const bit *nil_matches, const lng *estimate)
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
658 {
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
659 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr,
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
660 *nil_matches, *estimate);
59bbfa0096b3 Added a tutorial for creating a FILTER FUNCTION.
Sjoerd Mullender <sjoerd@acm.org>
parents:
diff changeset
661 }
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
662
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
663 #include "mel.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
664
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
665 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)"
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
666 " EXTERNAL NAME regexp.rematch; "
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
667 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)"
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
668 " EXTERNAL NAME regexp.rematch;";
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
669
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
670 static mel_func regexp_init_funcs[] = {
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
671 command("regexp", "rematch", regexpmatch, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
672 "Return true when the value 'val' matches the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
673 args(1,3, arg("",bit),arg("val",str),arg("pat",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
674 command("regexp", "rematchselect", regexpmatchselect, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
675 "Return the list of matches in 'val' that match the regular expression 'pat'",
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
676 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))),
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
677 command("regexp", "rematchjoin", regexpmatchjoin, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
678 "Return the matching pairs from the 'val' and 'pat' columns",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
679 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
40
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
680 command("batregexp", "rematch", regexpmatchbulk, false,
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
681 "Return a BAT with true for match and false for no match",
e70b12c15507 Updated for Oct2020 version.
Sjoerd Mullender <sjoerd@acm.org>
parents: 39
diff changeset
682 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))),
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
683 command("regexp", "rematch", regexpmatchf, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
684 "Return true when the value 'val' matches the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
685 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
686 command("regexp", "rematchselect", regexpmatchfselect, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
687 "Return the list of matches in 'val' that match the regular expression 'pat'",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
688 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
689 command("regexp", "rematchjoin", regexpmatchfjoin, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
690 "Return the matching pairs from the 'val' and 'pat'\ncolumns",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
691 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
692 command("batregexp", "rematch", regexpmatchfbulk, false,
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
693 "Return a BAT with true for match and false for no match",
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
694 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))),
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
695 { .imp=NULL } /* sentinel */
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
696 };
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
697
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
698 #include "mal_import.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
699 #include "sql_import.h"
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
700 #ifdef _MSC_VER
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
701 #undef read
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
702 #pragma section(".CRT$XCU",read)
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
703 #endif
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
704 LIB_STARTUP_FUNC(init_regexp)
39
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
705 {
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
706 mal_module("regexp", NULL, regexp_init_funcs);
4633ab41de55 Layout and some small fixes for Oct2020 release of MonetDB.
Sjoerd Mullender <sjoerd@acm.org>
parents: 37
diff changeset
707 sql_register("regexp", regexp_sql);
37
e5d2d0c9b7b3 build libraries and included mal/sql in the library startup functions
Niels Nes <niels@cwi.nl>
parents: 33
diff changeset
708 }