Mercurial > hg > MonetDB-extend
comparison regexp/regexp.c @ 40:e70b12c15507
Updated for Oct2020 version.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Tue, 08 Jun 2021 14:55:38 +0200 (2021-06-08) |
parents | 4633ab41de55 |
children | da896864dbbd |
comparison
equal
deleted
inserted
replaced
39:4633ab41de55 | 40:e70b12c15507 |
---|---|
17 /* system include files */ | 17 /* system include files */ |
18 #include <string.h> | 18 #include <string.h> |
19 | 19 |
20 /* we use the PCRE library to do regular expression matching */ | 20 /* we use the PCRE library to do regular expression matching */ |
21 #include <pcre.h> | 21 #include <pcre.h> |
22 | |
23 /* __declspec() must be used on Windows, but not on other systems */ | |
24 #ifndef _MSC_VER | |
25 /* not Windows */ | |
26 #define __declspec(x) /* nothing */ | |
27 #endif | |
28 | |
29 /* these eight functions are the only externally visible functions | |
30 * since they are the only ones that are called from the MAL layer; on | |
31 * Windows they must be exported, on other systems, declaring them as | |
32 * extern is enough */ | |
33 extern __declspec(dllexport) char *regexpmatch(bit *ret, const char **val, const char **pat); | |
34 extern __declspec(dllexport) char *regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags); | |
35 extern __declspec(dllexport) char *regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti); | |
36 extern __declspec(dllexport) char *regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti); | |
37 extern __declspec(dllexport) char *regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate); | |
38 extern __declspec(dllexport) char *regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const char **flags, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate); | |
39 extern __declspec(dllexport) char *regexpmatchbulk(bat *ret, const bat *bid, const char **pat); | |
40 extern __declspec(dllexport) char *regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags); | |
41 | 22 |
42 static int | 23 static int |
43 parseflags(const char *flags) | 24 parseflags(const char *flags) |
44 { | 25 { |
45 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ | 26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ |
99 } | 80 } |
100 *ret = pos >= 0; | 81 *ret = pos >= 0; |
101 return MAL_SUCCEED; | 82 return MAL_SUCCEED; |
102 } | 83 } |
103 | 84 |
104 char * | 85 static char * |
105 regexpmatch(bit *ret, const char **val, const char **pat) | 86 regexpmatch(bit *ret, const char **val, const char **pat) |
106 { | 87 { |
107 return do_match(ret, *val, *pat, ""); | 88 return do_match(ret, *val, *pat, ""); |
108 } | 89 } |
109 | 90 |
110 char * | 91 static char * |
111 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) | 92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) |
112 { | 93 { |
113 return do_match(ret, *val, *pat, *flags); | 94 return do_match(ret, *val, *pat, *flags); |
114 } | 95 } |
115 | 96 |
255 *ret = bn->batCacheid; | 236 *ret = bn->batCacheid; |
256 BBPkeepref(*ret); | 237 BBPkeepref(*ret); |
257 return MAL_SUCCEED; | 238 return MAL_SUCCEED; |
258 } | 239 } |
259 | 240 |
260 char * | 241 static char * |
261 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) | 242 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) |
262 { | 243 { |
263 return do_matchbulk(ret, *bid, *pat, ""); | 244 return do_matchbulk(ret, *bid, *pat, ""); |
264 } | 245 } |
265 | 246 |
266 | 247 |
267 char * | 248 static char * |
268 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) | 249 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) |
269 { | 250 { |
270 return do_matchbulk(ret, *bid, *pat, *flags); | 251 return do_matchbulk(ret, *bid, *pat, *flags); |
271 } | 252 } |
272 | 253 |
455 *ret = bn->batCacheid; | 436 *ret = bn->batCacheid; |
456 BBPkeepref(*ret); | 437 BBPkeepref(*ret); |
457 return MAL_SUCCEED; | 438 return MAL_SUCCEED; |
458 } | 439 } |
459 | 440 |
460 char * | 441 static char * |
461 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) | 442 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) |
462 { | 443 { |
463 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); | 444 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); |
464 } | 445 } |
465 | 446 |
466 | 447 |
467 char * | 448 static char * |
468 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) | 449 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) |
469 { | 450 { |
470 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); | 451 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); |
471 } | 452 } |
472 | 453 |
659 throw(MAL, "pcre.rematchjoin", | 640 throw(MAL, "pcre.rematchjoin", |
660 "error with regular expression: %s", err); | 641 "error with regular expression: %s", err); |
661 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); | 642 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); |
662 } | 643 } |
663 | 644 |
664 char * | 645 static char * |
665 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, | 646 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
666 const bat *sl, const bat *sr, const bit *nil_matches, | 647 const bat *sl, const bat *sr, const bit *nil_matches, |
667 const lng *estimate) | 648 const lng *estimate) |
668 { | 649 { |
669 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, | 650 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, |
670 *nil_matches, *estimate); | 651 *nil_matches, *estimate); |
671 } | 652 } |
672 | 653 |
673 char * | 654 static char * |
674 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, | 655 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, |
675 const char **flags, const bat *sl, const bat *sr, | 656 const char **flags, const bat *sl, const bat *sr, |
676 const bit *nil_matches, const lng *estimate) | 657 const bit *nil_matches, const lng *estimate) |
677 { | 658 { |
678 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, | 659 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, |
679 *nil_matches, *estimate); | 660 *nil_matches, *estimate); |
680 } | 661 } |
681 | 662 |
682 #include "mel.h" | 663 #include "mel.h" |
683 | 664 |
684 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING) EXTERNAL NAME regexp.rematch; CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING) EXTERNAL NAME regexp.rematch;"; | 665 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)" |
666 " EXTERNAL NAME regexp.rematch; " | |
667 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)" | |
668 " EXTERNAL NAME regexp.rematch;"; | |
685 | 669 |
686 static mel_func regexp_init_funcs[] = { | 670 static mel_func regexp_init_funcs[] = { |
687 command("regexp", "rematch", regexpmatch, false, | 671 command("regexp", "rematch", regexpmatch, false, |
688 "Return true when the value 'val' matches the regular expression 'pat'", | 672 "Return true when the value 'val' matches the regular expression 'pat'", |
689 args(1,3, arg("",bit),arg("val",str),arg("pat",str))), | 673 args(1,3, arg("",bit),arg("val",str),arg("pat",str))), |
690 command("regexp", "rematchselect", regexpmatchselect, false, | 674 command("regexp", "rematchselect", regexpmatchselect, false, |
691 "Return the list of matches in 'val' that match the regular expression 'pat'", | 675 "Return the list of matches in 'val' that match the regular expression 'pat'", |
692 args(1,5, batarg("",oid),batarg("val",str),batarg("cand",oid),arg("pat",str),arg("anti",bit))), | 676 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))), |
693 command("regexp", "rematchjoin", regexpmatchjoin, false, | 677 command("regexp", "rematchjoin", regexpmatchjoin, false, |
694 "Return the matching pairs from the 'val' and 'pat' columns", | 678 "Return the matching pairs from the 'val' and 'pat' columns", |
695 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), | 679 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
680 command("batregexp", "rematch", regexpmatchbulk, false, | |
681 "Return a BAT with true for match and false for no match", | |
682 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))), | |
696 command("regexp", "rematch", regexpmatchf, false, | 683 command("regexp", "rematch", regexpmatchf, false, |
697 "Return true when the value 'val' matches the regular expression 'pat'", | 684 "Return true when the value 'val' matches the regular expression 'pat'", |
698 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))), | 685 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))), |
699 command("regexp", "rematchselect", regexpmatchfselect, false, | 686 command("regexp", "rematchselect", regexpmatchfselect, false, |
700 "Return the list of matches in 'val' that match the regular expression 'pat'", | 687 "Return the list of matches in 'val' that match the regular expression 'pat'", |
701 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))), | 688 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))), |
702 command("regexp", "rematchjoin", regexpmatchfjoin, false, | 689 command("regexp", "rematchjoin", regexpmatchfjoin, false, |
703 "Return the matching pairs from the 'val' and 'pat'\ncolumns", | 690 "Return the matching pairs from the 'val' and 'pat'\ncolumns", |
704 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), | 691 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), |
705 command("batregexp", "rematch", regexpmatchbulk, false, | |
706 "Return a BAT with true for match and false for no match", | |
707 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))), | |
708 command("batregexp", "rematch", regexpmatchfbulk, false, | 692 command("batregexp", "rematch", regexpmatchfbulk, false, |
709 "Return a BAT with true for match and false for no match", | 693 "Return a BAT with true for match and false for no match", |
710 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))), | 694 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))), |
711 { .imp=NULL } /* sentinel */ | 695 { .imp=NULL } /* sentinel */ |
712 }; | 696 }; |