comparison regexp/regexp.c @ 40:e70b12c15507

Updated for Oct2020 version.
author Sjoerd Mullender <sjoerd@acm.org>
date Tue, 08 Jun 2021 14:55:38 +0200 (2021-06-08)
parents 4633ab41de55
children da896864dbbd
comparison
equal deleted inserted replaced
39:4633ab41de55 40:e70b12c15507
17 /* system include files */ 17 /* system include files */
18 #include <string.h> 18 #include <string.h>
19 19
20 /* we use the PCRE library to do regular expression matching */ 20 /* we use the PCRE library to do regular expression matching */
21 #include <pcre.h> 21 #include <pcre.h>
22
23 /* __declspec() must be used on Windows, but not on other systems */
24 #ifndef _MSC_VER
25 /* not Windows */
26 #define __declspec(x) /* nothing */
27 #endif
28
29 /* these eight functions are the only externally visible functions
30 * since they are the only ones that are called from the MAL layer; on
31 * Windows they must be exported, on other systems, declaring them as
32 * extern is enough */
33 extern __declspec(dllexport) char *regexpmatch(bit *ret, const char **val, const char **pat);
34 extern __declspec(dllexport) char *regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags);
35 extern __declspec(dllexport) char *regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti);
36 extern __declspec(dllexport) char *regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti);
37 extern __declspec(dllexport) char *regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate);
38 extern __declspec(dllexport) char *regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, const char **flags, const bat *sl, const bat *sr, const bit *nil_matches, const lng *estimate);
39 extern __declspec(dllexport) char *regexpmatchbulk(bat *ret, const bat *bid, const char **pat);
40 extern __declspec(dllexport) char *regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags);
41 22
42 static int 23 static int
43 parseflags(const char *flags) 24 parseflags(const char *flags)
44 { 25 {
45 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */ 26 int options = PCRE_UTF8; /* MonetDB uses UTF-8 exclusively */
99 } 80 }
100 *ret = pos >= 0; 81 *ret = pos >= 0;
101 return MAL_SUCCEED; 82 return MAL_SUCCEED;
102 } 83 }
103 84
104 char * 85 static char *
105 regexpmatch(bit *ret, const char **val, const char **pat) 86 regexpmatch(bit *ret, const char **val, const char **pat)
106 { 87 {
107 return do_match(ret, *val, *pat, ""); 88 return do_match(ret, *val, *pat, "");
108 } 89 }
109 90
110 char * 91 static char *
111 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags) 92 regexpmatchf(bit *ret, const char **val, const char **pat, const char **flags)
112 { 93 {
113 return do_match(ret, *val, *pat, *flags); 94 return do_match(ret, *val, *pat, *flags);
114 } 95 }
115 96
255 *ret = bn->batCacheid; 236 *ret = bn->batCacheid;
256 BBPkeepref(*ret); 237 BBPkeepref(*ret);
257 return MAL_SUCCEED; 238 return MAL_SUCCEED;
258 } 239 }
259 240
260 char * 241 static char *
261 regexpmatchbulk(bat *ret, const bat *bid, const char **pat) 242 regexpmatchbulk(bat *ret, const bat *bid, const char **pat)
262 { 243 {
263 return do_matchbulk(ret, *bid, *pat, ""); 244 return do_matchbulk(ret, *bid, *pat, "");
264 } 245 }
265 246
266 247
267 char * 248 static char *
268 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags) 249 regexpmatchfbulk(bat *ret, const bat *bid, const char **pat, const char **flags)
269 { 250 {
270 return do_matchbulk(ret, *bid, *pat, *flags); 251 return do_matchbulk(ret, *bid, *pat, *flags);
271 } 252 }
272 253
455 *ret = bn->batCacheid; 436 *ret = bn->batCacheid;
456 BBPkeepref(*ret); 437 BBPkeepref(*ret);
457 return MAL_SUCCEED; 438 return MAL_SUCCEED;
458 } 439 }
459 440
460 char * 441 static char *
461 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti) 442 regexpmatchselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const bit *anti)
462 { 443 {
463 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti); 444 return do_select(ret, *bid, sid ? *sid : 0, *pat, "", *anti);
464 } 445 }
465 446
466 447
467 char * 448 static char *
468 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti) 449 regexpmatchfselect(bat *ret, const bat *bid, const bat *sid, const char **pat, const char **flags, const bit *anti)
469 { 450 {
470 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti); 451 return do_select(ret, *bid, sid ? *sid : 0, *pat, *flags, *anti);
471 } 452 }
472 453
659 throw(MAL, "pcre.rematchjoin", 640 throw(MAL, "pcre.rematchjoin",
660 "error with regular expression: %s", err); 641 "error with regular expression: %s", err);
661 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION); 642 throw(MAL, "pcre.rematchjoin", GDK_EXCEPTION);
662 } 643 }
663 644
664 char * 645 static char *
665 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, 646 regexpmatchjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
666 const bat *sl, const bat *sr, const bit *nil_matches, 647 const bat *sl, const bat *sr, const bit *nil_matches,
667 const lng *estimate) 648 const lng *estimate)
668 { 649 {
669 return do_join(lres, rres, *lid, *rid, "", *sl, *sr, 650 return do_join(lres, rres, *lid, *rid, "", *sl, *sr,
670 *nil_matches, *estimate); 651 *nil_matches, *estimate);
671 } 652 }
672 653
673 char * 654 static char *
674 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid, 655 regexpmatchfjoin(bat *lres, bat *rres, const bat *lid, const bat *rid,
675 const char **flags, const bat *sl, const bat *sr, 656 const char **flags, const bat *sl, const bat *sr,
676 const bit *nil_matches, const lng *estimate) 657 const bit *nil_matches, const lng *estimate)
677 { 658 {
678 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr, 659 return do_join(lres, rres, *lid, *rid, *flags, *sl, *sr,
679 *nil_matches, *estimate); 660 *nil_matches, *estimate);
680 } 661 }
681 662
682 #include "mel.h" 663 #include "mel.h"
683 664
684 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING) EXTERNAL NAME regexp.rematch; CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING) EXTERNAL NAME regexp.rematch;"; 665 static char regexp_sql[] = "CREATE FILTER FUNCTION rematch(val STRING, pat STRING)"
666 " EXTERNAL NAME regexp.rematch; "
667 "CREATE FILTER FUNCTION rematch(val STRING, pat STRING, flags STRING)"
668 " EXTERNAL NAME regexp.rematch;";
685 669
686 static mel_func regexp_init_funcs[] = { 670 static mel_func regexp_init_funcs[] = {
687 command("regexp", "rematch", regexpmatch, false, 671 command("regexp", "rematch", regexpmatch, false,
688 "Return true when the value 'val' matches the regular expression 'pat'", 672 "Return true when the value 'val' matches the regular expression 'pat'",
689 args(1,3, arg("",bit),arg("val",str),arg("pat",str))), 673 args(1,3, arg("",bit),arg("val",str),arg("pat",str))),
690 command("regexp", "rematchselect", regexpmatchselect, false, 674 command("regexp", "rematchselect", regexpmatchselect, false,
691 "Return the list of matches in 'val' that match the regular expression 'pat'", 675 "Return the list of matches in 'val' that match the regular expression 'pat'",
692 args(1,5, batarg("",oid),batarg("val",str),batarg("cand",oid),arg("pat",str),arg("anti",bit))), 676 args(1,5, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("anti",bit))),
693 command("regexp", "rematchjoin", regexpmatchjoin, false, 677 command("regexp", "rematchjoin", regexpmatchjoin, false,
694 "Return the matching pairs from the 'val' and 'pat' columns", 678 "Return the matching pairs from the 'val' and 'pat' columns",
695 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), 679 args(2,8, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
680 command("batregexp", "rematch", regexpmatchbulk, false,
681 "Return a BAT with true for match and false for no match",
682 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))),
696 command("regexp", "rematch", regexpmatchf, false, 683 command("regexp", "rematch", regexpmatchf, false,
697 "Return true when the value 'val' matches the regular expression 'pat'", 684 "Return true when the value 'val' matches the regular expression 'pat'",
698 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))), 685 args(1,4, arg("",bit),arg("val",str),arg("pat",str),arg("flags",str))),
699 command("regexp", "rematchselect", regexpmatchfselect, false, 686 command("regexp", "rematchselect", regexpmatchfselect, false,
700 "Return the list of matches in 'val' that match the regular expression 'pat'", 687 "Return the list of matches in 'val' that match the regular expression 'pat'",
701 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))), 688 args(1,6, batarg("",oid),batarg("val",str),batarg("s",oid),arg("pat",str),arg("flags",str),arg("anti",bit))),
702 command("regexp", "rematchjoin", regexpmatchfjoin, false, 689 command("regexp", "rematchjoin", regexpmatchfjoin, false,
703 "Return the matching pairs from the 'val' and 'pat'\ncolumns", 690 "Return the matching pairs from the 'val' and 'pat'\ncolumns",
704 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))), 691 args(2,9, batarg("lr",oid),batarg("rr",oid),batarg("val",str),batarg("pat",str),arg("flags",str),batarg("sl",oid),batarg("sr",oid),arg("nil_matches",bit),arg("estimate",lng))),
705 command("batregexp", "rematch", regexpmatchbulk, false,
706 "Return a BAT with true for match and false for no match",
707 args(1,3, batarg("",bit),batarg("val",str),arg("pat",str))),
708 command("batregexp", "rematch", regexpmatchfbulk, false, 692 command("batregexp", "rematch", regexpmatchfbulk, false,
709 "Return a BAT with true for match and false for no match", 693 "Return a BAT with true for match and false for no match",
710 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))), 694 args(1,4, batarg("",bit),batarg("val",str),arg("pat",str),arg("flags",str))),
711 { .imp=NULL } /* sentinel */ 695 { .imp=NULL } /* sentinel */
712 }; 696 };