Hi Anandeshwar, thanks a lot for extending pathfinder with full-text functionality. I'm however a little bit taken by surprise and your checkin in my opinion leaves some questions open: - Why did you change the translation of PFcore_some in core.c? - Why did this rewrite touch any file in the algebra folder? - Whitespace removal is not ftcontains support as your message suggests. Additionally disabling consistency checks without an explanation isn't a nice thing. - Why did you duplicate the translation of a for loop? - Isn't the score just an optional modification like the positional variable? I at least don't like this code duplication as it will make code maintenance a lot harder. - As far as I can oversee you did break the compositionality of the compilation scheme. Now the different rules provide different result schemas. I think (mainly because of the last issue) this checkin should better go into a separate branch instead of the CVS HEAD until the translation is more mature. Jan On Nov 5, 2009, at 17:24, Anandeshwar Singh wrote:
Update of /cvsroot/monetdb/pathfinder/compiler/core In directory 23jxhf1.ch3.sourceforge.com:/tmp/cvs-serv7749/core
Modified Files: core.c coreopt.brg fs.brg simplify.brg Log Message: XQuery full-text search support initial version!
This initial version provides support to
-ftcontains keyword,
e.g., for $f in doc("menu.xml")//food[./name ftcontains "Belgian Waffles"] return $f The above query will return all the food nodes that has some relevancy over "Belgian Waffles"
-initial score variable support
e.g., for $f score $s in doc("menu.xml")//food[./name ftcontains "Belgian Waffles"] return $s The above query will return the relevancy score of all the matched food nodes, however since its an initial version, the support to this score variable is very limited.
Index: core.c =================================================================== RCS file: /cvsroot/monetdb/pathfinder/compiler/core/core.c,v retrieving revision 1.68 retrieving revision 1.69 diff -u -d -r1.68 -r1.69 --- core.c 7 May 2009 14:26:37 -0000 1.68 +++ core.c 5 Nov 2009 16:23:58 -0000 1.69 @@ -614,6 +614,7 @@ * @param bindvar variable bound in the for clause * @param pos positional variable */ + PFcnode_t * PFcore_forvars (const PFcnode_t *bindvar, const PFcnode_t *pos) { @@ -624,6 +625,23 @@ }
/** + * Variables bound in a for clause with score variable + * + * @param bindvar variable bound in the for clause + * @param pos positional variable + * @param score full-text score variable + */ +PFcnode_t * +PFcore_forvars2 (const PFcnode_t *bindvar, const PFcnode_t *pos, const PFcnode_t *score) +{ + assert (bindvar); assert (bindvar->kind == c_var); + assert (pos); assert (pos->kind == c_var || pos->kind == c_nil); + assert (score); assert (score->kind == c_var || score->kind == c_nil); + + return PFcore_wire2 (c_forvars, bindvar, PFcore_wire2(c_vars, pos, score)); +} + +/** * Create a core tree node representing a let binding: * * let v := e1 return e2 @@ -1669,20 +1687,26 @@ PFcnode_t * PFcore_some (const PFcnode_t *v, const PFcnode_t *expr, const PFcnode_t *qExpr) { - PFfun_t *fn_exists = PFcore_function (PFqname (PFns_fn, "exists"));
- return APPLY (fn_exists, - PFcore_flwr ( - PFcore_for ( - PFcore_forbind ( - PFcore_forvars ( - v, - PFcore_nil ()), - expr), - PFcore_nil ()), - PFcore_where ( - PFcore_ebv (qExpr), - PFcore_num (1)))); + PFfun_t *fn_not = PFcore_function (PFqname (PFns_fn, "not")); + PFfun_t *fn_empty = PFcore_function (PFqname (PFns_fn, "empty")); + + return APPLY (fn_not, + APPLY (fn_empty, + PFcore_flwr ( + PFcore_for ( + PFcore_forbind ( + PFcore_forvars ( + v, + PFcore_nil ()), + expr), + PFcore_nil ()), + PFcore_if + (PFcore_ebv (qExpr), + PFcore_then_else ( + PFcore_num (1), + PFcore_empty ()))))); + }
/**
Index: simplify.brg =================================================================== RCS file: /cvsroot/monetdb/pathfinder/compiler/core/simplify.brg,v retrieving revision 1.44 retrieving revision 1.45 diff -u -d -r1.44 -r1.45 --- simplify.brg 7 May 2009 14:26:52 -0000 1.44 +++ simplify.brg 5 Nov 2009 16:23:58 -0000 1.45 @@ -221,6 +221,9 @@ /* Pathfinder extension: XRPC */ %term xrpc = 76 /**< XRPC calls: "execute at" */
+ /* Associated For variable holders */ +%term vars = 77 /**< variable pair (position. var + score. var) of a for */ + %%
Query: main (FunctionDecls, CoreExpr) = 1 (10); @@ -230,7 +233,7 @@
CoreExpr: flwr (OptBindExpr, WhereExpr) = 32 (10); CoreExpr: flwr (OptBindExpr, OrdWhereExpr) = 33 (10); - + OptBindExpr: for_ (forbind (forvars (var, nil), LiteralValue), OptBindExpr) = 2 (10); @@ -378,6 +381,20 @@ /* Pathfinder extension: XRPC */ CoreExpr: xrpc (CoreExpr, FunExpr) = 124 (10);
+/* full-text score variable extension */ +OptBindExpr: for_ (forbind (forvars (var, + vars (nil, OptVar)), + LiteralValue), + OptBindExpr) = 125 (10); +OptBindExpr: for_ (forbind (forvars (var, + vars (var, OptVar)), + LiteralValue), + OptBindExpr) = 126 (10); +OptBindExpr: for_ (forbind (forvars (var, vars( + OptVar, OptVar)), + CoreExpr), + OptBindExpr) = 127 (10); + %%
/** Maximum number of pattern leaves */ @@ -719,6 +736,27 @@ */ p->sem.fun->core = R(p); break; + + /* OptBindExpr: for_ (forbind (forvars (var, + vars (nil, var)), + LiteralValue), + OptBindExpr) */ + case 125: + *p = *let (letbind (LLL(p), LR(p)), R(p)); + relabel (p, kids); + rewrite_again = true; + break; + + /* OptBindExpr: for_ (forbind (forvars (var, vars( + var, var)), + LiteralValue), + OptBindExpr) */ + case 126: + *p = *let (letbind (LLL(p), LR(p)), + let (letbind (LLRL(p), num (1)), R(p))); + relabel (p, kids); + rewrite_again = true; + break;
default: break;
Index: coreopt.brg =================================================================== RCS file: /cvsroot/monetdb/pathfinder/compiler/core/coreopt.brg,v retrieving revision 1.77 retrieving revision 1.78 diff -u -d -r1.77 -r1.78 --- coreopt.brg 12 Oct 2009 16:15:14 -0000 1.77 +++ coreopt.brg 5 Nov 2009 16:23:58 -0000 1.78 @@ -222,6 +222,9 @@ /* Pathfinder extension: XRPC */ %term xrpc = 76 /**< XRPC calls: "execute at" */
+ /* Associated For variable holders */ +%term vars = 77 /**< variable pair (position. var + score. var) of a for */ + %%
/* all rules starting from rule 40 are never used @@ -244,7 +247,7 @@ OptBindExpr: for_ (forbind (forvars (var, OptVar), CoreExpr), OptBindExpr) = 5 (10); - + OptVar: nil = 42(10); OptVar: var = 43(10);
@@ -383,6 +386,19 @@ /* Pathfinder extension: XRPC */ CoreExpr: xrpc (CoreExpr, FunExpr) = 116(10);
+/* Pathfinder full-text score var */ +CoreExpr: flwr (for_ (forbind (forvars (var, + vars (nil, nil)), + CoreExpr), + nil), var) = 117 (10); + +OptBindExpr: for_ (forbind (forvars (var, + vars (OptVar, + OptVar) + ), + CoreExpr), + OptBindExpr) = 118 (10); + %%
/** Type of a core tree node */ @@ -1141,7 +1157,7 @@ *p = *LL(p); TY(p) = ty; rewritten = true; - break; + break; } }
@@ -1543,6 +1559,56 @@ break; } break; + + /* CoreExpr: flwr (for_ (forbind (forvars (var, + vars (nil, nil)), + CoreExpr), + nil), var) */ + case 117: + /* replace for loops of the form + 'for $a in ... return $a' by its input '...' */ + if (LLLL(p)->sem.var == R(p)->sem.var) { + *p = *LLR(p); + relabel (p, kids); + rewritten = true; + } + break; + + /* OptBindExpr: for_ (forbind (forvars (var, + vars (OptVar, + OptVar) + ), + CoreExpr), + OptBindExpr) */ + case 118: + /* + * If we iterate over a sequence that we know (from static + * typing) to always have length 1, replace the `for' by + * a corresponding `let'. + */ + if (PFty_subtype (TY(LR(p)), PFty_xs_anyItem ())) { + + PFcnode_t *c = R(p); + + if (LLRL(p)->kind != c_nil ) + c = let (letbind (LLRL(p), num (1)), c); + + *p = *let (letbind (LLL(p), LR(p)), c); + + /* type-check what we just created */ + PFty_check (flwr (p, num(1))); + + rewritten = true; + + /* + * Re-label entire subtree. Type-checking may have + * modified all the state labels in the subtree, so + * we have to restore them. + */ + PFcoreopt_label (p); + break; + } + break;
default: break; @@ -1662,8 +1728,32 @@
/* a for loop increases the nesting depth */ base++; - - if (LLR(c)->kind == c_var) { + + if (LLR(c)->kind == c_vars) { + if (LLRL(c)->kind == c_var) { + /* add variable to the environment */ + *((bind_t *) PFarray_add (unused_var_env)) + = (bind_t) { .var = LLRL(c)->sem.var, + .atom = parent, + .child = child }; + /* reset reference counter */ + LLRL(c)->sem.var->used = 0; + /* record the current scope */ + LLRL(c)->sem.var->base = base; + } + + if (LLRR(c)->kind == c_var) { + /* add variable to the environment */ + *((bind_t *) PFarray_add (unused_var_env)) + = (bind_t) { .var = LLRR(c)->sem.var, + .atom = parent, + .child = child }; + /* reset reference counter */ + LLRR(c)->sem.var->used = 0; + /* record the current scope */ + LLRR(c)->sem.var->base = base; + } + } else if (LLR(c)-> kind == c_var) { /* add variable to the environment */ *((bind_t *) PFarray_add (unused_var_env)) = (bind_t) { .var = LLR(c)->sem.var, @@ -1791,7 +1881,18 @@
unused_var = true; } else if (n->kind == c_for) { - LLR(n) = nil (); + if(LLR(n)->kind == c_var) + LLR(n) = nil(); + else { + if(LLRL(n)->sem.var == var) + LLRL(n) = nil (); + else + LLRR(n) = nil (); + if(LLRL(n)->kind == c_nil && LLRR(n)->kind == c_nil) + LLR(n) = nil(); + else if(LLRR(n)->kind == c_nil) + LLR(n) = LLRL(n); + } unused_var = true; } } else if (var->used == 1 && n->kind == c_let)
Index: fs.brg =================================================================== RCS file: /cvsroot/monetdb/pathfinder/compiler/core/fs.brg,v retrieving revision 1.78 retrieving revision 1.79 diff -u -d -r1.78 -r1.79 --- fs.brg 19 Oct 2009 09:30:38 -0000 1.78 +++ fs.brg 5 Nov 2009 16:23:58 -0000 1.79 @@ -46,6 +46,7 @@ #include "abssyn.h" #include "qname.h" #include "mem.h" + #include "options.h" /* extract option declarations from parse tree */ #include "string_utils.h" /* trimming for pragmas */
@@ -220,6 +221,13 @@ %term seed = 128 %term xrpc = 129 %term docmgmt_ty = 130 +%term ftcontains = 131 +%term ftfilter = 132 +%term ftignore = 133 +%term ftor = 134 +%term ftand = 135 +%term ftmildnot = 136 +%term ftnot = 137
%%
@@ -355,7 +363,7 @@ OptPosVar), OptExpr), OptBindExpr) = 66 (10); - + OptPosVar: nil = 68 (10); OptPosVar: Var = 69 (10);
@@ -439,22 +447,22 @@ AndExpr: ComparisonExpr = 104 (10); AndExpr: and (ComparisonExpr, AndExpr) = 105 (10);
-ComparisonExpr: RangeExpr = 106 (10); -ComparisonExpr: eq (RangeExpr, RangeExpr) = 107 (10); -ComparisonExpr: ne (RangeExpr, RangeExpr) = 108 (10); -ComparisonExpr: lt (RangeExpr, RangeExpr) = 109 (10); -ComparisonExpr: le (RangeExpr, RangeExpr) = 110 (10); -ComparisonExpr: gt (RangeExpr, RangeExpr) = 111 (10); -ComparisonExpr: ge (RangeExpr, RangeExpr) = 112 (10); -ComparisonExpr: val_eq (RangeExpr, RangeExpr) = 113 (10); -ComparisonExpr: val_ne (RangeExpr, RangeExpr) = 114 (10); -ComparisonExpr: val_lt (RangeExpr, RangeExpr) = 115 (10); -ComparisonExpr: val_le (RangeExpr, RangeExpr) = 116 (10); -ComparisonExpr: val_gt (RangeExpr, RangeExpr) = 117 (10); -ComparisonExpr: val_ge (RangeExpr, RangeExpr) = 118 (10); -ComparisonExpr: is (RangeExpr, RangeExpr) = 119 (10); -ComparisonExpr: ltlt (RangeExpr, RangeExpr) = 121 (10); -ComparisonExpr: gtgt (RangeExpr, RangeExpr) = 122 (10); +ComparisonExpr: FTContainsExpr = 106 (10); +ComparisonExpr: eq (FTContainsExpr, FTContainsExpr) = 107 (10); +ComparisonExpr: ne (FTContainsExpr, FTContainsExpr) = 108 (10); +ComparisonExpr: lt (FTContainsExpr, FTContainsExpr) = 109 (10); +ComparisonExpr: le (FTContainsExpr, FTContainsExpr) = 110 (10); +ComparisonExpr: gt (FTContainsExpr, FTContainsExpr) = 111 (10); +ComparisonExpr: ge (FTContainsExpr, FTContainsExpr) = 112 (10); +ComparisonExpr: val_eq (FTContainsExpr, FTContainsExpr) = 113 (10); +ComparisonExpr: val_ne (FTContainsExpr, FTContainsExpr) = 114 (10); +ComparisonExpr: val_lt (FTContainsExpr, FTContainsExpr) = 115 (10); +ComparisonExpr: val_le (FTContainsExpr, FTContainsExpr) = 116 (10); +ComparisonExpr: val_gt (FTContainsExpr, FTContainsExpr) = 117 (10); +ComparisonExpr: val_ge (FTContainsExpr, FTContainsExpr) = 118 (10); +ComparisonExpr: is (FTContainsExpr, FTContainsExpr) = 119 (10); +ComparisonExpr: ltlt (FTContainsExpr, FTContainsExpr) = 121 (10); +ComparisonExpr: gtgt (FTContainsExpr, FTContainsExpr) = 122 (10);
RangeExpr: AdditiveExpr = 123 (10); RangeExpr: range (RangeExpr, RangeExpr) = 124 (10); @@ -634,6 +642,58 @@ /* Pathfinder extension: statement type for document management functions */ SequenceType: seq_ty (docmgmt_ty (nil)) = 238 (10);
+/* Pathfinder full-text support */ +FTContainsExpr: RangeExpr = 239 (10); +FTContainsExpr: ftcontains (RangeExpr, FTSelectionExpr) = 240 (10); +FTContainsExpr: ftcontains (RangeExpr, FTFilterExpr) = 241 (10); + +OptBindExpr: binds (bind (vars (vars (var_type (Var, + nil), + OptPosVar), + OptFTScoreVar), + OptExpr), + OptBindExpr) = 242 (10); +OptBindExpr: binds (bind (vars (vars (var_type + (Var, + TypeDeclaration), + OptPosVar), + OptFTScoreVar), + OptExpr), + OptBindExpr) = 243 (10); + +OptFTScoreVar: nil = 244 (10); +OptFTScoreVar: Var = 245 (10); + +FTFilterExpr: ftfilter (FTSelectionExpr, + FTIgnoreOption) = 246 (10); +FTIgnoreOption: ftignore (UnionExpr) = 247 (10); + +FTSelectionExpr: FTOrExpr = 248 (10); + +FTOrExpr: FTAndExpr = 249 (10); +FTOrExpr: ftor (FTOrExpr, FTAndExpr) = 250 (10); + +FTAndExpr: FTMildNot = 251 (10); +FTAndExpr: ftand (FTAndExpr, FTMildNot) = 252 (10); + +FTMildNot: FTUnaryNot = 253 (10); +FTMildNot: ftmildnot (FTMildNot, FTUnaryNot) = 254 (10); + +FTUnaryNot: FTPrimaryWithOptions = 255 (10); +FTUnaryNot: ftnot (FTPrimaryWithOptions) = 256 (10); + +FTPrimaryWithOptions: FTPrimaryExpr = 257 (10); + +FTPrimaryExpr: FTWordsExpr = 258 (10); +FTPrimaryExpr: FTSelectionExpr = 259 (10); + +FTWordsExpr: FTWordsValueExpr = 260 (10); + +FTWordsValueExpr: Literal = 261 (10); + +OptBindExpr: binds (let (Var, OptExpr), + OptBindExpr) = 262 (10); + %%
/** Access the Core representation of any node */ @@ -1446,7 +1506,7 @@ C(R(p))));
} break; - + /* OptBindExpr: binds (bind (vars (var_type (Var, nil), OptPosVar), @@ -3906,6 +3966,156 @@ C(p) = seqtype (PFty_star (PFty_docmgmt ())); break;
+ /* FTContainsExpr: RangeExpr */ + case 239: + break; + + /* FTContainsExpr: ftcontains (RangeExpr, FTSelectionExpr) */ + case 240: + { + PFfun_t *op_ftcontains = function (PFqname (PFns_fts, "ftcontains")); + two_arg_fun_worker (p, op_ftcontains); + + } break; + + /* FTContainsExpr: ftcontains (RangeExpr, FTFilterExpr) */ + case 241: + break; + + /* OptBindExpr: binds (bind (vars (vars (var_type (Var, + nil ), + OptPosVar), + OptFTScoreVar), + OptExpr), + OptBindExpr) */ + case 242: + C(p) = for_ (forbind (forvars2 (C(LLLLL(p)), C(LLLR(p)), C(LLR(p))), + C(LR(p))), + C(R(p))); + break; + + /* OptBindExpr: binds (bind (vars (vars (var_type ( + Var, + TypeDeclaration), + OptPosVar), + OptFTScoreVar), + OptExpr), + OptBindExpr) */ + case 243: + { + PFvar_t *v = new_var (NULL); + + C(p) = for_ (forbind (forvars2 (var (v), C(LLLR(p)), C(LLR(p))), + C(LR(p))), + let (letbind (C(LLLLL(p)), + proof (subty (var (v), + C(LLLLR(p))), + seqcast (C(LLLLR(p)), + var (v)))), + C(R(p)))); + } break; + + /* OptFTScoreVar: nil */ + case 244: + C(p) = nil (); + break; + + /* OptFTScoreVar: Var */ + case 245: + break; + + /*FTFilterExpr: ftfilter (FTSelectionExpr, + OptFTIgnoreOption) */ + case 246: + break; + + /*FTIgnoreOption: ftignore (UnionExpr) */ + case 247: + PFoops_loc (OOPS_NOTSUPPORTED, p->loc, + "Full text ignore options not yet implemented"); + break; + + /*FTSelectionExpr: FTOrExpr */ + case 248: + break; + + /*FTOrExpr: FTAndExpr */ + case 249: + break; + + /*FTOrExpr: ftor (FTAndExpr, FTAndExpr) */ + case 250: + PFoops_loc (OOPS_NOTSUPPORTED, p->loc, + "Full text Or operator not yet supported"); + break; + + /*FTAndExpr: FTMildNot */ + case 251: + break; + + /*FTAndExpr: ftand (FTMildNot, FTMildNot) */ + case 252: + PFoops_loc (OOPS_NOTSUPPORTED, p->loc, + "Full text And operator not yet supported"); + break; + + /*FTMildNot: FTUnaryNot */ + case 253: + break; + + /*FTMildNot: ftmildnot ( FTUnaryNot, FTUnaryNot) */ + case 254: + PFoops_loc (OOPS_NOTSUPPORTED, p->loc, + "Full text mild-not not yet supported"); + break; + + /*FTUnaryNot: FTPrimaryWithOptions */ + case 255: + break; + + /*FTUnaryNot: ftnot (FTPrimaryWithOptions) */ + case 256: + PFoops_loc (OOPS_NOTSUPPORTED, p->loc, + "Full text Not operator not yet supported"); + break; + + /*FTPrimaryWithOptions: FTPrimaryExpr */ + case 257: + break; + + /*FTPrimaryExpr: FTWordsExpr */ + case 258: + break; + + /*FTPrimaryExpr: FTSelectionExpr */ + case 259: + assert (C(p)); + break; + + /*FTWordsExpr: FTWordsValueExpr */ + case 260: + break; + + /*FTWordsValueExpr: Literal */ + case 261: + assert (C(p)); + break; + + /* OptBindExpr: binds (let (Var, OptExpr), + OptBindExpr) */ + case 262: + { + PFfun_t *op_score = function (PFqname (PFns_fts, "score")); + + /*C(p) = APPLY (op_plus, + num (0), + fs_convert_op_by_type ( + fn_data (C(L(p))), + PFty_xs_double ()));*/ + C(p) = let (letbind (C(LL(p)), APPLY (op_score, C(LR(p)))), C(R(p))); + + } break; + default: PFoops_loc (OOPS_FATAL, p->loc, "untranslated expression"); break;
------------------------------------------------------------------------------ Let Crystal Reports handle the reporting - Free Crystal Reports 2008 30-Day trial. Simplify your report design, integration and deployment - and focus on what you do best, core application coding. Discover what's new with Crystal Reports now. http://p.sf.net/sfu/bobj-july _______________________________________________ Monetdb-pf-checkins mailing list Monetdb-pf-checkins@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/monetdb-pf-checkins
-- Jan Rittinger Lehrstuhl Datenbanken und Informationssysteme Wilhelm-Schickard-Institut für Informatik Eberhard-Karls-Universität Tübingen http://www-db.informatik.uni-tuebingen.de/team/rittinger