Mercurial > hg > MonetDB-extend
changeset 3:de61daddf5ab
Updated copyrights and code, added comments, extended tutorial.
author | Sjoerd Mullender <sjoerd@acm.org> |
---|---|
date | Fri, 08 May 2015 12:40:55 +0200 (2015-05-08) |
parents | 22a95811ba05 |
children | d84d840b4f86 |
files | reverse/80_reverse.mal reverse/80_reverse.sql reverse/Makefile reverse/README.rst reverse/reverse.c reverse/reverse.mal |
diffstat | 6 files changed, 111 insertions(+), 109 deletions(-) [+] |
line wrap: on
line diff
--- a/reverse/80_reverse.mal Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/80_reverse.mal Fri May 08 12:40:55 2015 +0200 @@ -1,18 +1,7 @@ -# The contents of this file are subject to the MonetDB Public License -# Version 1.1 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.monetdb.org/Legal/MonetDBLicense +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. # -# Software distributed under the License is distributed on an "AS IS" -# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -# License for the specific language governing rights and limitations -# under the License. -# -# The Original Code is the MonetDB Database System. -# -# The Initial Developer of the Original Code is CWI. -# Portions created by CWI are Copyright (C) 1997-July 2008 CWI. -# Copyright August 2008-2013 MonetDB B.V. -# All Rights Reserved. +# Copyright 2008-2015 MonetDB B.V. include reverse;
--- a/reverse/80_reverse.sql Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/80_reverse.sql Fri May 08 12:40:55 2015 +0200 @@ -1,19 +1,8 @@ --- The contents of this file are subject to the MonetDB Public License --- Version 1.1 (the "License"); you may not use this file except in --- compliance with the License. You may obtain a copy of the License at --- http://www.monetdb.org/Legal/MonetDBLicense +-- This Source Code Form is subject to the terms of the Mozilla Public +-- License, v. 2.0. If a copy of the MPL was not distributed with this +-- file, You can obtain one at http://mozilla.org/MPL/2.0/. -- --- Software distributed under the License is distributed on an "AS IS" --- basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the --- License for the specific language governing rights and limitations --- under the License. --- --- The Original Code is the MonetDB Database System. --- --- The Initial Developer of the Original Code is CWI. --- Portions created by CWI are Copyright (C) 1997-July 2008 CWI. --- Copyright August 2008-2013 MonetDB B.V. --- All Rights Reserved. +-- Copyright 2008-2015 MonetDB B.V. CREATE FUNCTION reverse(src STRING) RETURNS STRING EXTERNAL NAME reverse.reverse;
--- a/reverse/Makefile Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/Makefile Fri May 08 12:40:55 2015 +0200 @@ -1,19 +1,8 @@ -# The contents of this file are subject to the MonetDB Public License -# Version 1.1 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.monetdb.org/Legal/MonetDBLicense +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. # -# Software distributed under the License is distributed on an "AS IS" -# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -# License for the specific language governing rights and limitations -# under the License. -# -# The Original Code is the MonetDB Database System. -# -# The Initial Developer of the Original Code is CWI. -# Portions created by CWI are Copyright (C) 1997-July 2008 CWI. -# Copyright August 2008-2013 MonetDB B.V. -# All Rights Reserved. +# Copyright 2008-2015 MonetDB B.V. LIBDIR = `pkg-config --variable=libdir monetdb5`
--- a/reverse/README.rst Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/README.rst Fri May 08 12:40:55 2015 +0200 @@ -1,19 +1,8 @@ -.. The contents of this file are subject to the MonetDB Public License -.. Version 1.1 (the "License"); you may not use this file except in -.. compliance with the License. You may obtain a copy of the License at -.. http://www.monetdb.org/Legal/MonetDBLicense +.. This Source Code Form is subject to the terms of the Mozilla Public +.. License, v. 2.0. If a copy of the MPL was not distributed with this +.. file, You can obtain one at http://mozilla.org/MPL/2.0/. .. -.. Software distributed under the License is distributed on an "AS IS" -.. basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -.. License for the specific language governing rights and limitations -.. under the License. -.. -.. The Original Code is the MonetDB Database System. -.. -.. The Initial Developer of the Original Code is CWI. -.. Portions created by CWI are Copyright (C) 1997-July 2008 CWI. -.. Copyright August 2008-2013 MonetDB B.V. -.. All Rights Reserved. +.. Copyright 2008-2015 MonetDB B.V. .. This document is written in reStructuredText (see http://docutils.sourceforge.net/ for more information). @@ -34,7 +23,7 @@ In this directory we show how to make a simple user-defined function (UDF) that can be used in MonetDB/SQL. The function is implemented in C. In order to make the function available to SQL, we need to provide -the correct C interface, a MAL interface, and a SQL interface. We +the correct C interface, a MAL interface, and an SQL interface. We will discuss them all, starting from SQL and going down towards the actual implementation. @@ -63,7 +52,8 @@ The statement tells the SQL system that there is a function called ``reverse`` which takes a ``STRING`` argument and produces a ``STRING`` result. The function is implemented using the MAL -interface ``reverse.reverse``. +interface ``reverse.reverse``. Note that ``STRING`` is equivalent to +``CHARACTER LARGE OBJECT`` or ``CLOB``. This statement will normally be executed once when the database is created, after which it is part of the SQL catalog. This is @@ -102,7 +92,7 @@ command reverse(b:bat[:oid,:str]):bat[:oid,:str] address UDFBATreverse - comment "Reverse a BAT of strings"; + comment "Reverse a column of strings"; At the SQL side we don't have to do anything more. @@ -174,12 +164,16 @@ extern __declspec(dllexport) char *UDFreverse(char **retval, const char **arg); extern __declspec(dllexport) char *UDFBATreverse(bat *retval, const bat *arg); +Scalar Version +~~~~~~~~~~~~~~ + We will now first focus on the implementation of the scalar function and return to the bulk version later. The input of the function ``UDFreverse`` is a (NULL-terminated) string. The function is called with a pointer to the string pointer, -so ``*arg`` is the actual string. +so ``*arg`` is the actual string and ``**arg`` the first byte of the +string. The result of the operation is also a (NULL-terminated) string. Since the caller does not know what the size of the result will be, it @@ -202,7 +196,11 @@ In the actual algorithm we have also taken into account that strings in MonetDB are always stored in the UTF-8 encoding. In addition, our implementation checks for the special value ``str_nil`` which is the C -representation of the SQL ``NULL`` value for strings. +representation of the SQL ``NULL`` value for strings (which in MAL is +called ``nil:str``). + +Bulk Version +~~~~~~~~~~~~ The bulk version gets as input a pointer to a BAT identifier (a value of type ``bat``). It also returns a BAT identifier of a newly created @@ -227,9 +225,9 @@ throw(MAL, "batreverse.reverse", RUNTIME_OBJECT_MISSING); When we're done with this BAT, we will need to decrement the physical -reference count again. We do that by calling ``BBPreleaseref``:: +reference count again. We do that by calling ``BBPunfix``:: - BBPreleaseref(b->batCacheid); + BBPunfix(b->batCacheid); Note that ``b->batCacheid`` is equal to ``*arg``. @@ -248,7 +246,8 @@ an efficiency point of view, it's better to create the BAT with the required size (growing a BAT can be expensive). -We then set the sequence base for the head column of the new BAT:: +We then set the sequence base for the head column of the new BAT to be +the same as that of the input BAT:: BATseqbase(bn, b->hseqbase); @@ -281,6 +280,12 @@ non-zero there if you know what you're doing (and if you need to read this document, you don't). +Note that the return value of ``BUNappend`` was changed starting with +the Jul2015 feature release. Before, ``BUNappend`` returned its first +argument on success or ``NULL`` on failure. Starting with the Jul2015 +release it returns ``GDK_SUCCEED`` or ``GDK_FAIL`` for success or +failure. + Makefile -------- @@ -290,5 +295,49 @@ Makefile works on Fedora Linux if you have the packages ``MonetDB-devel`` and ``MonetDB5-server-devel`` with all their dependencies installed (available in the feature release after the -Feb2013 release cycle). The file may need to be changed for other -systems. +Feb2013 release cycle), and on Debian/Ubuntu if you have the packages +``libmonetdb-dev`` and ``monetdb5-server-dev`` with all their +dependencies installed (available starting in the Oct2014-SP3 bugfix +release). The file may need to be changed for other systems. + +BAT Properties +-------------- + +MonetDB makes extensive use of a number of properties that can be set +on the columns of BATs. It is crucial that these properties don't +lie. + +Properties are Boolean flags, i.e. they are either true (set) or false +(not set). When a property is not set, it means that either the +property doesn't hold or it is unknown whether the property holds. + +The properties are + +``sorted`` + the column is sorted in ascending order + +``revsorted`` + the column is sorted in descending order + +``key`` + all values in the column are distinct + +``nonil`` + there are no nil values in the column + +``nil`` + there are nil values in the column (this property isn't used + internally) + +The properties ``sorted`` and ``revsorted`` may both be set at the +same time. When they are, it implies that all values are equal to +each other. + +The ``key`` property is actually two bits. The lower bit is set if +the property holds. If, in addition, the upper bit is also set, it +means that the property must hold, i.e. when an attempt is made to +insert a new value that already occurs, the insert must fail. + +Note that the function ``BUNappend`` maintains the properties the best +it can. That is why in the example we didn't do anything with the +properties.
--- a/reverse/reverse.c Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/reverse.c Fri May 08 12:40:55 2015 +0200 @@ -1,42 +1,35 @@ /* - * The contents of this file are subject to the MonetDB Public License - * Version 1.1 (the "License"); you may not use this file except in - * compliance with the License. You may obtain a copy of the License at - * http://www.monetdb.org/Legal/MonetDBLicense + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the - * License for the specific language governing rights and limitations - * under the License. - * - * The Original Code is the MonetDB Database System. - * - * The Initial Developer of the Original Code is CWI. - * Portions created by CWI are Copyright (C) 1997-July 2008 CWI. - * Copyright August 2008-2013 MonetDB B.V. - * All Rights Reserved. + * Copyright 2008-2015 MonetDB B.V. */ /* monetdb_config.h must be included as the first include file */ #include <monetdb_config.h> + /* mal_exception.h actually contains everything we need */ #include <mal_exception.h> /* system include files */ #include <string.h> -/* dst is a buffer of length larger than len, src is a UTF-8-encoded - * string of length exactly len bytes */ +/* dst is a buffer of length larger than len (i.e. dst[len] exists), + * src is a UTF-8-encoded string of length exactly len bytes */ static void do_reverse(char *dst, const char *src, size_t len) { dst[len] = 0; if (strcmp(src, str_nil) == 0) { - /* special case for nil */ + /* special case for nil:str */ + assert(len == strlen(str_nil)); strcpy(dst, str_nil); - assert(len == strlen(str_nil)); return; } + /* all strings in MonetDB are encoded using UTF-8; we must + * make sure that the reversed string is also encoded in valid + * UTF-8, so we treat multibyte characters as single units */ while (*src) { if ((*src & 0xF8) == 0xF0) { /* 4 byte UTF-8 sequence */ @@ -66,6 +59,7 @@ dst[len - 1] = *src++; len -= 2; } else { + /* 1 byte UTF-8 "sequence" */ assert(len >= 1); assert((*src & 0x80) == 0); dst[--len] = *src++; @@ -118,8 +112,10 @@ throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); b = BATdescriptor(*arg); - if (b == NULL) + if (b == NULL) { + GDKfree(dst); throw(MAL, "batreverse.reverse", RUNTIME_OBJECT_MISSING); + } /* we should only get called for string BATs */ assert(b->ttype == TYPE_str); @@ -127,7 +123,8 @@ /* allocate result BAT */ bn = BATnew(TYPE_void, TYPE_str, BATcount(b), TRANSIENT); if (bn == NULL) { - BBPreleaseref(b->batCacheid); + BBPunfix(b->batCacheid); + GDKfree(dst); throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); } @@ -163,7 +160,7 @@ } GDKfree(dst); - BBPreleaseref(b->batCacheid); + BBPunfix(b->batCacheid); *retval = bn->batCacheid; BBPkeepref(bn->batCacheid); @@ -174,7 +171,7 @@ /* we only get here in the case of an allocation error; clean * up the mess we've created and throw an exception */ GDKfree(dst); - BBPreleaseref(b->batCacheid); - BBPreleaseref(bn->batCacheid); + BBPunfix(b->batCacheid); + BBPunfix(bn->batCacheid); throw(MAL, "batreverse.reverse", MAL_MALLOC_FAIL); }
--- a/reverse/reverse.mal Tue Mar 31 19:47:53 2015 +0200 +++ b/reverse/reverse.mal Fri May 08 12:40:55 2015 +0200 @@ -1,19 +1,8 @@ -# The contents of this file are subject to the MonetDB Public License -# Version 1.1 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.monetdb.org/Legal/MonetDBLicense +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. # -# Software distributed under the License is distributed on an "AS IS" -# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the -# License for the specific language governing rights and limitations -# under the License. -# -# The Original Code is the MonetDB Database System. -# -# The Initial Developer of the Original Code is CWI. -# Portions created by CWI are Copyright (C) 1997-July 2008 CWI. -# Copyright August 2008-2013 MonetDB B.V. -# All Rights Reserved. +# Copyright 2008-2015 MonetDB B.V. module reverse;