LCOV - code coverage report
Current view: top level - common/utils - mstring.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 77 79 97.5 %
Date: 2024-12-20 21:24:02 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef _MSTRING_H_
      14             : #define _MSTRING_H_
      15             : 
      16             : #include <stdarg.h>               /* va_list etc. */
      17             : #include <string.h>               /* strlen */
      18             : 
      19             : #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
      20             : /* not on CentOS 6 (GCC 4.4.7) */
      21             : #define GCC_Pragma(pragma)      _Pragma(pragma)
      22             : #else
      23             : #define GCC_Pragma(pragma)
      24             : #endif
      25             : 
      26             : #if defined(__has_attribute)
      27             : #if ! __has_attribute(__access__)
      28             : #define __access__(...)
      29             : #endif
      30             : #else
      31             : #define __access__(...)
      32             : #endif
      33             : 
      34             : /* copy at most (n-1) bytes from src to dst and add a terminating NULL
      35             :  * byte; return length of src (i.e. can be more than what is copied) */
      36             : __attribute__((__access__(write_only, 1, 3)))
      37             : static inline size_t
      38   330702390 : strcpy_len(char *restrict dst, const char *restrict src, size_t n)
      39             : {
      40   330702390 :         if (dst != NULL && n != 0) {
      41  6335312539 :                 for (size_t i = 0; i < n; i++) {
      42  6331135589 :                         if ((dst[i] = src[i]) == 0)
      43   326518684 :                                 return i;
      44             :                 }
      45     4176950 :                 dst[n - 1] = 0;
      46             :         }
      47     4183706 :         return strlen(src);
      48             : }
      49             : 
      50             : /* copy the NULL terminated list of src strings with a maximum of n
      51             :  * bytes to dst; return the combined length of the src strings */
      52             : __attribute__((__access__(write_only, 1, 2)))
      53             : __attribute__((__sentinel__))
      54             : static inline size_t
      55    61005035 : strconcat_len(char *restrict dst, size_t n, const char *restrict src, ...)
      56             : {
      57    61005035 :         va_list ap;
      58    61005035 :         size_t i = 0;
      59             : 
      60    61005035 :         va_start(ap, src);
      61   278956454 :         while (src) {
      62   217449734 :                 size_t l;
      63   217449734 :                 if (dst && i < n)
      64   217439727 :                         l = strcpy_len(dst + i, src, n - i);
      65             :                 else
      66       10007 :                         l = strlen(src);
      67   218191829 :                 i += l;
      68   218191829 :                 src = va_arg(ap, const char *);
      69             :         }
      70    61506720 :         va_end(ap);
      71    61506720 :         return i;
      72             : }
      73             : 
      74             : #ifdef __has_builtin
      75             : #if __has_builtin(__builtin_expect)
      76             : /* __builtin_expect returns its first argument; it is expected to be
      77             :  * equal to the second argument */
      78             : #define unlikely(expr)  __builtin_expect((expr) != 0, 0)
      79             : #define likely(expr)    __builtin_expect((expr) != 0, 1)
      80             : #endif
      81             : #endif
      82             : #ifndef unlikely
      83             : #ifdef _MSC_VER
      84             : #define unlikely(expr)  (__assume(!(expr)), (expr))
      85             : #define likely(expr)    (__assume((expr)), (expr))
      86             : #else
      87             : #define unlikely(expr)  (expr)
      88             : #define likely(expr)    (expr)
      89             : #endif
      90             : #endif
      91             : 
      92             : /*
      93             :  * UTF-8 encoding is as follows:
      94             :  * U-00000000 - U-0000007F: 0xxxxxxx
      95             :  * U-00000080 - U-000007FF: 110zzzzx 10xxxxxx
      96             :  * U-00000800 - U-0000FFFF: 1110zzzz 10zxxxxx 10xxxxxx
      97             :  * U-00010000 - U-0010FFFF: 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
      98             :  *
      99             :  * To be correctly coded UTF-8, the sequence should be the shortest
     100             :  * possible encoding of the value being encoded.  This means that at
     101             :  * least one of the z bits must be non-zero.  Also note that the four
     102             :  * byte sequence can encode more than is allowed and that the values
     103             :  * U+D800..U+DFFF are not allowed to be encoded.
     104             :  */
     105             : static inline bool
     106   100377915 : checkUTF8(const char *v)
     107             : {
     108             :         /* It is unlikely that this functions returns false, because it is
     109             :          * likely that the string presented is a correctly coded UTF-8
     110             :          * string.  So we annotate the tests that are very (un)likely to
     111             :          * succeed, i.e. the ones that lead to a return of false.  This can
     112             :          * help the compiler produce more efficient code. */
     113   100377915 :         if (v != NULL) {
     114   100377915 :                 if (v[0] != '\200' || v[1] != '\0') {
     115             :                         /* check that string is correctly encoded UTF-8 */
     116  3902533777 :                         for (size_t i = 0; v[i]; i++) {
     117             :                                 /* we do not annotate all tests, only the ones
     118             :                                  * leading directly to an unlikely return
     119             :                                  * statement */
     120  3803197593 :                                 if ((v[i] & 0x80) == 0) {
     121             :                                         ;
     122      264221 :                                 } else if ((v[i] & 0xE0) == 0xC0) {
     123      251310 :                                         if (unlikely(((v[i] & 0x1E) == 0)))
     124             :                                                 return false;
     125      251310 :                                         if (unlikely(((v[++i] & 0xC0) != 0x80)))
     126             :                                                 return false;
     127       12911 :                                 } else if ((v[i] & 0xF0) == 0xE0) {
     128       12771 :                                         if ((v[i++] & 0x0F) == 0) {
     129          33 :                                                 if (unlikely(((v[i] & 0xE0) != 0xA0)))
     130             :                                                         return false;
     131             :                                         } else {
     132       12738 :                                                 if (unlikely(((v[i] & 0xC0) != 0x80)))
     133             :                                                         return false;
     134             :                                         }
     135       12771 :                                         if (unlikely(((v[++i] & 0xC0) != 0x80)))
     136             :                                                 return false;
     137         140 :                                 } else if (likely(((v[i] & 0xF8) == 0xF0))) {
     138         139 :                                         if ((v[i++] & 0x07) == 0) {
     139         137 :                                                 if (unlikely(((v[i] & 0x30) == 0)))
     140             :                                                         return false;
     141             :                                         }
     142         139 :                                         if (unlikely(((v[i] & 0xC0) != 0x80)))
     143             :                                                 return false;
     144         137 :                                         if (unlikely(((v[++i] & 0xC0) != 0x80)))
     145             :                                                 return false;
     146         137 :                                         if (unlikely(((v[++i] & 0xC0) != 0x80)))
     147             :                                                 return false;
     148             :                                 } else {
     149             :                                         return false;
     150             :                                 }
     151             :                         }
     152             :                 }
     153             :         }
     154             :         return true;
     155             : }
     156             : 
     157             : static inline int vreallocprintf(char **buf, size_t *pos, size_t *size, const char *fmt, va_list ap)
     158             :         __attribute__((__format__(__printf__, 4, 0)));
     159             : 
     160             : static inline int
     161        7156 : vreallocprintf(char **buf, size_t *pos, size_t *capacity, const char *fmt, va_list args)
     162             : {
     163        7156 :         va_list ap;
     164             : 
     165        7156 :         assert(*pos <= *capacity);
     166        7156 :         assert(*buf == NULL || *capacity > 0);
     167             : 
     168        7156 :         size_t need_at_least = strlen(fmt);
     169        7156 :         need_at_least += 1; // trailing NUL
     170        7156 :         need_at_least += 80; // some space for the items
     171        7156 :         while (1) {
     172             :                 // Common cases:
     173             :                 // 1. buf=NULL, pos=cap=0: allocate reasonable amount
     174             :                 // 2. buf=NULL, pos=0, cap=something: start with allocating cap
     175             :                 // 3. buf not NULL, cap=something: allocate larger cap
     176        7156 :                 if (*buf == NULL || need_at_least > *capacity - *pos) {
     177        1921 :                         size_t cap1 = *pos + need_at_least;
     178        1921 :                         size_t cap2 = *capacity;
     179        1921 :                         if (*buf)
     180          20 :                                 cap2 += cap2 / 2;
     181        1921 :                         size_t new_cap = cap1 > cap2 ? cap1 : cap2;
     182        1921 :                         char *new_buf = realloc(*buf, new_cap);
     183        1921 :                         if (new_buf == 0)
     184             :                                 return -1;
     185        1921 :                         *buf = new_buf;
     186        1921 :                         *capacity = new_cap;
     187             :                 }
     188        7156 :                 assert(*buf);
     189        7156 :                 assert(need_at_least <= *capacity - *pos);
     190        7156 :                 char *output = &(*buf)[*pos];
     191        7156 :                 size_t avail = *capacity - *pos;
     192        7156 :                 assert(avail >= 1);
     193             : 
     194        7156 :                 va_copy(ap, args);
     195        7156 :                 int n = vsnprintf(output, avail, fmt, ap);
     196        7156 :                 va_end(ap);
     197             : 
     198        7156 :                 if (n < 0)
     199           0 :                         return n;
     200        7156 :                 size_t needed = (size_t)n;
     201        7156 :                 if (needed <= avail - 1) {
     202             :                         // it wanted to print n chars and it could
     203        7156 :                         *pos += needed;
     204        7156 :                         return n;
     205             :                 }
     206           0 :                 need_at_least = needed + 1;
     207             :         }
     208             : }
     209             : 
     210             : static inline int reallocprintf(char **buf, size_t *pos, size_t *size, const char *fmt, ...)
     211             :         __attribute__((__format__(__printf__, 4, 5)));
     212             : 
     213             : static inline int
     214        6477 : reallocprintf(char **buf, size_t *pos, size_t *capacity, const char *fmt, ...)
     215             : {
     216        6477 :         int n;
     217        6477 :         va_list ap;
     218        6477 :         va_start(ap, fmt);
     219        6477 :         n = vreallocprintf(buf, pos, capacity, fmt, ap);
     220        6477 :         va_end(ap);
     221        6477 :         return n;
     222             : }
     223             : 
     224             : #undef unlikely
     225             : #undef likely
     226             : 
     227             : #endif

Generated by: LCOV version 1.14