LCOV - code coverage report
Current view: top level - common/utils - mutf8.h (source / functions) Hit Total Coverage
Test: coverage.info Lines: 14 31 45.2 %
Date: 2025-03-24 23:16:36 Functions: 2 3 66.7 %

          Line data    Source code
       1             : /*
       2             :  * SPDX-License-Identifier: MPL-2.0
       3             :  *
       4             :  * This Source Code Form is subject to the terms of the Mozilla Public
       5             :  * License, v. 2.0.  If a copy of the MPL was not distributed with this
       6             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       7             :  *
       8             :  * Copyright 2024, 2025 MonetDB Foundation;
       9             :  * Copyright August 2008 - 2023 MonetDB B.V.;
      10             :  * Copyright 1997 - July 2008 CWI.
      11             :  */
      12             : 
      13             : #ifndef mutils_export
      14             : #if defined(_MSC_VER) || defined(__CYGWIN__) || defined(__MINGW32__)
      15             : #ifndef LIBMUTILS
      16             : #define mutils_export extern __declspec(dllimport)
      17             : #else
      18             : #define mutils_export extern __declspec(dllexport)
      19             : #endif
      20             : #else
      21             : #define mutils_export extern
      22             : #endif
      23             : #endif
      24             : 
      25             : /* return display width of Unicode codepoint c */
      26             : mutils_export int charwidth(uint32_t c);
      27             : 
      28             : /* decode UTF-8 string byte by byte into *state and *codep, returns
      29             : *  state; UTF-8 sequence is complete (and value is in *codep) when state
      30             : *  is UTF8_ACCEPT, incorrect when state is UTF8_REJECT, and incomplete
      31             : *  for any other value of state */
      32             : 
      33             : /* this function and the table are copyright Bjoern Hoehrmann per the
      34             :  * below notice.  The layout was changed. */
      35             : 
      36             : // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
      37             : // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
      38             : 
      39             : mutils_export const uint8_t utf8d[364];
      40             : #define UTF8_ACCEPT 0
      41             : #define UTF8_REJECT 12
      42             : 
      43             : static inline uint32_t
      44   138552936 : decode(uint32_t *state, uint32_t *codep, uint32_t byte)
      45             : {
      46   138552936 :         uint32_t type = utf8d[byte];
      47             : 
      48   277105872 :         *codep = (*state != UTF8_ACCEPT) ?
      49   138552936 :                 (byte & 0x3fu) | (*codep << 6) :
      50   138542813 :                 (0xff >> type) & (byte);
      51             : 
      52   138552936 :         *state = utf8d[256 + *state + type];
      53   138552936 :         return *state;
      54             : }
      55             : /* end copyright Bjoern Hoehrmann */
      56             : 
      57             : /* return in *c the codepoint of the next character in string s, return
      58             :  * a pointer to the start of the following character */
      59             : static inline char *
      60         518 : nextchar(const char *s, uint32_t *c)
      61             : {
      62         518 :         uint32_t codepoint = 0, state = 0;
      63         518 :         while (*s) {
      64         518 :                 switch (decode(&state, &codepoint, (uint8_t) *s++)) {
      65         518 :                 case UTF8_ACCEPT:
      66         518 :                         *c = codepoint;
      67         518 :                         return (char *) s;
      68           0 :                 case UTF8_REJECT:
      69           0 :                         *c = 0;
      70           0 :                         return NULL;
      71             :                 default:
      72             :                         break;
      73             :                 }
      74             :         }
      75           0 :         *c = 0;
      76           0 :         return NULL;
      77             : }
      78             : 
      79             : /* like the above, but s is at most n bytes long */
      80             : static inline char *
      81           0 : nextcharn(const char *s, size_t n, uint32_t *c)
      82             : {
      83           0 :         uint32_t codepoint = 0, state = 0;
      84           0 :         while (n-- > 0 && *s) {
      85           0 :                 switch (decode(&state, &codepoint, (uint8_t) *s++)) {
      86           0 :                 case UTF8_ACCEPT:
      87           0 :                         *c = codepoint;
      88           0 :                         return (char *) s;
      89           0 :                 case UTF8_REJECT:
      90           0 :                         *c = 0;
      91           0 :                         return NULL;
      92             :                 default:
      93             :                         break;
      94             :                 }
      95             :         }
      96           0 :         *c = 0;
      97           0 :         return NULL;
      98             : }

Generated by: LCOV version 1.14