Line data Source code
1 : /* 2 : * SPDX-License-Identifier: MPL-2.0 3 : * 4 : * This Source Code Form is subject to the terms of the Mozilla Public 5 : * License, v. 2.0. If a copy of the MPL was not distributed with this 6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. 7 : * 8 : * Copyright 2024, 2025 MonetDB Foundation; 9 : * Copyright August 2008 - 2023 MonetDB B.V.; 10 : * Copyright 1997 - July 2008 CWI. 11 : */ 12 : 13 : #ifndef mutils_export 14 : #if defined(_MSC_VER) || defined(__CYGWIN__) || defined(__MINGW32__) 15 : #ifndef LIBMUTILS 16 : #define mutils_export extern __declspec(dllimport) 17 : #else 18 : #define mutils_export extern __declspec(dllexport) 19 : #endif 20 : #else 21 : #define mutils_export extern 22 : #endif 23 : #endif 24 : 25 : /* return display width of Unicode codepoint c */ 26 : mutils_export int charwidth(uint32_t c); 27 : 28 : /* decode UTF-8 string byte by byte into *state and *codep, returns 29 : * state; UTF-8 sequence is complete (and value is in *codep) when state 30 : * is UTF8_ACCEPT, incorrect when state is UTF8_REJECT, and incomplete 31 : * for any other value of state */ 32 : 33 : /* this function and the table are copyright Bjoern Hoehrmann per the 34 : * below notice. The layout was changed. */ 35 : 36 : // Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de> 37 : // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. 38 : 39 : mutils_export const uint8_t utf8d[364]; 40 : #define UTF8_ACCEPT 0 41 : #define UTF8_REJECT 12 42 : 43 : static inline uint32_t 44 138552936 : decode(uint32_t *state, uint32_t *codep, uint32_t byte) 45 : { 46 138552936 : uint32_t type = utf8d[byte]; 47 : 48 277105872 : *codep = (*state != UTF8_ACCEPT) ? 49 138552936 : (byte & 0x3fu) | (*codep << 6) : 50 138542813 : (0xff >> type) & (byte); 51 : 52 138552936 : *state = utf8d[256 + *state + type]; 53 138552936 : return *state; 54 : } 55 : /* end copyright Bjoern Hoehrmann */ 56 : 57 : /* return in *c the codepoint of the next character in string s, return 58 : * a pointer to the start of the following character */ 59 : static inline char * 60 518 : nextchar(const char *s, uint32_t *c) 61 : { 62 518 : uint32_t codepoint = 0, state = 0; 63 518 : while (*s) { 64 518 : switch (decode(&state, &codepoint, (uint8_t) *s++)) { 65 518 : case UTF8_ACCEPT: 66 518 : *c = codepoint; 67 518 : return (char *) s; 68 0 : case UTF8_REJECT: 69 0 : *c = 0; 70 0 : return NULL; 71 : default: 72 : break; 73 : } 74 : } 75 0 : *c = 0; 76 0 : return NULL; 77 : } 78 : 79 : /* like the above, but s is at most n bytes long */ 80 : static inline char * 81 0 : nextcharn(const char *s, size_t n, uint32_t *c) 82 : { 83 0 : uint32_t codepoint = 0, state = 0; 84 0 : while (n-- > 0 && *s) { 85 0 : switch (decode(&state, &codepoint, (uint8_t) *s++)) { 86 0 : case UTF8_ACCEPT: 87 0 : *c = codepoint; 88 0 : return (char *) s; 89 0 : case UTF8_REJECT: 90 0 : *c = 0; 91 0 : return NULL; 92 : default: 93 : break; 94 : } 95 : } 96 0 : *c = 0; 97 0 : return NULL; 98 : }