Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #ifndef _MSTRING_H_
14 : #define _MSTRING_H_
15 :
16 : #include <stdarg.h> /* va_list etc. */
17 : #include <string.h> /* strlen */
18 :
19 : #if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4))
20 : /* not on CentOS 6 (GCC 4.4.7) */
21 : #define GCC_Pragma(pragma) _Pragma(pragma)
22 : #else
23 : #define GCC_Pragma(pragma)
24 : #endif
25 :
26 : #if defined(__has_attribute)
27 : #if ! __has_attribute(__access__)
28 : #define __access__(...)
29 : #endif
30 : #else
31 : #define __access__(...)
32 : #endif
33 :
34 : /* copy at most (n-1) bytes from src to dst and add a terminating NULL
35 : * byte; return length of src (i.e. can be more than what is copied) */
36 : __attribute__((__access__(write_only, 1, 3)))
37 : static inline size_t
38 330034867 : strcpy_len(char *restrict dst, const char *restrict src, size_t n)
39 : {
40 330034867 : if (dst != NULL && n != 0) {
41 6319143657 : for (size_t i = 0; i < n; i++) {
42 6314977652 : if ((dst[i] = src[i]) == 0)
43 325861846 : return i;
44 : }
45 4166005 : dst[n - 1] = 0;
46 : }
47 4173021 : return strlen(src);
48 : }
49 :
50 : /* copy the NULL terminated list of src strings with a maximum of n
51 : * bytes to dst; return the combined length of the src strings */
52 : __attribute__((__access__(write_only, 1, 2)))
53 : __attribute__((__sentinel__))
54 : static inline size_t
55 60858338 : strconcat_len(char *restrict dst, size_t n, const char *restrict src, ...)
56 : {
57 60858338 : va_list ap;
58 60858338 : size_t i = 0;
59 :
60 60858338 : va_start(ap, src);
61 278068592 : while (src) {
62 216703414 : size_t l;
63 216703414 : if (dst && i < n)
64 216693407 : l = strcpy_len(dst + i, src, n - i);
65 : else
66 10007 : l = strlen(src);
67 217454499 : i += l;
68 217454499 : src = va_arg(ap, const char *);
69 : }
70 61365178 : va_end(ap);
71 61365178 : return i;
72 : }
73 :
74 : #ifdef __has_builtin
75 : #if __has_builtin(__builtin_expect)
76 : /* __builtin_expect returns its first argument; it is expected to be
77 : * equal to the second argument */
78 : #define unlikely(expr) __builtin_expect((expr) != 0, 0)
79 : #define likely(expr) __builtin_expect((expr) != 0, 1)
80 : #endif
81 : #endif
82 : #ifndef unlikely
83 : #ifdef _MSC_VER
84 : #define unlikely(expr) (__assume(!(expr)), (expr))
85 : #define likely(expr) (__assume((expr)), (expr))
86 : #else
87 : #define unlikely(expr) (expr)
88 : #define likely(expr) (expr)
89 : #endif
90 : #endif
91 :
92 : /*
93 : * UTF-8 encoding is as follows:
94 : * U-00000000 - U-0000007F: 0xxxxxxx
95 : * U-00000080 - U-000007FF: 110zzzzx 10xxxxxx
96 : * U-00000800 - U-0000FFFF: 1110zzzz 10zxxxxx 10xxxxxx
97 : * U-00010000 - U-0010FFFF: 11110zzz 10zzxxxx 10xxxxxx 10xxxxxx
98 : *
99 : * To be correctly coded UTF-8, the sequence should be the shortest
100 : * possible encoding of the value being encoded. This means that at
101 : * least one of the z bits must be non-zero. Also note that the four
102 : * byte sequence can encode more than is allowed and that the values
103 : * U+D800..U+DFFF are not allowed to be encoded.
104 : */
105 : static inline bool
106 99643989 : checkUTF8(const char *v)
107 : {
108 : /* It is unlikely that this functions returns false, because it is
109 : * likely that the string presented is a correctly coded UTF-8
110 : * string. So we annotate the tests that are very (un)likely to
111 : * succeed, i.e. the ones that lead to a return of false. This can
112 : * help the compiler produce more efficient code. */
113 99643989 : if (v != NULL) {
114 99643989 : if (v[0] != '\200' || v[1] != '\0') {
115 : /* check that string is correctly encoded UTF-8 */
116 3863483989 : for (size_t i = 0; v[i]; i++) {
117 : /* we do not annotate all tests, only the ones
118 : * leading directly to an unlikely return
119 : * statement */
120 3764881735 : if ((v[i] & 0x80) == 0) {
121 : ;
122 264218 : } else if ((v[i] & 0xE0) == 0xC0) {
123 251307 : if (unlikely(((v[i] & 0x1E) == 0)))
124 : return false;
125 251307 : if (unlikely(((v[++i] & 0xC0) != 0x80)))
126 : return false;
127 12911 : } else if ((v[i] & 0xF0) == 0xE0) {
128 12771 : if ((v[i++] & 0x0F) == 0) {
129 33 : if (unlikely(((v[i] & 0xE0) != 0xA0)))
130 : return false;
131 : } else {
132 12738 : if (unlikely(((v[i] & 0xC0) != 0x80)))
133 : return false;
134 : }
135 12771 : if (unlikely(((v[++i] & 0xC0) != 0x80)))
136 : return false;
137 140 : } else if (likely(((v[i] & 0xF8) == 0xF0))) {
138 139 : if ((v[i++] & 0x07) == 0) {
139 137 : if (unlikely(((v[i] & 0x30) == 0)))
140 : return false;
141 : }
142 139 : if (unlikely(((v[i] & 0xC0) != 0x80)))
143 : return false;
144 137 : if (unlikely(((v[++i] & 0xC0) != 0x80)))
145 : return false;
146 137 : if (unlikely(((v[++i] & 0xC0) != 0x80)))
147 : return false;
148 : } else {
149 : return false;
150 : }
151 : }
152 : }
153 : }
154 : return true;
155 : }
156 :
157 : static inline int vreallocprintf(char **buf, size_t *pos, size_t *size, const char *fmt, va_list ap)
158 : __attribute__((__format__(__printf__, 4, 0)));
159 :
160 : static inline int
161 7156 : vreallocprintf(char **buf, size_t *pos, size_t *capacity, const char *fmt, va_list args)
162 : {
163 7156 : va_list ap;
164 :
165 7156 : assert(*pos <= *capacity);
166 7156 : assert(*buf == NULL || *capacity > 0);
167 :
168 7156 : size_t need_at_least = strlen(fmt);
169 7156 : need_at_least += 1; // trailing NUL
170 7156 : need_at_least += 80; // some space for the items
171 7156 : while (1) {
172 : // Common cases:
173 : // 1. buf=NULL, pos=cap=0: allocate reasonable amount
174 : // 2. buf=NULL, pos=0, cap=something: start with allocating cap
175 : // 3. buf not NULL, cap=something: allocate larger cap
176 7156 : if (*buf == NULL || need_at_least > *capacity - *pos) {
177 1921 : size_t cap1 = *pos + need_at_least;
178 1921 : size_t cap2 = *capacity;
179 1921 : if (*buf)
180 20 : cap2 += cap2 / 2;
181 1921 : size_t new_cap = cap1 > cap2 ? cap1 : cap2;
182 1921 : char *new_buf = realloc(*buf, new_cap);
183 1921 : if (new_buf == 0)
184 : return -1;
185 1921 : *buf = new_buf;
186 1921 : *capacity = new_cap;
187 : }
188 7156 : assert(*buf);
189 7156 : assert(need_at_least <= *capacity - *pos);
190 7156 : char *output = &(*buf)[*pos];
191 7156 : size_t avail = *capacity - *pos;
192 7156 : assert(avail >= 1);
193 :
194 7156 : va_copy(ap, args);
195 7156 : int n = vsnprintf(output, avail, fmt, ap);
196 7156 : va_end(ap);
197 :
198 7156 : if (n < 0)
199 0 : return n;
200 7156 : size_t needed = (size_t)n;
201 7156 : if (needed <= avail - 1) {
202 : // it wanted to print n chars and it could
203 7156 : *pos += needed;
204 7156 : return n;
205 : }
206 0 : need_at_least = needed + 1;
207 : }
208 : }
209 :
210 : static inline int reallocprintf(char **buf, size_t *pos, size_t *size, const char *fmt, ...)
211 : __attribute__((__format__(__printf__, 4, 5)));
212 :
213 : static inline int
214 6477 : reallocprintf(char **buf, size_t *pos, size_t *capacity, const char *fmt, ...)
215 : {
216 6477 : int n;
217 6477 : va_list ap;
218 6477 : va_start(ap, fmt);
219 6477 : n = vreallocprintf(buf, pos, capacity, fmt, ap);
220 6477 : va_end(ap);
221 6477 : return n;
222 : }
223 :
224 : #undef unlikely
225 : #undef likely
226 :
227 : #endif
|