Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /* Generic stream handling code such as init and close */
14 :
15 : #include "monetdb_config.h"
16 : #include "stream.h"
17 : #include "stream_internal.h"
18 :
19 :
20 : /* ------------------------------------------------------------------ */
21 : /* streams working on a substream, converting character sets using iconv */
22 :
23 : #ifdef HAVE_ICONV
24 :
25 : struct icstream {
26 : iconv_t cd;
27 : char buffer[BUFSIZ];
28 : size_t buflen;
29 : bool eof;
30 : };
31 :
32 : static ssize_t
33 334 : ic_write(stream *restrict s, const void *restrict buf, size_t elmsize, size_t cnt)
34 : {
35 334 : struct icstream *ic = (struct icstream *) s->stream_data.p;
36 334 : char *inbuf;
37 334 : size_t inbytesleft = elmsize * cnt;
38 334 : char *bf = NULL;
39 :
40 334 : if (ic == NULL) {
41 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "stream already ended");
42 0 : goto bailout;
43 : }
44 :
45 : /* if unconverted data from a previous call remains, add it to
46 : * the start of the new data, using temporary space */
47 334 : if (ic->buflen > 0) {
48 7 : bf = malloc(ic->buflen + inbytesleft);
49 7 : if (bf == NULL) {
50 : /* cannot allocate memory */
51 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "out of memory");
52 0 : goto bailout;
53 : }
54 7 : memcpy(bf, ic->buffer, ic->buflen);
55 7 : memcpy(bf + ic->buflen, buf, inbytesleft);
56 7 : buf = bf;
57 7 : inbytesleft += ic->buflen;
58 7 : ic->buflen = 0;
59 : }
60 334 : inbuf = (char *) buf;
61 659 : while (inbytesleft > 0) {
62 334 : char *outbuf = ic->buffer;
63 334 : size_t outbytesleft = sizeof(ic->buffer);
64 :
65 334 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
66 9 : switch (errno) {
67 2 : case EILSEQ:
68 : /* invalid multibyte sequence encountered */
69 2 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "invalid multibyte sequence");
70 2 : goto bailout;
71 7 : case EINVAL:
72 : /* incomplete multibyte sequence
73 : * encountered flush what has been
74 : * converted */
75 7 : if (outbytesleft < sizeof(ic->buffer) &&
76 0 : mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
77 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "incomplete multibyte sequence");
78 0 : goto bailout;
79 : }
80 : /* remember what hasn't been converted */
81 7 : if (inbytesleft > sizeof(ic->buffer)) {
82 : /* ridiculously long multibyte
83 : * sequence, so return
84 : * error */
85 0 : mnstr_set_error(s, MNSTR_WRITE_ERROR, "multibyte sequence too long");
86 0 : goto bailout;
87 : }
88 7 : memcpy(ic->buffer, inbuf, inbytesleft);
89 7 : ic->buflen = inbytesleft;
90 7 : if (bf)
91 0 : free(bf);
92 7 : return (ssize_t) cnt;
93 : case E2BIG:
94 : /* not enough space in output buffer */
95 : break;
96 0 : default:
97 0 : mnstr_set_error_errno(s, MNSTR_WRITE_ERROR, "iconv reported an error");
98 0 : goto bailout;
99 : }
100 : }
101 325 : if (mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
102 0 : mnstr_copy_error(s, s->inner);
103 0 : goto bailout;
104 : }
105 : }
106 325 : if (bf)
107 6 : free(bf);
108 325 : return (ssize_t) cnt;
109 :
110 2 : bailout:
111 2 : assert(s->errkind != MNSTR_NO__ERROR);
112 2 : if (bf)
113 1 : free(bf);
114 : return -1;
115 : }
116 :
117 : static ssize_t
118 0 : ic_read(stream *restrict s, void *restrict buf, size_t elmsize, size_t cnt)
119 : {
120 0 : struct icstream *ic = (struct icstream *) s->stream_data.p;
121 0 : char *inbuf;
122 0 : size_t inbytesleft;
123 0 : char *outbuf;
124 0 : size_t outbytesleft;
125 :
126 0 : if (ic == NULL) {
127 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "stream already ended");
128 0 : return -1;
129 : }
130 0 : inbuf = ic->buffer;
131 0 : inbytesleft = ic->buflen;
132 0 : outbuf = (char *) buf;
133 0 : outbytesleft = elmsize * cnt;
134 0 : if (outbytesleft == 0)
135 : return 0;
136 0 : while (outbytesleft > 0 && !ic->eof) {
137 0 : if (ic->buflen == sizeof(ic->buffer)) {
138 : /* ridiculously long multibyte sequence, return error */
139 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "multibyte sequence too long");
140 0 : return -1;
141 : }
142 :
143 0 : switch (mnstr_read(s->inner, ic->buffer + ic->buflen, 1, 1)) {
144 0 : case 1:
145 : /* expected: read one byte */
146 0 : ic->buflen++;
147 0 : inbytesleft++;
148 0 : break;
149 0 : case 0:
150 : /* end of file */
151 0 : ic->eof = true;
152 0 : if (ic->buflen > 0) {
153 : /* incomplete input */
154 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "incomplete input");
155 0 : return -1;
156 : }
157 0 : if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
158 : /* some error occurred */
159 0 : mnstr_set_error_errno(s, MNSTR_READ_ERROR, "iconv reported an error");
160 0 : return -1;
161 : }
162 0 : goto exit_func; /* double break */
163 0 : default:
164 : /* error */
165 0 : mnstr_copy_error(s, s->inner);
166 0 : return -1;
167 : }
168 0 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
169 0 : switch (errno) {
170 0 : case EILSEQ:
171 0 : mnstr_set_error(s, MNSTR_READ_ERROR, "invalid multibyte sequence");
172 0 : return -1;
173 : case EINVAL:
174 : /* incomplete multibyte sequence encountered */
175 : break;
176 0 : case E2BIG:
177 : /* not enough space in output buffer,
178 : * return what we have, saving what's in
179 : * the buffer */
180 0 : goto exit_func;
181 0 : default:
182 0 : mnstr_set_error_errno(s, MNSTR_READ_ERROR, "iconv reported an error");
183 0 : return -1;
184 : }
185 : }
186 0 : if (inbytesleft == 0) {
187 : /* converted complete buffer */
188 0 : inbuf = ic->buffer;
189 0 : ic->buflen = 0;
190 : }
191 : }
192 0 : exit_func:
193 0 : if (inbuf > ic->buffer)
194 0 : memmove(ic->buffer, inbuf, inbytesleft);
195 0 : ic->buflen = inbytesleft;
196 0 : if (outbytesleft == elmsize * cnt && !s->inner->eof) {
197 : /* if we're returning data, we must pass on EOF on the
198 : * next call (i.e. keep ic->eof set), otherwise we
199 : * must clear it so that the next call will cause the
200 : * underlying stream to be read again */
201 0 : ic->eof = false;
202 : }
203 0 : return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
204 : }
205 :
206 : static int
207 8 : ic_flush(stream *s, mnstr_flush_level flush_level)
208 : {
209 8 : struct icstream *ic = (struct icstream *) s->stream_data.p;
210 8 : char *outbuf;
211 8 : size_t outbytesleft;
212 :
213 8 : if (ic == NULL)
214 : return -1;
215 8 : outbuf = ic->buffer;
216 8 : outbytesleft = sizeof(ic->buffer);
217 : /* if unconverted data from a previous call remains, it was an
218 : * incomplete multibyte sequence, so an error */
219 16 : if (ic->buflen > 0 ||
220 8 : iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1 ||
221 8 : (outbytesleft < sizeof(ic->buffer) &&
222 0 : mnstr_write(s->inner, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0)) {
223 0 : mnstr_copy_error(s, s->inner);
224 0 : return -1;
225 : }
226 8 : return mnstr_flush(s->inner, flush_level);
227 : }
228 :
229 : static void
230 4 : ic_close(stream *s)
231 : {
232 4 : struct icstream *ic = (struct icstream *) s->stream_data.p;
233 :
234 4 : if (ic) {
235 4 : if (!s->readonly)
236 4 : ic_flush(s, MNSTR_FLUSH_DATA);
237 4 : iconv_close(ic->cd);
238 4 : close_stream(s->inner);
239 4 : s->inner = NULL;
240 4 : free(s->stream_data.p);
241 4 : s->stream_data.p = NULL;
242 : }
243 4 : }
244 :
245 : static void
246 4 : ic_destroy(stream *s)
247 : {
248 4 : ic_close(s);
249 4 : mnstr_destroy(s->inner);
250 4 : destroy_stream(s);
251 4 : }
252 :
253 : static stream *
254 4 : ic_open(iconv_t cd, stream *restrict ss, const char *restrict name)
255 : {
256 4 : stream *s;
257 4 : struct icstream *ic;
258 :
259 4 : if (ss->isutf8)
260 : return ss;
261 4 : if ((s = create_wrapper_stream(name, ss)) == NULL)
262 : return NULL;
263 4 : s->read = ic_read;
264 4 : s->write = ic_write;
265 4 : s->close = ic_close;
266 4 : s->destroy = ic_destroy;
267 4 : s->flush = ic_flush;
268 4 : ic = malloc(sizeof(struct icstream));
269 4 : if (ic == NULL) {
270 0 : mnstr_destroy(s);
271 0 : mnstr_set_open_error(s->name, errno, NULL);
272 0 : return NULL;
273 : }
274 4 : s->stream_data.p = ic;
275 4 : *ic = (struct icstream) {
276 : .cd = cd,
277 : .buflen = 0,
278 : .eof = false,
279 : };
280 4 : return s;
281 : }
282 :
283 : stream *
284 0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
285 : {
286 0 : stream *s;
287 0 : iconv_t cd;
288 :
289 0 : if (ss == NULL || charset == NULL || name == NULL)
290 : return NULL;
291 : #ifdef STREAM_DEBUG
292 : fprintf(stderr, "iconv_rstream %s %s\n", charset, name);
293 : #endif
294 0 : if (ss->isutf8)
295 : return ss;
296 0 : cd = iconv_open("utf-8", charset);
297 0 : if (cd == (iconv_t) -1) {
298 0 : mnstr_set_open_error(name, errno, "iconv_open");
299 0 : return NULL;
300 : }
301 0 : s = ic_open(cd, ss, name);
302 0 : if (s == NULL) {
303 0 : iconv_close(cd);
304 0 : return NULL;
305 : }
306 0 : s->readonly = true;
307 0 : s->isutf8 = true;
308 0 : return s;
309 : }
310 :
311 : stream *
312 4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
313 : {
314 4 : stream *s;
315 4 : iconv_t cd;
316 :
317 4 : if (ss == NULL || charset == NULL || name == NULL)
318 : return NULL;
319 : #ifdef STREAM_DEBUG
320 : fprintf(stderr, "iconv_wstream %s %s\n", charset, name);
321 : #endif
322 4 : if (ss->isutf8)
323 : return ss;
324 4 : cd = iconv_open(charset, "utf-8");
325 4 : if (cd == (iconv_t) -1) {
326 0 : mnstr_set_open_error(name, errno, "iconv_open");
327 0 : return NULL;
328 : }
329 4 : s = ic_open(cd, ss, name);
330 4 : if (s == NULL) {
331 0 : iconv_close(cd);
332 0 : return NULL;
333 : }
334 4 : s->readonly = false;
335 4 : return s;
336 : }
337 :
338 : #else
339 : stream *
340 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
341 : {
342 : if (ss == NULL || charset == NULL || name == NULL)
343 : return NULL;
344 : if (ss->isutf8 ||
345 : strcmp(charset, "utf-8") == 0 ||
346 : strcmp(charset, "UTF-8") == 0 ||
347 : strcmp(charset, "UTF8") == 0)
348 : return ss;
349 :
350 : mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
351 : return NULL;
352 : }
353 :
354 : stream *
355 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
356 : {
357 : if (ss == NULL || charset == NULL || name == NULL)
358 : return NULL;
359 : if (ss->isutf8 ||
360 : strcmp(charset, "utf-8") == 0 ||
361 : strcmp(charset, "UTF-8") == 0 ||
362 : strcmp(charset, "UTF8") == 0)
363 : return ss;
364 :
365 : mnstr_set_open_error(name, 0, "ICONV support has been left out of this MonetDB");
366 : return NULL;
367 : }
368 : #endif /* HAVE_ICONV */
|