Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024, 2025 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : #include <iconv.h>
14 :
15 : struct ic_priv_t {
16 : stream *s;
17 : iconv_t cd;
18 : bool eof:1;
19 : bool err:1;
20 : size_t buflen;
21 : char buffer[BUFSIZ];
22 : };
23 :
24 : static ssize_t
25 0 : ic_read(void *restrict private, void *restrict buf, size_t elmsize, size_t cnt)
26 : {
27 0 : struct ic_priv_t *ic = private;
28 0 : char *inbuf = ic->buffer;
29 0 : size_t inbytesleft = ic->buflen;
30 0 : char *outbuf = buf;
31 0 : size_t outbytesleft = elmsize * cnt;
32 :
33 0 : if (outbytesleft == 0)
34 : return 0;
35 0 : while (outbytesleft > 0 && !ic->eof) {
36 0 : if (ic->buflen == sizeof(ic->buffer)) {
37 : /* ridiculously long multibyte sequence, return error */
38 0 : fprintf(stderr, "multibyte sequence too long\n");
39 0 : return -1;
40 : }
41 :
42 0 : switch (mnstr_read(ic->s, ic->buffer + ic->buflen, 1, 1)) {
43 0 : case 1:
44 : /* expected: read one byte */
45 0 : ic->buflen++;
46 0 : inbytesleft++;
47 0 : break;
48 0 : case 0:
49 : /* end of file */
50 0 : ic->eof = true;
51 0 : if (ic->buflen > 0) {
52 : /* incomplete input */
53 0 : fprintf(stderr, "incomplete input\n");
54 0 : return -1;
55 : }
56 0 : if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
57 : /* some error occurred */
58 0 : fprintf(stderr, "iconv reported an error\n");
59 0 : return -1;
60 : }
61 0 : goto exit_func; /* double break */
62 0 : default:
63 : /* error */
64 0 : if (mnstr_peek_error(ic->s))
65 0 : fprintf(stderr, "%s\n", mnstr_peek_error(ic->s));
66 0 : mnstr_clearerr(ic->s);
67 0 : return -1;
68 : }
69 0 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
70 0 : switch (errno) {
71 0 : case EILSEQ:
72 0 : fprintf(stderr, "invalid multibyte sequence\n");
73 0 : return -1;
74 : case EINVAL:
75 : /* incomplete multibyte sequence encountered */
76 : break;
77 0 : case E2BIG:
78 : /* not enough space in output buffer,
79 : * return what we have, saving what's in
80 : * the buffer */
81 0 : goto exit_func;
82 0 : default:
83 0 : fprintf(stderr, "iconv reported an error\n");
84 0 : return -1;
85 : }
86 : }
87 0 : if (inbytesleft == 0) {
88 : /* converted complete buffer */
89 0 : inbuf = ic->buffer;
90 0 : ic->buflen = 0;
91 : }
92 : }
93 0 : exit_func:
94 0 : if (inbuf > ic->buffer)
95 0 : memmove(ic->buffer, inbuf, inbytesleft);
96 0 : ic->buflen = inbytesleft;
97 0 : if (outbytesleft == elmsize * cnt && !mnstr_eof(ic->s)) {
98 : /* if we're returning data, we must pass on EOF on the
99 : * next call (i.e. keep ic->eof set), otherwise we
100 : * must clear it so that the next call will cause the
101 : * underlying stream to be read again */
102 0 : ic->eof = false;
103 : }
104 0 : return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
105 : }
106 :
107 : static ssize_t
108 372 : ic_write(void *restrict private, const void *restrict buf, size_t elmsize, size_t cnt)
109 : {
110 372 : struct ic_priv_t *ic = private;
111 372 : char *inbuf = (char *) buf; /* iconv requires non-const */
112 372 : size_t inbytesleft = elmsize * cnt;
113 372 : char *bf = NULL;
114 :
115 372 : if (ic == NULL) {
116 0 : fprintf(stderr, "stream already ended\n");
117 0 : return -1;
118 : }
119 :
120 372 : if (ic->err) {
121 27 : for (size_t i = 0; i < inbytesleft; i++)
122 21 : if (inbuf[i] == '\n') {
123 1 : ic->err = false;
124 1 : break;
125 : }
126 7 : return -1;
127 : }
128 :
129 : /* if unconverted data from a previous call remains, add it to
130 : * the start of the new data, using temporary space */
131 365 : if (ic->buflen > 0) {
132 0 : bf = malloc(ic->buflen + inbytesleft);
133 0 : if (bf == NULL) {
134 : /* cannot allocate memory */
135 0 : fprintf(stderr, "out of memory\n");
136 0 : goto bailout;
137 : }
138 0 : memcpy(bf, ic->buffer, ic->buflen);
139 0 : memcpy(bf + ic->buflen, buf, inbytesleft);
140 0 : inbuf = bf;
141 0 : inbytesleft += ic->buflen;
142 0 : ic->buflen = 0;
143 : }
144 728 : while (inbytesleft > 0) {
145 365 : char *outbuf = ic->buffer;
146 365 : size_t outbytesleft = sizeof(ic->buffer);
147 :
148 365 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
149 2 : switch (errno) {
150 2 : case EILSEQ:
151 : /* invalid multibyte sequence encountered */
152 2 : fprintf(stderr, "invalid multibyte sequence\n");
153 2 : goto bailout;
154 0 : case EINVAL:
155 : /* incomplete multibyte sequence
156 : * encountered flush what has been
157 : * converted */
158 0 : if (outbytesleft < sizeof(ic->buffer) &&
159 0 : mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
160 0 : fprintf(stderr, "incomplete multibyte sequence\n");
161 0 : goto bailout;
162 : }
163 : /* remember what hasn't been converted */
164 0 : if (inbytesleft > sizeof(ic->buffer)) {
165 : /* ridiculously long multibyte
166 : * sequence, so return
167 : * error */
168 0 : fprintf(stderr, "multibyte sequence too long\n");
169 0 : goto bailout;
170 : }
171 0 : memcpy(ic->buffer, inbuf, inbytesleft);
172 0 : ic->buflen = inbytesleft;
173 0 : if (bf)
174 0 : free(bf);
175 0 : return (ssize_t) cnt;
176 : case E2BIG:
177 : /* not enough space in output buffer */
178 : break;
179 0 : default:
180 0 : fprintf(stderr, "iconv reported an error\n");
181 0 : goto bailout;
182 : }
183 : }
184 363 : if (mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
185 0 : const char *e = mnstr_peek_error(ic->s);
186 0 : if (e)
187 0 : fprintf(stderr, "%s\n", e);
188 0 : mnstr_clearerr(ic->s);
189 0 : goto bailout;
190 : }
191 : }
192 363 : if (bf)
193 0 : free(bf);
194 363 : return (ssize_t) cnt;
195 :
196 2 : bailout:
197 2 : if (bf)
198 0 : free(bf);
199 2 : ic->err = true;
200 2 : return -1;
201 : }
202 :
203 : static void
204 4 : ic_close(void *private)
205 : {
206 4 : struct ic_priv_t *ic = private;
207 4 : if (ic->cd != (iconv_t) -1)
208 4 : iconv_close(ic->cd);
209 4 : ic->cd = (iconv_t) -1;
210 4 : mnstr_close(ic->s);
211 4 : }
212 :
213 : static void
214 4 : ic_destroy(void *private)
215 : {
216 4 : ic_close(private);
217 4 : mnstr_destroy(((struct ic_priv_t *) private)->s);
218 4 : free(private);
219 4 : }
220 :
221 : static stream *
222 0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
223 : {
224 0 : if (ss == NULL || charset == NULL || name == NULL)
225 : return NULL;
226 :
227 0 : struct ic_priv_t *priv = malloc(sizeof(struct ic_priv_t));
228 0 : if (priv == NULL) {
229 0 : fprintf(stderr, "Cannot allocate memory\n");
230 0 : return NULL;
231 : }
232 0 : *priv = (struct ic_priv_t) {
233 : .s = ss,
234 0 : .cd = iconv_open("utf-8", charset),
235 : };
236 0 : if (priv->cd == (iconv_t) -1) {
237 0 : free(priv);
238 0 : fprintf(stderr, "Cannot initiate character set translation from %s\n",
239 : charset);
240 0 : return NULL;
241 : }
242 0 : stream *s = callback_stream(priv, ic_read, NULL, ic_close, ic_destroy, name);
243 0 : if (s == NULL) {
244 0 : fprintf(stderr, "Cannot allocate memory\n");
245 0 : iconv_close(priv->cd);
246 0 : free(priv);
247 0 : return NULL;
248 : }
249 : return s;
250 : }
251 :
252 : static stream *
253 4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
254 : {
255 4 : if (ss == NULL || charset == NULL || name == NULL)
256 : return NULL;
257 :
258 4 : struct ic_priv_t *priv = malloc(sizeof(struct ic_priv_t));
259 4 : if (priv == NULL) {
260 0 : fprintf(stderr, "Cannot allocate memory\n");
261 0 : return NULL;
262 : }
263 8 : *priv = (struct ic_priv_t) {
264 : .s = ss,
265 4 : .cd = iconv_open(charset, "utf-8"),
266 : };
267 4 : if (priv->cd == (iconv_t) -1) {
268 0 : free(priv);
269 0 : fprintf(stderr, "Cannot initiate character set translation from %s\n",
270 : charset);
271 0 : return NULL;
272 : }
273 4 : stream *s = callback_stream(priv, NULL, ic_write, ic_close, ic_destroy, name);
274 4 : if (s == NULL) {
275 0 : fprintf(stderr, "Cannot allocate memory\n");
276 0 : iconv_close(priv->cd);
277 0 : free(priv);
278 0 : return NULL;
279 : }
280 : return s;
281 : }
|