Line data Source code
1 : #include <iconv.h>
2 :
3 : struct ic_priv_t {
4 : stream *s;
5 : iconv_t cd;
6 : bool eof:1;
7 : bool err:1;
8 : size_t buflen;
9 : char buffer[BUFSIZ];
10 : };
11 :
12 : static ssize_t
13 0 : ic_read(void *restrict private, void *restrict buf, size_t elmsize, size_t cnt)
14 : {
15 0 : struct ic_priv_t *ic = private;
16 0 : char *inbuf = ic->buffer;
17 0 : size_t inbytesleft = ic->buflen;
18 0 : char *outbuf = buf;
19 0 : size_t outbytesleft = elmsize * cnt;
20 :
21 0 : if (outbytesleft == 0)
22 : return 0;
23 0 : while (outbytesleft > 0 && !ic->eof) {
24 0 : if (ic->buflen == sizeof(ic->buffer)) {
25 : /* ridiculously long multibyte sequence, return error */
26 0 : fprintf(stderr, "multibyte sequence too long\n");
27 0 : return -1;
28 : }
29 :
30 0 : switch (mnstr_read(ic->s, ic->buffer + ic->buflen, 1, 1)) {
31 0 : case 1:
32 : /* expected: read one byte */
33 0 : ic->buflen++;
34 0 : inbytesleft++;
35 0 : break;
36 0 : case 0:
37 : /* end of file */
38 0 : ic->eof = true;
39 0 : if (ic->buflen > 0) {
40 : /* incomplete input */
41 0 : fprintf(stderr, "incomplete input\n");
42 0 : return -1;
43 : }
44 0 : if (iconv(ic->cd, NULL, NULL, &outbuf, &outbytesleft) == (size_t) -1) {
45 : /* some error occurred */
46 0 : fprintf(stderr, "iconv reported an error\n");
47 0 : return -1;
48 : }
49 0 : goto exit_func; /* double break */
50 0 : default:
51 : /* error */
52 0 : if (mnstr_peek_error(ic->s))
53 0 : fprintf(stderr, "%s\n", mnstr_peek_error(ic->s));
54 0 : mnstr_clearerr(ic->s);
55 0 : return -1;
56 : }
57 0 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
58 0 : switch (errno) {
59 0 : case EILSEQ:
60 0 : fprintf(stderr, "invalid multibyte sequence\n");
61 0 : return -1;
62 : case EINVAL:
63 : /* incomplete multibyte sequence encountered */
64 : break;
65 0 : case E2BIG:
66 : /* not enough space in output buffer,
67 : * return what we have, saving what's in
68 : * the buffer */
69 0 : goto exit_func;
70 0 : default:
71 0 : fprintf(stderr, "iconv reported an error\n");
72 0 : return -1;
73 : }
74 : }
75 0 : if (inbytesleft == 0) {
76 : /* converted complete buffer */
77 0 : inbuf = ic->buffer;
78 0 : ic->buflen = 0;
79 : }
80 : }
81 0 : exit_func:
82 0 : if (inbuf > ic->buffer)
83 0 : memmove(ic->buffer, inbuf, inbytesleft);
84 0 : ic->buflen = inbytesleft;
85 0 : if (outbytesleft == elmsize * cnt && !mnstr_eof(ic->s)) {
86 : /* if we're returning data, we must pass on EOF on the
87 : * next call (i.e. keep ic->eof set), otherwise we
88 : * must clear it so that the next call will cause the
89 : * underlying stream to be read again */
90 0 : ic->eof = false;
91 : }
92 0 : return (ssize_t) ((elmsize * cnt - outbytesleft) / elmsize);
93 : }
94 :
95 : static ssize_t
96 372 : ic_write(void *restrict private, const void *restrict buf, size_t elmsize, size_t cnt)
97 : {
98 372 : struct ic_priv_t *ic = private;
99 372 : char *inbuf = (char *) buf; /* iconv requires non-const */
100 372 : size_t inbytesleft = elmsize * cnt;
101 372 : char *bf = NULL;
102 :
103 372 : if (ic == NULL) {
104 0 : fprintf(stderr, "stream already ended\n");
105 0 : return -1;
106 : }
107 :
108 372 : if (ic->err) {
109 27 : for (size_t i = 0; i < inbytesleft; i++)
110 21 : if (inbuf[i] == '\n') {
111 1 : ic->err = false;
112 1 : break;
113 : }
114 7 : return -1;
115 : }
116 :
117 : /* if unconverted data from a previous call remains, add it to
118 : * the start of the new data, using temporary space */
119 365 : if (ic->buflen > 0) {
120 0 : bf = malloc(ic->buflen + inbytesleft);
121 0 : if (bf == NULL) {
122 : /* cannot allocate memory */
123 0 : fprintf(stderr, "out of memory\n");
124 0 : goto bailout;
125 : }
126 0 : memcpy(bf, ic->buffer, ic->buflen);
127 0 : memcpy(bf + ic->buflen, buf, inbytesleft);
128 0 : inbuf = bf;
129 0 : inbytesleft += ic->buflen;
130 0 : ic->buflen = 0;
131 : }
132 728 : while (inbytesleft > 0) {
133 365 : char *outbuf = ic->buffer;
134 365 : size_t outbytesleft = sizeof(ic->buffer);
135 :
136 365 : if (iconv(ic->cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) == (size_t) -1) {
137 2 : switch (errno) {
138 2 : case EILSEQ:
139 : /* invalid multibyte sequence encountered */
140 2 : fprintf(stderr, "invalid multibyte sequence\n");
141 2 : goto bailout;
142 0 : case EINVAL:
143 : /* incomplete multibyte sequence
144 : * encountered flush what has been
145 : * converted */
146 0 : if (outbytesleft < sizeof(ic->buffer) &&
147 0 : mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
148 0 : fprintf(stderr, "incomplete multibyte sequence\n");
149 0 : goto bailout;
150 : }
151 : /* remember what hasn't been converted */
152 0 : if (inbytesleft > sizeof(ic->buffer)) {
153 : /* ridiculously long multibyte
154 : * sequence, so return
155 : * error */
156 0 : fprintf(stderr, "multibyte sequence too long\n");
157 0 : goto bailout;
158 : }
159 0 : memcpy(ic->buffer, inbuf, inbytesleft);
160 0 : ic->buflen = inbytesleft;
161 0 : if (bf)
162 0 : free(bf);
163 0 : return (ssize_t) cnt;
164 : case E2BIG:
165 : /* not enough space in output buffer */
166 : break;
167 0 : default:
168 0 : fprintf(stderr, "iconv reported an error\n");
169 0 : goto bailout;
170 : }
171 : }
172 363 : if (mnstr_write(ic->s, ic->buffer, 1, sizeof(ic->buffer) - outbytesleft) < 0) {
173 0 : const char *e = mnstr_peek_error(ic->s);
174 0 : if (e)
175 0 : fprintf(stderr, "%s\n", e);
176 0 : mnstr_clearerr(ic->s);
177 0 : goto bailout;
178 : }
179 : }
180 363 : if (bf)
181 0 : free(bf);
182 363 : return (ssize_t) cnt;
183 :
184 2 : bailout:
185 2 : if (bf)
186 0 : free(bf);
187 2 : ic->err = true;
188 2 : return -1;
189 : }
190 :
191 : static void
192 4 : ic_close(void *private)
193 : {
194 4 : struct ic_priv_t *ic = private;
195 4 : if (ic->cd != (iconv_t) -1)
196 4 : iconv_close(ic->cd);
197 4 : ic->cd = (iconv_t) -1;
198 4 : mnstr_close(ic->s);
199 4 : }
200 :
201 : static void
202 4 : ic_destroy(void *private)
203 : {
204 4 : ic_close(private);
205 4 : mnstr_destroy(((struct ic_priv_t *) private)->s);
206 4 : free(private);
207 4 : }
208 :
209 : static stream *
210 0 : iconv_rstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
211 : {
212 0 : if (ss == NULL || charset == NULL || name == NULL)
213 : return NULL;
214 :
215 0 : struct ic_priv_t *priv = malloc(sizeof(struct ic_priv_t));
216 0 : if (priv == NULL) {
217 0 : fprintf(stderr, "Cannot allocate memory\n");
218 0 : return NULL;
219 : }
220 0 : *priv = (struct ic_priv_t) {
221 : .s = ss,
222 0 : .cd = iconv_open("utf-8", charset),
223 : };
224 0 : if (priv->cd == (iconv_t) -1) {
225 0 : free(priv);
226 0 : fprintf(stderr, "Cannot initiate character set translation from %s\n",
227 : charset);
228 0 : return NULL;
229 : }
230 0 : stream *s = callback_stream(priv, ic_read, NULL, ic_close, ic_destroy, name);
231 0 : if (s == NULL) {
232 0 : fprintf(stderr, "Cannot allocate memory\n");
233 0 : iconv_close(priv->cd);
234 0 : free(priv);
235 0 : return NULL;
236 : }
237 : return s;
238 : }
239 :
240 : static stream *
241 4 : iconv_wstream(stream *restrict ss, const char *restrict charset, const char *restrict name)
242 : {
243 4 : if (ss == NULL || charset == NULL || name == NULL)
244 : return NULL;
245 :
246 4 : struct ic_priv_t *priv = malloc(sizeof(struct ic_priv_t));
247 4 : if (priv == NULL) {
248 0 : fprintf(stderr, "Cannot allocate memory\n");
249 0 : return NULL;
250 : }
251 8 : *priv = (struct ic_priv_t) {
252 : .s = ss,
253 4 : .cd = iconv_open(charset, "utf-8"),
254 : };
255 4 : if (priv->cd == (iconv_t) -1) {
256 0 : free(priv);
257 0 : fprintf(stderr, "Cannot initiate character set translation from %s\n",
258 : charset);
259 0 : return NULL;
260 : }
261 4 : stream *s = callback_stream(priv, NULL, ic_write, ic_close, ic_destroy, name);
262 4 : if (s == NULL) {
263 0 : fprintf(stderr, "Cannot allocate memory\n");
264 0 : iconv_close(priv->cd);
265 0 : free(priv);
266 0 : return NULL;
267 : }
268 : return s;
269 : }
|