Line data Source code
1 : /*
2 : * SPDX-License-Identifier: MPL-2.0
3 : *
4 : * This Source Code Form is subject to the terms of the Mozilla Public
5 : * License, v. 2.0. If a copy of the MPL was not distributed with this
6 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 : *
8 : * Copyright 2024 MonetDB Foundation;
9 : * Copyright August 2008 - 2023 MonetDB B.V.;
10 : * Copyright 1997 - July 2008 CWI.
11 : */
12 :
13 : /* streams working on a lzma/xz-compressed disk file */
14 :
15 : #include "monetdb_config.h"
16 : #include "stream.h"
17 : #include "stream_internal.h"
18 : #include "pump.h"
19 :
20 :
21 : #ifdef HAVE_LIBZ
22 :
23 : struct inner_state {
24 : z_stream strm;
25 : int (*indeflate)(z_streamp strm, int flush);
26 : int (*indeflateEnd)(z_streamp strm);
27 : void (*reset)(inner_state_t *inner_state);
28 : Bytef buf[64*1024];
29 : bool prev_was_stream_end;
30 : };
31 :
32 : static pump_buffer
33 3203 : get_src_win(inner_state_t *inner_state)
34 : {
35 3203 : return (pump_buffer) {
36 3203 : .start = (void*) inner_state->strm.next_in,
37 3203 : .count = inner_state->strm.avail_in,
38 : };
39 : }
40 :
41 : static void
42 773 : set_src_win(inner_state_t *inner_state, pump_buffer buf)
43 : {
44 773 : assert(buf.count < UINT_MAX);
45 773 : inner_state->strm.next_in = (Bytef*)buf.start;
46 773 : inner_state->strm.avail_in = (uInt)buf.count;
47 773 : }
48 :
49 : static pump_buffer
50 2220 : get_dst_win(inner_state_t *inner_state)
51 : {
52 2220 : return (pump_buffer) {
53 2220 : .start = (char*)inner_state->strm.next_out,
54 2220 : .count = inner_state->strm.avail_out,
55 : };
56 : }
57 :
58 : static void
59 139 : set_dst_win(inner_state_t *inner_state, pump_buffer buf)
60 : {
61 139 : assert(buf.count < UINT_MAX);
62 139 : inner_state->strm.next_out = (Bytef*)buf.start;
63 139 : inner_state->strm.avail_out = (uInt)buf.count;
64 139 : }
65 :
66 : static pump_buffer
67 863 : get_buffer(inner_state_t *inner_state)
68 : {
69 863 : return (pump_buffer) {
70 863 : .start = (char*)inner_state->buf,
71 : .count = sizeof(inner_state->buf),
72 : };
73 : }
74 :
75 : static pump_result
76 1104 : work(inner_state_t *inner_state, pump_action action)
77 : {
78 1104 : int a;
79 1104 : switch (action) {
80 : case PUMP_NO_FLUSH:
81 : a = Z_NO_FLUSH;
82 : break;
83 : case PUMP_FLUSH_DATA:
84 : a = Z_SYNC_FLUSH;
85 : break;
86 : case PUMP_FLUSH_ALL:
87 : a = Z_FULL_FLUSH;
88 : break;
89 : case PUMP_FINISH:
90 : a = Z_FINISH;
91 : break;
92 : default:
93 0 : assert(0 /* unknown action */);
94 : return PUMP_ERROR;
95 : }
96 :
97 1104 : if (inner_state->strm.next_in == NULL && inner_state->prev_was_stream_end) {
98 : // on the previous Z_STREAM_END we attempted to continue in case there
99 : // was a concatenated additional zstream but that is not the case.
100 : return PUMP_END;
101 : }
102 :
103 1096 : int ret = inner_state->indeflate(&inner_state->strm, a);
104 :
105 1096 : inner_state->prev_was_stream_end = false;
106 1096 : switch (ret) {
107 : case Z_OK:
108 : return PUMP_OK;
109 6 : case Z_STREAM_END:
110 6 : inner_state->prev_was_stream_end = true;
111 6 : if (action == PUMP_NO_FLUSH && inner_state->reset != NULL) {
112 : // zlib returns end, but maybe the input consists of multiple
113 : // gzipped files.
114 4 : inner_state->reset(inner_state);
115 4 : return PUMP_OK;
116 : } else {
117 : // no more incoming data
118 : return PUMP_END;
119 : }
120 : default:
121 : return PUMP_ERROR;
122 : }
123 : }
124 :
125 : static void
126 6 : finalizer(inner_state_t *inner_state)
127 : {
128 6 : inner_state->indeflateEnd(&inner_state->strm);
129 6 : free(inner_state);
130 6 : }
131 :
132 :
133 : static const char*
134 0 : get_error(inner_state_t *inner_state)
135 : {
136 0 : return inner_state->strm.msg;
137 : }
138 :
139 : static void
140 4 : inflate_reset(inner_state_t *inner_state)
141 : {
142 4 : pump_buffer src = get_src_win(inner_state);
143 4 : pump_buffer dst = get_dst_win(inner_state);
144 4 : inflateReset(&inner_state->strm);
145 4 : set_src_win(inner_state, src);
146 4 : set_dst_win(inner_state, dst);
147 4 : }
148 :
149 : stream *
150 6 : gz_stream(stream *inner, int level)
151 : {
152 6 : inner_state_t *gz = calloc(1, sizeof(inner_state_t));
153 6 : pump_state *state = calloc(1, sizeof(pump_state));
154 6 : if (gz == NULL || state == NULL) {
155 0 : free(gz);
156 0 : free(state);
157 0 : mnstr_set_open_error(inner->name, errno, "couldn't initialize gz stream");
158 0 : return NULL;
159 : }
160 :
161 6 : state->inner_state = gz;
162 6 : state->get_src_win = get_src_win;
163 6 : state->set_src_win = set_src_win;
164 6 : state->get_dst_win = get_dst_win;
165 6 : state->set_dst_win = set_dst_win;
166 6 : state->get_buffer = get_buffer;
167 6 : state->worker = work;
168 6 : state->get_error = get_error;
169 6 : state->finalizer = finalizer;
170 :
171 6 : int ret;
172 6 : if (inner->readonly) {
173 4 : gz->indeflate = inflate;
174 4 : gz->indeflateEnd = inflateEnd;
175 4 : gz->reset = inflate_reset;
176 4 : gz->strm.next_in = gz->buf;
177 4 : gz->strm.avail_in = 0;
178 4 : gz->strm.next_in = NULL;
179 4 : gz->strm.avail_in = 0;
180 4 : ret = inflateInit2(&gz->strm, 15 | 32); // 15 = allow all window sizes, 32 = accept gzip and zlib headers
181 : } else {
182 2 : gz->indeflate = deflate;
183 2 : gz->indeflateEnd = deflateEnd;
184 2 : gz->strm.next_out = gz->buf;
185 2 : gz->strm.avail_out = sizeof(gz->buf);
186 2 : if (level == 0)
187 2 : level = 6;
188 2 : ret = deflateInit2(&gz->strm, level, Z_DEFLATED, 15 | 16, 8, Z_DEFAULT_STRATEGY);
189 : }
190 :
191 6 : if (ret != Z_OK) {
192 0 : free(gz);
193 0 : free(state);
194 0 : mnstr_set_open_error(inner->name, 0, "failed to initialize gz stream: code %d", (int)ret);
195 0 : return NULL;
196 : }
197 :
198 6 : stream *s = pump_stream(inner, state);
199 :
200 6 : if (s == NULL) {
201 0 : gz->indeflateEnd(&gz->strm);
202 0 : free(gz);
203 0 : free(state);
204 0 : return NULL;
205 : }
206 :
207 : return s;
208 : }
209 :
210 : static stream *
211 0 : open_gzstream(const char *restrict filename, const char *restrict flags)
212 : {
213 0 : stream *inner;
214 :
215 0 : inner = open_stream(filename, flags);
216 0 : if (inner == NULL)
217 : return NULL;
218 :
219 0 : return gz_stream(inner, 0);
220 : }
221 :
222 : stream *
223 0 : open_gzrstream(const char *filename)
224 : {
225 0 : stream *s = open_gzstream(filename, "rb");
226 0 : if (s == NULL)
227 : return NULL;
228 :
229 0 : assert(s->readonly == true);
230 0 : assert(s->binary == true);
231 : return s;
232 : }
233 :
234 : stream *
235 0 : open_gzwstream(const char *restrict filename, const char *restrict mode)
236 : {
237 0 : stream *s = open_gzstream(filename, mode);
238 0 : if (s == NULL)
239 : return NULL;
240 :
241 0 : assert(s->readonly == false);
242 0 : assert(s->binary == true);
243 : return s;
244 : }
245 :
246 : stream *
247 0 : open_gzrastream(const char *filename)
248 : {
249 0 : stream *s = open_gzstream(filename, "r");
250 0 : s = create_text_stream(s);
251 0 : if (s == NULL)
252 : return NULL;
253 :
254 0 : assert(s->readonly == true);
255 0 : assert(s->binary == false);
256 : return s;
257 : }
258 :
259 : stream *
260 0 : open_gzwastream(const char *restrict filename, const char *restrict mode)
261 : {
262 0 : stream *s = open_gzstream(filename, mode);
263 0 : s = create_text_stream(s);
264 0 : if (s == NULL)
265 : return NULL;
266 0 : assert(s->readonly == false);
267 0 : assert(s->binary == false);
268 : return s;
269 : }
270 : #else
271 :
272 : stream *
273 : gz_stream(stream *inner, int preset)
274 : {
275 : (void) inner;
276 : (void) preset;
277 : mnstr_set_open_error(inner->name, 0, "GZ support has been left out of this MonetDB");
278 : return NULL;
279 : }
280 : stream *
281 : open_gzrstream(const char *filename)
282 : {
283 : mnstr_set_open_error(filename, 0, "GZ support has been left out of this MonetDB");
284 : return NULL;
285 : }
286 :
287 : stream *
288 : open_gzwstream(const char *restrict filename, const char *restrict mode)
289 : {
290 : (void) mode;
291 : mnstr_set_open_error(filename, 0, "GZ support has been left out of this MonetDB");
292 : return NULL;
293 : }
294 :
295 : stream *
296 : open_gzrastream(const char *filename)
297 : {
298 : mnstr_set_open_error(filename, 0, "GZ support has been left out of this MonetDB");
299 : return NULL;
300 : }
301 :
302 : stream *
303 : open_gzwastream(const char *restrict filename, const char *restrict mode)
304 : {
305 : (void) mode;
306 : mnstr_set_open_error(filename, 0, "GZ support has been left out of this MonetDB");
307 : return NULL;
308 : }
309 :
310 : #endif
|