Mercurial > hg > monetdb-java
changeset 595:36df3c89845d
Extended FileTransferHandler utility class by adding support for reading from and writing to gzip compressed files when using file name extension .gz
Also improve error handling by testing on empty file name and provide more information when invalid file name is given or other compression formats are requested.
As the FileTransferHandler utility class is used by JdbcClient application, it will now also support this functionality.
author | Martin van Dinther <martin.van.dinther@monetdbsolutions.com> |
---|---|
date | Thu, 25 Nov 2021 19:06:48 +0100 (2021-11-25) |
parents | 613f94a13ad6 |
children | d30b69ed8e95 |
files | ChangeLog src/main/java/org/monetdb/util/FileTransferHandler.java |
diffstat | 2 files changed, 141 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,41 @@ # ChangeLog file for monetdb-java # This file is updated with Maddlog +* Thu Nov 25 2021 Martin van Dinther <martin.van.dinther@monetdbsolutions.com> +- Extended FileTransferHandler utility class by adding support for reading from + and writing to gzip compressed files when using file name extension .gz + Also improve error handling by testing on empty file name and provide more + information when invalid file name is given or other compression formats are + requested. As the FileTransferHandler utility class is used by JdbcClient + application, it will now also support this functionality. + +* Fri Nov 05 2021 Joeri van Ruth <joeri.van.ruth@monetdbsolutions.com> +- Extended the MonetDB JDBC driver with support for the ON CLIENT clause of + the COPY statements. To make use of this functionality you must first + register handlers for upload and download of data. + The MonetConnection class has been extended with 2 methods: + public void setUploadHandler(UploadHandler uploadHandler) + public void setDownloadHandler(DownloadHandler downloadHandler) + The MonetDB JDBC driver API has been extended with interfaces: + public interface org.monetdb.jdbc.MonetConnection.UploadHandler + public interface org.monetdb.jdbc.MonetConnection.DownloadHandler + See onclient.txt for more information on how to use these from Java. + We also provide a utility class: + public class org.monetdb.util.FileTransferHandler + which provides an example implementation of the MonetConnection.UploadHandler + and MonetConnection.DownloadHandler interfaces useable for reading files + from or writing files to a local file system. + +* Fri Nov 05 2021 Martin van Dinther <martin.van.dinther@monetdbsolutions.com> +- The JdbcClient application has been extended to support the new + COPY ... ON CLIENT functionality. + However for security reasons you must provide an explicit new startup argument + --csvdir "/absolute/path/to/csvdatafiles" + or on MS Windows + --csvdir "C:\\path\\to\\csvdatafiles" + in order to activate the JdbcClient application to down/up load data to/from + the local file system. + * Mon Jun 14 2021 Martin van Dinther <martin.van.dinther@monetdbsolutions.com> - Compiled and released new jar files: monetdb-jdbc-3.1.jre8.jar, monetdb-mcl-1.20.jre8.jar and jdbcclient.jre8.jar
--- a/src/main/java/org/monetdb/util/FileTransferHandler.java +++ b/src/main/java/org/monetdb/util/FileTransferHandler.java @@ -13,6 +13,7 @@ import org.monetdb.jdbc.MonetConnection; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.IOException; +import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -20,13 +21,31 @@ import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.zip.GZIPInputStream; +import java.util.zip.GZIPOutputStream; /** - * Sample implementation of COPY ... INTO 'file-name' ON CLIENT handling + * Default implementation of UploadHandler and DownloadHandler interfaces + * for reading from and writing to files on the local file system. + * It enables support for: + * COPY .. INTO table FROM 'file-name' ON CLIENT ... + * and + * COPY SELECT_query INTO 'file-name' ON CLIENT ... + * handling. * - * Can be registered with {@link MonetConnection#setUploadHandler(MonetConnection.UploadHandler)} - * and {@link MonetConnection#setDownloadHandler(MonetConnection.DownloadHandler)}. - * Implements uploads and downloads by reading and writing files on the file system. + * Currently only file compression format .gz is supported. This is intentionally + * as other compression formats would introduce dependencies on external + * libraries which complicates usage of JDBC driver or JdbcClient application. + * Developers can of course build their own MyFileTransferHandler class + * and use it instead of this default implementation. + * + * A FileTransferHandler object needs to be registered via + * {@link MonetConnection#setUploadHandler(MonetConnection.UploadHandler)} and/or + * {@link MonetConnection#setDownloadHandler(MonetConnection.DownloadHandler)}. + * + * @author Joeri van Ruth + * @author Martin van Dinther + * @version 1.1 */ public class FileTransferHandler implements MonetConnection.UploadHandler, MonetConnection.DownloadHandler { private final Path root; @@ -34,8 +53,10 @@ public class FileTransferHandler impleme /** * Create a new FileTransferHandler which serves the given directory. - * @param dir directory to read and write files from - * @param encoding set this to true if all files in the directory are known to be utf-8 encoded. + * + * @param dir directory Path to read and write files from + * @param encoding the specified characterSet encoding is used for all data files in the directory + * when null the Charset.defaultCharset() is used. */ public FileTransferHandler(final Path dir, final Charset encoding) { this.root = dir.toAbsolutePath().normalize(); @@ -45,59 +66,113 @@ public class FileTransferHandler impleme /** * Create a new FileTransferHandler which serves the given directory. * - * @param dir directory to read and write files from - * @param utf8Encoded set this to true if all files in the directory are known to be utf-8 encoded. + * @param dir directory String to read and write files from + * @param encoding the specified characterSet encoding is used for all data files in the directory + * when null the Charset.defaultCharset() is used. */ public FileTransferHandler(final String dir, final Charset encoding) { this(FileSystems.getDefault().getPath(dir), encoding); } + /** + * Read the data from the specified file (in the root directory) and upload it to the server. + */ public void handleUpload(final MonetConnection.Upload handle, final String name, final boolean textMode, final long linesToSkip) throws IOException { + if (name == null || name.isEmpty()) { + handle.sendError("Missing file name"); + return; + } final Path path = root.resolve(name).normalize(); if (!path.startsWith(root)) { - handle.sendError("File is not in upload directory"); + handle.sendError("File is not in upload directory: " + root.toString()); return; } if (!Files.isReadable(path)) { - handle.sendError("Cannot read " + name); + handle.sendError("Cannot read file " + path.toString()); + return; + } + // In this implementation we ONLY support gzip compression format and none of the other compression formats. + if (name.endsWith(".bz2") || name.endsWith(".lz4") || name.endsWith(".xz") || name.endsWith(".zip")) { + final String extension = name.substring(name.lastIndexOf​(".")); + handle.sendError("Specified file compression format " + extension + " is not supported. Only .gz is supported."); return; } - if (!textMode) { - // must upload as a byte stream - handle.uploadFrom(Files.newInputStream(path)); - } else if (linesToSkip == 0 && utf8Encoded()) { - // more efficient to upload as a byte stream - handle.uploadFrom(Files.newInputStream(path)); + + final boolean useGZIP = name.endsWith(".gz"); + if (!textMode || (linesToSkip == 0 && utf8Encoded())) { + // when !textMode we must upload as a byte stream + // when utf8Encoded and linesToSkip is 0 it is more efficient to upload as a byte stream + final InputStream inputStream = Files.newInputStream(path); + if (useGZIP) { + final GZIPInputStream gzipInputStream = new GZIPInputStream(inputStream, 128 * 1024); + handle.uploadFrom(gzipInputStream); + gzipInputStream.close(); + } else { + handle.uploadFrom(inputStream); + } } else { - // cannot upload as a byte stream, must deal with encoding - final BufferedReader reader = Files.newBufferedReader(path, encoding); - handle.uploadFrom(reader, linesToSkip); + // cannot upload as a byte stream, must deal with encoding and/or linesToSkip + if (useGZIP) { + // TODO add support for: useGZIP + handle.sendError("Sorry, uploading compressed .gz file data with an offset or a none utf-8 encoding is not yet supported"); + return; + } else { + final BufferedReader reader = Files.newBufferedReader(path, encoding); + handle.uploadFrom(reader, linesToSkip); + reader.close(); + } } } + /** + * Download the data from the server and write it to a new created file in the root directory. + * When a file with the same name already exists the download request will send an error and NOT overwrite the existing file. + */ public void handleDownload(final MonetConnection.Download handle, final String name, final boolean textMode) throws IOException { + if (name == null || name.isEmpty()) { + handle.sendError("Missing file name"); + return; + } final Path path = root.resolve(name).normalize(); if (!path.startsWith(root)) { - handle.sendError("File is not in download directory"); + handle.sendError("File is not in download directory: " + root.toString()); return; } if (Files.exists(path)) { - handle.sendError("File already exists: " + name); + handle.sendError("File already exists: " + path.toString()); + return; + } + // In this implementation we ONLY support gzip compression format and none of the other compression formats. + if (name.endsWith(".bz2") || name.endsWith(".lz4") || name.endsWith(".xz") || name.endsWith(".zip")) { + final String extension = name.substring(name.lastIndexOf​(".")); + handle.sendError("Requested file compression format " + extension + " is not supported. Use .gz instead."); return; } - if (!textMode) { - // must download as a byte stream + + final boolean useGZIP = name.endsWith(".gz"); + if (!textMode || utf8Encoded()) { + // when !textMode we must download as a byte stream + // when utf8Encoded it is more efficient to download as a byte stream final OutputStream outputStream = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW); - handle.downloadTo(outputStream); - } else if (utf8Encoded()) { - // more efficient to download as a byte stream - final OutputStream outputStream = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW); - handle.downloadTo(outputStream); + if (useGZIP) { + final GZIPOutputStream gzipOutputStream = new GZIPOutputStream(outputStream, 128 * 1024); + handle.downloadTo(gzipOutputStream); + gzipOutputStream.flush(); + gzipOutputStream.close(); + } else { + handle.downloadTo(outputStream); + } } else { // cannot download as a byte stream, must deal with encoding - final BufferedWriter writer = Files.newBufferedWriter(path, encoding, StandardOpenOption.CREATE_NEW); - handle.downloadTo(writer); - writer.close(); + if (useGZIP) { + // TODO add support for: useGZIP + handle.sendError("Sorry, downloading data in a none utf-8 encoding to a compressed .gz file is not yet supported"); + return; + } else { + final BufferedWriter writer = Files.newBufferedWriter(path, encoding, StandardOpenOption.CREATE_NEW); + handle.downloadTo(writer); + writer.close(); + } } }