I've done that using MonetDB C UDF. It takes a MonetDB table (either tmp or non-tmp) and outputs Parquet.
Essentially MonetDB C UDF passes table on-heap column pointers to a C++ function. And C++ function writes Parquet using Arrow C++ API.
/* on-heap column pointers */
typedef struct _monetdata {
//C: create pointers to actual data on heap
//bool
char **boolheap;
//short
short **shortheap;
//int32
int **ptrheap;
//float
float **fltheap;
//double
double **dblheap;
//int64
long long **biheap;
//int32-date
int **dateheap;
//char
int *width_str_offset;
unsigned char **cheap;
unsigned short **usheap;
unsigned int **uiheap;
size_t **stheap;
char **tvbase;
} t_monetdata;
/* C++: loop over rows for each Monet on-heap column, for example CHAR type */
case 6: // Monet char -> Parquet char
vlj = len[icol];
ba_writer = static_cast<parquet::ByteArrayWriter*>(rg_writer->NextColumn());
for (i = minrow; i < maxrow; i++) {
if (*(width_str_offset+char_count) == 1)
cptr = tvbase[char_count] + cheap[char_count][i];
else if (*(width_str_offset+char_count) == 2)
cptr = tvbase[char_count] + usheap[char_count][i];
else if (*(width_str_offset+char_count) == 4)
cptr = tvbase[char_count] + uiheap[char_count][i];
else
cptr = tvbase[char_count] + stheap[char_count][i];
if (*cptr == -128){
definition_level = 0;
ba_writer->WriteBatch(1, &definition_level, nullptr, nullptr);
}
else {
for (k=0; k<vlj; k++) {
c = *cptr++;
if (c != 0)
charbuf[k] = c;
else
break;
}
definition_level = 1;
ba_value.ptr = reinterpret_cast<const uint8_t*>(charbuf);
ba_value.len = k;
ba_writer->WriteBatch(1, &definition_level, nullptr, &ba_value);
}
}
char_count++;
break;
Anton