void TRI_InitCsvParser (TRI_csv_parser_t* parser, TRI_memory_zone_t* zone, void (*begin) (TRI_csv_parser_t*, size_t), void (*add) (TRI_csv_parser_t*, char const*, size_t, size_t, size_t, bool), void (*end) (TRI_csv_parser_t*, char const*, size_t, size_t, size_t, bool), void *vData) { size_t length; parser->_state = TRI_CSV_PARSER_BOL; parser->_data = vData; TRI_SetQuoteCsvParser(parser, '"', true); TRI_SetSeparatorCsvParser(parser, ';'); TRI_UseBackslashCsvParser(parser, false); length = 1024; parser->_row = 0; parser->_column = 0; parser->_memoryZone = zone; parser->_begin = static_cast<char*>(TRI_Allocate(zone, length, false)); if (parser->_begin == nullptr) { length = 0; } parser->_start = parser->_begin; parser->_written = parser->_begin; parser->_current = parser->_begin; parser->_stop = parser->_begin; parser->_end = parser->_begin + length; parser->_dataBegin = nullptr; parser->_dataAdd = nullptr; parser->_dataEnd = nullptr; parser->begin = begin; parser->add = add; parser->end = end; parser->_nResize = 0; parser->_nMemmove = 0; parser->_nMemcpy = 0; }
bool ImportHelper::importDelimited (string const& collectionName, string const& fileName, DelimitedImportType typeImport) { _collectionName = collectionName; _firstLine = ""; _numberLines = 0; _numberOk = 0; _numberError = 0; _outputBuffer.clear(); _lineBuffer.clear(); _errorMessage = ""; _hasError = false; // read and convert int fd; int64_t totalLength; if (fileName == "-") { // we don't have a filesize totalLength = 0; fd = STDIN_FILENO; } else { // read filesize totalLength = TRI_SizeFile(fileName.c_str()); fd = TRI_OPEN(fileName.c_str(), O_RDONLY); if (fd < 0) { _errorMessage = TRI_LAST_ERROR_STR; return false; } } // progress display control variables int64_t totalRead = 0; double nextProgress = ProgressStep; size_t separatorLength; char* separator = TRI_UnescapeUtf8StringZ(TRI_UNKNOWN_MEM_ZONE, _separator.c_str(), _separator.size(), &separatorLength); if (separator == 0) { if (fd != STDIN_FILENO) { TRI_CLOSE(fd); } _errorMessage = "out of memory"; return false; } TRI_csv_parser_t parser; TRI_InitCsvParser(&parser, TRI_UNKNOWN_MEM_ZONE, ProcessCsvBegin, ProcessCsvAdd, ProcessCsvEnd); TRI_SetSeparatorCsvParser(&parser, separator[0]); // in csv, we'll use the quote char if set // in tsv, we do not use the quote char if (typeImport == ImportHelper::CSV && _quote.size() > 0) { TRI_SetQuoteCsvParser(&parser, _quote[0], true); } else { TRI_SetQuoteCsvParser(&parser, '\0', false); } parser._dataAdd = this; _rowOffset = 0; _rowsRead = 0; char buffer[32768]; while (! _hasError) { ssize_t n = TRI_READ(fd, buffer, sizeof(buffer)); if (n < 0) { TRI_Free(TRI_UNKNOWN_MEM_ZONE, separator); TRI_DestroyCsvParser(&parser); if (fd != STDIN_FILENO) { TRI_CLOSE(fd); } _errorMessage = TRI_LAST_ERROR_STR; return false; } else if (n == 0) { break; } totalRead += (int64_t) n; reportProgress(totalLength, totalRead, nextProgress); TRI_ParseCsvString2(&parser, buffer, n); } if (_outputBuffer.length() > 0) { sendCsvBuffer(); } TRI_DestroyCsvParser(&parser); TRI_Free(TRI_UNKNOWN_MEM_ZONE, separator); if (fd != STDIN_FILENO) { TRI_CLOSE(fd); } _outputBuffer.clear(); return !_hasError; }