Exemplo n.º 1
0
void RowCollection<Group,Hash>::flushOneRowInBuffer(size_t rowId, Items const& items) {
    assert(rowId<_counts.size());

    vector<boost::shared_ptr<ChunkIterator> > chunkIterators(_attributes.size());

    try {
        if (! isLastChunkFull(rowId)) { // only get chunk iterators if there exists a non-full last chunk.
            getChunkIterators(chunkIterators, rowId);
        }

        for (size_t v=0; v<items.size(); ++v) {
            vector<Value> const& item = items[v];

            if (isLastChunkFull(rowId)) { // when the last chunk was full, get the iterators here (right before append)
                getChunkIterators(chunkIterators, rowId);
            }

            for (size_t i=0; i<_attributes.size(); ++i) {
                chunkIterators[i]->writeItem(item[i]);
            }
            ++ _counts[rowId];

            if (isLastChunkFull(rowId)) { // after append, flush and clear the chunk iterators if the last chunk becomes full
                for (size_t i=0; i<_attributes.size(); ++i) {
                    chunkIterators[i]->flush();
                    chunkIterators[i].reset();
                }
            } else {
                for (size_t i=0; i<_attributes.size(); ++i) {
                    ++ (*chunkIterators[i]);
                }
            }
        }

        if (items.size()>0 && !isLastChunkFull(rowId)) {
            assert(chunkIterators[0]);
            for (size_t i=0; i<_attributes.size(); ++i) {
                chunkIterators[i]->flush();
                chunkIterators[i].reset();
            }
        } else {
            assert(! chunkIterators[0]);
        }
    } catch (std::exception& e) {
        LOG4CXX_DEBUG(logger, "[RowCollection] std::exception in RowIterator::appendItems(): " << e.what());
        throw;
    } catch (...) {
        LOG4CXX_DEBUG(logger, "[RowCollection] (...) exception in RowIterator::appendItem()" );
        throw;
    }
}
Exemplo n.º 2
0
bool CsvChunkLoader::loadChunk(boost::shared_ptr<Query>& query, size_t chunkIndex)
{
    // Must do EOF check *before* nextImplicitChunkPosition() call, or
    // we risk stepping out of bounds.
    if (_csvParser.empty()) {
        int ch = ::getc(fp());
        if (ch == EOF) {
            return false;
        }
        ::ungetc(ch, fp());
    }

    // Reposition and make sure all is cool.
    nextImplicitChunkPosition(MY_CHUNK);
    enforceChunkOrder("csv loader");

    // Initialize a chunk and chunk iterator for each attribute.
    Attributes const& attrs = schema().getAttributes();
    size_t nAttrs = attrs.size();
    vector< boost::shared_ptr<ChunkIterator> > chunkIterators(nAttrs);
    for (size_t i = 0; i < nAttrs; i++) {
        Address addr(i, _chunkPos);
        MemChunk& chunk = getLookaheadChunk(i, chunkIndex);
        chunk.initialize(array(), &schema(), addr, attrs[i].getDefaultCompressionMethod());
        chunkIterators[i] = chunk.getIterator(query,
                                              ChunkIterator::NO_EMPTY_CHECK |
                                              ConstChunkIterator::SEQUENTIAL_WRITE);
    }

    char const *field = 0;
    int rc = 0;
    bool sawData = false;
    bool sawEof = false;

    while (!chunkIterators[0]->end()) {

        _column = 0;
        array()->countCell();
        
        // Parse and write out a line's worth of fields.  NB if you
        // have to 'continue;' after a writeItem() call, make sure the
        // iterator (and possibly the _column) gets incremented.
        //
        for (size_t i = 0; i < nAttrs; ++i) {
            try {
                // Handle empty tag...
                if (i == emptyTagAttrId()) {
                    attrVal(i).setBool(true);
                    chunkIterators[i]->writeItem(attrVal(i));
                    ++(*chunkIterators[i]); // ...but don't increment _column.
                    continue;
                }

                // Parse out next input field.
                rc = _csvParser.getField(field);
                if (rc == CsvParser::END_OF_FILE) {
                    sawEof = true;
                    break;
                }
                if (rc == CsvParser::END_OF_RECORD) {
                    // Got record terminator, but we have more attributes!
                    throw USER_EXCEPTION(SCIDB_SE_IMPORT_ERROR, SCIDB_LE_OP_INPUT_TOO_FEW_FIELDS)
                        << _csvParser.getFileOffset() << _csvParser.getRecordNumber() << _column;
                }
                if (rc > 0) {
                    // So long as we never call _csvParser.setStrict(true), we should never see this.
                    throw USER_EXCEPTION(SCIDB_SE_IMPORT_ERROR, SCIDB_LE_CSV_PARSE_ERROR)
                        << _csvParser.getFileOffset() << _csvParser.getRecordNumber()
                        << _column << csv_strerror(rc);
                }
                SCIDB_ASSERT(rc == CsvParser::OK);
                SCIDB_ASSERT(field);
                sawData = true;

                // Process input field.
                if (mightBeNull(field) && attrs[i].isNullable()) {
                    int8_t missingReason = parseNullField(field);
                    if (missingReason >= 0) {
                        attrVal(i).setNull(missingReason);
                        chunkIterators[i]->writeItem(attrVal(i));
                        ++(*chunkIterators[i]);
                        _column += 1;
                        continue;
                    }
                }
                if (converter(i)) {
                    Value v;
                    v.setString(field);
                    const Value* vp = &v;
                    (*converter(i))(&vp, &attrVal(i), NULL);
                    chunkIterators[i]->writeItem(attrVal(i));
                }
                else {
                    TypeId const &tid = typeIdOfAttr(i);
                    if (attrs[i].isNullable() &&
                        (*field == '\0' || (iswhitespace(field) && IS_NUMERIC(tid))))
                    {
                        // [csv2scidb compat] With csv2scidb, empty strings (or for numeric
                        // fields, whitespace) became nulls if the target attribute was
                        // nullable.  We keep the same behavior.  (We should *not* do this for
                        // TSV, that format requires explicit nulls!)
                        attrVal(i).setNull();
                    } else {
                        StringToValue(tid, field, attrVal(i));
                    }
                    chunkIterators[i]->writeItem(attrVal(i));
                }
            }
            catch (Exception& ex) {
                _badField = field;
                _fileOffset = _csvParser.getFileOffset();
                array()->handleError(ex, chunkIterators[i], i);
            }

            _column += 1;
            ++(*chunkIterators[i]);
        }

        if (sawEof) {
            break;
        }

        // We should be at EOL now, otherwise there are too many fields on this line.  Post a
        // warning: it seems useful not to complain too loudly about this or to abort the load, but
        // we do want to mention it.
        //
        rc = _csvParser.getField(field);
        if (!_tooManyWarning && (rc != CsvParser::END_OF_RECORD)) {
            _tooManyWarning = true;
            query->postWarning(SCIDB_WARNING(SCIDB_LE_OP_INPUT_TOO_MANY_FIELDS)
                               << _csvParser.getFileOffset() << _csvParser.getRecordNumber() << _column);
        }

        array()->completeShadowArrayRow(); // done with cell/record
    }

    for (size_t i = 0; i < nAttrs; i++) {
        if (chunkIterators[i]) {
            chunkIterators[i]->flush();
        }
    }

    return sawData;
}