bool Scanner::isSingleLine (Scanner &scanner) { int pos = scanner.tell(); scanner.skipLine(); bool res = scanner.isEOF(); scanner.seek(pos, SEEK_SET); return res; }
/* Read full (with hash chains, for future modifying) * * lzw dictionary function */ void LzwDict::load( Scanner &_scanner ) { int i, j, HashCode, n, k; _modified = false; _alphabetSize = _scanner.readBinaryInt(); _nextCode = _scanner.readBinaryInt(); n = _scanner.readBinaryInt(); _bitcodeSize = _scanner.readBinaryInt(); _maxCode = (1 << _bitcodeSize) - 1; _storage.clear(); _storage.resize(n); for (i = 0; i < n; i++) { k = _scanner.readBinaryDword(); _storage[i].Prefix = k; _storage[i].AppendChar = _scanner.readByte(); } _freePtr = _scanner.readBinaryInt(); _hashKeys.clear_resize(SIZE); _nextPointers.clear_resize(SIZE); for (i = 0; i < SIZE; i++) { _nextPointers[i] = -1; _hashKeys[i] = -1; } while (!_scanner.isEOF()) { HashCode = _scanner.readBinaryInt(); j = _scanner.readBinaryInt(); _hashKeys[HashCode] = j; i = j; j = _scanner.readBinaryInt(); while (j != -1) { _nextPointers[i] = j; i = j; j = _scanner.readBinaryInt(); } } }
void GrossFormula::fromString (Scanner &scanner, Array<int> &gross) { gross.clear_resize(ELEM_MAX); gross.zerofill(); scanner.skipSpace(); while (!scanner.isEOF()) { int elem = Element::read(scanner); scanner.skipSpace(); int counter = 1; if (isdigit(scanner.lookNext())) { counter = scanner.readUnsigned(); scanner.skipSpace(); } gross[elem] += counter; } }
void _importSMILES (OracleEnv &env, const char *table, const char *smiles_col, const char *id_col, const char *file_name) { FileScanner fscanner(file_name); AutoPtr<GZipScanner> gzscanner; Scanner *scanner; int nwritten = 0; QS_DEF(Array<char>, id); QS_DEF(Array<char>, str); env.dbgPrintfTS("importing into table %s\n", table); // detect if input is gzipped byte magic[2]; int pos = fscanner.tell(); fscanner.readCharsFix(2, (char *)magic); fscanner.seek(pos, SEEK_SET); if (magic[0] == 0x1f && magic[1] == 0x8b) { gzscanner.reset(new GZipScanner(fscanner)); scanner = gzscanner.get(); } else scanner = &fscanner; while (!scanner->isEOF()) { id.clear(); scanner->readLine(str, false); BufferScanner strscan(str); strscan.skipSpace(); while (!strscan.isEOF() && !isspace(strscan.readChar())) ; strscan.skipSpace(); if (strscan.lookNext() == '|') { strscan.readChar(); while (!strscan.isEOF() && strscan.readChar() != '|') ; strscan.skipSpace(); } if (!strscan.isEOF() && id_col != 0) strscan.readLine(id, true); OracleStatement statement(env); statement.append("INSERT INTO %s(%s", table, smiles_col); if (id_col != 0) statement.append(", %s", id_col); statement.append(") VALUES(:smiles"); if (id_col != 0) { if (id.size() > 1) statement.append(", :id"); else statement.append(", NULL"); } statement.append(")"); statement.prepare(); str.push(0); statement.bindStringByName(":smiles", str.ptr(), str.size()); if (id.size() > 1) statement.bindStringByName(":id", id.ptr(), id.size()); statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }
bool MoleculeAutoLoader::tryMDLCT (Scanner &scanner, Array<char> &outbuf) { int pos = scanner.tell(); bool endmark = false; QS_DEF(Array<char>, curline); outbuf.clear(); while (!scanner.isEOF()) { int len = scanner.readByte(); if (len > 90) // Molfiles and Rxnfiles actually have 80 characters limit { scanner.seek(pos, SEEK_SET); // Garbage after endmark means end of data. // (See the note below about endmarks) if (endmark) return true; return false; } curline.clear(); while (len-- > 0) { if (scanner.isEOF()) { scanner.seek(pos, SEEK_SET); return false; } int c = scanner.readChar(); curline.push(c); } curline.push(0); if (endmark) { // We can not properly read the data to the end as there // is often garbage after the actual MDLCT data. // Instead, we are doing this lousy check: // "M END" or "$END MOL" can be followed only // by "$END CTAB" (in Markush queries), or // by "$MOL" (in Rxnfiles). Otherwise, this // is actually the end of data. if (strcmp(curline.ptr(), "$END CTAB") != 0 && strcmp(curline.ptr(), "$MOL") != 0) { scanner.seek(pos, SEEK_SET); return true; } } if (strcmp(curline.ptr(), "M END") == 0) endmark = true; else if (strcmp(curline.ptr(), "$END MOL") == 0) endmark = true; else endmark = false; outbuf.appendString(curline.ptr(), false); outbuf.push('\n'); } scanner.seek(pos, SEEK_SET); // It happened once that a valid Molfile had successfully // made its way through the above while() cycle, and thus // falsely recognized as MDLCT. To fight this case, we include // here a check that the last line was actually an endmark return endmark; }