SdfLoader::SdfLoader (Scanner &scanner) : TL_CP_GET(data), TL_CP_GET(properties), TL_CP_GET(_offsets), TL_CP_GET(_preread) { data.clear(); properties.clear(); // detect if input is gzipped byte id[2]; int pos = scanner.tell(); scanner.readCharsFix(2, (char *)id); scanner.seek(pos, SEEK_SET); if (id[0] == 0x1f && id[1] == 0x8b) { _scanner = new GZipScanner(scanner); _own_scanner = true; } else { _scanner = &scanner; _own_scanner = false; } _current_number = 0; _max_offset = 0; _offsets.clear(); _preread.clear(); }
bool Scanner::isSingleLine (Scanner &scanner) { int pos = scanner.tell(); scanner.skipLine(); bool res = scanner.isEOF(); scanner.seek(pos, SEEK_SET); return res; }
SdfLoader::SdfLoader (Scanner &scanner) : CP_INIT, TL_CP_GET(data), TL_CP_GET(properties), TL_CP_GET(_offsets), TL_CP_GET(_preread) { data.clear(); properties.clear(); // detect if input is gzipped byte id[2]; int pos = scanner.tell(); scanner.readCharsFix(2, (char *)id); scanner.seek(pos, SEEK_SET); if (id[0] == 0x1f && id[1] == 0x8b) { _scanner = new GZipScanner(scanner); _own_scanner = true; } else { _scanner = &scanner; _own_scanner = false; } _current_number = 0; _max_offset = 0; _offsets.clear(); _preread.clear(); } SdfLoader::~SdfLoader() { if (_own_scanner) delete _scanner; } int SdfLoader::tell () { return _scanner->tell(); } int SdfLoader::currentNumber () { return _current_number; } int SdfLoader::count () { int offset = _scanner->tell(); int cn = _current_number; if (offset != _max_offset) { _scanner->seek(_max_offset, SEEK_SET); _preread.clear(); _current_number = _offsets.size(); } while (!isEOF()) readNext(); int res = _current_number; if (res != cn) { _scanner->seek(offset, SEEK_SET); _preread.clear(); _current_number = cn; } return res; }
bool MoleculeAutoLoader::tryMDLCT (Scanner &scanner, Array<char> &outbuf) { int pos = scanner.tell(); bool endmark = false; QS_DEF(Array<char>, curline); outbuf.clear(); while (!scanner.isEOF()) { int len = scanner.readByte(); if (len > 90) // Molfiles and Rxnfiles actually have 80 characters limit { scanner.seek(pos, SEEK_SET); // Garbage after endmark means end of data. // (See the note below about endmarks) if (endmark) return true; return false; } curline.clear(); while (len-- > 0) { if (scanner.isEOF()) { scanner.seek(pos, SEEK_SET); return false; } int c = scanner.readChar(); curline.push(c); } curline.push(0); if (endmark) { // We can not properly read the data to the end as there // is often garbage after the actual MDLCT data. // Instead, we are doing this lousy check: // "M END" or "$END MOL" can be followed only // by "$END CTAB" (in Markush queries), or // by "$MOL" (in Rxnfiles). Otherwise, this // is actually the end of data. if (strcmp(curline.ptr(), "$END CTAB") != 0 && strcmp(curline.ptr(), "$MOL") != 0) { scanner.seek(pos, SEEK_SET); return true; } } if (strcmp(curline.ptr(), "M END") == 0) endmark = true; else if (strcmp(curline.ptr(), "$END MOL") == 0) endmark = true; else endmark = false; outbuf.appendString(curline.ptr(), false); outbuf.push('\n'); } scanner.seek(pos, SEEK_SET); // It happened once that a valid Molfile had successfully // made its way through the above while() cycle, and thus // falsely recognized as MDLCT. To fight this case, we include // here a check that the last line was actually an endmark return endmark; }