static void dedup_test(const char *msg,char *base) { struct fs_dat fs; printf("TEST: %s\n",msg); fscanner_init(&fs, base, false); fscanner(&fs, NULL); assertEquals(inodetab_count(fs.itab),3); struct duptab *clusters = dedup_cluster(fs.dtab); assertTrue(clusters); if (!clusters) return; assertEquals(inodetab_count(fs.itab),3); duptab_free(fs.dtab); // We are only interested in the clusters! fs.dtab = clusters; duptab_sort(fs.dtab); // Sort by size.... assertEquals(duptab_count(fs.dtab),1); struct dedup_cb dpcb = { .do_dedup = do_dedup, .ext = NULL }; dedup_pass(&fs,&dpcb); fscanner_close(&fs); }
void _importSMILES (OracleEnv &env, const char *table, const char *smiles_col, const char *id_col, const char *file_name) { FileScanner fscanner(file_name); AutoPtr<GZipScanner> gzscanner; Scanner *scanner; int nwritten = 0; QS_DEF(Array<char>, id); QS_DEF(Array<char>, str); env.dbgPrintfTS("importing into table %s\n", table); // detect if input is gzipped byte magic[2]; int pos = fscanner.tell(); fscanner.readCharsFix(2, (char *)magic); fscanner.seek(pos, SEEK_SET); if (magic[0] == 0x1f && magic[1] == 0x8b) { gzscanner.reset(new GZipScanner(fscanner)); scanner = gzscanner.get(); } else scanner = &fscanner; while (!scanner->isEOF()) { id.clear(); scanner->readLine(str, false); BufferScanner strscan(str); strscan.skipSpace(); while (!strscan.isEOF() && !isspace(strscan.readChar())) ; strscan.skipSpace(); if (strscan.lookNext() == '|') { strscan.readChar(); while (!strscan.isEOF() && strscan.readChar() != '|') ; strscan.skipSpace(); } if (!strscan.isEOF() && id_col != 0) strscan.readLine(id, true); OracleStatement statement(env); statement.append("INSERT INTO %s(%s", table, smiles_col); if (id_col != 0) statement.append(", %s", id_col); statement.append(") VALUES(:smiles"); if (id_col != 0) { if (id.size() > 1) statement.append(", :id"); else statement.append(", NULL"); } statement.append(")"); statement.prepare(); str.push(0); statement.bindStringByName(":smiles", str.ptr(), str.size()); if (id.size() > 1) statement.bindStringByName(":id", id.ptr(), id.size()); statement.execute(); nwritten++; if (nwritten % 1000 == 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } } if (nwritten % 1000 != 0) { env.dbgPrintfTS("imported %d items, commiting\n", nwritten); OracleStatement::executeSingle(env, "COMMIT"); } }