static void blastXmlToPsl(char *blastXmlFile, char *pslFile, char *scoreFile) /* blastXmlToPsl - convert blast XML output to PSLs. */ { struct xap *xap = xapNew(ncbiBlastStartHandler, ncbiBlastEndHandler, blastXmlFile); xapParseFile(xap, blastXmlFile); FILE *pslFh = mustOpen(pslFile, "w"); FILE *scoreFh = NULL; if (scoreFile != NULL) scoreFh = pslBuildScoresOpen(scoreFile, TRUE); if (xap->topObject == NULL) errAbort("empty BLAST XML file: %s", blastXmlFile); char *expectType = "BlastOutput"; if (!sameString(xap->topType, expectType)) errAbort("expected top XML element of type \"%s\", got \"%s\"", expectType, xap->topType); struct ncbiBlastBlastOutput *outputRec = xap->topObject; unsigned flags = getFlags(outputRec); if (flags & psiblast) convertPsiBlast(outputRec, flags, pslFh, scoreFh); else convertOnePassBlast(outputRec, flags, pslFh, scoreFh); carefulClose(&scoreFh); carefulClose(&pslFh); ncbiBlastBlastOutputFree(&outputRec); xapFree(&xap); }
void autoDtd(char *inXml, char *outDtd, char *outStats, char *treeFileName, char *atreeFileName) /* autoDtd - Give this a XML document to look at and it will come up with a * DTD to describe it.. */ { struct xap *xap = xapNew(startHandler, endHandler, inXml); typeHash = newHash(0); xapParseFile(xap, inXml); writeDtd(outDtd, outStats, inXml, topType); if (treeFileName != NULL) writeTree(treeFileName, topType, FALSE); if (atreeFileName != NULL) writeTree(atreeFileName, topType, TRUE); }
void xmlToSql(char *xmlFileName, char *dtdFileName, char *statsFileName, char *outDir) /* xmlToSql - Convert XML dump into a fairly normalized relational database. */ { struct elStat *elStatList = NULL; struct dtdElement *dtdList, *dtdEl; struct hash *dtdHash, *dtdMixedHash = hashNew(0); struct table *tableList = NULL, *table; struct hash *tableHash = hashNew(0); struct xap *xap = xapNew(startHandler, endHandler, xmlFileName); char outFile[PATH_LEN]; /* Load up dtd and stats file. */ elStatList = elStatLoadAll(statsFileName); verbose(2, "%d elements in %s\n", slCount(elStatList), statsFileName); dtdParse(dtdFileName, globalPrefix, textField, &dtdList, &dtdHash); dtdRenameMixedCase(dtdList); verbose(1, "%d elements in %s\n", dtdHash->elCount, dtdFileName); /* Build up hash of dtdElements keyed by mixed name rather * than tag name. */ for (dtdEl = dtdList; dtdEl != NULL; dtdEl = dtdEl->next) hashAdd(dtdMixedHash, dtdEl->mixedCaseName, dtdEl); /* Create list of tables that correspond to tag types. * This doesn't include any association tables we create * to handle lists of child elements. */ tableList = elsIntoTables(elStatList, dtdHash); verbose(2, "Made tableList\n"); /* Create hashes of the table lists - one keyed by the * table name, and one keyed by the tag name. */ xmlTableHash = hashNew(0); for (table = tableList; table != NULL; table = table->next) { hashAdd(tableHash, table->name, table); hashAdd(xmlTableHash, table->dtdElement->name, table); } verbose(2, "Made table hashes\n"); /* Find top level tag (which we won't actually output). */ countUsesAsChild(dtdList, tableHash); verbose(2, "Past countUsesAsChild\n"); rootTable = findRootTable(tableList); verbose(2, "Root table is %s\n", rootTable->name); /* Add stuff to support parent-child relationships. */ addParentKeys(rootTable->dtdElement, tableHash, &tableList); verbose(2, "Added parent keys\n"); /* Now that all the fields, both attributes and made up * keys are in place, figure out index of field in table. */ calcTablePosOfFields(tableList); /* Make output directory. */ makeDir(outDir); /* Make table creation SQL files. */ for (table = tableList; table != NULL; table = table->next) { if (!table->promoted) { safef(outFile, sizeof(outFile), "%s/%s.sql", outDir, table->name); writeCreateSql(outFile, table); } } verbose(2, "Made sql table creation files\n"); /* Set up output directory and open tab-separated files. */ for (table = tableList; table != NULL; table = table->next) { if (!table->promoted) { safef(outFile, sizeof(outFile), "%s/%s.tab", outDir, table->name); table->tabFile = mustOpen(outFile, "w"); } } verbose(2, "Created output files.\n"); /* Stream through XML adding to tab-separated files.. */ xapParseFile(xap, xmlFileName); verbose(2, "Streamed through XML\n"); /* Close down files */ for (table = tableList; table != NULL; table = table->next) carefulClose(&table->tabFile); verbose(2, "Closed tab files\n"); verbose(1, "All done\n"); }