Beispiel #1
0
static void blastXmlToPsl(char *blastXmlFile, char *pslFile, char *scoreFile)
/* blastXmlToPsl - convert blast XML output to PSLs. */
{
struct xap *xap = xapNew(ncbiBlastStartHandler, ncbiBlastEndHandler, blastXmlFile);
xapParseFile(xap, blastXmlFile);
FILE *pslFh = mustOpen(pslFile, "w");
FILE *scoreFh = NULL;
if (scoreFile != NULL)
    scoreFh = pslBuildScoresOpen(scoreFile, TRUE);

if (xap->topObject == NULL)
    errAbort("empty BLAST XML file: %s", blastXmlFile);
char *expectType = "BlastOutput";
if (!sameString(xap->topType, expectType))
    errAbort("expected top XML element of type \"%s\", got \"%s\"", expectType, xap->topType);
struct ncbiBlastBlastOutput *outputRec = xap->topObject;
unsigned flags = getFlags(outputRec);

if (flags & psiblast)
    convertPsiBlast(outputRec, flags, pslFh, scoreFh);
else
    convertOnePassBlast(outputRec, flags, pslFh, scoreFh);

carefulClose(&scoreFh);
carefulClose(&pslFh);
ncbiBlastBlastOutputFree(&outputRec);
xapFree(&xap);
}
void autoDtd(char *inXml, char *outDtd, char *outStats, char *treeFileName,
	char *atreeFileName)
/* autoDtd - Give this a XML document to look at and it will come up with a 
 * DTD to describe it.. */
{
struct xap *xap = xapNew(startHandler, endHandler, inXml);
typeHash = newHash(0);
xapParseFile(xap, inXml);
writeDtd(outDtd, outStats, inXml, topType);
if (treeFileName != NULL)
    writeTree(treeFileName, topType, FALSE);
if (atreeFileName != NULL)
    writeTree(atreeFileName, topType, TRUE);
}
Beispiel #3
0
void xmlToSql(char *xmlFileName, char *dtdFileName, char *statsFileName,
	char *outDir)
/* xmlToSql - Convert XML dump into a fairly normalized relational database. */
{
struct elStat *elStatList = NULL;
struct dtdElement *dtdList, *dtdEl;
struct hash *dtdHash, *dtdMixedHash = hashNew(0);
struct table *tableList = NULL, *table;
struct hash *tableHash = hashNew(0);
struct xap *xap = xapNew(startHandler, endHandler, xmlFileName);
char outFile[PATH_LEN];

/* Load up dtd and stats file. */
elStatList = elStatLoadAll(statsFileName);
verbose(2, "%d elements in %s\n", slCount(elStatList), statsFileName);
dtdParse(dtdFileName, globalPrefix, textField,
	&dtdList, &dtdHash);
dtdRenameMixedCase(dtdList);
verbose(1, "%d elements in %s\n", dtdHash->elCount, dtdFileName);

/* Build up hash of dtdElements keyed by mixed name rather
 * than tag name. */
for (dtdEl = dtdList; dtdEl != NULL; dtdEl = dtdEl->next)
    hashAdd(dtdMixedHash, dtdEl->mixedCaseName, dtdEl);

/* Create list of tables that correspond to tag types. 
 * This doesn't include any association tables we create
 * to handle lists of child elements. */
tableList = elsIntoTables(elStatList, dtdHash);
verbose(2, "Made tableList\n");

/* Create hashes of the table lists - one keyed by the
 * table name, and one keyed by the tag name. */
xmlTableHash = hashNew(0);
for (table = tableList; table != NULL; table = table->next)
    {
    hashAdd(tableHash, table->name, table);
    hashAdd(xmlTableHash, table->dtdElement->name, table);
    }
verbose(2, "Made table hashes\n");

/* Find top level tag (which we won't actually output). */
countUsesAsChild(dtdList, tableHash);
verbose(2, "Past countUsesAsChild\n");
rootTable = findRootTable(tableList);
verbose(2, "Root table is %s\n", rootTable->name);

/* Add stuff to support parent-child relationships. */
addParentKeys(rootTable->dtdElement, tableHash, &tableList);
verbose(2, "Added parent keys\n");

/* Now that all the fields, both attributes and made up 
 * keys are in place, figure out index of field in table. */
calcTablePosOfFields(tableList);

/* Make output directory. */
makeDir(outDir);

/* Make table creation SQL files. */
for (table = tableList; table != NULL; table = table->next)
    {
    if (!table->promoted)
	{
	safef(outFile, sizeof(outFile), "%s/%s.sql", 
	  outDir, table->name);
	writeCreateSql(outFile, table);
	}
    }
verbose(2, "Made sql table creation files\n");

/* Set up output directory and open tab-separated files. */
for (table = tableList; table != NULL; table = table->next)
    {
    if (!table->promoted)
	{
	safef(outFile, sizeof(outFile), "%s/%s.tab", 
	  outDir, table->name);
	table->tabFile = mustOpen(outFile, "w");
	}
    }
verbose(2, "Created output files.\n");

/* Stream through XML adding to tab-separated files.. */
xapParseFile(xap, xmlFileName);
verbose(2, "Streamed through XML\n");

/* Close down files */
for (table = tableList; table != NULL; table = table->next)
    carefulClose(&table->tabFile);
verbose(2, "Closed tab files\n");

verbose(1, "All done\n");
}