void gapFileToTable(struct sqlConnection *conn, char *gapFileName, char *gapTableName) /* Build a single gap table from a single gap file. */ { struct lineFile *lf = lineFileOpen(gapFileName, TRUE); char tabFileName[256]; FILE *tabFile = NULL; char *words[16]; int wordCount; safef(tabFileName, sizeof(tabFileName), "%s.tab", gapTableName); tabFile = mustOpen(tabFileName, "w"); while ((wordCount = lineFileChop(lf, words)) > 0) { if (wordCount < 5) errAbort("Short line %d of %s", lf->lineIx, lf->fileName); if (words[4][0] == 'N' || words[4][0] == 'U') { int len = strlen(words[0]); if (len > maxChromNameSize) { maxChromNameSize = len; if (maxChromNameSize > 254) errAbort("ERROR: chrom name size is over 254(%d) characters: " "'%s'", maxChromNameSize, words[0]); } struct agpGap gap; agpGapStaticLoad(words, &gap); gap.chromStart -= 1; fprintf(tabFile, "%u\t", hFindBin(gap.chromStart, gap.chromEnd)); agpGapTabOut(&gap, tabFile); } } lineFileClose(&lf); fclose(tabFile); if (! noLoad) { struct dyString *ds = newDyString(2048); if (unsplit) sqlDyStringPrintf(ds, createGapUnsplit, gapTableName, maxChromNameSize, maxChromNameSize); else sqlDyStringPrintf(ds, createGapSplit, gapTableName); char query[1024]; sqlRemakeTable(conn, gapTableName, ds->string); sqlSafef(query, sizeof(query), "LOAD data local infile '%s' into table %s", tabFileName, gapTableName); sqlUpdate(conn, query); remove(tabFileName); freeDyString(&ds); } }
void splitAgp(char *agpName, char *goldFileName, char *gapFileName) /* Split up agp file into gold and gap files. */ { struct lineFile *lf; char *words[16]; int wordCount; FILE *goldTab, *gapTab; /* Scan through .agp file splitting it into gold * and gap components. */ goldTab = mustOpen(goldFileName, "w"); gapTab = mustOpen(gapFileName, "w"); lf = lineFileOpen(agpName, TRUE); while ((wordCount = lineFileChop(lf, words)) > 0) { int start, end; if (wordCount < 5) errAbort("Short line %d of %s", lf->lineIx, lf->fileName); int len = strlen(words[0]); if (len > maxChromNameSize) { maxChromNameSize = len; if (maxChromNameSize > 254) errAbort("ERROR: chrom name size is over 254(%d) characters: " "'%s'", maxChromNameSize, words[0]); } start = sqlUnsigned(words[1])-1; end = sqlUnsigned(words[2]); if (words[4][0] == 'N' || words[4][0] == 'U') { struct agpGap gap; agpGapStaticLoad(words, &gap); gap.chromStart -= 1; fprintf(gapTab, "%u\t", hFindBin(start, end)); agpGapTabOut(&gap, gapTab); verbose(3,"#GAP\t%s:%d-%d\n", gap.chrom, gap.chromStart, gap.chromEnd); } else { struct agpFrag gold; agpFragStaticLoad(words, &gold); agpFragValidate(&gold); len = strlen(words[5]); if (len > maxFragNameSize) { maxFragNameSize = len; if (maxFragNameSize > 254) errAbort("ERROR: fragment name size is over 254(%d) " "characters: '%s'", maxFragNameSize, words[5]); } // file is 1-based. agpFragLoad() now assumes 0-based. // and agpFragTabOut() will assume 1-based, but we will load // the generated file straight into the database, so // subtract 2: gold.chromStart -= 2; gold.fragStart -= 2; fprintf(goldTab, "%u\t", hFindBin(start, end)); agpFragTabOut(&gold, goldTab); } } lineFileClose(&lf); carefulClose(&goldTab); carefulClose(&gapTab); }