Пример #1
0
void gapFileToTable(struct sqlConnection *conn, char *gapFileName,
		    char *gapTableName)
/* Build a single gap table from a single gap file. */
{
struct lineFile *lf = lineFileOpen(gapFileName, TRUE);
char tabFileName[256];
FILE *tabFile = NULL;
char *words[16];
int wordCount;

safef(tabFileName, sizeof(tabFileName), "%s.tab", gapTableName);
tabFile = mustOpen(tabFileName, "w");
while ((wordCount = lineFileChop(lf, words)) > 0)
    {
    if (wordCount < 5)
	errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
    if (words[4][0] == 'N' || words[4][0] == 'U')
	{
	int len = strlen(words[0]);
	if (len > maxChromNameSize)
	    {
	    maxChromNameSize = len;
	    if (maxChromNameSize > 254)
		errAbort("ERROR: chrom name size is over 254(%d) characters: "
			"'%s'", maxChromNameSize, words[0]);
	    }
	struct agpGap gap;
	agpGapStaticLoad(words, &gap);
	gap.chromStart -= 1;
	fprintf(tabFile, "%u\t", hFindBin(gap.chromStart, gap.chromEnd));
	agpGapTabOut(&gap, tabFile);
	}
    }
lineFileClose(&lf);
fclose(tabFile);

if (! noLoad)
    {
    struct dyString *ds = newDyString(2048);
    if (unsplit)
	sqlDyStringPrintf(ds,  createGapUnsplit, gapTableName,
		maxChromNameSize, maxChromNameSize);
    else
	sqlDyStringPrintf(ds, createGapSplit, gapTableName);
    char query[1024];
    sqlRemakeTable(conn, gapTableName, ds->string);
    sqlSafef(query, sizeof(query), "LOAD data local infile '%s' into table %s", 
	  tabFileName, gapTableName);
    sqlUpdate(conn, query);
    remove(tabFileName);
    freeDyString(&ds);
    }
}
void splitAgp(char *agpName, char *goldFileName, char *gapFileName)
/* Split up agp file into gold and gap files. */
{
    struct lineFile *lf;
    char *words[16];
    int wordCount;
    FILE *goldTab, *gapTab;

    /* Scan through .agp file splitting it into gold
     * and gap components. */
    goldTab = mustOpen(goldFileName, "w");
    gapTab = mustOpen(gapFileName, "w");
    lf = lineFileOpen(agpName, TRUE);
    while ((wordCount = lineFileChop(lf, words)) > 0)
    {
        int start, end;
        if (wordCount < 5)
            errAbort("Short line %d of %s", lf->lineIx, lf->fileName);
        int len = strlen(words[0]);
        if (len > maxChromNameSize)
        {
            maxChromNameSize = len;
            if (maxChromNameSize > 254)
                errAbort("ERROR: chrom name size is over 254(%d) characters: "
                         "'%s'", maxChromNameSize, words[0]);
        }

        start = sqlUnsigned(words[1])-1;
        end = sqlUnsigned(words[2]);
        if (words[4][0] == 'N' || words[4][0] == 'U')
        {
            struct agpGap gap;
            agpGapStaticLoad(words, &gap);
            gap.chromStart -= 1;
            fprintf(gapTab, "%u\t", hFindBin(start, end));
            agpGapTabOut(&gap, gapTab);
            verbose(3,"#GAP\t%s:%d-%d\n", gap.chrom, gap.chromStart, gap.chromEnd);
        }
        else
        {
            struct agpFrag gold;
            agpFragStaticLoad(words, &gold);
            agpFragValidate(&gold);
            len = strlen(words[5]);
            if (len > maxFragNameSize)
            {
                maxFragNameSize = len;
                if (maxFragNameSize > 254)
                    errAbort("ERROR: fragment name size is over 254(%d) "
                             "characters: '%s'", maxFragNameSize, words[5]);
            }
            // file is 1-based. agpFragLoad() now assumes 0-based.
            // and agpFragTabOut() will assume 1-based, but we will load
            // the generated file straight into the database, so
            // subtract 2:
            gold.chromStart -= 2;
            gold.fragStart  -= 2;
            fprintf(goldTab, "%u\t", hFindBin(start, end));
            agpFragTabOut(&gold, goldTab);
        }
    }
    lineFileClose(&lf);
    carefulClose(&goldTab);
    carefulClose(&gapTab);

}