Пример #1
0
FILE *openXa(char *organism)
/* Return file handle for xeno-alignments for given organism. */
{
char path[512];
sprintf(path, "%s%s/all.st", wormXenoDir(), organism);
return xaOpenVerify(path);
}
struct xaAli *getOneXaAli(char *organism, char *xaName)
/* Return a single named xaAli for organism. */
{
char ixFileName[512];
char dataFileName[512];
char *xDir;
struct snof *snof;
long offset;
FILE *f;
struct xaAli *xa;

xDir = wormXenoDir();
sprintf(ixFileName, "%s%s/all", xDir, organism); 
sprintf(dataFileName, "%s%s/all%s", xDir, organism, xaAlignSuffix());

snof = snofMustOpen(ixFileName);
if (!snofFindOffset(snof, xaName, &offset))
    errAbort("Couldn't find %s", xaName);
snofClose(&snof);
f = xaOpenVerify(dataFileName);
fseek(f, offset, SEEK_SET);
xa = xaReadNext(f, FALSE);
fclose(f);
return xa;
}
Пример #3
0
struct xaAli *xaReadRange(char *rangeIndexFileName, char *dataFileName, 
    int start, int end, boolean condensed)
/* Return list of all xaAlis that range from start to end.  If condensed
 * don't fill int query, target, qSym, tSym, or hSym. */
{
FILE *ix = xaIxOpenVerify(rangeIndexFileName);
FILE *data = xaOpenVerify(dataFileName);
struct xaAli *xa = xaRdRange(ix, data, start, end, condensed);
fclose(data);
fclose(ix);
return xa;
}
Пример #4
0
void hgWaba(char *database, char *species, char *chromosome, 
	int chromOffset, int wabaFileCount, char *wabaFile[])
/* hgWaba - load Waba alignments into database. */
{
struct sqlConnection *conn = sqlConnect(database);
FILE *fullTab, *chromTab;
FILE *in;
struct xaAli *xa, *xaList = NULL;
char fullTabName[512], chromTabName[512];
char fullTable[128], chromTable[128];
char *inFile;
int i;
struct dyString *query = newDyString(2048);

/* Loop through each waba file grabbing sequence into
 * memory, then sort. */
for (i = 0; i < wabaFileCount; ++i)
    {
    inFile = wabaFile[i];
    printf("Processing %s\n", inFile);
    in = xaOpenVerify(inFile);

    while ((xa = xaReadNext(in, FALSE)) != NULL)
        {
	xa->tStart += chromOffset;
	xa->tEnd += chromOffset;
	slAddHead(&xaList, xa);
	}
    carefulClose(&in);
    }
printf("Sorting %d alignments by chromosome position\n", slCount(xaList));
slSort(&xaList, xaAliCmpTstart);

/* Create names of tables and the tables themselves. 
 * Clear anything in the chrom table. */
sprintf(fullTable, "waba%s", species);
sprintf(chromTable, "%s_waba%s", chromosome, species);
dyStringClear(query);
sqlDyStringPrintf(query, wabaFullCreate, fullTable);
sqlMaybeMakeTable(conn, fullTable, query->string);
dyStringClear(query);
sqlDyStringPrintf(query, wabaChromCreate, chromTable);
sqlMaybeMakeTable(conn, chromTable, query->string);
if (chromOffset == 0)
    {
    dyStringClear(query);
    sqlDyStringPrintf(query, "DELETE from %s", chromTable);
    sqlUpdate(conn, query->string);
    }

/* Make a temp file for each table we'll update. */
strcpy(fullTabName, "full_waba.tab");
fullTab = mustOpen(fullTabName, "w");
strcpy(chromTabName, "chrom_waba.tab");	
chromTab = mustOpen(chromTabName, "w");

/* Write out tab-delimited files. */
printf("Writing tab-delimited files\n");
for (xa = xaList; xa != NULL; xa = xa->next)
    {
    int squeezedSize;
    squeezedSize = squeezeSym(xa->tSym, xa->hSym, xa->symCount, xa->hSym);
    if( squeezedSize != xa->tEnd - xa->tStart ) {
		printf("%s squeezedSize: %d, tEnd, tStart: %d, %d, diff: %d\n", xa->query, squeezedSize, xa->tEnd, xa->tStart, xa->tEnd - xa->tStart );
        } else {
    fprintf(fullTab, "%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n",
    	/*xa->query, xa->qStart, xa->qEnd, xa->qStrand,*/
    	xa->name, xa->qStart, xa->qEnd, xa->qStrand,
	chromosome, xa->tStart, xa->tEnd,
	xa->milliScore, xa->symCount, 
	xa->qSym, xa->tSym, xa->hSym);
    assert(squeezedSize == xa->tEnd - xa->tStart);
    fprintf(chromTab, "%s\t%d\t%d\t%c\t%d\t%s\n",
        /*xa->query, xa->tStart, xa->tEnd, xa->qStrand,*/
        xa->name, xa->tStart, xa->tEnd, xa->qStrand,
	xa->milliScore, xa->hSym);
        }
    }
fclose(fullTab);
fclose(chromTab);

printf("Loading %s table in %s\n", chromTable, database);
dyStringClear(query);
sqlDyStringPrintf(query, 
   "LOAD data local infile '%s' into table %s", chromTabName, chromTable);
sqlUpdate(conn, query->string);

printf("Loading %s table in %s\n", fullTable, database);
dyStringClear(query);
sqlDyStringPrintf(query, 
   "LOAD data local infile '%s' into table %s", fullTabName, fullTable);
sqlUpdate(conn, query->string);

printf("Done!\n");

// remove(fullTabName);
// remove(chromTabName);
sqlDisconnect(&conn);
freeDyString(&query);
}
Пример #5
0
int main(int argc, char *argv[])
{
char *outName;
char xaFileName[512];
char region[64];
FILE *xaFile, *out;
struct xaAli *xaList = NULL, *xa;
char *sortBy;
char *subtitle;
int (*cmp)(const void *va, const void *vb);

if (argc != 3)
    {
    usage();
    }
sortBy = argv[1];
outName = argv[2];

if (sameWord(sortBy, "score"))
    {
    cmp = cmpXaScore;
    subtitle = "(sorted by alignment score)";
    }
else if (sameWord(sortBy, "briggsae"))
    {
    cmp = cmpXaQuery;
    subtitle = "(sorted by <I>C. briggsae</I> region)";
    }
else if (sameWord(sortBy, "elegans"))
    {
    cmp = cmpXaTarget;
    subtitle = "(sorted by <I>C. elegans</I> region)";
    }
else
    usage();

/* Read in alignment file. */
sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", 
    xaAlignSuffix());
printf("Scanning %s\n", xaFileName);
xaFile = xaOpenVerify(xaFileName);
while ((xa = xaReadNext(xaFile, FALSE)) != NULL)
    {
    xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart));
    freeMem(xa->qSym);
    freeMem(xa->tSym);
    freeMem(xa->hSym);
    slAddHead(&xaList, xa);
    }

/* Sort by score. */
printf("Sorting...");
slSort(&xaList, cmp);
printf(" best score %d\n", xaList->milliScore);

/* Write out .html */
printf("Writing %s\n", outName);
out = mustOpen(outName, "w");
htmStart(out, "C. briggsae/C. elegans Homologies");
fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n");
fprintf(out, "<H3>%s</H3>\n", subtitle);
fprintf(out, "<TT><PRE><B>");
fprintf(out, "Score  <I>C. elegans Region</I>     <I>C. briggsae</I> Region </B>\n");
fprintf(out, "--------------------------------------------------------\n");
for (xa = xaList; xa != NULL; xa = xa->next)
    {
    fprintf(out, "%6d ", xa->milliScore);
    sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd);
    fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", 
        region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand);
    fprintf(out, "\n");
    }
htmEnd(out);
return 0;
}