struct xaAli *getOneXaAli(char *organism, char *xaName)
/* Return a single named xaAli for organism. */
{
char ixFileName[512];
char dataFileName[512];
char *xDir;
struct snof *snof;
long offset;
FILE *f;
struct xaAli *xa;

xDir = wormXenoDir();
sprintf(ixFileName, "%s%s/all", xDir, organism); 
sprintf(dataFileName, "%s%s/all%s", xDir, organism, xaAlignSuffix());

snof = snofMustOpen(ixFileName);
if (!snofFindOffset(snof, xaName, &offset))
    errAbort("Couldn't find %s", xaName);
snofClose(&snof);
f = xaOpenVerify(dataFileName);
fseek(f, offset, SEEK_SET);
xa = xaReadNext(f, FALSE);
fclose(f);
return xa;
}
void liftWab(char *destFile, struct hash *liftHash, 
        int sourceCount, char *sources[], boolean querySide)
/* Lift up coordinates in .wab file. */
{
FILE *f = mustOpen(destFile, "w");
int sourceIx;

for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx)
    {
    struct xaAli *xa;
    char *source = sources[sourceIx];
    FILE *in = mustOpen(source, "r");
    while ((xa = xaReadNext(in, FALSE)) != NULL)
	{
	char *seqName = querySide ? xa->query : xa->target;
	struct liftSpec *spec = findLift(liftHash, seqName, NULL);
	int offset;
	if (spec == NULL)
	    {
	    verbose(0,"name:\t%s\n", xa->name);
	    verbose(0,"query:\t%s\n", xa->query);
	    verbose(0,"qStart,qEnd:\t%d,%d\n", xa->qStart,xa->qEnd);
	    verbose(0,"qStrand:\t%c\n", xa->qStrand);
	    verbose(0,"target:\t%s\n", xa->target);
	    verbose(0,"tStart,tEnd:\t%d,%d\n", xa->tStart,xa->tEnd);
	    verbose(0,"tStrand:\t%c\n", xa->tStrand);
	    verbose(0,"milliScore:\t%d\n", xa->milliScore);
	    verbose(0,"symCount:\t%d\n", xa->symCount);
	    errAbort("Can not find lift spec for %s", seqName);
	    }
	if (querySide)
	    {
	    cantHandleSpecRevStrand(spec);
	    offset = spec->offset;
	    xa->qStart += offset;
	    xa->qEnd += offset;
	fprintf(f, "%s align %d.%d%% of %d %s.fa %s:%d-%d %c %s:%d-%d %c\n",
    xa->name, xa->milliScore/10, xa->milliScore%10, xa->symCount,
    spec->newName, spec->newName, xa->qStart, xa->qEnd, xa->qStrand,
    xa->target, xa->tStart, xa->tEnd, xa->tStrand);
	    }
	else
	    {
	    errAbort("Sorry, lift for WABA target not yet implemented");
	    }
	mustWrite(f, xa->qSym, xa->symCount);
	fputc('\n', f);
	mustWrite(f, xa->tSym, xa->symCount);
	fputc('\n', f);
	mustWrite(f, xa->hSym, xa->symCount);
	fputc('\n', f);

	}
    carefulClose(&in);
    }
}
Example #3
0
struct xaAli *xaRdRange(FILE *ix, FILE *data, 
    int start, int end, boolean condensed)
/* Return list of all xaAlis that range from start to end.  
 * Assumes that ix and data files are open. If condensed
 * don't fill int query, target, qSym, tSym, or hSym. */
{
int s, e;
int maxS, minE;
long offset;
struct xaAli *list = NULL, *xa;


/* Scan through index file looking for things in range.
 * When find one read it from data file and add it to list. */
fseek(ix, sizeof(bits32), SEEK_SET);
for (;;)
    {
    if (!readOne(ix, s))
        break;
    mustReadOne(ix, e);
    mustReadOne(ix, offset);
    if (s >= end)
        break;
    maxS = max(s, start);
    minE = min(e, end);
    if (minE - maxS > 0)
        {
        fseek(data, offset, SEEK_SET);
        xa = xaReadNext(data, condensed);
        slAddHead(&list, xa);
        }
    }

slReverse(&list);
return list;
}
Example #4
0
void hgWaba(char *database, char *species, char *chromosome, 
	int chromOffset, int wabaFileCount, char *wabaFile[])
/* hgWaba - load Waba alignments into database. */
{
struct sqlConnection *conn = sqlConnect(database);
FILE *fullTab, *chromTab;
FILE *in;
struct xaAli *xa, *xaList = NULL;
char fullTabName[512], chromTabName[512];
char fullTable[128], chromTable[128];
char *inFile;
int i;
struct dyString *query = newDyString(2048);

/* Loop through each waba file grabbing sequence into
 * memory, then sort. */
for (i = 0; i < wabaFileCount; ++i)
    {
    inFile = wabaFile[i];
    printf("Processing %s\n", inFile);
    in = xaOpenVerify(inFile);

    while ((xa = xaReadNext(in, FALSE)) != NULL)
        {
	xa->tStart += chromOffset;
	xa->tEnd += chromOffset;
	slAddHead(&xaList, xa);
	}
    carefulClose(&in);
    }
printf("Sorting %d alignments by chromosome position\n", slCount(xaList));
slSort(&xaList, xaAliCmpTstart);

/* Create names of tables and the tables themselves. 
 * Clear anything in the chrom table. */
sprintf(fullTable, "waba%s", species);
sprintf(chromTable, "%s_waba%s", chromosome, species);
dyStringClear(query);
sqlDyStringPrintf(query, wabaFullCreate, fullTable);
sqlMaybeMakeTable(conn, fullTable, query->string);
dyStringClear(query);
sqlDyStringPrintf(query, wabaChromCreate, chromTable);
sqlMaybeMakeTable(conn, chromTable, query->string);
if (chromOffset == 0)
    {
    dyStringClear(query);
    sqlDyStringPrintf(query, "DELETE from %s", chromTable);
    sqlUpdate(conn, query->string);
    }

/* Make a temp file for each table we'll update. */
strcpy(fullTabName, "full_waba.tab");
fullTab = mustOpen(fullTabName, "w");
strcpy(chromTabName, "chrom_waba.tab");	
chromTab = mustOpen(chromTabName, "w");

/* Write out tab-delimited files. */
printf("Writing tab-delimited files\n");
for (xa = xaList; xa != NULL; xa = xa->next)
    {
    int squeezedSize;
    squeezedSize = squeezeSym(xa->tSym, xa->hSym, xa->symCount, xa->hSym);
    if( squeezedSize != xa->tEnd - xa->tStart ) {
		printf("%s squeezedSize: %d, tEnd, tStart: %d, %d, diff: %d\n", xa->query, squeezedSize, xa->tEnd, xa->tStart, xa->tEnd - xa->tStart );
        } else {
    fprintf(fullTab, "%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n",
    	/*xa->query, xa->qStart, xa->qEnd, xa->qStrand,*/
    	xa->name, xa->qStart, xa->qEnd, xa->qStrand,
	chromosome, xa->tStart, xa->tEnd,
	xa->milliScore, xa->symCount, 
	xa->qSym, xa->tSym, xa->hSym);
    assert(squeezedSize == xa->tEnd - xa->tStart);
    fprintf(chromTab, "%s\t%d\t%d\t%c\t%d\t%s\n",
        /*xa->query, xa->tStart, xa->tEnd, xa->qStrand,*/
        xa->name, xa->tStart, xa->tEnd, xa->qStrand,
	xa->milliScore, xa->hSym);
        }
    }
fclose(fullTab);
fclose(chromTab);

printf("Loading %s table in %s\n", chromTable, database);
dyStringClear(query);
sqlDyStringPrintf(query, 
   "LOAD data local infile '%s' into table %s", chromTabName, chromTable);
sqlUpdate(conn, query->string);

printf("Loading %s table in %s\n", fullTable, database);
dyStringClear(query);
sqlDyStringPrintf(query, 
   "LOAD data local infile '%s' into table %s", fullTabName, fullTable);
sqlUpdate(conn, query->string);

printf("Done!\n");

// remove(fullTabName);
// remove(chromTabName);
sqlDisconnect(&conn);
freeDyString(&query);
}
Example #5
0
int main(int argc, char *argv[])
{
FILE *xaFile;
struct xaAli *xa;
struct gapInfo *gapList = NULL, *gaps;
int count = 0;
long startTime = clock1000();
char *xaName, *newName;
char *first;
boolean cbFirst;

if (argc != 4)
    usage();
first = argv[1];
xaName = argv[2];
newName = argv[3];
if (sameWord("elegans", first))
    cbFirst = FALSE;
else if (sameWord("briggsae", first))
    cbFirst = TRUE;
else
    usage();
dnaUtilOpen();
intronHash = newHash(0);
out = mustOpen(newName, "w");
xaFile = mustOpen(xaName, "r");
while ((xa = xaReadNext(xaFile, FALSE)) != NULL)
    {
    char *s;
    if (!cbFirst)
	{
	char *swaps;
	int swapi;
	char swapc;
	uglyf("Swapping....\n");
	swaps = xa->query;
	xa->query = xa->target;
	xa->target = swaps;
	swapi = xa->qStart;
	xa->qStart = xa->tStart;
	xa->tStart = swapi;
	swapi = xa->qEnd;
	xa->qEnd = xa->tEnd;
	xa->tEnd = swapi;
	swapc = xa->qStrand;
	xa->qStrand = xa->tStrand;
	xa->tStrand = swapc;
	swaps = xa->qSym;
	xa->qSym = xa->tSym;
	xa->tSym = swaps;
	swapSym(xa->hSym, xa->symCount);
	}
    uglyf("%d  query %s target %s\n", count, xa->query, xa->target);
    s = chromFromPath(xa->target);
    freeMem(xa->target);
    xa->target = s;
    if (++count % 500 == 0)
        printf("Processing %d\n", count);
    gaps = findLargeGaps(xa, gapList);
    gapList = slCat(gaps, gapList);
    xaAliFree(xa);
    }
slReverse(&gapList);
report(out, "Processing took %f seconds\n", (clock1000()-startTime)*0.001);

reportGaps(gapList, out);
printAllHistograms(out);
calcCeHomoCount();
printHomologousEndStats(out);
printSameIntronStats(out);
return 0;
}
Example #6
0
int main(int argc, char *argv[])
{
char *outName;
char xaFileName[512];
char region[64];
FILE *xaFile, *out;
struct xaAli *xaList = NULL, *xa;
char *sortBy;
char *subtitle;
int (*cmp)(const void *va, const void *vb);

if (argc != 3)
    {
    usage();
    }
sortBy = argv[1];
outName = argv[2];

if (sameWord(sortBy, "score"))
    {
    cmp = cmpXaScore;
    subtitle = "(sorted by alignment score)";
    }
else if (sameWord(sortBy, "briggsae"))
    {
    cmp = cmpXaQuery;
    subtitle = "(sorted by <I>C. briggsae</I> region)";
    }
else if (sameWord(sortBy, "elegans"))
    {
    cmp = cmpXaTarget;
    subtitle = "(sorted by <I>C. elegans</I> region)";
    }
else
    usage();

/* Read in alignment file. */
sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", 
    xaAlignSuffix());
printf("Scanning %s\n", xaFileName);
xaFile = xaOpenVerify(xaFileName);
while ((xa = xaReadNext(xaFile, FALSE)) != NULL)
    {
    xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart));
    freeMem(xa->qSym);
    freeMem(xa->tSym);
    freeMem(xa->hSym);
    slAddHead(&xaList, xa);
    }

/* Sort by score. */
printf("Sorting...");
slSort(&xaList, cmp);
printf(" best score %d\n", xaList->milliScore);

/* Write out .html */
printf("Writing %s\n", outName);
out = mustOpen(outName, "w");
htmStart(out, "C. briggsae/C. elegans Homologies");
fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n");
fprintf(out, "<H3>%s</H3>\n", subtitle);
fprintf(out, "<TT><PRE><B>");
fprintf(out, "Score  <I>C. elegans Region</I>     <I>C. briggsae</I> Region </B>\n");
fprintf(out, "--------------------------------------------------------\n");
for (xa = xaList; xa != NULL; xa = xa->next)
    {
    fprintf(out, "%6d ", xa->milliScore);
    sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd);
    fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", 
        region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand);
    fprintf(out, "\n");
    }
htmEnd(out);
return 0;
}