struct xaAli *getOneXaAli(char *organism, char *xaName) /* Return a single named xaAli for organism. */ { char ixFileName[512]; char dataFileName[512]; char *xDir; struct snof *snof; long offset; FILE *f; struct xaAli *xa; xDir = wormXenoDir(); sprintf(ixFileName, "%s%s/all", xDir, organism); sprintf(dataFileName, "%s%s/all%s", xDir, organism, xaAlignSuffix()); snof = snofMustOpen(ixFileName); if (!snofFindOffset(snof, xaName, &offset)) errAbort("Couldn't find %s", xaName); snofClose(&snof); f = xaOpenVerify(dataFileName); fseek(f, offset, SEEK_SET); xa = xaReadNext(f, FALSE); fclose(f); return xa; }
void liftWab(char *destFile, struct hash *liftHash, int sourceCount, char *sources[], boolean querySide) /* Lift up coordinates in .wab file. */ { FILE *f = mustOpen(destFile, "w"); int sourceIx; for (sourceIx = 0; sourceIx < sourceCount; ++sourceIx) { struct xaAli *xa; char *source = sources[sourceIx]; FILE *in = mustOpen(source, "r"); while ((xa = xaReadNext(in, FALSE)) != NULL) { char *seqName = querySide ? xa->query : xa->target; struct liftSpec *spec = findLift(liftHash, seqName, NULL); int offset; if (spec == NULL) { verbose(0,"name:\t%s\n", xa->name); verbose(0,"query:\t%s\n", xa->query); verbose(0,"qStart,qEnd:\t%d,%d\n", xa->qStart,xa->qEnd); verbose(0,"qStrand:\t%c\n", xa->qStrand); verbose(0,"target:\t%s\n", xa->target); verbose(0,"tStart,tEnd:\t%d,%d\n", xa->tStart,xa->tEnd); verbose(0,"tStrand:\t%c\n", xa->tStrand); verbose(0,"milliScore:\t%d\n", xa->milliScore); verbose(0,"symCount:\t%d\n", xa->symCount); errAbort("Can not find lift spec for %s", seqName); } if (querySide) { cantHandleSpecRevStrand(spec); offset = spec->offset; xa->qStart += offset; xa->qEnd += offset; fprintf(f, "%s align %d.%d%% of %d %s.fa %s:%d-%d %c %s:%d-%d %c\n", xa->name, xa->milliScore/10, xa->milliScore%10, xa->symCount, spec->newName, spec->newName, xa->qStart, xa->qEnd, xa->qStrand, xa->target, xa->tStart, xa->tEnd, xa->tStrand); } else { errAbort("Sorry, lift for WABA target not yet implemented"); } mustWrite(f, xa->qSym, xa->symCount); fputc('\n', f); mustWrite(f, xa->tSym, xa->symCount); fputc('\n', f); mustWrite(f, xa->hSym, xa->symCount); fputc('\n', f); } carefulClose(&in); } }
struct xaAli *xaRdRange(FILE *ix, FILE *data, int start, int end, boolean condensed) /* Return list of all xaAlis that range from start to end. * Assumes that ix and data files are open. If condensed * don't fill int query, target, qSym, tSym, or hSym. */ { int s, e; int maxS, minE; long offset; struct xaAli *list = NULL, *xa; /* Scan through index file looking for things in range. * When find one read it from data file and add it to list. */ fseek(ix, sizeof(bits32), SEEK_SET); for (;;) { if (!readOne(ix, s)) break; mustReadOne(ix, e); mustReadOne(ix, offset); if (s >= end) break; maxS = max(s, start); minE = min(e, end); if (minE - maxS > 0) { fseek(data, offset, SEEK_SET); xa = xaReadNext(data, condensed); slAddHead(&list, xa); } } slReverse(&list); return list; }
void hgWaba(char *database, char *species, char *chromosome, int chromOffset, int wabaFileCount, char *wabaFile[]) /* hgWaba - load Waba alignments into database. */ { struct sqlConnection *conn = sqlConnect(database); FILE *fullTab, *chromTab; FILE *in; struct xaAli *xa, *xaList = NULL; char fullTabName[512], chromTabName[512]; char fullTable[128], chromTable[128]; char *inFile; int i; struct dyString *query = newDyString(2048); /* Loop through each waba file grabbing sequence into * memory, then sort. */ for (i = 0; i < wabaFileCount; ++i) { inFile = wabaFile[i]; printf("Processing %s\n", inFile); in = xaOpenVerify(inFile); while ((xa = xaReadNext(in, FALSE)) != NULL) { xa->tStart += chromOffset; xa->tEnd += chromOffset; slAddHead(&xaList, xa); } carefulClose(&in); } printf("Sorting %d alignments by chromosome position\n", slCount(xaList)); slSort(&xaList, xaAliCmpTstart); /* Create names of tables and the tables themselves. * Clear anything in the chrom table. */ sprintf(fullTable, "waba%s", species); sprintf(chromTable, "%s_waba%s", chromosome, species); dyStringClear(query); sqlDyStringPrintf(query, wabaFullCreate, fullTable); sqlMaybeMakeTable(conn, fullTable, query->string); dyStringClear(query); sqlDyStringPrintf(query, wabaChromCreate, chromTable); sqlMaybeMakeTable(conn, chromTable, query->string); if (chromOffset == 0) { dyStringClear(query); sqlDyStringPrintf(query, "DELETE from %s", chromTable); sqlUpdate(conn, query->string); } /* Make a temp file for each table we'll update. */ strcpy(fullTabName, "full_waba.tab"); fullTab = mustOpen(fullTabName, "w"); strcpy(chromTabName, "chrom_waba.tab"); chromTab = mustOpen(chromTabName, "w"); /* Write out tab-delimited files. */ printf("Writing tab-delimited files\n"); for (xa = xaList; xa != NULL; xa = xa->next) { int squeezedSize; squeezedSize = squeezeSym(xa->tSym, xa->hSym, xa->symCount, xa->hSym); if( squeezedSize != xa->tEnd - xa->tStart ) { printf("%s squeezedSize: %d, tEnd, tStart: %d, %d, diff: %d\n", xa->query, squeezedSize, xa->tEnd, xa->tStart, xa->tEnd - xa->tStart ); } else { fprintf(fullTab, "%s\t%d\t%d\t%c\t%s\t%d\t%d\t%d\t%d\t%s\t%s\t%s\n", /*xa->query, xa->qStart, xa->qEnd, xa->qStrand,*/ xa->name, xa->qStart, xa->qEnd, xa->qStrand, chromosome, xa->tStart, xa->tEnd, xa->milliScore, xa->symCount, xa->qSym, xa->tSym, xa->hSym); assert(squeezedSize == xa->tEnd - xa->tStart); fprintf(chromTab, "%s\t%d\t%d\t%c\t%d\t%s\n", /*xa->query, xa->tStart, xa->tEnd, xa->qStrand,*/ xa->name, xa->tStart, xa->tEnd, xa->qStrand, xa->milliScore, xa->hSym); } } fclose(fullTab); fclose(chromTab); printf("Loading %s table in %s\n", chromTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", chromTabName, chromTable); sqlUpdate(conn, query->string); printf("Loading %s table in %s\n", fullTable, database); dyStringClear(query); sqlDyStringPrintf(query, "LOAD data local infile '%s' into table %s", fullTabName, fullTable); sqlUpdate(conn, query->string); printf("Done!\n"); // remove(fullTabName); // remove(chromTabName); sqlDisconnect(&conn); freeDyString(&query); }
int main(int argc, char *argv[]) { FILE *xaFile; struct xaAli *xa; struct gapInfo *gapList = NULL, *gaps; int count = 0; long startTime = clock1000(); char *xaName, *newName; char *first; boolean cbFirst; if (argc != 4) usage(); first = argv[1]; xaName = argv[2]; newName = argv[3]; if (sameWord("elegans", first)) cbFirst = FALSE; else if (sameWord("briggsae", first)) cbFirst = TRUE; else usage(); dnaUtilOpen(); intronHash = newHash(0); out = mustOpen(newName, "w"); xaFile = mustOpen(xaName, "r"); while ((xa = xaReadNext(xaFile, FALSE)) != NULL) { char *s; if (!cbFirst) { char *swaps; int swapi; char swapc; uglyf("Swapping....\n"); swaps = xa->query; xa->query = xa->target; xa->target = swaps; swapi = xa->qStart; xa->qStart = xa->tStart; xa->tStart = swapi; swapi = xa->qEnd; xa->qEnd = xa->tEnd; xa->tEnd = swapi; swapc = xa->qStrand; xa->qStrand = xa->tStrand; xa->tStrand = swapc; swaps = xa->qSym; xa->qSym = xa->tSym; xa->tSym = swaps; swapSym(xa->hSym, xa->symCount); } uglyf("%d query %s target %s\n", count, xa->query, xa->target); s = chromFromPath(xa->target); freeMem(xa->target); xa->target = s; if (++count % 500 == 0) printf("Processing %d\n", count); gaps = findLargeGaps(xa, gapList); gapList = slCat(gaps, gapList); xaAliFree(xa); } slReverse(&gapList); report(out, "Processing took %f seconds\n", (clock1000()-startTime)*0.001); reportGaps(gapList, out); printAllHistograms(out); calcCeHomoCount(); printHomologousEndStats(out); printSameIntronStats(out); return 0; }
int main(int argc, char *argv[]) { char *outName; char xaFileName[512]; char region[64]; FILE *xaFile, *out; struct xaAli *xaList = NULL, *xa; char *sortBy; char *subtitle; int (*cmp)(const void *va, const void *vb); if (argc != 3) { usage(); } sortBy = argv[1]; outName = argv[2]; if (sameWord(sortBy, "score")) { cmp = cmpXaScore; subtitle = "(sorted by alignment score)"; } else if (sameWord(sortBy, "briggsae")) { cmp = cmpXaQuery; subtitle = "(sorted by <I>C. briggsae</I> region)"; } else if (sameWord(sortBy, "elegans")) { cmp = cmpXaTarget; subtitle = "(sorted by <I>C. elegans</I> region)"; } else usage(); /* Read in alignment file. */ sprintf(xaFileName, "%s%s/all%s", wormXenoDir(), "cbriggsae", xaAlignSuffix()); printf("Scanning %s\n", xaFileName); xaFile = xaOpenVerify(xaFileName); while ((xa = xaReadNext(xaFile, FALSE)) != NULL) { xa->milliScore = round(0.001 * xa->milliScore * (xa->tEnd - xa->tStart)); freeMem(xa->qSym); freeMem(xa->tSym); freeMem(xa->hSym); slAddHead(&xaList, xa); } /* Sort by score. */ printf("Sorting..."); slSort(&xaList, cmp); printf(" best score %d\n", xaList->milliScore); /* Write out .html */ printf("Writing %s\n", outName); out = mustOpen(outName, "w"); htmStart(out, "C. briggsae/C. elegans Homologies"); fprintf(out, "<H2>Regions with Sequenced <I>C. briggsae</I> Homologs</H2>\n"); fprintf(out, "<H3>%s</H3>\n", subtitle); fprintf(out, "<TT><PRE><B>"); fprintf(out, "Score <I>C. elegans Region</I> <I>C. briggsae</I> Region </B>\n"); fprintf(out, "--------------------------------------------------------\n"); for (xa = xaList; xa != NULL; xa = xa->next) { fprintf(out, "%6d ", xa->milliScore); sprintf(region, "%s:%d-%d", xa->target, xa->tStart, xa->tEnd); fprintf(out, "<A HREF=\"../cgi-bin/tracks.exe?where=%s\">%21s</A> %s:%d-%d %c", region, region, xa->query, xa->qStart, xa->qEnd, xa->qStrand); fprintf(out, "\n"); } htmEnd(out); return 0; }