int main(int argc, char *argv[]) { char *genoListName; char *otherListName; char *oocFileName; char *typeName; char *outName; struct patSpace *patSpace; long startTime, endTime; char **genoList; int genoListSize; char *genoListBuf; char **otherList; int otherListSize; char *otherListBuf; char *genoName; int i; int blockCount = 0; struct dnaSeq **seqListList = NULL, *seq = NULL; char *outRoot; struct sqlConnection *conn; enum ffStringency stringency = ffCdna; int seedSize = 10; FILE *out; boolean noHead = FALSE; struct repeatTracker *rt; struct hash *repeatHash = newHash(10); hostName = getenv("HOST"); pushWarnHandler(warnHandler); startTime = clock1(); cgiSpoof(&argc, argv); minMatch = cgiOptionalInt("minMatch", minMatch); maxBad = cgiOptionalInt("maxBad", maxBad); minBases = cgiOptionalInt("minBases", minBases); dnaUtilOpen(); #ifdef DEBUG /* Hard wire command line input so don't have to type it in each * time run the stupid Gnu debugger. */ genoListName = "pFoo/geno.lst"; otherListName = "pFoo/bacend.lst"; typeName = "genomic"; oocFileName = "/d/biodata/human/10.ooc"; outName = "pFoo/pFoo.psl"; #else if (argc != 6 && argc != 7) usage(); genoListName = argv[1]; otherListName = argv[2]; typeName = argv[3]; oocFileName = argv[4]; if (sameWord(oocFileName, "none")) oocFileName = NULL; outName = argv[5]; if (argc == 7) { if (sameWord("noHead", argv[6])) noHead = TRUE; else usage(); } #endif if (sameWord(typeName, "mRNA") || sameWord(typeName, "cDNA")) { stringency = ffCdna; } else if (sameWord(typeName, "genomic")) { stringency = ffTight; } else if (sameWord(typeName, "g2g")) { stringency = ffTight; veryTight = TRUE; seedSize = 11; } else if (sameString(typeName, "asm")) { stringency = ffTight; avoidSelfSelf = TRUE; } else { warn("Unrecognized otherType %s\n", typeName); usage(); } readAllWordsOrFa(genoListName, &genoList, &genoListSize, &genoListBuf); filterMissingFiles(genoList, &genoListSize); if (genoListSize <= 0) errAbort("There are no files that exist in %s\n", genoListName); readAllWordsOrFa(otherListName, &otherList, &otherListSize, &otherListBuf); if (otherListSize <= 0) errAbort("There are no files that exist in %s\n", otherListName); filterMissingFiles(otherList, &otherListSize); out = mustOpen(outName, "w"); if (!noHead) pslWriteHead(out); AllocArray(seqListList, genoListSize); for (i=0; i<genoListSize; ++i) { genoName = genoList[i]; if (!startsWith("#", genoName) ) seqListList[i] = seq = faReadAllDna(genoName); for (;seq != NULL; seq = seq->next) { int size = seq->size; char *name = seq->name; struct hashEl *hel; AllocVar(rt); AllocArray(rt->repBytes, size); rt->seq = seq; if ((hel = hashLookup(repeatHash, name)) != NULL) errAbort("Duplicate %s in %s\n", name, genoName); hashAdd(repeatHash, name, rt); } storeMasked(repeatHash, genoName); } patSpace = makePatSpace(seqListList, genoListSize, seedSize, oocFileName, minMatch, 2000); endTime = clock1(); printf("Made index in %ld seconds\n", (endTime-startTime)); startTime = endTime; for (i=0; i<otherListSize; ++i) { FILE *f; char *otherName; int c; int dotCount = 0; struct dnaSeq otherSeq; ZeroVar(&otherSeq); otherName = otherList[i]; if (startsWith("#", otherName) ) continue; f = mustOpen(otherName, "r"); while ((c = fgetc(f)) != EOF) if (c == '>') break; printf("%s\n", otherName); fflush(stdout); while (faFastReadNext(f, &otherSeq.dna, &otherSeq.size, &otherSeq.name)) { aliSeqName = otherSeq.name; oneStrand(patSpace, repeatHash, &otherSeq, FALSE, stringency, out); reverseComplement(otherSeq.dna, otherSeq.size); oneStrand(patSpace, repeatHash, &otherSeq, TRUE, stringency, out); aliSeqName = NULL; } fclose(f); } freePatSpace(&patSpace); endTime = clock1(); printf("Alignment time is %ld sec\n", (endTime-startTime)); startTime = endTime; fclose(out); return 0; }
void firstPass(char *aList, char *bList, char *outName) /* Do first pass - find areas of homology between a and b, * save to outName. */ { char *aNameBuf, **aNames; char *bNameBuf, **bNames; int aCount, bCount; struct nt4Seq **bNts, *bNt, *bNtList = NULL; int bNtCount; int i; FILE *out = mustOpen(outName, "w"); /* Read in fa file lists . */ readAllWordsOrFa(aList, &aNames, &aCount, &aNameBuf); readAllWordsOrFa(bList, &bNames, &bCount, &bNameBuf); /* Convert second list to nt4 (packed) format in memory. */ printf("Loading and packing dna in %s\n", bList); for (i=0; i<bCount; ++i) { char *bName = bNames[i]; struct dnaSeq *seqList, *seq; seqList = faReadAllDna(bName); for (seq = seqList; seq != NULL; seq = seq->next) { char uniqName[512]; sprintf(uniqName, "%s@%s", seq->name, bName); bNt = newNt4(seq->dna, seq->size, uniqName); slAddHead(&bNtList, bNt); } freeDnaSeqList(&seqList); } slReverse(&bNtList); bNtCount = slCount(bNtList); AllocArray(bNts, bNtCount); for (i=0, bNt=bNtList; i<bNtCount; ++i, bNt=bNt->next) bNts[i] = bNt; printf("Loaded %d contigs from %d files\n", bNtCount, bCount); /* Align elements of A list one at a time against B list. */ for (i=0; i<aCount; ++i) { char *aName = aNames[i]; struct dnaSeq *seqList, *seq; printf("Aligning %s against %s\n", aName, bList); seqList = faReadAllDna(aName); for (seq = seqList; seq != NULL; seq = seq->next) { doCrude(aName, seq, bNts, bNtCount, out); } printf("\n"); freeDnaSeqList(&seqList); } /* Cleanup time. */ for (i=0; i<bNtCount; ++i) freeNt4(&bNts[i]); freeMem(bNts); freeMem(aNames); freeMem(bNames); freeMem(aNameBuf); freeMem(bNameBuf); fclose(out); }