int main(int argc, char *argv[])
{
char *genoListName;
char *otherListName;
char *oocFileName;
char *typeName;
char *outName;
struct patSpace *patSpace;
long startTime, endTime;
char **genoList;
int genoListSize;
char *genoListBuf;
char **otherList;
int otherListSize;
char *otherListBuf;
char *genoName;
int i;
int blockCount = 0;
struct dnaSeq **seqListList = NULL, *seq = NULL;
char *outRoot;
struct sqlConnection *conn;
enum ffStringency stringency = ffCdna;
int seedSize = 10;
FILE *out;
boolean noHead = FALSE;
struct repeatTracker *rt;
struct hash *repeatHash = newHash(10);

hostName = getenv("HOST");
pushWarnHandler(warnHandler);

startTime = clock1();
cgiSpoof(&argc, argv);
minMatch = cgiOptionalInt("minMatch", minMatch);
maxBad = cgiOptionalInt("maxBad", maxBad);
minBases = cgiOptionalInt("minBases", minBases);

dnaUtilOpen();

#ifdef DEBUG
/* Hard wire command line input so don't have to type it in each 
 * time run the stupid Gnu debugger. */

genoListName = "pFoo/geno.lst";
otherListName = "pFoo/bacend.lst";
typeName = "genomic";
oocFileName = "/d/biodata/human/10.ooc";
outName = "pFoo/pFoo.psl";

#else

if (argc != 6 && argc != 7)
    usage();

genoListName = argv[1];
otherListName = argv[2];
typeName = argv[3];
oocFileName = argv[4];
if (sameWord(oocFileName, "none"))
    oocFileName = NULL;
outName = argv[5];
if (argc == 7)
    {
    if (sameWord("noHead", argv[6]))
	noHead = TRUE;
    else
	usage();
    }

#endif 

if (sameWord(typeName, "mRNA") || sameWord(typeName, "cDNA"))
    {
    stringency = ffCdna;
    }
else if (sameWord(typeName, "genomic"))
    {
    stringency = ffTight;
    }
else if (sameWord(typeName, "g2g"))
    {
    stringency = ffTight;
    veryTight = TRUE;
    seedSize = 11;
    }
else if (sameString(typeName, "asm"))
    {
    stringency = ffTight;
    avoidSelfSelf = TRUE;
    }
else
    {
    warn("Unrecognized otherType %s\n", typeName);
    usage();
    }

readAllWordsOrFa(genoListName, &genoList, &genoListSize, &genoListBuf);
filterMissingFiles(genoList, &genoListSize);
if (genoListSize <= 0)
    errAbort("There are no files that exist in %s\n", genoListName);
readAllWordsOrFa(otherListName, &otherList, &otherListSize, &otherListBuf);
if (otherListSize <= 0)
    errAbort("There are no files that exist in %s\n", otherListName);
filterMissingFiles(otherList, &otherListSize);
out = mustOpen(outName, "w");
if (!noHead)
    pslWriteHead(out);

AllocArray(seqListList, genoListSize);
for (i=0; i<genoListSize; ++i)
    {
    genoName = genoList[i];
    if (!startsWith("#", genoName)  )
        seqListList[i] = seq = faReadAllDna(genoName);
    for (;seq != NULL; seq = seq->next)
	{
	int size = seq->size;
	char *name = seq->name;
	struct hashEl *hel;
	AllocVar(rt);
	AllocArray(rt->repBytes, size);
	rt->seq = seq;
	if ((hel = hashLookup(repeatHash, name)) != NULL)
	    errAbort("Duplicate %s in %s\n", name, genoName);
	hashAdd(repeatHash, name, rt);
	}
    storeMasked(repeatHash, genoName);
    }

patSpace = makePatSpace(seqListList, genoListSize, seedSize, oocFileName, minMatch, 2000);
endTime = clock1();
printf("Made index in %ld seconds\n",  (endTime-startTime));
startTime = endTime;

for (i=0; i<otherListSize; ++i)
    {
    FILE *f;
    char *otherName;
    int c;
    int dotCount = 0;
    struct dnaSeq otherSeq;
    ZeroVar(&otherSeq);

    otherName = otherList[i];
    if (startsWith("#", otherName)  )
	continue;
    f = mustOpen(otherName, "r");
    while ((c = fgetc(f)) != EOF)
	if (c == '>')
	    break;
    printf("%s\n", otherName);
    fflush(stdout);
    while (faFastReadNext(f, &otherSeq.dna, &otherSeq.size, &otherSeq.name))
        {
	aliSeqName = otherSeq.name;
	oneStrand(patSpace, repeatHash, &otherSeq, FALSE, stringency, out);
	reverseComplement(otherSeq.dna, otherSeq.size);
	oneStrand(patSpace, repeatHash, &otherSeq, TRUE, stringency, out);
	aliSeqName = NULL;
        }
    fclose(f);
    }
freePatSpace(&patSpace);
endTime = clock1();
printf("Alignment time is %ld sec\n", (endTime-startTime));
startTime = endTime;
fclose(out);
return 0;
}
Beispiel #2
0
void firstPass(char *aList, char *bList, char *outName)
/* Do first pass - find areas of homology between a and b,
 * save to outName. */
{
char *aNameBuf, **aNames;
char *bNameBuf, **bNames;
int aCount, bCount;
struct nt4Seq **bNts, *bNt, *bNtList = NULL;
int bNtCount;
int i;
FILE *out = mustOpen(outName, "w");

/* Read in fa file lists . */
readAllWordsOrFa(aList, &aNames, &aCount, &aNameBuf);
readAllWordsOrFa(bList, &bNames, &bCount, &bNameBuf);

/* Convert second list to nt4 (packed) format in memory. */
printf("Loading and packing dna in %s\n", bList);
for (i=0; i<bCount; ++i)
    {
    char *bName = bNames[i];
    struct dnaSeq *seqList, *seq;

    seqList = faReadAllDna(bName);
    for (seq = seqList; seq != NULL; seq = seq->next)
	{
	char uniqName[512];
	sprintf(uniqName, "%s@%s", seq->name, bName);
	bNt = newNt4(seq->dna, seq->size, uniqName);
	slAddHead(&bNtList, bNt);
	}
    freeDnaSeqList(&seqList);
    }
slReverse(&bNtList);
bNtCount = slCount(bNtList);
AllocArray(bNts, bNtCount);
for (i=0, bNt=bNtList; i<bNtCount; ++i, bNt=bNt->next)
    bNts[i] = bNt;
printf("Loaded %d contigs from %d files\n", bNtCount, bCount);

/* Align elements of A list one at a time against B list. */
for (i=0; i<aCount; ++i)
    {
    char *aName = aNames[i];
    struct dnaSeq *seqList, *seq;

    printf("Aligning %s against %s\n", aName, bList);
    seqList = faReadAllDna(aName);
    for (seq = seqList; seq != NULL; seq = seq->next)
	{
	doCrude(aName, seq, bNts, bNtCount, out);
	}
    printf("\n");
    freeDnaSeqList(&seqList);
    }


/* Cleanup time. */
for (i=0; i<bNtCount; ++i)
    freeNt4(&bNts[i]);
freeMem(bNts);
freeMem(aNames);
freeMem(bNames);
freeMem(aNameBuf);
freeMem(bNameBuf);
fclose(out);
}