void relPairs(char *allPairName, char *inPslName, char *outPairName, char *dirs[], int dirCount)
/* extract relevant pairs that could apply to a .psl file in each dir. */
{
struct hash *pairHash;
struct pair *pairList;
char inPslPath[512];
char outPairPath[512];
char *dir;
int i;

makePairHash(allPairName, &pairHash, &pairList);
for (i=0; i<dirCount; ++i)
    {
    dir = dirs[i];
    sprintf(inPslPath, "%s/%s", dir, inPslName);
    sprintf(outPairPath, "%s/%s", dir, outPairName);
    if (fileExists(inPslPath))
	extractUsedPairs(pairHash, inPslPath, outPairPath);
    }
freeHash(&pairHash);
slFreeList(&pairList);
}
Ejemplo n.º 2
0
int main(int argc, char *argv[])
{
char *genoListName;
char *cdnaListName;
char *oocFileName;
char *pairFileName;
struct patSpace *patSpace;
long startTime, endTime;
char **genoList;
int genoListSize;
char *genoListBuf;
char **cdnaList;
int cdnaListSize;
char *cdnaListBuf;
char *genoName;
int i;
int estIx = 0;
struct dnaSeq **seqListList = NULL, *seq;
static char hitFileName[512], mergerFileName[512], okFileName[512];
char *outRoot;
struct hash *pairHash;

if (dumpMe)
    {
    bigHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patAli.html", "w");
    littleHtmlFile = mustOpen("C:\\inetpub\\wwwroot\\test\\patSpace.html", "w");
    htmStart(bigHtmlFile, "PatSpace Alignments");
    htmStart(littleHtmlFile, "PatSpace Index");
    }

if ((hostName = getenv("HOST")) == NULL)
    hostName = "";

if (argc != 6)
    usage();

pushWarnHandler(patSpaceWarnHandler);
startTime = clock1000();
dnaUtilOpen();
makePolys();
genoListName = argv[1];
cdnaListName = argv[2];
oocFileName = argv[3];
pairFileName = argv[4];
outRoot = argv[5];

sprintf(hitFileName, "%s.hit", outRoot);
sprintf(mergerFileName, "%s.glu", outRoot);
sprintf(okFileName, "%s.ok", outRoot);

readAllWords(genoListName, &genoList, &genoListSize, &genoListBuf);
readAllWords(cdnaListName, &cdnaList, &cdnaListSize, &cdnaListBuf);
pairHash = makePairHash(pairFileName);

hitOut = mustOpen(hitFileName, "w");
mergerOut = mustOpen(mergerFileName, "w");
dumpOut = mustOpen("dump.out", "w");
seqListList = needMem(genoListSize*sizeof(seqListList[0]) );
fprintf(hitOut, "Pattern space 0.2 cDNA matcher\n");
fprintf(hitOut, "cDNA files: ", cdnaListSize);
for (i=0; i<cdnaListSize; ++i)
    fprintf(hitOut, " %s", cdnaList[i]);
fprintf(hitOut, "\n");
fprintf(hitOut, "%d genomic files\n", genoListSize);
for (i=0; i<genoListSize; ++i)
    {
    genoName = genoList[i];
    if (!startsWith("//", genoName)  )
        {
        seqListList[i] = seq = faReadAllDna(genoName);
        fprintf(hitOut, "%d els in %s ", slCount(seq), genoList[i]);
        for (; seq != NULL; seq = seq->next)
            fprintf(hitOut, "%d ", seq->size);
        fprintf(hitOut, "\n");
        }
    }

patSpace = makePatSpace(seqListList, genoListSize, oocFileName);

for (i=0; i<cdnaListSize; ++i)
    {
    FILE *f;
    char *estFileName;
    DNA *dna;
    char *estName;
    int size;
    int c;
    int maxSizeForFuzzyFind = 20000;
    int dotCount = 0;

    estFileName = cdnaList[i];
    if (startsWith("//", estFileName)  )
		continue;

    f = mustOpen(estFileName, "rb");
    while ((c = fgetc(f)) != EOF)
    if (c == '>')
        break;
    printf("%s", cdnaList[i]);
    fflush(stdout);
    while (fastFaReadNext(f, &dna, &size, &estName))
        {
	aliSeqName = estName;
        if (size < maxSizeForFuzzyFind)  /* Some day need to fix this somehow... */
            {
            struct hashEl *hel;
            struct cdnaAliList *calList = NULL;

            hel = hashLookup(pairHash, estName);
            if (hel != NULL)    /* Do pair processing. */
                {
                struct estPair *ep;
                struct seq *thisSeq, *otherSeq;

                ep = hel->val;
                if (hel->name == ep->name3)
                    {
                    thisSeq = &ep->seq3;
                    otherSeq = &ep->seq5;
                    }
                else
                    {
                    thisSeq = &ep->seq5;
                    otherSeq = &ep->seq3;
                    }
                if (otherSeq->dna == NULL)  /* First in pair - need to save sequence. */
                    {
                    thisSeq->size = size;
                    thisSeq->dna = needMem(size);
                    memcpy(thisSeq->dna, dna, size);
                    }
                else                        /* Second in pair - do gluing and free partner. */
                    {
                    char mergedName[64];
                    thisSeq->dna = dna;
                    thisSeq->size = size;
                    sprintf(mergedName, "%s_AND_%s", ep->name5, ep->name3);

                    patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size,
                        '+', '5', ep->name5, &calList);
                    reverseComplement(ep->seq5.dna, ep->seq5.size);
                    patSpaceFindOne(patSpace, ep->seq5.dna, ep->seq5.size,
                        '-', '5', ep->name5, &calList);
                    patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size,
                        '+', '3', ep->name3, &calList);
                    reverseComplement(ep->seq3.dna, ep->seq3.size);
                    patSpaceFindOne(patSpace, ep->seq3.dna, ep->seq3.size,
                        '-', '3', ep->name3, &calList);
                    slReverse(&calList);
                    writeMergers(calList, mergedName, genoList);

                    freez(&otherSeq->dna);
                    thisSeq->dna = NULL;
                    thisSeq->size =otherSeq->size = 0;
                    }
                }
            else
                {
                patSpaceFindOne(patSpace, dna, size, '+', '5', estName, &calList);
                reverseComplement(dna, size);
                patSpaceFindOne(patSpace, dna, size, '-', '5', estName, &calList);
                slReverse(&calList);
                writeMergers(calList, estName, genoList);
                }
            ++estIx;
            if ((estIx & 0xfff) == 0)
                {
                printf(".");
                ++dotCount;
                fflush(stdout);
                }
            }
        }
    printf("\n");
    }
aliSeqName = "";
printf("ffSubmitted %3d ffAccepted %3d ffOkScore %3d ffSolidMatch %2d\n",
    ffSubmitted, ffAccepted, ffOkScore, ffSolidMatch);

endTime = clock1000();

printf("Total time is %4.2f\n", 0.001*(endTime-startTime));

/* Write out file who's presense say's we succeeded */
    {
    FILE *f = mustOpen(okFileName, "w");
    fputs("ok", f);
    fclose(f);
    }

if (dumpMe)
    {
    htmEnd(bigHtmlFile);
    htmEnd(littleHtmlFile);
    }
return 0;
}