Пример #1
0
void wormOpenAliSnof()
/* Set up for fast access alignments of named cDNAs. */
{
if (aliSnof == NULL)
    {
    char fileName[512];
    sprintf(fileName, "%sgood", wormCdnaDir());
    aliSnof = snofOpen(fileName);
    }
if (aliFile == NULL)
    aliFile = wormOpenGoodAli();
}
struct nameOff *scanIntronFile(char *preIntronQ, char *startIntronQ, 
    char *endIntronQ, char *postIntronQ, boolean invert)
{
char intronFileName[600];
FILE *f;
char lineBuf[4*1024];
char *words[4*128];
int wordCount;
int lineCount = 0;
int preLenQ = strlen(preIntronQ);
int startLenQ = strlen(startIntronQ);
int endLenQ = strlen(endIntronQ);
int postLenQ = strlen(postIntronQ);
char *preIntronF, *startIntronF, *endIntronF, *postIntronF;
int preLenF, startLenF, endLenF, postLenF;
int preIx = 6, startIx = 7, endIx =8, postIx = 9;
struct nameOff *list = NULL, *el;
boolean addIt;
int i;

if (preLenQ > 25 || postLenQ > 25 || startLenQ > 40 || endLenQ > 40)
    {
    errAbort("Can only handle queries up to 25 bases on either side of the intron "
             "and 40 bases inside the intron.");
    }
sprintf(intronFileName, "%s%s", wormCdnaDir(), "introns.txt");
f = mustOpen(intronFileName, "r");
while (fgets(lineBuf, sizeof(lineBuf), f) != NULL)
    {
    ++lineCount;
    wordCount = chopByWhite(lineBuf, words, ArraySize(words));
    if (wordCount == ArraySize(words))
        {
        warn("May have truncated end of line %d of %s",
            lineCount, intronFileName);
        }
    if (wordCount == 0)
        continue;
    if (wordCount < 11)
        errAbort("Unexpected short line %d of %s", lineCount, intronFileName);
    preIntronF = words[preIx];
    startIntronF = words[startIx];
    endIntronF = words[endIx];
    postIntronF = words[postIx];
    preLenF = strlen(preIntronF);
    startLenF = strlen(startIntronF);
    endLenF = strlen(endIntronF);
    postLenF = strlen(postIntronF);
    addIt = FALSE;
    if (   (  preLenQ == 0 || patMatch(preIntronQ, preIntronF+preLenF-preLenQ+countSpecial(preIntronQ), preLenQ))
        && (startLenQ == 0 || patMatch(startIntronQ, startIntronF, startLenQ))
        && (  endLenQ == 0 || patMatch(endIntronQ, endIntronF+endLenF-endLenQ+countSpecial(endIntronQ), endLenQ))
        && ( postLenQ == 0 || patMatch(postIntronQ, postIntronF, postLenQ)) )
        {
        addIt = TRUE;
        }
    if (invert)
        addIt = !addIt;
    if (addIt)
        {
        addIntronToHistogram(preIntronF+preLenF, startIntronF, endIntronF+endLenF, postIntronF);
        AllocVar(el);
        el->chrom = cloneString(words[1]);
        el->name = cloneString(words[5]);
        el->start = atoi(words[2]);
        el->end = atoi(words[3]);        
        el->cdnaCount = atoi(words[0]);
        memcpy(el->startI, startIntronF, 2);
        memcpy(el->endI, endIntronF + endLenF - 2, 2);
        assert(wordCount == el->cdnaCount + 10);
        for (i=10; i<wordCount; ++i)
            {
            struct slName *name = newSlName(words[i]);
            slAddHead(&el->cdnaNames, name);
            }
        slReverse(&el->cdnaNames);
        assert(slCount(el->cdnaNames) == el->cdnaCount);
        slAddHead(&list, el);
        }
    }
fclose(f);
slSort(&list, cmpCounts);
return list;
}