Beispiel #1
0
/* create object on the heap, copy values from psl and return, don't forget to free */
struct psl *copyPsl(struct psl *psl)
{
struct psl *ret = NULL;
AllocVar(ret);
ret->next = NULL;
ret->match = psl->match;
ret->misMatch = psl->misMatch;
ret->repMatch = psl->repMatch;
ret->nCount = psl->nCount;
ret->qNumInsert = psl->qNumInsert;
ret->qBaseInsert = psl->qBaseInsert;
ret->tNumInsert = psl->tNumInsert;
ret->tBaseInsert = psl ->tBaseInsert;
strcpy(ret->strand, psl->strand);
ret->qName = cloneString(psl->qName);
ret->qSize = psl->qSize;
ret->qStart = psl->qStart;
ret->qEnd = psl->qEnd;
ret->tName = cloneString(psl->tName);
ret->tSize = psl->tSize;
ret->tStart = psl->tStart;
ret->tEnd = psl->tEnd;
ret->blockCount = psl->blockCount;
ret->blockSizes = CloneArray(psl->blockSizes, psl->blockCount);
ret->qStarts = CloneArray(psl->qStarts, psl->blockCount);
ret->tStarts = CloneArray(psl->tStarts, psl->blockCount);
return ret;
}
Beispiel #2
0
 CHqlExprMultiGuard(const CHqlExprMultiGuard * other)
 {
     if (other)
     {
         CloneArray(guarded, other->guarded);
     }
 }
struct wigSection *wigSectionRead(struct lineFile *lf)
/* Parse out next section of wig. */
{
    static double *vals = NULL;
    static int valAlloc = 0;

    /* Get "fixedStep" line and parse it. */
    char *line;
    if (!lineFileNextReal(lf, &line))
        return NULL;
    char *pattern = "fixedStep ";
    int patSize = 10;
    if (!startsWith(pattern, line))
        errAbort("Expecting fixedStep line %d of %s", lf->lineIx, lf->fileName);
    line += patSize;
    struct hash *varHash = hashVarLine(line, lf->lineIx);
    int step = sqlUnsigned(requiredVal(lf, varHash, "step"));
    int start = sqlUnsigned(requiredVal(lf, varHash, "start"));
    char *chrom = cloneString(requiredVal(lf, varHash, "chrom"));
    hashFree(&varHash);

    /* Parse out numbers until next fixedStep. */
    int valCount = 0;
    int i;
    for (;;)
    {
        if (!lineFileNextReal(lf, &line))
            break;
        if (startsWith(pattern, line))
        {
            lineFileReuse(lf);
            break;
        }
        for (i=0; i<step; ++i)
        {
            if (valCount >= valAlloc)
            {
                int newAlloc = valAlloc + 1024;
                ExpandArray(vals, valAlloc, newAlloc);
                valAlloc = newAlloc;
            }
            vals[valCount] = lineFileNeedDouble(lf, &line, 0);
            ++valCount;
        }
    }

    /* Create wigSection. */
    struct wigSection *section;
    AllocVar(section);
    section->chrom = chrom;
    section->chromStart = start;
    section->chromEnd = start + valCount;
    section->vals = CloneArray(vals, valCount);
    return section;
}
void maxTranscriptomeExps(char *files[], int numFiles, char *outputFile, char *trackName)
{
struct sample **pSampList = NULL;
struct sample *s1;
struct sample *maxListSamp  = NULL;
struct sample *maxSamp = NULL;
int i;
int count =0;
FILE *out = NULL;
AllocArray(pSampList, numFiles);
for(i=0;i<numFiles; i++)
 {
 warn("Reading %s.", files[i]);
 pSampList[i] = sampleLoadAll(files[i]);
 }

warn("Calculating Maxes.");
count = slCount(pSampList[0]);
for(i=0;i<count;i++)
    {
    AllocVar(maxSamp);
    s1 = slElementFromIx(pSampList[0], i);
    maxSamp->chrom = cloneString(s1->chrom);
    maxSamp->chromStart = s1->chromStart;
    maxSamp->chromEnd = s1->chromEnd;
    maxSamp->name = cloneString(trackName);
    snprintf(maxSamp->strand, sizeof(maxSamp->strand), "%s", s1->strand);
    maxSamp->sampleCount = s1->sampleCount;
    maxSamp->samplePosition = CloneArray(s1->samplePosition, maxSamp->sampleCount);
    AllocArray(maxSamp->sampleHeight, maxSamp->sampleCount);
    fillInMaxVals(maxSamp, pSampList, numFiles, i);
    slAddHead(&maxListSamp, maxSamp);
    }
slReverse(&maxListSamp);
warn("Saving Maxes");
out = mustOpen(outputFile, "w");
for(maxSamp = maxListSamp; maxSamp != NULL; maxSamp = maxSamp->next)
    {
    sampleTabOut(maxSamp, out);
    }
carefulClose(&out);
warn("Cleaning up");
sampleFreeList(&maxListSamp);
for(i=0;i<numFiles; i++)
    sampleFreeList(&pSampList[i]);
freez(&pSampList);
warn("Done.");
}
struct improbRunInfo * analyseOneMotifRun(char *runName, char *seqDir,
    char *motifDir, int controlCount, char *controls[])
/* Bundle up data on one improbizer run and associated control runs. */
{
char fileName[512];
char motifName[256];
int seqCount, baseCount;
struct improbRunInfo *iriList = NULL, *iri;
struct lineFile *lf = NULL;
struct motif motif;
int motifIx = 0;
int i;
float acc, best, mean, x;

printf("%s\n", runName);

/* Count bases in sequences - this will be used in each iri. */
sprintf(fileName, "%s/%s.fa", seqDir, runName);
countSeq(fileName, &seqCount, &baseCount);

/* Allocate iri and read the main run. */
sprintf(fileName, "%s/%s", motifDir, runName);
lf = lineFileOpen(fileName, TRUE);
while (readMotif(lf, &motif))
    {
    AllocVar(iri);
    slAddTail(&iriList, iri);
    ++motifIx;
    snprintf(motifName, sizeof(motifName), "%s.%d", runName, motifIx);
    iri->name = cloneString(motifName);
    iri->seqCount = seqCount;
    iri->runScore = motif.score;
    iri->runPos = motif.pos;
    iri->runPosSd = motif.posSd;
    iri->columnCount = motif.size;
    iri->consensus = cloneString(motif.consensus);
    iri->aProb = CloneArray(motif.profile[0], motif.size);
    iri->cProb = CloneArray(motif.profile[1], motif.size);
    iri->gProb = CloneArray(motif.profile[2], motif.size);
    iri->tProb = CloneArray(motif.profile[3], motif.size);
    iri->controlCount = controlCount;
    AllocArray(iri->controlScores, controlCount);
    }
lineFileClose(&lf);

/* Read the control runs. */
for (i=0; i<controlCount; ++i)
    {
    sprintf(fileName, "%s/%s", controls[i], runName);
    lf = lineFileOpen(fileName, TRUE);
    for (iri = iriList; iri != NULL; iri = iri->next)
        {
	if (!readMotif(lf, &motif))
	    errAbort("%s doesn't contain the expected number of motifs", lf->fileName);
	iri->controlScores[i] = motif.score;
	}
    lineFileClose(&lf);
    }

/* Calculate best and mean on control runs. */
for (iri = iriList; iri != NULL; iri = iri->next)
    {
    acc = best = 0;
    for (i=0; i<controlCount; ++i)
        {
	x = iri->controlScores[i];
	acc += x;
	if (x > best)
	    best = x;
	}
    iri->bestControlScore = best;
    iri->meanControlScore = acc/controlCount;
    }

/* Calculate standard deviation of control runs. */
for (iri = iriList; iri != NULL; iri = iri->next)
    {
    acc = 0;
    mean = iri->meanControlScore;
    for (i=0; i<controlCount; ++i)
        {
	x = iri->controlScores[i] - mean;
	acc += x*x;
	}
    if (controlCount > 1)
        acc /= controlCount;
    iri->sdControlScore = sqrt(acc);
    }

return iriList;
}
Beispiel #6
0
void reportCassette(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out both an altGraphX and two bed files. For a cassette exon the
 edges are - 
 Name       Vertexes         Class
 ------     ----------       -----
 exon1:     startV->vs       constitutive (cons 0)
 junction1: vs->ve1          alternative1 (alt1 1)
 exon2:     ve1->altBpEnd    alternative1 (alt1 1)
 junction2: altBpEnd->ve2    alternative1 (alt1 1)
 exon3:     ve2->endV        constitutive (cons 0)
 junction3: vs->ve2          alternative2 (alt2 2)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int i =0;
struct dyString *dy = NULL;
if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 6;
agLoc->id = altCassette;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, vCLoc);
AllocArray(eEndsLoc, vCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, vCLoc);

/* Fill in the vertex positions. */
vPosLoc[0] = vPos[startV];
vPosLoc[1] = vPos[vs];
vPosLoc[2] = vPos[ve1];
vPosLoc[3] = vPos[altBpEnd];
vPosLoc[4] = vPos[ve2];
vPosLoc[5] = vPos[endV];

/* Fill in the vertex types. */
vTLoc[0] = vT[startV];
vTLoc[1] = vT[vs];
vTLoc[2] = vT[ve1];
vTLoc[3] = vT[altBpEnd];
vTLoc[4] = vT[ve2];
vTLoc[5] = vT[endV];

/* Fill in the edges. */
/* Constitutive first exon. */
eStartsLoc[0] = 0;
eEndsLoc[0] = 1;
eTLoc[0] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
/* Exon inclusion junction. */
eStartsLoc[1] = 1;
eEndsLoc[1] = 2;
eTLoc[1] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon exclusion junction. */
eStartsLoc[2] = 1;
eEndsLoc[2] = 4;
eTLoc[2] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Cassette exon. */
eStartsLoc[3] = 2;
eEndsLoc[3] = 3;
eTLoc[3] = 1;
ev = evidenceForEdge(ag, ve1, altBpEnd);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Exon inclusion junction. */
eStartsLoc[4] = 3;
eEndsLoc[4] = 4;
eTLoc[4] = 1;
ev = evidenceForEdge(ag, altBpEnd, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

/* Constitutive second exon. */
eStartsLoc[5] = 4;
eEndsLoc[5] = 5;
eTLoc[5] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);

slReverse(&agLoc->evidence);

dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}
Beispiel #7
0
void reportAlt3Prime(struct altGraphX *ag, bool **em, int vs, int ve1, int ve2, 
		    int altBpStart, int altBpEnd, int startV, int endV, FILE *out)
/* Write out an altGraphX record for an alt3Prime splicing
event. Variable names are consistent with the rest of the program, but
can be misleading. Specifically vs = start of alt splicing, ve1 =
first end of alt splicing, etc. even though "vs" is really the end of
an exon. For an alt5Prime splice the edges are:

 Name       Vertexes         Class
 ------     ----------       -----
exon1:      startV->vs       constituative (0)
junction1:  vs->ve1          alternative (1)
junction2:  vs->ve2          alternative (2)
exon2:      ve1->e2        alternative (1)
exon3:      ve2->endV        constituative (0)
*/
{
struct altGraphX *agLoc = NULL;  /* Local altGraphX. */
struct evidence *ev = NULL, *evLoc = NULL;
int *vPos = ag->vPositions;
unsigned char *vT = ag->vTypes;
int *vPosLoc = NULL;    /* Vertex Positions. */
int *eStartsLoc = NULL; /* Edge Starts. */
int *eEndsLoc = NULL;   /* Edge ends. */
unsigned char *vTLoc = NULL;      /* Vertex Types. */
int *eTLoc = NULL;      /* Edge Types. */
int vCLoc = 0;
int eCLoc = 0;
int edgeIx = 0, vertexIx = 0;
int i =0;
struct dyString *dy = NULL;

if(out == NULL)
    return;
AllocVar(agLoc);
agLoc->tName = cloneString(ag->tName);
agLoc->name = cloneString(ag->name);
agLoc->tStart = vPos[startV];
agLoc->tEnd = vPos[endV];
agLoc->strand[0] = ag->strand[0];
agLoc->vertexCount = vCLoc = 6;
agLoc->edgeCount = eCLoc = 5;
agLoc->id = alt3Prime;
/* Allocate some arrays. */
AllocArray(vPosLoc, vCLoc);
AllocArray(eStartsLoc, eCLoc);
AllocArray(eEndsLoc, eCLoc);
AllocArray(vTLoc, vCLoc);
AllocArray(eTLoc, eCLoc);

/* Fill in the vertex positions. */
vertexIx = 0;
vPosLoc[vertexIx++] = vPos[startV]; /* 0 */
vPosLoc[vertexIx++] = vPos[vs];     /* 1 */
vPosLoc[vertexIx++] = vPos[ve1];    /* 2 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 3 */
vPosLoc[vertexIx++] = vPos[ve2];    /* 4 */
vPosLoc[vertexIx++] = vPos[endV];   /* 5 */

/* Fill in the vertex types. */
vertexIx = 0;
vTLoc[vertexIx++] = vT[startV];
vTLoc[vertexIx++] = vT[vs];
vTLoc[vertexIx++] = vT[ve1];
vTLoc[vertexIx++] = vT[vs]; /* Faking a separate exon for the alt spliced portion. */
vTLoc[vertexIx++] = vT[ve2];
vTLoc[vertexIx++] = vT[endV];

edgeIx = 0;

/* Constitutive first exon. */
eStartsLoc[edgeIx] = 0;
eEndsLoc[edgeIx] = 1;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, startV, vs);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alternative1 junction (shorter). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 2;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, vs, ve1);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt2 junction (longer). */
eStartsLoc[edgeIx] = 1;
eEndsLoc[edgeIx] = 4;
eTLoc[edgeIx] = 2;
ev = evidenceForEdge(ag, vs, ve2);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Alt1 portion of second exon. */
eStartsLoc[edgeIx] = 2;
eEndsLoc[edgeIx] = 3;
eTLoc[edgeIx] = 1;
ev = evidenceForEdge(ag, ve1, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Exon 2 constitutive (shorter exon) */
eStartsLoc[edgeIx] = 4;
eEndsLoc[edgeIx] = 5;
eTLoc[edgeIx] = 0;
ev = evidenceForEdge(ag, ve2, endV);
evLoc = CloneVar(ev);
evLoc->mrnaIds = CloneArray(ev->mrnaIds, ev->evCount);
slAddHead(&agLoc->evidence, evLoc);
edgeIx++;

/* Package up the evidence, tissues, etc. */
slReverse(&agLoc->evidence);
dy = newDyString(ag->mrnaRefCount*36);
agLoc->mrnaRefCount = ag->mrnaRefCount;
for(i=0; i<ag->mrnaRefCount; i++)
    dyStringPrintf(dy, "%s,", ag->mrnaRefs[i]);
sqlStringDynamicArray(dy->string, &agLoc->mrnaRefs, &i);
dyStringFree(&dy);
agLoc->mrnaTissues = CloneArray(ag->mrnaTissues, ag->mrnaRefCount);
agLoc->mrnaLibs = CloneArray(ag->mrnaLibs, ag->mrnaRefCount);
agLoc->vPositions = vPosLoc;
agLoc->edgeStarts = eStartsLoc;
agLoc->edgeEnds = eEndsLoc;
agLoc->vTypes = vTLoc;
agLoc->edgeTypes = eTLoc;
altGraphXTabOut(agLoc, out);
altGraphXFree(&agLoc);
}