Beispiel #1
0
void filterPsls()
{
struct psl *origPslList=NULL, *pslList=NULL, *psl=NULL;
int startCount=0, stopCount=0;
char buff[256];
origPslList = pslLoadAll(pslIn);

/* some messages for the user */
startCount = slCount(origPslList);
sprintf(buff, "Filtering %d psl using seqIdent=%g and basePct=%g\n", 
	startCount, seqIdent, basePct);
msg(buff);

/* do our filtering */
pslList = filterBySeqIdentity(seqIdent, origPslList);
pslFreeList(&origPslList);
origPslList = filterByBasePct(basePct, pslList);


/* let the user know we're done */
if(origPslList != NULL)
{
stopCount = slCount(origPslList);
pslWriteAll(origPslList, pslOut, FALSE);
pslFreeList(&origPslList);
}
pslFreeList(&origPslList);
pslFreeList(&pslList);
sprintf(buff, "After filtering %d of %d are left\n", stopCount, startCount);
msg(buff);
}
Beispiel #2
0
void pslReps(char *inName, char *bestAliName, char *repName)
/* Analyse inName and put best alignments for eacmRNA in estAliName.
 * Put repeat info in repName. */
{
struct lineFile *in = pslFileOpen(inName);
FILE *bestFile = mustOpen(bestAliName, "w");
FILE *repFile = mustOpen(repName, "w");
int lineSize;
char *line;
char *words[32];
int wordCount;
struct psl *pslList = NULL, *psl = NULL;
char lastName[512];
int aliCount = 0;
quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout");
if (coverQSizeFile != NULL)
    loadCoverQSizes(coverQSizeFile);

if (!quiet)
    printf("Processing %s to %s and %s\n", inName, bestAliName, repName);
 if (!noHead)
     pslWriteHead(bestFile);
strcpy(lastName, "");
while (lineFileNext(in, &line, &lineSize))
    {
    if (((++aliCount & 0x1ffff) == 0) && !quiet)
        {
	printf(".");
	fflush(stdout);
	}
    wordCount = chopTabs(line, words);
    if (wordCount == 21)
	psl = pslLoad(words);
    else if (wordCount == 23)
	psl = pslxLoad(words);
    else
	errAbort("Bad line %d of %s\n", in->lineIx, in->fileName);
    if (!sameString(lastName, psl->qName))
	{
	doOneAcc(lastName, pslList, bestFile, repFile);
	pslFreeList(&pslList);
	safef(lastName, sizeof(lastName), "%s", psl->qName);
	}
    slAddHead(&pslList, psl);
    }
doOneAcc(lastName, pslList, bestFile, repFile);
pslFreeList(&pslList);
lineFileClose(&in);
fclose(bestFile);
fclose(repFile);
if (!quiet)
    printf("Processed %d alignments\n", aliCount);
}
void coordConvRepFree(struct coordConvRep **pEl)
/* free an individual coordinate conversion report */
{
struct coordConvRep *el;
if((el = *pEl) == NULL) return;
freeMem(el->msg);
coordConvFree(&el->to);
coordConvFree(&el->from);
freeDnaSeqList(&el->upSeq);
freeDnaSeqList(&el->midSeq);
freeDnaSeqList(&el->downSeq);
pslFreeList(&el->upPsl);
pslFreeList(&el->midPsl);
pslFreeList(&el->downPsl);
}
Beispiel #4
0
int main(int argc, char *argv[])
/* The program */
{
struct psl *pslList = NULL, *psl;
struct hash *queryHash, *targetHash;
struct lineFile *vulg;
aaSeq *querySeqs;
struct dnaSeq *targetSeqs;
if (argc != 5)
    usage();
/* Load up everything at beginning */
vulg = lineFileOpen(argv[1], TRUE);
querySeqs = dnaLoadAll(argv[2]);
targetSeqs = dnaLoadAll(argv[3]);
queryHash = seqHash(querySeqs);
targetHash = seqHash(targetSeqs);
/* Main business */
pslList = vulgarToPsl(vulg, queryHash, targetHash);
pslWriteAll(pslList, argv[4], FALSE);
/* Free up everything */
freeDnaSeqList(&querySeqs);
freeDnaSeqList(&targetSeqs);
freeHash(&targetHash);
freeHash(&queryHash);
pslFreeList(&pslList);
lineFileClose(&vulg);
return 0;
}
void outputChunk(struct psl **pPslList, char *tempDir, int midIx, boolean noHead)
/* Sort and write out pslList and free it. */
{
char fileName[512];
FILE *f;
struct psl *psl;

if (*pPslList == NULL)
    return; 	/* Empty. */
psl = *pPslList;
//slSort(pPslList, pslCmpTarget);
makeMidName(tempDir, midIx, fileName);
if (stripVer)
    {
    char *s = stringIn(".",psl->qName);
    if (s != NULL)
        *s = 0;
    }
if (chunkSize ==1)
    safef(fileName, sizeof(fileName), "%s/%s.psl",tempDir,psl->qName);
f = mustOpen(fileName, "w");
if (!noHead)
    pslWriteHead(f);
for (psl = *pPslList; psl != NULL; psl = psl->next)
    pslTabOut(psl, f);
fclose(f);
pslFreeList(pPslList);
}
Beispiel #6
0
static void pslPartsWrite(struct pslParts *parts, char *outDir)
/* write out a set of partitions and reset stated to empty. */
{
    char *partPath = getPartPslFile(outDir, parts->partNum++);
    pslWriteAll(parts->psls, partPath, FALSE);
    freeMem(partPath);
    pslFreeList(&parts->psls);
    parts->size = 0;
}
Beispiel #7
0
void gbGeneTblRebuild(struct gbGeneTbl *ggt, struct gbStatus* status,
                      struct sqlConnection *conn)
/* rebuild a gene from an alignment that is already loaded in a table */
{
char where[128];
sqlSafefFrag(where, sizeof(where), "qName = \"%s\"", status->acc);
struct psl *psls = pslReaderLoadQuery(conn, ggt->alnTbl, where);
struct psl *psl;
for (psl = psls; psl != NULL; psl = psl->next)
    gbGeneTblWrite(ggt, status, psl, conn);
pslFreeList(&psls);
}
Beispiel #8
0
static void getAligns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile)
/* get request alignments from database */
{
struct psl *psls = pslReaderLoadQuery(conn, "refSeqAli", NULL);
slSort(psls, pslCmpQuery);
FILE *fh = mustOpen(outFile, "w");
struct psl *psl;
for (psl = psls; psl != NULL; psl = psl->next)
    processPsl(fh, refSeqVerInfoTbl, psl);
carefulClose(&fh);
pslFreeList(&psls);
}
Beispiel #9
0
static void displayAligns(struct sqlConnection *conn, struct mappingInfo *mi)
/* display cDNA alignments */
{
int start = cartInt(cart, "o");
char alignTbl[128];
struct psl *psl;
safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix);

/* this should only ever have one alignment */
psl = getAlignments(conn, alignTbl, mi->pg->name);
printf("<H3>Retro Locus/Parent mRNA Alignments</H3>");
printRetroAlignments(psl, start, "hgcRetroCdnaAli", alignTbl, mi->pg->name);
pslFreeList(&psl);
}
Beispiel #10
0
static float scoreHapRefPair(struct hapRegions *hr, struct cDnaAlign *refAln, struct refChrom *refChrom, struct cDnaAlign *hapAln, struct hapChrom *hapChrom)
/* Score a hapAln with refAln based on mapping a mapping to the ref chrom and
 * then a mapping of the two.  An issue is that the mapping a given hapAln to
 * the reference chromosome might be fragmented into multiple alignments due
 * to multiple mapping alignments.  So we count the total number of
 * non-redundent bases that are aligned. */
{
    float score = -1.0;
    struct psl *mappedHaps = mapToRef(hr, refChrom, hapAln, hapChrom);
    if (mappedHaps != NULL)
    {
        score = calcHapRefPairScore(hr, refAln, refChrom, hapAln, hapChrom, mappedHaps);
        pslFreeList(&mappedHaps);
    }
    return score;
}
Beispiel #11
0
void pslUnpile(char *inName, char *outName)
/* pslUnpile - Removes huge piles of alignments from sorted 
 * psl files (due to unmasked repeats presumably).. */
{
FILE *f = mustOpen(outName, "w");
enum gfType qType, tType;
struct lineFile *lf;
struct psl *list = NULL, *psl, *el;

pslxFileOpen(inName, &qType, &tType, &lf);
if (!noHead)
    pslxWriteHead(f, qType, tType);
for (;;)
    {
    psl = pslNext(lf);
    if (list != NULL && (psl == NULL || !pslOverlap(psl, list)))
        {
	if (list != NULL)
	    {
	    slReverse(&list);
	    if (checkPile(list))
	        {
		for (el = list; el != NULL; el = el->next)
		    {
		    pslTabOut(el, f);
		    }
		}
	    else
	        {
		for (el = list; el != NULL; el = el->next)
		    {
		    if (psl == NULL)
			pslTabOut(el, f);
		    else if (psl->tEnd - psl->tStart > 4000)
			pslTabOut(el, f);
		    }
		}
	    pslFreeList(&list);
	    }
	}
    if (psl == NULL)
        break;
    slAddHead(&list, psl);
    }
lineFileClose(&lf);
carefulClose(&f);
}
Beispiel #12
0
void outputAlignmentForStan(struct sqlConnection *conn, struct stanMad *sm, struct hash *iHash, FILE *out)
{
struct psl *pslList, *bestPsl = NULL;
char buff[1024];
int i;
struct imageClone *ic = NULL;
sprintf(buff, "%d", sm->clid);
printf("Looking for %s\n", buff);
ic = hashFindVal(iHash, buff);
if(ic != NULL) 
    {
    /* first try looking for the image clones themselves... */
    for(i=0; i<ic->numGenbank; i++) 
	{
	char query[1024];
	sqlSafef(query, sizeof query, "select * from all_est where qName='%s'", ic->genbankIds[i]);
	pslList = pslLoadByQuery(conn, buff);
	if(pslList != NULL) 
	    {
	    slSort(&pslList, pslCmpScore);	
	    if(bestPsl == NULL || (pslScore(pslList) > pslScore(bestPsl)))
		pslFree(&bestPsl);
		bestPsl = copyPsl(pslList);
	    }
	
	pslFreeList(&pslList);
	}

    if(bestPsl != NULL)
	{    
	freez(&bestPsl->qName);
	sprintf(buff, "%d", sm->clid);
	bestPsl->qName = cloneString(buff);
	pslTabOut(bestPsl,out);
	}
    else 
	{
	fprintf(out, "%d\talignment unknown\n", sm->clid);
	}
    
    }
else 
    {
    fprintf(out, "%d\tunknown\n", sm->clid);
    }
}
Beispiel #13
0
void retroClickHandler(struct trackDb *tdb, char *mappedId)
/* Handle click on a transMap tracks */
{
struct sqlConnection *conn = hAllocConn(database);
struct mappingInfo *mi = mappingInfoNew(conn, tdb->table, mappedId);
struct psl *pslList = NULL;
char *table;

genericHeader(tdb, mappedId);
printf("<TABLE border=0>\n");
printf("<TR CLASS=\"transMapLayout\">\n");
printf("<TD COLSPAN=3>\n");
displaySrcGene(conn, mi);
printf("</TR>\n");
printf("<TR CLASS=\"transMapLayout\">\n");
printf("<TD>\n");
displayMappingInfo(conn, mi);
printf("<TD>\n");
#if 0
struct geneCheck *gc = displayGeneCheck(conn, &mti, mappedId);
printf("<TD>\n");
displayProtSim(conn, &mti, mappedId);
#endif
printf("</TR>\n");
#if 0
if (!sameString(gc->stat, "ok"))
    {
    printf("<TR CLASS=\"transMapLayout\">\n");
    printf("<TD COLSPAN=3>\n");
    displayGeneCheckDetails(conn, &mti, gc);
    printf("</TR>\n");
    }
#endif
printf("</TABLE>\n");
displayRetroDetails(conn, mi);
displayAligns(conn, mi);
pslList = getParentAligns(conn, mi, &table);
displayParentAligns(mi, pslList, table);
pslFreeList(&pslList);
printTrackHtml(tdb);
#if 0
geneCheckFree(&gc);
#endif
mappingInfoFree(&mi);
hFreeConn(&conn);
}
Beispiel #14
0
void outputChunk(struct psl **pPslList, char *tempDir, int midIx)
/* Sort and write out pslList and free it. */
{
char fileName[512];
FILE *f;
struct psl *psl;

if (*pPslList == NULL)
    return; 	/* Empty. */
slSort(pPslList, pslCmpTarget);
makeMidName(tempDir, midIx, fileName);
f = mustOpen(fileName, "w");
pslWriteHead(f);
for (psl = *pPslList; psl != NULL; psl = psl->next)
    pslTabOut(psl, f);
fclose(f);
pslFreeList(pPslList);
}
boolean simpleOut(FILE *out, FILE *glue, struct psl **pList)
/* Write out relevant bits of pList and free pList.
 * Return TRUE if wrote anything. */
{
struct psl *psl, *lastPsl;
struct psl *list;
int minDiff = 15;
boolean isRelevant = FALSE;


slReverse(pList);
++outCount;
ltot += slCount(*pList);

/* Return if size of list less than 2. */
if ((lastPsl = list = *pList) == NULL)
    return FALSE;
if (list->next == NULL)
    return FALSE;

++mtot;
for (psl = lastPsl->next; psl != NULL; psl = psl->next)
    {
    if (psl->qStart - lastPsl->qStart >= minDiff
        && psl->qEnd - lastPsl->qEnd >= minDiff)
        {
        isRelevant = TRUE;
        break;
        }
    }

if (isRelevant)
    {
    for (psl = list; psl != NULL; psl = psl->next)
        {
        pslTabOut(psl, out);
        fprintf(glue, "%s\t%d\t%d\t%s %s\t%d\t%d\n",
            psl->qName, psl->qStart, psl->qEnd,
            psl->tName, psl->strand, psl->tStart, psl->tEnd);
        }
    }
pslFreeList(pList);
return isRelevant;
}
void dropNotBest(struct psl **pslList)
/* Sorts list and drops any score less than the best score */
{
struct psl *tail = NULL;
int bestScore =0;
slSort(pslList, pslCmpScoreDesc);
if(pslList == NULL) 
    return;
bestScore = pslScore(*pslList);
for(tail=*pslList; tail != NULL; tail=tail->next)
    {
    if(tail->next == NULL)
	break;
    if(pslScore(tail->next) < bestScore)
	{
	struct psl *tmp = tail->next;
	tail->next = NULL;
	pslFreeList(&tmp);
	break;
	}
    }
}
Beispiel #17
0
struct bed *createBedsFromPsls(char *pslFile, int expCount)
/** creates a list of beds from a pslfile, allocates memory for
arrays as determined by expCount */
{
struct psl *pslList = NULL, *psl = NULL;
struct bed *bedList = NULL, *bed = NULL;
pslList = pslLoadAll(pslFile);
for(psl = pslList; psl != NULL; psl = psl->next)
    {

    bed = bedFromPsl(psl);
    freez(&bed->name);
    bed->name=parseNameFromHgc(psl->qName);
    bed->score = 0;
    bed->expCount = 0;
    bed->expIds = needMem(sizeof(int)*expCount);
    bed->expScores = needMem(sizeof(float)*expCount);
    slAddHead(&bedList,bed);
    }
slReverse(&bedList);
pslFreeList(&pslList);
return bedList;
}
Beispiel #18
0
struct altGraphX *agFromGp(char *db, struct genePred *gp, struct sqlConnection *conn, 
			   int maxGap, FILE *out)
/** Create an altGraphX record by clustering psl records within coordinates
    specified by genePred record. */
{
struct altGraphX *ag = NULL;
struct dnaSeq *genoSeq = NULL;
struct ggMrnaAli *maList=NULL, *ma=NULL, *maNext=NULL, *maSameStrand=NULL;
struct psl *pslList = NULL, *psl = NULL, *pslCluster = NULL, *pslNext = NULL;
char *chrom = gp->chrom;
int chromStart = BIGNUM;
int chromEnd = -1;

verbose(2, "agFromGp on %s %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd);

pslList = getPsls(gp, conn);
verbose(3, "  got %d psls\n", slCount(pslList));
if(slCount(pslList) == 0)
    {
    verbose(2, "No available alignments for %s.", gp->name);
    return NULL;
    }
/* expand to find the furthest boundaries of alignments */
expandToMaxAlignment(pslList, chrom, &chromStart, &chromEnd);
verbose(3, "  expanded to %s:%d-%d\n", chrom, chromStart, chromEnd);

/* get the sequence */
genoSeq = dnaFromChrom(db, chrom, chromStart, chromEnd, dnaLower);

for(psl = pslList; psl != NULL; psl = pslNext)
    {
    pslNext = psl->next;
    if(singleExonOk || pslHasIntron(psl, genoSeq, chromStart))
	{
	slAddHead(&pslCluster, psl);
	}
    else 
	{
	if(!useChromKeeper)
	    pslFree(&psl);
	}
    }
verbose(3, "  got %d psls after intron/singleExon check\n", slCount(pslCluster));
/* load and check the alignments */
maList = pslListToGgMrnaAliList(pslCluster, gp->chrom, chromStart, chromEnd, genoSeq, maxGap);
verbose(3, "  got %d in maList\n", slCount(maList));

for(ma = maList; ma != NULL; ma = maNext)
    {
    maNext = ma->next;
    verbose(4, "      ma->strand %s, gp->strand %s\n", ma->strand, gp->strand);
    if(ma->strand[0] == gp->strand[0])
	{
	slSafeAddHead(&maSameStrand, ma);
	}
    else
	ggMrnaAliFree(&ma);
    }
slReverse(&maSameStrand);

verbose(3, "  got %d in ma on same strand\n", slCount(maSameStrand));

/* If there is a cluster to work with create an geneGraph */
if(maSameStrand != NULL)
    {
    ag = agFromAlignments(db, maSameStrand, genoSeq, conn, chromStart, chromEnd,  out);
    }
else
    {
    dnaSeqFree(&genoSeq);
    ggMrnaAliFreeList(&maSameStrand);
    }

/* Only free psls if not using cache... */
if(!useChromKeeper)
    pslFreeList(&pslCluster);
return ag;
}
Beispiel #19
0
void showAliPlaces(char *pslName, char *faName, char *database, 
		   enum gfType qType, enum gfType tType, 
		   char *organism, boolean feelingLucky)
/* Show all the places that align. */
{
struct lineFile *lf = pslFileOpen(pslName);
struct psl *pslList = NULL, *psl;
char *browserUrl = hgTracksName();
char *hgcUrl = hgcName();
char uiState[64];
char *vis;
char unhideTrack[64];
char *sort = cartUsualString(cart, "sort", sortList[0]);
char *output = cartUsualString(cart, "output", outputList[0]);
boolean pslOut = startsWith("psl", output);
boolean isStraightNuc = (qType == gftRna || qType == gftDna);
int  minThreshold = (isStraightNuc ? minMatchShown : 0);

sprintf(uiState, "%s=%s", cartSessionVarName(), cartSessionId(cart));

/* If user has hidden BLAT track, add a setting that will unhide the 
   track if user clicks on a browser link. */
vis = cartOptionalString(cart, "hgUserPsl");
if (vis != NULL && sameString(vis, "hide"))
    snprintf(unhideTrack, sizeof(unhideTrack), "&hgUserPsl=dense");
else
    unhideTrack[0] = 0;

while ((psl = pslNext(lf)) != NULL)
    {
    if (psl->match >= minThreshold)
	slAddHead(&pslList, psl);
    }
lineFileClose(&lf);
if (pslList == NULL)
    {
    puts("<table><tr><td><hr>Sorry, no matches found<hr><td></tr></table>");
    return;
    }

if (sameString(sort, "query,start"))
    {
    slSort(&pslList, pslCmpQuery);
    }
else if (sameString(sort, "query,score"))
    {
    slSort(&pslList, pslCmpQueryScore);
    }
else if (sameString(sort, "score"))
    {
    slSort(&pslList, pslCmpScore);
    }
else if (sameString(sort, "chrom,start"))
    {
    slSort(&pslList, pslCmpTargetStart);
    }
else if (sameString(sort, "chrom,score"))
    {
    slSort(&pslList, pslCmpTargetScore);
    }
else
    {
    slSort(&pslList, pslCmpQueryScore);
    }
if(feelingLucky)
    {
    /* If we found something jump browser to there. */
    if(slCount(pslList) > 0)
	printLuckyRedirect(browserUrl, pslList, database, pslName, faName, uiState, unhideTrack);
    /* Otherwise call ourselves again not feeling lucky to print empty 
       results. */
    else 
	{
	cartWebStart(cart, database, "%s BLAT Results", trackHubSkipHubName(organism));
	showAliPlaces(pslName, faName, database, qType, tType, organism, FALSE);
	cartWebEnd();
	}
    }
else if (pslOut)
    {
    printf("<TT><PRE>");
    if (!sameString(output, "psl no header"))
	pslxWriteHead(stdout, qType, tType);
    for (psl = pslList; psl != NULL; psl = psl->next)
	pslTabOut(psl, stdout);
    printf("</PRE></TT>");
    }
else
    {
    printf("<H2>BLAT Search Results</H2>");
    printf("<TT><PRE>");
    printf("   ACTIONS      QUERY           SCORE START  END QSIZE IDENTITY CHRO STRAND  START    END      SPAN\n");
    printf("---------------------------------------------------------------------------------------------------\n");
    for (psl = pslList; psl != NULL; psl = psl->next)
	{
	printf("<A HREF=\"%s?position=%s:%d-%d&db=%s&ss=%s+%s&%s%s\">",
	    browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, 
	    pslName, faName, uiState, unhideTrack);
	printf("browser</A> ");
	printf("<A HREF=\"%s?o=%d&g=htcUserAli&i=%s+%s+%s&c=%s&l=%d&r=%d&db=%s&%s\">", 
	    hgcUrl, psl->tStart, pslName, cgiEncode(faName), psl->qName,  psl->tName,
	    psl->tStart, psl->tEnd, database, uiState);
	printf("details</A> ");
	printf("%-14s %5d %5d %5d %5d %5.1f%%  %4s  %2s  %9d %9d %6d\n",
	    psl->qName, pslScore(psl), psl->qStart+1, psl->qEnd, psl->qSize,
	    100.0 - pslCalcMilliBad(psl, TRUE) * 0.1,
	    skipChr(psl->tName), psl->strand, psl->tStart+1, psl->tEnd,
	    psl->tEnd - psl->tStart);
	}
    printf("</PRE></TT>");
    }
pslFreeList(&pslList);

}
Beispiel #20
0
void pslFreeListWrapper(void *val)
{
struct psl *pslList = val;
pslFreeList(&pslList);
}
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack,
              struct hash *otherHash, struct stats *stats)
/* Process one chromosome. */
{
    struct bed *bedList = NULL, *bed;
    struct sqlConnection *conn = hAllocConn(database);
    struct sqlResult *sr;
    char **row;
    int rowOffset;
    int chromSize = hChromSize(database, chrom);
    struct binKeeper *bk = binKeeperNew(0, chromSize);
    struct psl *pslList = NULL;
    struct dnaSeq *chromSeq = NULL;

    if (endsWith(bedTrack, ".bed"))
    {
        struct lineFile *lf = lineFileOpen(bedTrack, TRUE);
        char *row[3];
        while (lineFileRow(lf, row))
        {
            if (sameString(chrom, row[0]))
            {
                bed = bedLoad3(row);
                slAddHead(&bedList, bed);
            }
        }
        lineFileClose(&lf);
    }
    else
    {
        sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset);
        while ((row = sqlNextRow(sr)) != NULL)
        {
            bed = bedLoad3(row+rowOffset);
            slAddHead(&bedList, bed);
        }
        sqlFreeResult(&sr);
    }
    slReverse(&bedList);
    uglyf("Loaded beds\n");

    sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset);
    while ((row = sqlNextRow(sr)) != NULL)
    {
        struct psl *psl = pslLoad(row + rowOffset);
        slAddHead(&pslList, psl);
        binKeeperAdd(bk, psl->tStart, psl->tEnd, psl);
    }
    sqlFreeResult(&sr);
    uglyf("Loaded psls\n");

    chromSeq = hLoadChrom(database, chrom);
    /* Fetch entire chromosome into memory. */
    uglyf("Loaded human seq\n");

    for (bed = bedList; bed != NULL; bed = bed->next)
    {
        struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd);
        for (el = list; el != NULL; el = el->next)
        {
            struct psl *fullPsl = el->val;
            struct psl *psl = pslTrimToTargetRange(fullPsl,
                                                   bed->chromStart, bed->chromEnd);
            if (psl != NULL)
            {
                foldPslIntoStats(psl, chromSeq, otherHash, stats);
                pslFree(&psl);
            }
        }
        slFreeList(&list);
        stats->bedCount += 1;
        stats->bedBaseCount += bed->chromEnd - bed->chromStart;
        sqlFreeResult(&sr);
    }
    freeDnaSeq(&chromSeq);
    pslFreeList(&pslList);
    binKeeperFree(&bk);
    hFreeConn(&conn);
}
Beispiel #22
0
static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi)
/* display information from a transMap table */
{
struct ucscRetroInfo *pg = mi->pg;
double  wt[12];     /* weights on score function*/
char query[512];
char *name;
char alignTbl[128];
char scoreSql[128];
struct psl *psl;
float coverFactor = 0;
float maxOverlap = 0;
if (mi->suffix == NULL)
    {
    safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet);
    sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo", mi->tblPre, mi->geneSet);
    }
else
    {
    safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix);
    sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo%s", mi->tblPre, mi->geneSet, mi->suffix);
    }
printf("<TABLE class=\"transMap\">\n");
printf("<H3>Retrogene Statistics:</H3>\n");
printf("<THEAD>\n");
printf("<TR><TH>Feature<TH>Value </TR>\n");
printf("</THEAD><TBODY>\n");
if (sameString(pg->type, "singleExon"))
    printf("<TR><TH>Type of Parent<TD>%s</tr>\n",pg->type);
else 
    printf("<TR><TH>Expression of Retrogene<TD>%s</TR>\n",pg->type);
printf("<TR><TH>Score <TD>%d (range from 0 - %d)</TR>\n",  
        pg->score,
        sqlQuickNum(conn, scoreSql) );
printf("<TR><TH>Parent Gene Alignment Coverage (Bases&nbsp;Matching Parent) <TD>%d %% &nbsp;(%d bp) </TR>\n", pg->coverage, pg->matches);
printf("<TR><TH>Introns Processed Out <TD>%d out of %d (%d exons covered)\n", pg->processedIntrons, (pg->parentSpliceCount/2), pg->exonCover);
printf("<TR><TH>Possible Introns or Gaps in Retrogene<TD>%d,%d\n", pg->intronCount, pg->alignGapCount);
printf("<TR><TH>Conserved Splice Sites<TD>%d</TR>\n",  pg->conservedSpliceSites);
printf("<TR><TH>Parent Splice Sites<TD>%d</TR>\n",  pg->parentSpliceCount);
psl = getAlignments(conn, alignTbl, mi->pg->name);
if (psl != NULL)
    {
    maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch)  ;
    coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize);
    }
else 
    {
    maxOverlap = 0;
    }
wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8; 
wt[5] = 1; wt[6] = 1  ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1;
#ifdef debug
char table[512];
struct psl *pslList = getParentAligns(conn, mi, &table);
if (psl != NULL)
    {
    printf("<TR><TH>Blocks in retro:gap%%/intronsSpliced <TD>\n");
    printBlocks(psl, MAXBLOCKGAP, pslList);
    printf("</td></TR>\n");  
    }
if (pslList != NULL)
    {
    printf("<TR><TH>Exons in parent:gap%% <TD>\n");
    printBlocks(pslList, MAXBLOCKGAP, NULL);
    printf("</td></TR>\n");  
    pslFreeList(&pslList);
    }
#endif
printf("<TR><TH>Length of PolyA Tail<TD>%d As&nbsp;out&nbsp;of&nbsp;%d&nbsp;bp </TR><TR><TH>%% A's from Parent PolyA tail (Position)<TD>%5.1f&nbsp;%%\n",pg->polyA,pg->polyAlen, (float)pg->polyA*100/(float)pg->polyAlen);
if (pg->polyAstart < 0)
    printf("&nbsp;(%d&nbsp;bp&nbsp;before&nbsp;end&nbsp;of&nbsp;retrogene)<br>\n",-(pg->polyAstart));
else
    printf("&nbsp;(%d&nbsp;bp&nbsp;past&nbsp;end&nbsp;of&nbsp;retrogene)<br>\n",pg->polyAstart);

printf("<tr><th>mRNA Expression Evidence<td>");
if (!sameString(pg->overName, "none"))
    printf("%s&nbsp;(overlap:&nbsp;&nbsp;%d&nbsp;bp)\n", pg->overName, pg->maxOverlap);
else
    printf("No&nbsp;overlapping");
printf("<TR><TH>BESTORF Score (>50 is good)<TD>%4.0f</td></TR>\n",pg->posConf);
#ifdef score
printf("<TR><TH>score function<TD>1:xon %d %4.1f conSS %d 2: ax %4.1f 3: pA %4.1f 4: net + %4.1f max (%d, %d) 5: procIntrons %d %4.1f 6:in.cnt %d -%4.1f 7:overlap - %4.1f  8:cov %d*(qe %d- qsz %d)/%d=%4.1f 9:tRep - %4.1f 10:oldintron %d %4.1f </td></TR>\n",
                pg->exonCover,
                wt[1]*(log(pg->exonCover+1)/log(2))*200 , 
                pg->conservedSpliceSites,
                wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
                wt[3]*(log(pg->polyAlen+2)*200) ,
                wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog,
                pg->processedIntrons,
                wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
                pg->intronCount, 
                wt[6]*pow(pg->intronCount,0.5)*750 ,
                wt[7]*(maxOverlap*300),
                pg->coverage, pg->qEnd, pg->qSize , pg->qSize,
                wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
                wt[9]*(pg->tReps*10), 
                pg->alignGapCount,
                wt[10]*pg->alignGapCount);
printf("<TR><TH>score function<TD>%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f</td></TR>\n",
                wt[1]*(log(pg->exonCover+1)/log(2))*200 , 
                wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000),
                wt[3]*(log(pg->polyAlen+2)*200) ,
                wt[4]*overlapOrtholog*10 , 
                wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) ,
                (float)wt[6]*pow(pg->intronCount,0.5)*750 ,
                (float)wt[7]*(maxOverlap*300),
                wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0),
                wt[9]*(pg->tReps*10), 
                wt[10]*pg->alignGapCount);
if (pg->kaku > 0 && pg->kaku < 1000000)
    printf("<TR><TH>KA/KU mutation rate in non-syn sites vs utr with repect to parent gene<TD>%4.2f</TR>\n",  pg->kaku);
#endif
#ifdef xxx
sqlSafef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'", 
        pg->chrom, pg->gStart, pg->gEnd , pg->overName );
sr = sqlGetResult(conn, query);
if ((row = sqlNextRow(sr)) != NULL)
    overlappingGene = genePredLoad(row);
if (overlappingGene != NULL)
    {
    printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene));
    }

#endif
printf("</tr>\n");
if ( differentString("none",pg->overName) &&
    sqlFieldIndex(conn, "refGene", "exonFrames") != -1)
    {
    sqlSafef(query, sizeof(query), 
            "select concat(exonFrames,'(',cdsStart,')') from refGene where name = '%s' and chrom = '%s'" , 
            pg->overName, pg->chrom);
    if (sqlQuickString(conn, query) != NULL)
        printf("<TR><TH>Frame of retro %s (start)<TD>%s</TR>\n",  
            pg->overName, sqlQuickString(conn, query));
    }

name = cloneString(pg->name);
chopSuffix(name);
sqlSafef(query, sizeof(query), 
        "select concat(exonFrames,'(',cdsStart,')') from rbRetroParent where name like '%s%%' and chrom = '%s'" , 
        name, pg->chrom);
if (hTableExists(database, "rbRetroParent"))
    {
    if ( sqlQuickString(conn, query) != NULL)
        printf("<TR><TH>Frames of mapped parent %s (start)<TD>%s</TR>\n",  
            name, sqlQuickString(conn, query));
    }
printf("</TBODY></TABLE>\n");
}