void filterPsls() { struct psl *origPslList=NULL, *pslList=NULL, *psl=NULL; int startCount=0, stopCount=0; char buff[256]; origPslList = pslLoadAll(pslIn); /* some messages for the user */ startCount = slCount(origPslList); sprintf(buff, "Filtering %d psl using seqIdent=%g and basePct=%g\n", startCount, seqIdent, basePct); msg(buff); /* do our filtering */ pslList = filterBySeqIdentity(seqIdent, origPslList); pslFreeList(&origPslList); origPslList = filterByBasePct(basePct, pslList); /* let the user know we're done */ if(origPslList != NULL) { stopCount = slCount(origPslList); pslWriteAll(origPslList, pslOut, FALSE); pslFreeList(&origPslList); } pslFreeList(&origPslList); pslFreeList(&pslList); sprintf(buff, "After filtering %d of %d are left\n", stopCount, startCount); msg(buff); }
void pslReps(char *inName, char *bestAliName, char *repName) /* Analyse inName and put best alignments for eacmRNA in estAliName. * Put repeat info in repName. */ { struct lineFile *in = pslFileOpen(inName); FILE *bestFile = mustOpen(bestAliName, "w"); FILE *repFile = mustOpen(repName, "w"); int lineSize; char *line; char *words[32]; int wordCount; struct psl *pslList = NULL, *psl = NULL; char lastName[512]; int aliCount = 0; quiet = sameString(bestAliName, "stdout") || sameString(repName, "stdout"); if (coverQSizeFile != NULL) loadCoverQSizes(coverQSizeFile); if (!quiet) printf("Processing %s to %s and %s\n", inName, bestAliName, repName); if (!noHead) pslWriteHead(bestFile); strcpy(lastName, ""); while (lineFileNext(in, &line, &lineSize)) { if (((++aliCount & 0x1ffff) == 0) && !quiet) { printf("."); fflush(stdout); } wordCount = chopTabs(line, words); if (wordCount == 21) psl = pslLoad(words); else if (wordCount == 23) psl = pslxLoad(words); else errAbort("Bad line %d of %s\n", in->lineIx, in->fileName); if (!sameString(lastName, psl->qName)) { doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); safef(lastName, sizeof(lastName), "%s", psl->qName); } slAddHead(&pslList, psl); } doOneAcc(lastName, pslList, bestFile, repFile); pslFreeList(&pslList); lineFileClose(&in); fclose(bestFile); fclose(repFile); if (!quiet) printf("Processed %d alignments\n", aliCount); }
void coordConvRepFree(struct coordConvRep **pEl) /* free an individual coordinate conversion report */ { struct coordConvRep *el; if((el = *pEl) == NULL) return; freeMem(el->msg); coordConvFree(&el->to); coordConvFree(&el->from); freeDnaSeqList(&el->upSeq); freeDnaSeqList(&el->midSeq); freeDnaSeqList(&el->downSeq); pslFreeList(&el->upPsl); pslFreeList(&el->midPsl); pslFreeList(&el->downPsl); }
int main(int argc, char *argv[]) /* The program */ { struct psl *pslList = NULL, *psl; struct hash *queryHash, *targetHash; struct lineFile *vulg; aaSeq *querySeqs; struct dnaSeq *targetSeqs; if (argc != 5) usage(); /* Load up everything at beginning */ vulg = lineFileOpen(argv[1], TRUE); querySeqs = dnaLoadAll(argv[2]); targetSeqs = dnaLoadAll(argv[3]); queryHash = seqHash(querySeqs); targetHash = seqHash(targetSeqs); /* Main business */ pslList = vulgarToPsl(vulg, queryHash, targetHash); pslWriteAll(pslList, argv[4], FALSE); /* Free up everything */ freeDnaSeqList(&querySeqs); freeDnaSeqList(&targetSeqs); freeHash(&targetHash); freeHash(&queryHash); pslFreeList(&pslList); lineFileClose(&vulg); return 0; }
void outputChunk(struct psl **pPslList, char *tempDir, int midIx, boolean noHead) /* Sort and write out pslList and free it. */ { char fileName[512]; FILE *f; struct psl *psl; if (*pPslList == NULL) return; /* Empty. */ psl = *pPslList; //slSort(pPslList, pslCmpTarget); makeMidName(tempDir, midIx, fileName); if (stripVer) { char *s = stringIn(".",psl->qName); if (s != NULL) *s = 0; } if (chunkSize ==1) safef(fileName, sizeof(fileName), "%s/%s.psl",tempDir,psl->qName); f = mustOpen(fileName, "w"); if (!noHead) pslWriteHead(f); for (psl = *pPslList; psl != NULL; psl = psl->next) pslTabOut(psl, f); fclose(f); pslFreeList(pPslList); }
static void pslPartsWrite(struct pslParts *parts, char *outDir) /* write out a set of partitions and reset stated to empty. */ { char *partPath = getPartPslFile(outDir, parts->partNum++); pslWriteAll(parts->psls, partPath, FALSE); freeMem(partPath); pslFreeList(&parts->psls); parts->size = 0; }
void gbGeneTblRebuild(struct gbGeneTbl *ggt, struct gbStatus* status, struct sqlConnection *conn) /* rebuild a gene from an alignment that is already loaded in a table */ { char where[128]; sqlSafefFrag(where, sizeof(where), "qName = \"%s\"", status->acc); struct psl *psls = pslReaderLoadQuery(conn, ggt->alnTbl, where); struct psl *psl; for (psl = psls; psl != NULL; psl = psl->next) gbGeneTblWrite(ggt, status, psl, conn); pslFreeList(&psls); }
static void getAligns(struct sqlConnection *conn, struct hash *refSeqVerInfoTbl, char *outFile) /* get request alignments from database */ { struct psl *psls = pslReaderLoadQuery(conn, "refSeqAli", NULL); slSort(psls, pslCmpQuery); FILE *fh = mustOpen(outFile, "w"); struct psl *psl; for (psl = psls; psl != NULL; psl = psl->next) processPsl(fh, refSeqVerInfoTbl, psl); carefulClose(&fh); pslFreeList(&psls); }
static void displayAligns(struct sqlConnection *conn, struct mappingInfo *mi) /* display cDNA alignments */ { int start = cartInt(cart, "o"); char alignTbl[128]; struct psl *psl; safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet,mi->suffix); /* this should only ever have one alignment */ psl = getAlignments(conn, alignTbl, mi->pg->name); printf("<H3>Retro Locus/Parent mRNA Alignments</H3>"); printRetroAlignments(psl, start, "hgcRetroCdnaAli", alignTbl, mi->pg->name); pslFreeList(&psl); }
static float scoreHapRefPair(struct hapRegions *hr, struct cDnaAlign *refAln, struct refChrom *refChrom, struct cDnaAlign *hapAln, struct hapChrom *hapChrom) /* Score a hapAln with refAln based on mapping a mapping to the ref chrom and * then a mapping of the two. An issue is that the mapping a given hapAln to * the reference chromosome might be fragmented into multiple alignments due * to multiple mapping alignments. So we count the total number of * non-redundent bases that are aligned. */ { float score = -1.0; struct psl *mappedHaps = mapToRef(hr, refChrom, hapAln, hapChrom); if (mappedHaps != NULL) { score = calcHapRefPairScore(hr, refAln, refChrom, hapAln, hapChrom, mappedHaps); pslFreeList(&mappedHaps); } return score; }
void pslUnpile(char *inName, char *outName) /* pslUnpile - Removes huge piles of alignments from sorted * psl files (due to unmasked repeats presumably).. */ { FILE *f = mustOpen(outName, "w"); enum gfType qType, tType; struct lineFile *lf; struct psl *list = NULL, *psl, *el; pslxFileOpen(inName, &qType, &tType, &lf); if (!noHead) pslxWriteHead(f, qType, tType); for (;;) { psl = pslNext(lf); if (list != NULL && (psl == NULL || !pslOverlap(psl, list))) { if (list != NULL) { slReverse(&list); if (checkPile(list)) { for (el = list; el != NULL; el = el->next) { pslTabOut(el, f); } } else { for (el = list; el != NULL; el = el->next) { if (psl == NULL) pslTabOut(el, f); else if (psl->tEnd - psl->tStart > 4000) pslTabOut(el, f); } } pslFreeList(&list); } } if (psl == NULL) break; slAddHead(&list, psl); } lineFileClose(&lf); carefulClose(&f); }
void outputAlignmentForStan(struct sqlConnection *conn, struct stanMad *sm, struct hash *iHash, FILE *out) { struct psl *pslList, *bestPsl = NULL; char buff[1024]; int i; struct imageClone *ic = NULL; sprintf(buff, "%d", sm->clid); printf("Looking for %s\n", buff); ic = hashFindVal(iHash, buff); if(ic != NULL) { /* first try looking for the image clones themselves... */ for(i=0; i<ic->numGenbank; i++) { char query[1024]; sqlSafef(query, sizeof query, "select * from all_est where qName='%s'", ic->genbankIds[i]); pslList = pslLoadByQuery(conn, buff); if(pslList != NULL) { slSort(&pslList, pslCmpScore); if(bestPsl == NULL || (pslScore(pslList) > pslScore(bestPsl))) pslFree(&bestPsl); bestPsl = copyPsl(pslList); } pslFreeList(&pslList); } if(bestPsl != NULL) { freez(&bestPsl->qName); sprintf(buff, "%d", sm->clid); bestPsl->qName = cloneString(buff); pslTabOut(bestPsl,out); } else { fprintf(out, "%d\talignment unknown\n", sm->clid); } } else { fprintf(out, "%d\tunknown\n", sm->clid); } }
void retroClickHandler(struct trackDb *tdb, char *mappedId) /* Handle click on a transMap tracks */ { struct sqlConnection *conn = hAllocConn(database); struct mappingInfo *mi = mappingInfoNew(conn, tdb->table, mappedId); struct psl *pslList = NULL; char *table; genericHeader(tdb, mappedId); printf("<TABLE border=0>\n"); printf("<TR CLASS=\"transMapLayout\">\n"); printf("<TD COLSPAN=3>\n"); displaySrcGene(conn, mi); printf("</TR>\n"); printf("<TR CLASS=\"transMapLayout\">\n"); printf("<TD>\n"); displayMappingInfo(conn, mi); printf("<TD>\n"); #if 0 struct geneCheck *gc = displayGeneCheck(conn, &mti, mappedId); printf("<TD>\n"); displayProtSim(conn, &mti, mappedId); #endif printf("</TR>\n"); #if 0 if (!sameString(gc->stat, "ok")) { printf("<TR CLASS=\"transMapLayout\">\n"); printf("<TD COLSPAN=3>\n"); displayGeneCheckDetails(conn, &mti, gc); printf("</TR>\n"); } #endif printf("</TABLE>\n"); displayRetroDetails(conn, mi); displayAligns(conn, mi); pslList = getParentAligns(conn, mi, &table); displayParentAligns(mi, pslList, table); pslFreeList(&pslList); printTrackHtml(tdb); #if 0 geneCheckFree(&gc); #endif mappingInfoFree(&mi); hFreeConn(&conn); }
void outputChunk(struct psl **pPslList, char *tempDir, int midIx) /* Sort and write out pslList and free it. */ { char fileName[512]; FILE *f; struct psl *psl; if (*pPslList == NULL) return; /* Empty. */ slSort(pPslList, pslCmpTarget); makeMidName(tempDir, midIx, fileName); f = mustOpen(fileName, "w"); pslWriteHead(f); for (psl = *pPslList; psl != NULL; psl = psl->next) pslTabOut(psl, f); fclose(f); pslFreeList(pPslList); }
boolean simpleOut(FILE *out, FILE *glue, struct psl **pList) /* Write out relevant bits of pList and free pList. * Return TRUE if wrote anything. */ { struct psl *psl, *lastPsl; struct psl *list; int minDiff = 15; boolean isRelevant = FALSE; slReverse(pList); ++outCount; ltot += slCount(*pList); /* Return if size of list less than 2. */ if ((lastPsl = list = *pList) == NULL) return FALSE; if (list->next == NULL) return FALSE; ++mtot; for (psl = lastPsl->next; psl != NULL; psl = psl->next) { if (psl->qStart - lastPsl->qStart >= minDiff && psl->qEnd - lastPsl->qEnd >= minDiff) { isRelevant = TRUE; break; } } if (isRelevant) { for (psl = list; psl != NULL; psl = psl->next) { pslTabOut(psl, out); fprintf(glue, "%s\t%d\t%d\t%s %s\t%d\t%d\n", psl->qName, psl->qStart, psl->qEnd, psl->tName, psl->strand, psl->tStart, psl->tEnd); } } pslFreeList(pList); return isRelevant; }
void dropNotBest(struct psl **pslList) /* Sorts list and drops any score less than the best score */ { struct psl *tail = NULL; int bestScore =0; slSort(pslList, pslCmpScoreDesc); if(pslList == NULL) return; bestScore = pslScore(*pslList); for(tail=*pslList; tail != NULL; tail=tail->next) { if(tail->next == NULL) break; if(pslScore(tail->next) < bestScore) { struct psl *tmp = tail->next; tail->next = NULL; pslFreeList(&tmp); break; } } }
struct bed *createBedsFromPsls(char *pslFile, int expCount) /** creates a list of beds from a pslfile, allocates memory for arrays as determined by expCount */ { struct psl *pslList = NULL, *psl = NULL; struct bed *bedList = NULL, *bed = NULL; pslList = pslLoadAll(pslFile); for(psl = pslList; psl != NULL; psl = psl->next) { bed = bedFromPsl(psl); freez(&bed->name); bed->name=parseNameFromHgc(psl->qName); bed->score = 0; bed->expCount = 0; bed->expIds = needMem(sizeof(int)*expCount); bed->expScores = needMem(sizeof(float)*expCount); slAddHead(&bedList,bed); } slReverse(&bedList); pslFreeList(&pslList); return bedList; }
struct altGraphX *agFromGp(char *db, struct genePred *gp, struct sqlConnection *conn, int maxGap, FILE *out) /** Create an altGraphX record by clustering psl records within coordinates specified by genePred record. */ { struct altGraphX *ag = NULL; struct dnaSeq *genoSeq = NULL; struct ggMrnaAli *maList=NULL, *ma=NULL, *maNext=NULL, *maSameStrand=NULL; struct psl *pslList = NULL, *psl = NULL, *pslCluster = NULL, *pslNext = NULL; char *chrom = gp->chrom; int chromStart = BIGNUM; int chromEnd = -1; verbose(2, "agFromGp on %s %s:%d-%d\n", gp->name, gp->chrom, gp->txStart, gp->txEnd); pslList = getPsls(gp, conn); verbose(3, " got %d psls\n", slCount(pslList)); if(slCount(pslList) == 0) { verbose(2, "No available alignments for %s.", gp->name); return NULL; } /* expand to find the furthest boundaries of alignments */ expandToMaxAlignment(pslList, chrom, &chromStart, &chromEnd); verbose(3, " expanded to %s:%d-%d\n", chrom, chromStart, chromEnd); /* get the sequence */ genoSeq = dnaFromChrom(db, chrom, chromStart, chromEnd, dnaLower); for(psl = pslList; psl != NULL; psl = pslNext) { pslNext = psl->next; if(singleExonOk || pslHasIntron(psl, genoSeq, chromStart)) { slAddHead(&pslCluster, psl); } else { if(!useChromKeeper) pslFree(&psl); } } verbose(3, " got %d psls after intron/singleExon check\n", slCount(pslCluster)); /* load and check the alignments */ maList = pslListToGgMrnaAliList(pslCluster, gp->chrom, chromStart, chromEnd, genoSeq, maxGap); verbose(3, " got %d in maList\n", slCount(maList)); for(ma = maList; ma != NULL; ma = maNext) { maNext = ma->next; verbose(4, " ma->strand %s, gp->strand %s\n", ma->strand, gp->strand); if(ma->strand[0] == gp->strand[0]) { slSafeAddHead(&maSameStrand, ma); } else ggMrnaAliFree(&ma); } slReverse(&maSameStrand); verbose(3, " got %d in ma on same strand\n", slCount(maSameStrand)); /* If there is a cluster to work with create an geneGraph */ if(maSameStrand != NULL) { ag = agFromAlignments(db, maSameStrand, genoSeq, conn, chromStart, chromEnd, out); } else { dnaSeqFree(&genoSeq); ggMrnaAliFreeList(&maSameStrand); } /* Only free psls if not using cache... */ if(!useChromKeeper) pslFreeList(&pslCluster); return ag; }
void showAliPlaces(char *pslName, char *faName, char *database, enum gfType qType, enum gfType tType, char *organism, boolean feelingLucky) /* Show all the places that align. */ { struct lineFile *lf = pslFileOpen(pslName); struct psl *pslList = NULL, *psl; char *browserUrl = hgTracksName(); char *hgcUrl = hgcName(); char uiState[64]; char *vis; char unhideTrack[64]; char *sort = cartUsualString(cart, "sort", sortList[0]); char *output = cartUsualString(cart, "output", outputList[0]); boolean pslOut = startsWith("psl", output); boolean isStraightNuc = (qType == gftRna || qType == gftDna); int minThreshold = (isStraightNuc ? minMatchShown : 0); sprintf(uiState, "%s=%s", cartSessionVarName(), cartSessionId(cart)); /* If user has hidden BLAT track, add a setting that will unhide the track if user clicks on a browser link. */ vis = cartOptionalString(cart, "hgUserPsl"); if (vis != NULL && sameString(vis, "hide")) snprintf(unhideTrack, sizeof(unhideTrack), "&hgUserPsl=dense"); else unhideTrack[0] = 0; while ((psl = pslNext(lf)) != NULL) { if (psl->match >= minThreshold) slAddHead(&pslList, psl); } lineFileClose(&lf); if (pslList == NULL) { puts("<table><tr><td><hr>Sorry, no matches found<hr><td></tr></table>"); return; } if (sameString(sort, "query,start")) { slSort(&pslList, pslCmpQuery); } else if (sameString(sort, "query,score")) { slSort(&pslList, pslCmpQueryScore); } else if (sameString(sort, "score")) { slSort(&pslList, pslCmpScore); } else if (sameString(sort, "chrom,start")) { slSort(&pslList, pslCmpTargetStart); } else if (sameString(sort, "chrom,score")) { slSort(&pslList, pslCmpTargetScore); } else { slSort(&pslList, pslCmpQueryScore); } if(feelingLucky) { /* If we found something jump browser to there. */ if(slCount(pslList) > 0) printLuckyRedirect(browserUrl, pslList, database, pslName, faName, uiState, unhideTrack); /* Otherwise call ourselves again not feeling lucky to print empty results. */ else { cartWebStart(cart, database, "%s BLAT Results", trackHubSkipHubName(organism)); showAliPlaces(pslName, faName, database, qType, tType, organism, FALSE); cartWebEnd(); } } else if (pslOut) { printf("<TT><PRE>"); if (!sameString(output, "psl no header")) pslxWriteHead(stdout, qType, tType); for (psl = pslList; psl != NULL; psl = psl->next) pslTabOut(psl, stdout); printf("</PRE></TT>"); } else { printf("<H2>BLAT Search Results</H2>"); printf("<TT><PRE>"); printf(" ACTIONS QUERY SCORE START END QSIZE IDENTITY CHRO STRAND START END SPAN\n"); printf("---------------------------------------------------------------------------------------------------\n"); for (psl = pslList; psl != NULL; psl = psl->next) { printf("<A HREF=\"%s?position=%s:%d-%d&db=%s&ss=%s+%s&%s%s\">", browserUrl, psl->tName, psl->tStart + 1, psl->tEnd, database, pslName, faName, uiState, unhideTrack); printf("browser</A> "); printf("<A HREF=\"%s?o=%d&g=htcUserAli&i=%s+%s+%s&c=%s&l=%d&r=%d&db=%s&%s\">", hgcUrl, psl->tStart, pslName, cgiEncode(faName), psl->qName, psl->tName, psl->tStart, psl->tEnd, database, uiState); printf("details</A> "); printf("%-14s %5d %5d %5d %5d %5.1f%% %4s %2s %9d %9d %6d\n", psl->qName, pslScore(psl), psl->qStart+1, psl->qEnd, psl->qSize, 100.0 - pslCalcMilliBad(psl, TRUE) * 0.1, skipChr(psl->tName), psl->strand, psl->tStart+1, psl->tEnd, psl->tEnd - psl->tStart); } printf("</PRE></TT>"); } pslFreeList(&pslList); }
void pslFreeListWrapper(void *val) { struct psl *pslList = val; pslFreeList(&pslList); }
void oneChrom(char *database, char *chrom, char *refAliTrack, char *bedTrack, struct hash *otherHash, struct stats *stats) /* Process one chromosome. */ { struct bed *bedList = NULL, *bed; struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; int chromSize = hChromSize(database, chrom); struct binKeeper *bk = binKeeperNew(0, chromSize); struct psl *pslList = NULL; struct dnaSeq *chromSeq = NULL; if (endsWith(bedTrack, ".bed")) { struct lineFile *lf = lineFileOpen(bedTrack, TRUE); char *row[3]; while (lineFileRow(lf, row)) { if (sameString(chrom, row[0])) { bed = bedLoad3(row); slAddHead(&bedList, bed); } } lineFileClose(&lf); } else { sr = hChromQuery(conn, bedTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { bed = bedLoad3(row+rowOffset); slAddHead(&bedList, bed); } sqlFreeResult(&sr); } slReverse(&bedList); uglyf("Loaded beds\n"); sr = hChromQuery(conn, refAliTrack, chrom, NULL, &rowOffset); while ((row = sqlNextRow(sr)) != NULL) { struct psl *psl = pslLoad(row + rowOffset); slAddHead(&pslList, psl); binKeeperAdd(bk, psl->tStart, psl->tEnd, psl); } sqlFreeResult(&sr); uglyf("Loaded psls\n"); chromSeq = hLoadChrom(database, chrom); /* Fetch entire chromosome into memory. */ uglyf("Loaded human seq\n"); for (bed = bedList; bed != NULL; bed = bed->next) { struct binElement *el, *list = binKeeperFind(bk, bed->chromStart, bed->chromEnd); for (el = list; el != NULL; el = el->next) { struct psl *fullPsl = el->val; struct psl *psl = pslTrimToTargetRange(fullPsl, bed->chromStart, bed->chromEnd); if (psl != NULL) { foldPslIntoStats(psl, chromSeq, otherHash, stats); pslFree(&psl); } } slFreeList(&list); stats->bedCount += 1; stats->bedBaseCount += bed->chromEnd - bed->chromStart; sqlFreeResult(&sr); } freeDnaSeq(&chromSeq); pslFreeList(&pslList); binKeeperFree(&bk); hFreeConn(&conn); }
static void displayMappingInfo(struct sqlConnection *conn, struct mappingInfo *mi) /* display information from a transMap table */ { struct ucscRetroInfo *pg = mi->pg; double wt[12]; /* weights on score function*/ char query[512]; char *name; char alignTbl[128]; char scoreSql[128]; struct psl *psl; float coverFactor = 0; float maxOverlap = 0; if (mi->suffix == NULL) { safef(alignTbl, sizeof(alignTbl), "%s%sAli", mi->tblPre, mi->geneSet); sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo", mi->tblPre, mi->geneSet); } else { safef(alignTbl, sizeof(alignTbl), "%s%sAli%s", mi->tblPre, mi->geneSet, mi->suffix); sqlSafef(scoreSql, sizeof(scoreSql), "select max(score) from %s%sInfo%s", mi->tblPre, mi->geneSet, mi->suffix); } printf("<TABLE class=\"transMap\">\n"); printf("<H3>Retrogene Statistics:</H3>\n"); printf("<THEAD>\n"); printf("<TR><TH>Feature<TH>Value </TR>\n"); printf("</THEAD><TBODY>\n"); if (sameString(pg->type, "singleExon")) printf("<TR><TH>Type of Parent<TD>%s</tr>\n",pg->type); else printf("<TR><TH>Expression of Retrogene<TD>%s</TR>\n",pg->type); printf("<TR><TH>Score <TD>%d (range from 0 - %d)</TR>\n", pg->score, sqlQuickNum(conn, scoreSql) ); printf("<TR><TH>Parent Gene Alignment Coverage (Bases Matching Parent) <TD>%d %% (%d bp) </TR>\n", pg->coverage, pg->matches); printf("<TR><TH>Introns Processed Out <TD>%d out of %d (%d exons covered)\n", pg->processedIntrons, (pg->parentSpliceCount/2), pg->exonCover); printf("<TR><TH>Possible Introns or Gaps in Retrogene<TD>%d,%d\n", pg->intronCount, pg->alignGapCount); printf("<TR><TH>Conserved Splice Sites<TD>%d</TR>\n", pg->conservedSpliceSites); printf("<TR><TH>Parent Splice Sites<TD>%d</TR>\n", pg->parentSpliceCount); psl = getAlignments(conn, alignTbl, mi->pg->name); if (psl != NULL) { maxOverlap = (float)pg->maxOverlap/(float)(psl->match+psl->misMatch+psl->repMatch) ; coverFactor = ((float)(psl->qSize-psl->qEnd)/(float)psl->qSize); } else { maxOverlap = 0; } wt[0] = 0; wt[1] = 0.85; wt[2] = 0.2; wt[3] = 0.3; wt[4] = 0.8; wt[5] = 1; wt[6] = 1 ; wt[7] = 0.5; wt[8] = 0.5; wt[9] = 1; wt[10] = 1; #ifdef debug char table[512]; struct psl *pslList = getParentAligns(conn, mi, &table); if (psl != NULL) { printf("<TR><TH>Blocks in retro:gap%%/intronsSpliced <TD>\n"); printBlocks(psl, MAXBLOCKGAP, pslList); printf("</td></TR>\n"); } if (pslList != NULL) { printf("<TR><TH>Exons in parent:gap%% <TD>\n"); printBlocks(pslList, MAXBLOCKGAP, NULL); printf("</td></TR>\n"); pslFreeList(&pslList); } #endif printf("<TR><TH>Length of PolyA Tail<TD>%d As out of %d bp </TR><TR><TH>%% A's from Parent PolyA tail (Position)<TD>%5.1f %%\n",pg->polyA,pg->polyAlen, (float)pg->polyA*100/(float)pg->polyAlen); if (pg->polyAstart < 0) printf(" (%d bp before end of retrogene)<br>\n",-(pg->polyAstart)); else printf(" (%d bp past end of retrogene)<br>\n",pg->polyAstart); printf("<tr><th>mRNA Expression Evidence<td>"); if (!sameString(pg->overName, "none")) printf("%s (overlap: %d bp)\n", pg->overName, pg->maxOverlap); else printf("No overlapping"); printf("<TR><TH>BESTORF Score (>50 is good)<TD>%4.0f</td></TR>\n",pg->posConf); #ifdef score printf("<TR><TH>score function<TD>1:xon %d %4.1f conSS %d 2: ax %4.1f 3: pA %4.1f 4: net + %4.1f max (%d, %d) 5: procIntrons %d %4.1f 6:in.cnt %d -%4.1f 7:overlap - %4.1f 8:cov %d*(qe %d- qsz %d)/%d=%4.1f 9:tRep - %4.1f 10:oldintron %d %4.1f </td></TR>\n", pg->exonCover, wt[1]*(log(pg->exonCover+1)/log(2))*200 , pg->conservedSpliceSites, wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , pg->overlapMouse, pg->overlapDog, pg->processedIntrons, wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , pg->intronCount, wt[6]*pow(pg->intronCount,0.5)*750 , wt[7]*(maxOverlap*300), pg->coverage, pg->qEnd, pg->qSize , pg->qSize, wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), pg->alignGapCount, wt[10]*pg->alignGapCount); printf("<TR><TH>score function<TD>%4.1f+ %4.1f+ %4.1f+ %4.1f+ %4.1f - %4.1f - %4.1f+ %4.1f - %4.1f - %4.1f</td></TR>\n", wt[1]*(log(pg->exonCover+1)/log(2))*200 , wt[2]*(((log(pg->axtScore>0?pg->axtScore:1)/log(2))*170)-1000), wt[3]*(log(pg->polyAlen+2)*200) , wt[4]*overlapOrtholog*10 , wt[5]*(((log(pg->processedIntrons > 0 ? pg->processedIntrons : 1))/log(2))*600) , (float)wt[6]*pow(pg->intronCount,0.5)*750 , (float)wt[7]*(maxOverlap*300), wt[8]*((pg->coverage/100.0)*(1.0-coverFactor)*300.0), wt[9]*(pg->tReps*10), wt[10]*pg->alignGapCount); if (pg->kaku > 0 && pg->kaku < 1000000) printf("<TR><TH>KA/KU mutation rate in non-syn sites vs utr with repect to parent gene<TD>%4.2f</TR>\n", pg->kaku); #endif #ifdef xxx sqlSafef(query, sizeof(query), "select * from refGene where chrom = '%d' and txEnd > %d and txStart %d and name = '%s'", pg->chrom, pg->gStart, pg->gEnd , pg->overName ); sr = sqlGetResult(conn, query); if ((row = sqlNextRow(sr)) != NULL) overlappingGene = genePredLoad(row); if (overlappingGene != NULL) { printf ("CDS exons %d ",genePredcountCdsExons(overlappingGene)); } #endif printf("</tr>\n"); if ( differentString("none",pg->overName) && sqlFieldIndex(conn, "refGene", "exonFrames") != -1) { sqlSafef(query, sizeof(query), "select concat(exonFrames,'(',cdsStart,')') from refGene where name = '%s' and chrom = '%s'" , pg->overName, pg->chrom); if (sqlQuickString(conn, query) != NULL) printf("<TR><TH>Frame of retro %s (start)<TD>%s</TR>\n", pg->overName, sqlQuickString(conn, query)); } name = cloneString(pg->name); chopSuffix(name); sqlSafef(query, sizeof(query), "select concat(exonFrames,'(',cdsStart,')') from rbRetroParent where name like '%s%%' and chrom = '%s'" , name, pg->chrom); if (hTableExists(database, "rbRetroParent")) { if ( sqlQuickString(conn, query) != NULL) printf("<TR><TH>Frames of mapped parent %s (start)<TD>%s</TR>\n", name, sqlQuickString(conn, query)); } printf("</TBODY></TABLE>\n"); }