char *PredictionTranscript_getcDNA(PredictionTranscript *trans) { Vector *exons = PredictionTranscript_getAllExons(trans,0); char *cdna = StrUtil_copyString(&cdna, "", 0); //int lastPhase = 0; int i; int first = 1; int cdnaStart; int pepStart; cdnaStart = 1; pepStart = 1; for (i=0; i<Vector_getNumElement(exons); i++) { PredictionExon *exon = Vector_getElementAt(exons, i); int phase; if (!exon) { if (cdna[0] == '\0') { continue; } else { break; } } phase = 0; // NIY if (defined($exon->phase)) { phase = PredictionExon_getPhase(exon); // } //fprintf(stderr, " phase for exon %d is %d\n", i, phase); if (first) { cdna = SeqUtil_addNs(cdna,phase); first = 0; } /* // Hack for now - should never happen if (phase != lastPhase ) { if (lastPhase == 1) { cdna = StrUtil_appendString(cdna,"NN"); } else if (lastPhase == 2) { cdna = StrUtil_appendString(cdna,"N"); } // startpadding for this exon cdna = SeqUtil_addNs(cdna,phase); } */ cdna = StrUtil_appendString(cdna, PredictionExon_getSeqString(exon)); //lastPhase = PredictionExon_getEndPhase(exon); //lastPhase = phase; } // NIY Freeing exons vector? return cdna; }
void GenomicAlignAdaptor_addDerivedAlignments(GenomicAlignAdaptor *gaa, Vector *mergedAligns, GenomicAlign *alignA, GenomicAlign *alignB) { // variable name explanation // q - query c - consensus s - start e - end l - last // o, ov overlap j - jump_in_ // r - result int qs, qe, lqs, lqe, cs, ce, lce, ocs, oce, oqs, oqe, jc, jq, ovs, ove, rcs, rce, rqs, rqe; int currentMatch = 0; int newMatch; int cigAPos = 0, cigBPos = 0; char *resultCig; char tmpStr[128]; // initialization phase Vector *cigA = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignA)); Vector *cigB = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignB)); if (GenomicAlign_getQueryStrand(alignA) == -1 ) { Vector_reverse(cigB); } // need a 'normalized' start for qs, qe, oxs so I dont // have to check strandedness all the time // consensus is strand 1 and is not compared to anything, // can keep its original coordinate system lce = GenomicAlign_getConsensusStart(alignA) - 1; ce = lce; cs = ce + 1; // alignBs query can be + or - just keep relative coords for now lqe = 0; lqs = 1; qe = 0; qs = 1; // ocs will be found relative to oce and has to be comparable // to oqs. But it could be that we have to move downwards if we // are not - strand. thats why coordinates are transformed here if (GenomicAlign_getQueryStrand(alignA) == -1 ) { // query_end is first basepair of alignment if (GenomicAlign_getQueryEnd(alignA) < GenomicAlign_getConsensusEnd(alignB)) { oce = 0; ocs = 1; oqe = GenomicAlign_getConsensusEnd(alignB) - GenomicAlign_getQueryEnd(alignA); oqs = oqe + 1; } else { oqe = 0; oqs = 1; oce = GenomicAlign_getQueryEnd(alignA) - GenomicAlign_getConsensusEnd(alignB); ocs = oce + 1; } } else { // in theory no coordinate magic necessary :-) oqs = GenomicAlign_getQueryStart(alignA); oqe = oqs - 1; ocs = GenomicAlign_getConsensusStart(alignB); oce = ocs - 1; } // initializing result rcs = rce = rqs = rqe = 0; resultCig= StrUtil_copyString(&resultCig,"",0); while (1) { int newGa; // exit if you request a new piece of alignment and the cig list is // empty if (oce < ocs || oce < oqs) { // next M area in cigB if (cigBPos == Vector_getNumElement(cigB)) break; GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); continue; } if (oqe < oqs || oqe < ocs) { // next M area in cigA if (cigAPos == Vector_getNumElement(cigA)) break; GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe ); continue; } // now matching region overlap in reference genome ovs = ocs < oqs ? oqs : ocs; ove = oce < oqe ? oce : oqe; if (currentMatch) { jc = cs + (ovs - oqs) - lce - 1; jq = qs + (ovs - ocs) - lqe - 1; } else { jc = jq = 0; } newMatch = ove - ovs + 1; newGa = 0; if (jc==0) { if (jq==0) { currentMatch += newMatch; } else { // store current match; sprintf(tmpStr,"%dM",currentMatch); resultCig = StrUtil_appendString(resultCig,tmpStr); // jq deletions; if (jq == 1) { resultCig = StrUtil_appendString(resultCig,"D"); } else { sprintf(tmpStr,"%dD",jq); resultCig = StrUtil_appendString(resultCig,tmpStr); } currentMatch = newMatch; } } else { if (jq==0) { // store current match; sprintf(tmpStr,"%dM",currentMatch); resultCig = StrUtil_appendString(resultCig,tmpStr); // jc insertions; if (jc==1) { resultCig = StrUtil_appendString(resultCig,"I"); } else { sprintf(tmpStr,"%dI",jc); resultCig = StrUtil_appendString(resultCig,tmpStr); } currentMatch = newMatch; } else { double percId; double score; GenomicAlign *ga; sprintf(tmpStr,"%dM",currentMatch); resultCig = StrUtil_appendString(resultCig,tmpStr); // new GA int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB); int queryStart, queryEnd; if (queryStrand == 1) { queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1; queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1; } else { queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1; queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1; } score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB); percId = (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0); ga = GenomicAlign_new(); GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA)); GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB)); GenomicAlign_setCigarString(ga, resultCig); GenomicAlign_setConsensusStart(ga, rcs); GenomicAlign_setConsensusEnd(ga, rce); GenomicAlign_setQueryStrand(ga, queryStrand); GenomicAlign_setQueryStart(ga, queryStart); GenomicAlign_setQueryEnd(ga, queryEnd); GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa); GenomicAlign_setPercentId(ga, percId); GenomicAlign_setScore(ga, score); Vector_addElement(mergedAligns, ga); rcs = rce = rqs = rqe = 0; resultCig[0] = '\0'; currentMatch = newMatch; } } if (!rcs) rcs = cs+(ovs-oqs); rce = cs+(ove-oqs); if (!rqs) rqs = qs+(ovs-ocs); rqe = qs+(ove-ocs); // update the last positions lce = rce; lqe = rqe; // next piece on the one that end earlier if (oce <= oqe) { // next M area in cigB if (cigBPos == Vector_getNumElement(cigB)) break; GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); } if (oce >= oqe) { // next M area in cigA if (cigAPos == Vector_getNumElement(cigA)) break; GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe ); } } // end of while loop // if there is a last floating current match if (currentMatch) { // new GA int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB); int queryStart, queryEnd; double percId; double score; GenomicAlign *ga; sprintf(tmpStr,"%dM",currentMatch); resultCig = StrUtil_appendString(resultCig, tmpStr); if (queryStrand == 1) { queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1; queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1; } else { queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1; queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1; } score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB); percId = (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0); ga = GenomicAlign_new(); GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA)); GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB)); GenomicAlign_setCigarString(ga, resultCig); GenomicAlign_setConsensusStart(ga, rcs); GenomicAlign_setConsensusEnd(ga, rce); GenomicAlign_setQueryStrand(ga, queryStrand); GenomicAlign_setQueryStart(ga, queryStart); GenomicAlign_setQueryEnd(ga, queryEnd); GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa); GenomicAlign_setPercentId(ga, percId); GenomicAlign_setScore(ga, score); Vector_addElement(mergedAligns, ga); } free(resultCig); Vector_free(cigA); Vector_free(cigB); // nothing to return all in merged_aligns }
void GenomicAlignAdaptor_store(GenomicAlignAdaptor *gaa, Vector *genomicAligns) { int ok = 1; char *qStr = NULL; StatementHandle *sth; char commaStr[2] = {'\0','\0'}; int i; char *tmpStr = NULL; if ((tmpStr = (char *)calloc(65556,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating tmpStr\n"); ok = 0; } if (ok) { StrUtil_copyString(&qStr, "INSERT INTO genomic_align_block" " (consensus_dnafrag_id, consensus_start, consensus_end," " query_dnafrag_id, query_start, query_end, query_strand, method_link_id," " score, perc_id, cigar_line) VALUES ",0); for (i=0; i<Vector_getNumElement(genomicAligns); i++) { GenomicAlign *ga = Vector_getElementAt(genomicAligns,i); DNAFrag *consDNAFrag = GenomicAlign_getConsensusDNAFrag(ga); DNAFrag *queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga); // check that everything has dbIDs if (!DNAFrag_getDbID(consDNAFrag) || !DNAFrag_getDbID(queryDNAFrag)) { fprintf(stderr, "Error: dna_fragment in GenomicAlign is not in DB\n"); ok = 0; break; } } } GenomicAlign *ga = NULL; DNAFrag *consDNAFrag = NULL; DNAFrag *queryDNAFrag = NULL; IDType methodLinkId = 0; if (ok) { // all clear for storing for (i=0; i<Vector_getNumElement(genomicAligns); i++) { ga = Vector_getElementAt(genomicAligns,i); consDNAFrag = GenomicAlign_getConsensusDNAFrag(ga); queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga); methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, GenomicAlign_getAlignmentType(ga)); if (!methodLinkId) { fprintf(stderr, "Error: There is no method_link with this type [%s] in the DB.\n", GenomicAlign_getAlignmentType(ga)); ok = 0; break; } } if (ok) { sprintf(tmpStr," %s(" IDFMTSTR ", %d, %d, " IDFMTSTR ", %d, %d, %d, " IDFMTSTR ", %f, %f, '%s')", commaStr, DNAFrag_getDbID(consDNAFrag), GenomicAlign_getConsensusStart(ga), GenomicAlign_getConsensusEnd(ga), DNAFrag_getDbID(queryDNAFrag), GenomicAlign_getQueryStart(ga), GenomicAlign_getQueryEnd(ga), GenomicAlign_getQueryStrand(ga), methodLinkId, GenomicAlign_getScore(ga), GenomicAlign_getPercentId(ga), GenomicAlign_getCigarString(ga)); qStr = StrUtil_appendString(qStr, tmpStr); commaStr[0] = ','; } sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr)); sth->execute(sth); sth->finish(sth); } if (qStr) free(qStr); if (tmpStr) free(tmpStr); }
Vector *GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect( GenomicAlignAdaptor *gaa, DNAFrag *dnaFrag, GenomeDB *targetGenome, int *startP, int *endP, IDType methodLinkId) { IDType dnaFragId; GenomeDB *genomeDB; char *qStr = NULL; char tmpStr[512]; Vector *results; StatementHandle *sth; int ok = 0; if (!dnaFrag) { fprintf(stderr, "Error: Input dnafrag must not be NULL\n"); ok = 0; } if (ok) { // formatting the dnafrag dnaFragId = DNAFrag_getDbID(dnaFrag); genomeDB = DNAFrag_getGenomeDB(dnaFrag); StrUtil_copyString(&qStr, "SELECT gab.consensus_dnafrag_id," " gab.consensus_start," " gab.consensus_end," " gab.query_dnafrag_id," " gab.query_start," " gab.query_end," " gab.query_strand," " gab.method_link_id," " gab.score," " gab.perc_id," " gab.cigar_line" " FROM genomic_align_block gab ",0); if (targetGenome) { qStr = StrUtil_appendString(qStr,", dnafrag d"); } sprintf(tmpStr," WHERE gab.method_link_id = " IDFMTSTR, methodLinkId); qStr = StrUtil_appendString(qStr,tmpStr); results = Vector_new(); if (!targetGenome || GenomeDB_hasQuery(genomeDB, targetGenome, methodLinkId)) { Vector *qres; sprintf(tmpStr," AND gab.consensus_dnafrag_id = " IDFMTSTR, dnaFragId); qStr = StrUtil_appendString(qStr, tmpStr); if (startP && endP) { int lowerBound = *startP - gaa->maxAlignmentLength; sprintf(tmpStr, " AND gab.consensus_start <= %d" " AND gab.consensus_start >= %d" " AND gab.consensus_end >= %d", *endP, lowerBound, *startP ) ; qStr = StrUtil_appendString(qStr, tmpStr); } if (targetGenome) { sprintf(tmpStr, " AND gab.query_dnafrag_id = d.dnafrag_id" " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome)); qStr = StrUtil_appendString(qStr, tmpStr); } sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr)); sth->execute(sth); qres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 0); Vector_append(results,qres); Vector_free(qres); sth->finish(sth); } if (!targetGenome || GenomeDB_hasConsensus(genomeDB, targetGenome, methodLinkId)) { Vector *cres; sprintf(tmpStr," AND gab.query_dnafrag_id = " IDFMTSTR, dnaFragId); qStr = StrUtil_appendString(qStr, tmpStr); if (startP && endP) { int lowerBound = *startP - gaa->maxAlignmentLength; sprintf(tmpStr, " AND gab.query_start <= %d" " AND gab.query_start >= %d" " AND gab.query_end >= %d", *endP, lowerBound, *startP ) ; qStr = StrUtil_appendString(qStr, tmpStr); } if (targetGenome) { sprintf(tmpStr, " AND gab.consensus_dnafrag_id = d.dnafrag_id" " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome)); qStr = StrUtil_appendString(qStr, tmpStr); } sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr)); sth->execute(sth); cres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 1); Vector_append(results,cres); Vector_free(cres); sth->finish(sth); } } if (qStr) free(qStr); return results; }