Example #1
0
char *PredictionTranscript_getcDNA(PredictionTranscript *trans) {
    Vector *exons = PredictionTranscript_getAllExons(trans,0);
    char *cdna = StrUtil_copyString(&cdna, "", 0);
    //int lastPhase = 0;
    int i;
    int first = 1;

    int cdnaStart;
    int pepStart;

    cdnaStart = 1;
    pepStart = 1;

    for (i=0; i<Vector_getNumElement(exons); i++) {
        PredictionExon *exon = Vector_getElementAt(exons, i);
        int phase;
        if (!exon) {
            if (cdna[0] == '\0') {
                continue;
            } else {
                break;
            }
        }

        phase = 0;

// NIY    if (defined($exon->phase)) {
        phase = PredictionExon_getPhase(exon);
//    }

        //fprintf(stderr, " phase for exon %d is %d\n", i, phase);

        if (first) {
            cdna = SeqUtil_addNs(cdna,phase);
            first = 0;
        }

        /*
        // Hack for now - should never happen
            if (phase != lastPhase ) {

              if (lastPhase == 1) {
        	cdna = StrUtil_appendString(cdna,"NN");
              } else if (lastPhase == 2) {
        	cdna = StrUtil_appendString(cdna,"N");
              }

              // startpadding for this exon
              cdna = SeqUtil_addNs(cdna,phase);
            }
        */

        cdna = StrUtil_appendString(cdna, PredictionExon_getSeqString(exon));
        //lastPhase = PredictionExon_getEndPhase(exon);
        //lastPhase = phase;
    }

// NIY Freeing exons vector?
    return cdna;
}
Example #2
0
void GenomicAlignAdaptor_addDerivedAlignments(GenomicAlignAdaptor *gaa, 
                     Vector *mergedAligns, GenomicAlign *alignA, GenomicAlign *alignB) {

  // variable name explanation
  // q - query c - consensus s - start e - end l - last
  // o, ov overlap j - jump_in_
  // r - result

  int  qs, qe, lqs, lqe, cs, ce, lce,
       ocs, oce, oqs, oqe, jc, jq, ovs, ove,
       rcs, rce, rqs, rqe;
  int currentMatch = 0;
  int newMatch;
  int cigAPos = 0, cigBPos = 0;
  char *resultCig;
  char tmpStr[128];

  // initialization phase
  Vector *cigA = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignA));
  Vector *cigB = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignB));

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    Vector_reverse(cigB);
  }

  // need a 'normalized' start for qs, qe, oxs so I dont 
  // have to check strandedness all the time  

  // consensus is strand 1 and is not compared to anything,
  // can keep its original coordinate system
 
  lce = GenomicAlign_getConsensusStart(alignA) - 1;
  ce = lce;
  cs = ce + 1;
  
  // alignBs query can be + or - just keep relative coords for now
  lqe = 0; lqs = 1;
  qe = 0; qs = 1;

  // ocs will be found relative to oce and has to be comparable
  // to oqs. But it could be that we have to move downwards if we
  // are not - strand. thats why coordinates are transformed here

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    // query_end is first basepair of alignment
    if (GenomicAlign_getQueryEnd(alignA) < GenomicAlign_getConsensusEnd(alignB)) {
      oce = 0; ocs = 1;
      oqe = GenomicAlign_getConsensusEnd(alignB) - GenomicAlign_getQueryEnd(alignA);
      oqs = oqe + 1;
    } else {
      oqe = 0; oqs = 1;
      oce = GenomicAlign_getQueryEnd(alignA) - GenomicAlign_getConsensusEnd(alignB);
      ocs = oce + 1;
    }
  } else {
    // in theory no coordinate magic necessary :-)
    oqs = GenomicAlign_getQueryStart(alignA);
    oqe = oqs - 1; 
    ocs = GenomicAlign_getConsensusStart(alignB);
    oce = ocs - 1;
  }

  // initializing result
  rcs = rce = rqs = rqe = 0;
  resultCig= StrUtil_copyString(&resultCig,"",0);

  while (1) {
    int newGa;
    // exit if you request a new piece of alignment and the cig list is 
    // empty

    if (oce < ocs || oce < oqs) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
      continue;
    }
    if (oqe < oqs || oqe < ocs) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
      continue;
    }

    // now matching region overlap in reference genome
    ovs = ocs < oqs ? oqs : ocs;
    ove = oce < oqe ? oce : oqe;
    
    if (currentMatch) {
      jc = cs + (ovs - oqs) - lce - 1;
      jq = qs + (ovs - ocs) - lqe - 1;
    } else {
      jc = jq = 0;
    }

    newMatch = ove - ovs + 1;
    newGa = 0;

    if (jc==0) {
      if (jq==0) {
	currentMatch += newMatch;
      } else {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jq deletions;
	if (jq == 1) {
          resultCig = StrUtil_appendString(resultCig,"D");
        } else {
          sprintf(tmpStr,"%dD",jq);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
      }
    } else {
      if (jq==0) {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jc insertions;
	if (jc==1) {
          resultCig = StrUtil_appendString(resultCig,"I");
        } else {
          sprintf(tmpStr,"%dI",jc);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
         
      } else {
        double percId;
        double score;
        GenomicAlign *ga;

        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// new GA
	int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
	int queryStart, queryEnd;
	if (queryStrand == 1) {
	  queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
	  queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
	} else {
	  queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
	  queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
	}
      
        score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
          GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
        percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
        
        ga = GenomicAlign_new();
    
        GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
        GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
        GenomicAlign_setCigarString(ga, resultCig);
        GenomicAlign_setConsensusStart(ga, rcs);
        GenomicAlign_setConsensusEnd(ga, rce);
        GenomicAlign_setQueryStrand(ga, queryStrand);
        GenomicAlign_setQueryStart(ga, queryStart);
        GenomicAlign_setQueryEnd(ga, queryEnd);
        GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
        GenomicAlign_setPercentId(ga, percId);
        GenomicAlign_setScore(ga, score);

	Vector_addElement(mergedAligns, ga);

        rcs = rce = rqs = rqe = 0;
	resultCig[0] = '\0';
	
	currentMatch = newMatch;
      }
    }


    
    if (!rcs) rcs = cs+(ovs-oqs);
    rce = cs+(ove-oqs);
    if (!rqs) rqs = qs+(ovs-ocs);
    rqe = qs+(ove-ocs);

    // update the last positions
    lce = rce; 
    lqe = rqe;

    // next piece on the one that end earlier
 
    if (oce <= oqe) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
    }
    if (oce >= oqe) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
    } 
  } // end of while loop

  // if there is a last floating current match
  if (currentMatch) {
    
    // new GA
    int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
    int queryStart, queryEnd;
    double percId;
    double score;
    GenomicAlign *ga;

    sprintf(tmpStr,"%dM",currentMatch);
    resultCig = StrUtil_appendString(resultCig, tmpStr);

    if (queryStrand == 1) {
      queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
      queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
    } else {
      queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
      queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
    }
  
    score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
      GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
    percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
    
    ga = GenomicAlign_new();

    GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
    GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
    GenomicAlign_setCigarString(ga, resultCig);
    GenomicAlign_setConsensusStart(ga, rcs);
    GenomicAlign_setConsensusEnd(ga, rce);
    GenomicAlign_setQueryStrand(ga, queryStrand);
    GenomicAlign_setQueryStart(ga, queryStart);
    GenomicAlign_setQueryEnd(ga, queryEnd);
    GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
    GenomicAlign_setPercentId(ga, percId);
    GenomicAlign_setScore(ga, score);

    Vector_addElement(mergedAligns, ga);
  }

  free(resultCig);

  Vector_free(cigA);
  Vector_free(cigB);

  // nothing to return all in merged_aligns
}
Example #3
0
void GenomicAlignAdaptor_store(GenomicAlignAdaptor *gaa, Vector *genomicAligns) {
  int ok = 1;
  char *qStr = NULL;
  StatementHandle *sth;
  char commaStr[2] = {'\0','\0'};
  int i;
  char *tmpStr = NULL;
  
  if ((tmpStr = (char *)calloc(65556,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating tmpStr\n");
    ok = 0;
  }

  if (ok) {
    StrUtil_copyString(&qStr, "INSERT INTO genomic_align_block"
                       " (consensus_dnafrag_id, consensus_start, consensus_end,"
                       "  query_dnafrag_id, query_start, query_end, query_strand, method_link_id,"
                       "  score, perc_id, cigar_line) VALUES ",0);
  
    for (i=0; i<Vector_getNumElement(genomicAligns); i++) {
      GenomicAlign *ga = Vector_getElementAt(genomicAligns,i);
      DNAFrag *consDNAFrag  = GenomicAlign_getConsensusDNAFrag(ga);
      DNAFrag *queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga);

      // check that everything has dbIDs
      if (!DNAFrag_getDbID(consDNAFrag) || !DNAFrag_getDbID(queryDNAFrag)) {
        fprintf(stderr, "Error: dna_fragment in GenomicAlign is not in DB\n");
        ok = 0;
        break;
      }
    }
  }

  GenomicAlign *ga = NULL;
  DNAFrag *consDNAFrag = NULL;
  DNAFrag *queryDNAFrag = NULL;
  IDType methodLinkId = 0;

  if (ok) {
    // all clear for storing
  
    for (i=0; i<Vector_getNumElement(genomicAligns); i++) {
      ga = Vector_getElementAt(genomicAligns,i);
      consDNAFrag  = GenomicAlign_getConsensusDNAFrag(ga);
      queryDNAFrag = GenomicAlign_getQueryDNAFrag(ga);

      methodLinkId = GenomicAlignAdaptor_methodLinkIdByAlignmentType(gaa, GenomicAlign_getAlignmentType(ga));

      if (!methodLinkId) {
        fprintf(stderr, "Error: There is no method_link with this type [%s] in the DB.\n",
                GenomicAlign_getAlignmentType(ga));
        ok = 0;
        break;
      }
    }
    
    if (ok) {
      sprintf(tmpStr," %s(" IDFMTSTR ", %d, %d, " IDFMTSTR ", %d, %d, %d, " IDFMTSTR ", %f, %f, '%s')", 
              commaStr, 
              DNAFrag_getDbID(consDNAFrag),
              GenomicAlign_getConsensusStart(ga),
              GenomicAlign_getConsensusEnd(ga),
              DNAFrag_getDbID(queryDNAFrag),  
              GenomicAlign_getQueryStart(ga),
              GenomicAlign_getQueryEnd(ga),
              GenomicAlign_getQueryStrand(ga),
              methodLinkId,
              GenomicAlign_getScore(ga),
              GenomicAlign_getPercentId(ga),
              GenomicAlign_getCigarString(ga));

      qStr = StrUtil_appendString(qStr, tmpStr);
      commaStr[0] = ','; 
    }
  
    sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
    sth->execute(sth);
    sth->finish(sth);
  }

  if (qStr)
    free(qStr);

  if (tmpStr)
    free(tmpStr);
}
Example #4
0
Vector *GenomicAlignAdaptor_fetchAllByDNAFragGenomeDBDirect( GenomicAlignAdaptor *gaa, 
     DNAFrag *dnaFrag, GenomeDB *targetGenome, int *startP, int *endP, IDType methodLinkId) {
  IDType dnaFragId;
  GenomeDB *genomeDB;
  char *qStr = NULL;
  char tmpStr[512];
  Vector *results;
  StatementHandle *sth;
  int ok = 0;

  if (!dnaFrag) {
    fprintf(stderr, "Error: Input dnafrag must not be NULL\n");
    ok = 0;
  }

  if (ok) {
    // formatting the dnafrag
    dnaFragId = DNAFrag_getDbID(dnaFrag);

    genomeDB = DNAFrag_getGenomeDB(dnaFrag);

    StrUtil_copyString(&qStr,
                       "SELECT gab.consensus_dnafrag_id,"
                       "       gab.consensus_start," 
                       "       gab.consensus_end,"
                       "       gab.query_dnafrag_id," 
                       "       gab.query_start," 
                       "       gab.query_end,"
                       "       gab.query_strand,"
                       "       gab.method_link_id,"
                       "       gab.score,"
                       "       gab.perc_id," 
                       "       gab.cigar_line"
                       " FROM genomic_align_block gab ",0);

    if (targetGenome) {
      qStr = StrUtil_appendString(qStr,", dnafrag d");
    }
    sprintf(tmpStr," WHERE gab.method_link_id = " IDFMTSTR, methodLinkId);
    qStr = StrUtil_appendString(qStr,tmpStr);

    results = Vector_new();

    if (!targetGenome ||
        GenomeDB_hasQuery(genomeDB, targetGenome, methodLinkId)) {
      Vector *qres;

      sprintf(tmpStr," AND gab.consensus_dnafrag_id = " IDFMTSTR, dnaFragId);
      qStr = StrUtil_appendString(qStr, tmpStr);

      if (startP && endP) {
        int lowerBound = *startP - gaa->maxAlignmentLength;
        sprintf(tmpStr,
                " AND gab.consensus_start <= %d"
                " AND gab.consensus_start >= %d"
                " AND gab.consensus_end >= %d", *endP, lowerBound, *startP ) ;
        qStr = StrUtil_appendString(qStr, tmpStr);
      }

      if (targetGenome) {
        sprintf(tmpStr,
                " AND gab.query_dnafrag_id = d.dnafrag_id"
                " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome));
        qStr = StrUtil_appendString(qStr, tmpStr);
      }

      sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
      sth->execute(sth);

      qres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 0);
      Vector_append(results,qres);
      Vector_free(qres);

      sth->finish(sth);
    }

    if (!targetGenome ||
        GenomeDB_hasConsensus(genomeDB, targetGenome, methodLinkId)) {
      Vector *cres;

      sprintf(tmpStr," AND gab.query_dnafrag_id = " IDFMTSTR, dnaFragId);
      qStr = StrUtil_appendString(qStr, tmpStr);

      if (startP && endP) {
        int lowerBound = *startP - gaa->maxAlignmentLength;
        sprintf(tmpStr,
                " AND gab.query_start <= %d"
                " AND gab.query_start >= %d"
                " AND gab.query_end >= %d", *endP, lowerBound, *startP ) ;
        qStr = StrUtil_appendString(qStr, tmpStr);
      }
      if (targetGenome) {
        sprintf(tmpStr,
                " AND gab.consensus_dnafrag_id = d.dnafrag_id"
                " AND d.genome_db_id = " IDFMTSTR, GenomeDB_getDbID(targetGenome));
        qStr = StrUtil_appendString(qStr, tmpStr);
      }
      sth = gaa->prepare((BaseAdaptor *)gaa, qStr, strlen(qStr));
      sth->execute(sth);

      cres = GenomicAlignAdaptor_objectsFromStatementHandle(gaa, sth, 1);
      Vector_append(results,cres);
      Vector_free(cres);

      sth->finish(sth);
    }
  }

  if (qStr)
    free(qStr);

  return results;
}