Beispiel #1
0
TEST_F(VectorMethods, ReverseCompletes) {
  Vector reversed = {};
  SystemErr eIgnore = S_E_CLEAR;
  initVector(&reversed, sizeof(int), NULL, NULL, &eIgnore);
  int nums[3] = { 1, 2, 3 };
  Vector_catPrimitive(&v, nums, 3, &eIgnore);
  Vector_reverse(&v, &reversed, &eIgnore);
  EXPECT_EQ(*(int*) reversed.arr, 3);
  EXPECT_EQ(*((int*) reversed.arr + 1), 2);
  EXPECT_EQ(*((int*) reversed.arr + 2), 1);

  deinitVector(&reversed);
}
Beispiel #2
0
void GenomicAlignAdaptor_addDerivedAlignments(GenomicAlignAdaptor *gaa, 
                     Vector *mergedAligns, GenomicAlign *alignA, GenomicAlign *alignB) {

  // variable name explanation
  // q - query c - consensus s - start e - end l - last
  // o, ov overlap j - jump_in_
  // r - result

  int  qs, qe, lqs, lqe, cs, ce, lce,
       ocs, oce, oqs, oqe, jc, jq, ovs, ove,
       rcs, rce, rqs, rqe;
  int currentMatch = 0;
  int newMatch;
  int cigAPos = 0, cigBPos = 0;
  char *resultCig;
  char tmpStr[128];

  // initialization phase
  Vector *cigA = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignA));
  Vector *cigB = CigarStrUtil_getPieces(GenomicAlign_getCigarString(alignB));

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    Vector_reverse(cigB);
  }

  // need a 'normalized' start for qs, qe, oxs so I dont 
  // have to check strandedness all the time  

  // consensus is strand 1 and is not compared to anything,
  // can keep its original coordinate system
 
  lce = GenomicAlign_getConsensusStart(alignA) - 1;
  ce = lce;
  cs = ce + 1;
  
  // alignBs query can be + or - just keep relative coords for now
  lqe = 0; lqs = 1;
  qe = 0; qs = 1;

  // ocs will be found relative to oce and has to be comparable
  // to oqs. But it could be that we have to move downwards if we
  // are not - strand. thats why coordinates are transformed here

  if (GenomicAlign_getQueryStrand(alignA) == -1 ) {
    // query_end is first basepair of alignment
    if (GenomicAlign_getQueryEnd(alignA) < GenomicAlign_getConsensusEnd(alignB)) {
      oce = 0; ocs = 1;
      oqe = GenomicAlign_getConsensusEnd(alignB) - GenomicAlign_getQueryEnd(alignA);
      oqs = oqe + 1;
    } else {
      oqe = 0; oqs = 1;
      oce = GenomicAlign_getQueryEnd(alignA) - GenomicAlign_getConsensusEnd(alignB);
      ocs = oce + 1;
    }
  } else {
    // in theory no coordinate magic necessary :-)
    oqs = GenomicAlign_getQueryStart(alignA);
    oqe = oqs - 1; 
    ocs = GenomicAlign_getConsensusStart(alignB);
    oce = ocs - 1;
  }

  // initializing result
  rcs = rce = rqs = rqe = 0;
  resultCig= StrUtil_copyString(&resultCig,"",0);

  while (1) {
    int newGa;
    // exit if you request a new piece of alignment and the cig list is 
    // empty

    if (oce < ocs || oce < oqs) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
      continue;
    }
    if (oqe < oqs || oqe < ocs) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
      continue;
    }

    // now matching region overlap in reference genome
    ovs = ocs < oqs ? oqs : ocs;
    ove = oce < oqe ? oce : oqe;
    
    if (currentMatch) {
      jc = cs + (ovs - oqs) - lce - 1;
      jq = qs + (ovs - ocs) - lqe - 1;
    } else {
      jc = jq = 0;
    }

    newMatch = ove - ovs + 1;
    newGa = 0;

    if (jc==0) {
      if (jq==0) {
	currentMatch += newMatch;
      } else {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jq deletions;
	if (jq == 1) {
          resultCig = StrUtil_appendString(resultCig,"D");
        } else {
          sprintf(tmpStr,"%dD",jq);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
      }
    } else {
      if (jq==0) {
        // store current match;
        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// jc insertions;
	if (jc==1) {
          resultCig = StrUtil_appendString(resultCig,"I");
        } else {
          sprintf(tmpStr,"%dI",jc);
          resultCig = StrUtil_appendString(resultCig,tmpStr);
        }
	currentMatch = newMatch;
         
      } else {
        double percId;
        double score;
        GenomicAlign *ga;

        sprintf(tmpStr,"%dM",currentMatch);
        resultCig = StrUtil_appendString(resultCig,tmpStr);

	// new GA
	int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
	int queryStart, queryEnd;
	if (queryStrand == 1) {
	  queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
	  queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
	} else {
	  queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
	  queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
	}
      
        score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
          GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
        percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
        
        ga = GenomicAlign_new();
    
        GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
        GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
        GenomicAlign_setCigarString(ga, resultCig);
        GenomicAlign_setConsensusStart(ga, rcs);
        GenomicAlign_setConsensusEnd(ga, rce);
        GenomicAlign_setQueryStrand(ga, queryStrand);
        GenomicAlign_setQueryStart(ga, queryStart);
        GenomicAlign_setQueryEnd(ga, queryEnd);
        GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
        GenomicAlign_setPercentId(ga, percId);
        GenomicAlign_setScore(ga, score);

	Vector_addElement(mergedAligns, ga);

        rcs = rce = rqs = rqe = 0;
	resultCig[0] = '\0';
	
	currentMatch = newMatch;
      }
    }


    
    if (!rcs) rcs = cs+(ovs-oqs);
    rce = cs+(ove-oqs);
    if (!rqs) rqs = qs+(ovs-ocs);
    rqe = qs+(ove-ocs);

    // update the last positions
    lce = rce; 
    lqe = rqe;

    // next piece on the one that end earlier
 
    if (oce <= oqe) {
      // next M area in cigB
      if (cigBPos == Vector_getNumElement(cigB)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigB, &cigBPos, &ocs, &oce, &qs, &qe ); 
    }
    if (oce >= oqe) {
      // next M area in cigA
      if (cigAPos == Vector_getNumElement(cigA)) break;
      GenomicAlignAdaptor_nextCig(gaa, cigA, &cigAPos, &cs, &ce, &oqs, &oqe );
    } 
  } // end of while loop

  // if there is a last floating current match
  if (currentMatch) {
    
    // new GA
    int queryStrand = GenomicAlign_getQueryStrand(alignA) * GenomicAlign_getQueryStrand(alignB);
    int queryStart, queryEnd;
    double percId;
    double score;
    GenomicAlign *ga;

    sprintf(tmpStr,"%dM",currentMatch);
    resultCig = StrUtil_appendString(resultCig, tmpStr);

    if (queryStrand == 1) {
      queryStart = rqs + GenomicAlign_getQueryStart(alignB) - 1;
      queryEnd = rqe + GenomicAlign_getQueryStart(alignB) - 1;
    } else {
      queryEnd = GenomicAlign_getQueryEnd(alignB) - rqs + 1;
      queryStart = GenomicAlign_getQueryEnd(alignB) - rqe + 1;
    }
  
    score = (GenomicAlign_getScore(alignA) < GenomicAlign_getScore(alignB)) ? 
      GenomicAlign_getScore(alignA) : GenomicAlign_getScore(alignB);
    percId =  (int)(GenomicAlign_getPercentId(alignA)*GenomicAlign_getPercentId(alignB)/100.0);
    
    ga = GenomicAlign_new();

    GenomicAlign_setConsensusDNAFrag(ga, GenomicAlign_getConsensusDNAFrag(alignA));
    GenomicAlign_setQueryDNAFrag(ga, GenomicAlign_getQueryDNAFrag(alignB));
    GenomicAlign_setCigarString(ga, resultCig);
    GenomicAlign_setConsensusStart(ga, rcs);
    GenomicAlign_setConsensusEnd(ga, rce);
    GenomicAlign_setQueryStrand(ga, queryStrand);
    GenomicAlign_setQueryStart(ga, queryStart);
    GenomicAlign_setQueryEnd(ga, queryEnd);
    GenomicAlign_setAdaptor(ga, (BaseAdaptor *)gaa);
    GenomicAlign_setPercentId(ga, percId);
    GenomicAlign_setScore(ga, score);

    Vector_addElement(mergedAligns, ga);
  }

  free(resultCig);

  Vector_free(cigA);
  Vector_free(cigB);

  // nothing to return all in merged_aligns
}