Example #1
0
MapperRangeSet *PredictionTranscript_genomic2cDNA(PredictionTranscript *trans, int start, int end, int strand, BaseContig *contig) {
    Mapper *mapper;

    // "ids" in mapper are contigs of exons, so use the same contig that should
    // be attached to all of the exons...
    if (!contig) {
        Vector *translateable = PredictionTranscript_getAllTranslateableExons(trans);
        PredictionExon *firstExon;
        if (!Vector_getNumElement(translateable)) {
            return MapperRangeSet_new();
        }
        firstExon = Vector_getElementAt(translateable, 0);
        contig = (BaseContig*)PredictionExon_getSlice(firstExon);
        Vector_free(translateable);
    }

    mapper = PredictionTranscript_getcDNACoordMapper(trans);

    return Mapper_mapCoordinates(mapper,(IDType)contig, start, end, strand, "genomic");
}
Example #2
0
// NIY: May need some reworking to handle mapInsert because I'd changed the way it returns data
// Change back to returning MapperRangeSet
MapperRangeSet *Mapper_fastMap(Mapper *m, IDType id, long start, long end, int strand, char *type) {
  MapperPairSet *pairs;
  int i;
  IDHash *hash;
  int from, to;
  CoordSystem *cs;

  if(end+1 == start) {
    return Mapper_mapInsert(m, id, start, end, strand, type, 1);
  }

  if(!Mapper_compareType(type, Mapper_getTo(m))) {
    from = MAPPER_TO_IND;
    to   = MAPPER_FROM_IND;
    cs   = Mapper_getFromCoordSystem(m);

  } else {
    from = MAPPER_FROM_IND;
    to   = MAPPER_TO_IND;
    cs   = Mapper_getToCoordSystem(m);
  }

  hash = Mapper_getPairHash(m, from);

  if (!hash) {
    fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type);
    exit(1);
  }

  if (Mapper_getIsSorted(m) == 0) {
    Mapper_sort(m);
  }

  if (!IDHash_contains(hash, id)) {
    fprintf(stderr,"ERROR: Fastmap expects to be able to find an id. It couldnt for " IDFMTSTR "\n",id);
    exit(1);
  }

  pairs = IDHash_getValue(hash,id);

  MapperRangeSet *retSet = MapperRangeSet_new();

  for (i=0;i<MapperPairSet_getNumPair(pairs);i++) {
    MapperPair *pair = MapperPairSet_getPairAt(pairs,i);
    MapperUnit *selfCoord   = MapperPair_getUnit(pair, from);
    MapperUnit *targetCoord = MapperPair_getUnit(pair, to);

    // only super easy mapping is done
    if(start < selfCoord->start ||
       end   > selfCoord->end ) {
      continue;
    }

    if (pair->ori == 1) {
      MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, 
                                                        targetCoord->start + start - selfCoord->start, 
                                                        targetCoord->start + end   - selfCoord->start,
                                                        strand,
                                                        cs, 
                                                        0); // Perl didn't set rank, so use 0 

/*
      retRange->id     = targetCoord->id;
      retRange->start  = targetCoord->start + start - selfCoord->start;
      retRange->end    = targetCoord->start + end   - selfCoord->start;
      retRange->strand = strand;
      retRange->coordSystem = cs;
*/

      MapperRangeSet_addRange(retSet, (MapperRange *)retRange);
      break;
    } else {
      MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, 
                                                        targetCoord->end - (end - selfCoord->start),
                                                        targetCoord->end - (start - selfCoord->start),
                                                        -strand,
                                                        cs, 
                                                        0); // Perl didn't set rank, so use 0 

/*
      retRange->id     = targetCoord->id;
      retRange->start  = targetCoord->end - (end - selfCoord->start);
      retRange->end    = targetCoord->end - (start - selfCoord->start);
      retRange->strand = -strand;
      retRange->coordSystem = cs;
*/

      MapperRangeSet_addRange(retSet, (MapperRange *)retRange);
      break;
    }
  }

  // NIY: Here we return empty set, in mapInsert it returns NULL for empty fastmap - need to work out which is right
  return retSet;
}
Example #3
0
MapperRangeSet *Mapper_mapInsert(Mapper *m, IDType id, long start, long end, int strand, char *type, int fastmap) {

  // swap start/end and map the resultant 2bp coordinate
  long tmp;
  tmp = start;
  start = end;
  end = tmp;
  
  MapperRangeSet *coords = Mapper_mapCoordinates(m, id, start, end, strand, type);

  MapperRangeSet *retSet = MapperRangeSet_new(); 

  if (MapperRangeSet_getNumRange(coords) == 1) {
// Note assuming its a mapper coordinate - not sure if this is always true
    MapperCoordinate *c = (MapperCoordinate *)MapperRangeSet_getRangeAt(coords,0);

    // swap start and end to convert back into insert
    MapperCoordinate *newC = MapperCoordinate_new(c->id, c->end, c->start, c->strand, c->coordSystem, 0); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(retSet, (MapperRange *)newC);

  } else {
    if (MapperRangeSet_getNumRange(coords) != 2) {
      fprintf(stderr, "Unexpected: Got %d expected 2.\n", MapperRangeSet_getNumRange(coords));
      exit(1);
    }

    // adjust coordinates, remove gaps
    MapperRange *c1, *c2;

    if (strand == -1) {
      c1 = MapperRangeSet_getRangeAt(coords,1);
      c2 = MapperRangeSet_getRangeAt(coords,0);
    } else {
      c1 = MapperRangeSet_getRangeAt(coords,0);
      c2 = MapperRangeSet_getRangeAt(coords,1);
    }

    //@coords = ();

    // Was ref($c1) eq 'Bio::EnsEMBL::Mapper::Coordinate' so think this WON'T include MAPPERRANGE_INDEL

    MapperCoordinate *newC1 = NULL;
    if (c1->rangeType == MAPPERRANGE_COORD) {
      MapperCoordinate *mcC1 = (MapperCoordinate *)c1;
      // insert is after first coord
      if (mcC1->strand * strand == -1) {
        mcC1->end--;
      } else {
        mcC1->start++;
      }
      newC1 = MapperCoordinate_new(mcC1->id, mcC1->start, mcC1->end, mcC1->strand, mcC1->coordSystem, 0); // Perl didn't set rank, so use 0
    }

    // (see above for note on this condition if (ref($c2) eq 'Bio::EnsEMBL::Mapper::Coordinate') 
    MapperCoordinate *newC2 = NULL;
    if (c2->rangeType == MAPPERRANGE_COORD) {
      MapperCoordinate *mcC2 = (MapperCoordinate *)c2;
      // insert is before second coord
      if(mcC2->strand * strand == -1) {
        mcC2->start++;
      } else {
        mcC2->end--;
      }
      newC2 = MapperCoordinate_new(mcC2->id, mcC2->start, mcC2->end, mcC2->strand, mcC2->coordSystem, 0); // Perl didn't set rank, so use 0
    }
   
    if (strand == -1) { // Add in 2, 1 order
      if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2);
      if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1);
    } else {  // Add in 1, 2 order
      if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1);
      if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2);
    }

  }

  if (fastmap) {
    if (MapperRangeSet_getNumRange(coords) != 1) {
      MapperRangeSet_free(retSet);
      retSet = NULL;
    } else {
      MapperRange *c = MapperRangeSet_getRangeAt(coords, 0);

      // Type check - not sure what we're supposed to have here so belt and braces seem appropriate
      if (c->rangeType != MAPPERRANGE_COORD) {
        fprintf(stderr, "Expected a Coordinate range, got a %d range (look up in MapperRange.h)\n", c->rangeType);
        exit(1);
      }
      
      MapperCoordinate *mcC = (MapperCoordinate *)c;

      MapperCoordinate *newC = MapperCoordinate_new(mcC->id, mcC->start, mcC->end, mcC->strand, mcC->coordSystem, 0); // Perl didn't set rank, so use 0
      MapperRangeSet_addRange(retSet, (MapperRange *)newC);

/*
      return ($c->{'id'}, $c->{'start'}, $c->{'end'},
              $c->{'strand'}, $c->{'coord_system'});
*/
    }
  }

  MapperRangeSet_free(coords);

  return retSet;
}
Example #4
0
MapperRangeSet *Mapper_mapCoordinates(Mapper *m, IDType id, long start, long end, int strand, char *type) {


  // special case for handling inserts:
  if ( start == end+1 ) {
    return Mapper_mapInsert(m, id, start, end, strand, type, 0 /*fastmap flag */);
  } else if (start > end+1) {
    fprintf(stderr,"ERROR: Start is greater than end for id " IDFMTSTR ", start %ld, end %ld\n",id,start,end);
    exit(1);
  }


  IDHash *hash;
  CoordSystem *cs;
  if( Mapper_getIsSorted(m) == 0 ) {
    Mapper_sort(m);
  }

  int from, to;
  if (!Mapper_compareType(type,Mapper_getTo(m))) {
    from = MAPPER_TO_IND;
    to   = MAPPER_FROM_IND;
    cs   = Mapper_getFromCoordSystem(m);
  } else if (!Mapper_compareType(type,Mapper_getFrom(m))) {
    from = MAPPER_FROM_IND;
    to   = MAPPER_TO_IND;
    cs   = Mapper_getToCoordSystem(m);
  } else {
    fprintf(stderr, "Invalid type [%s] in mapper (not from [%s] or to [%s])\n", type, Mapper_getFrom(m), Mapper_getTo(m));
    exit(1);
  }

  hash = Mapper_getPairHash(m, from);

  if (!hash) {
    fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type);
    exit(1);
  }


  MapperRangeSet *results = MapperRangeSet_new();


// Was upcasing the id - its a number in C, I haven't found a case yet where its a string
  if (!IDHash_contains(hash, id)) {
    // one big gap!
    MapperRange *gap = (MapperRange *)MapperGap_new(start,end,0); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(results,gap);
    return results;
  }


  MapperPairSet *pairs = IDHash_getValue(hash,id); //my $lr = $hash->{ uc($id) };


  MapperPair *lastUsedPair = NULL;

  int startIdx, endIdx, midIdx;
  MapperPair *pair;
  MapperUnit *selfCoord;


  startIdx = 0;
  endIdx   = MapperPairSet_getNumPair(pairs)-1;

  // binary search the relevant pairs
  // helps if the list is big
  while ( ( endIdx - startIdx ) > 1 ) {
    midIdx = ( startIdx + endIdx ) >> 1;
    pair   = MapperPairSet_getPairAt(pairs, midIdx);

    selfCoord = MapperPair_getUnit(pair, from);

    if ( selfCoord->end < start ) {
      startIdx = midIdx;
    } else {
      endIdx = midIdx;
    }
  }

  int rank       = 0;
  long origStart = start;
  IDType lastTargetCoord;
  int lastTargetCoordIsSet = 0;

  int i;
  for (i=startIdx; i<MapperPairSet_getNumPair(pairs); i++) {
    MapperPair *pair = MapperPairSet_getPairAt(pairs,i);
    MapperUnit *selfCoord   = MapperPair_getUnit(pair, from);
    MapperUnit *targetCoord = MapperPair_getUnit(pair, to);

    //
    // But not the case for haplotypes!! need to test for this case???
    // so removing this till a better solution is found
    // 
    //
    //     if($self_coord->{'start'} < $start){
    //       $start = $orig_start;
    //       $rank++;
    //     }

    if ( lastTargetCoordIsSet && targetCoord->id != lastTargetCoord ) {
      if ( selfCoord->start < start ) {    // i.e. the same bit is being mapped to another assembled bit
        start = origStart;
      }
    } else {
      lastTargetCoord = targetCoord->id;
      lastTargetCoordIsSet = 1;
    }

    // if we haven't even reached the start, move on
    if (selfCoord->end < origStart) {
      continue;
    }

    // if we have over run, break
    if (selfCoord->start > end) {
      break;
    }

// Check is start not origStart
    if (start < selfCoord->start) {
      // gap detected
      MapperRange *gap = (MapperRange *)MapperGap_new(start, selfCoord->start-1, rank);
      MapperRangeSet_addRange(results,gap);
      start = gap->end+1;
    }

    long targetStart, targetEnd;

    MapperRange *res;

    if ( MapperPair_isIndel(pair) ) {
      // When next pair is an IndelPair and not a Coordinate, create the
      // new mapping Coordinate, the IndelCoordinate.
      targetStart = targetCoord->start;
      targetEnd   = targetCoord->end;

      // create a Gap object
      MapperGap *gap = MapperGap_new(start,
                                     selfCoord->end < end ? selfCoord->end : end,
                                     0); // Perl didn't set rank - don't know if need to

      // create the Coordinate object
      MapperCoordinate *coord = MapperCoordinate_new(targetCoord->id,
                                                     targetStart,
                                                     targetEnd,
                                                     pair->ori * strand, 
                                                     cs, 
                                                     0); // Perl didn't set rank - don't know if need to

      //and finally, the IndelCoordinate object with
      res = (MapperRange *)IndelCoordinate_new(gap, coord);
    } else {
      // start is somewhere inside the region
      if (pair->ori == 1) {
        targetStart = targetCoord->start + (start - selfCoord->start);
      } else {
        targetEnd = targetCoord->end - (start - selfCoord->start);
      }

      // Either we are enveloping this map or not.  If yes, then end
      // point (self perspective) is determined solely by target.  If
      // not we need to adjust.
      if (end > selfCoord->end) {
        // enveloped
        if( pair->ori == 1 ) {
          targetEnd = targetCoord->end;
        } else {
          targetStart = targetCoord->start;
        }
      } else {
        // need to adjust end
        if (pair->ori == 1) {
          targetEnd = targetCoord->start + (end - selfCoord->start);
        } else {
          targetStart = targetCoord->end - (end - selfCoord->start);
        }
      }

      res = (MapperRange *)MapperCoordinate_new(targetCoord->id,
                                                targetStart,
                                                targetEnd,
                                                pair->ori * strand, 
                                                cs, 
                                                rank);
    } // end else [ if ( exists $pair->{'indel'...})]

    MapperRangeSet_addRange(results, res);

    lastUsedPair = pair;
    start = selfCoord->end+1;
  }

  if (lastUsedPair == NULL) {
    MapperRange *gap = (MapperRange *)MapperGap_new(start,end, 0); // Perl doesn't set rank, so use 0
    MapperRangeSet_addRange(results,gap);

  } else if (MapperPair_getUnit(lastUsedPair, from)->end < end) {
    // gap at the end
    MapperRange *gap = (MapperRange *)MapperGap_new(
                           MapperPair_getUnit(lastUsedPair,from)->end + 1,
                           end, 0 ); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(results,gap);
  }

  if (strand == -1) {
    MapperRangeSet_reverse(results);
  }

  return results;
}