Esempio n. 1
0
void printCoords(MapperRangeSet *results) {
  int i;
  for (i=0;i<MapperRangeSet_getNumRange(results);i++) {
    MapperRange *range = MapperRangeSet_getRangeAt(results, i);
    switch (range->rangeType) {
      case MAPPERRANGE_COORD :
        {
          MapperCoordinate *mc = (MapperCoordinate *)range;
          fprintf(stderr, "Coord: "IDFMTSTR" %ld %ld %d\n", mc->id, mc->start, mc->end, mc->strand);
        }
        break;
      case MAPPERRANGE_GAP :
        {
          MapperGap *mg = (MapperGap *)range;
          fprintf(stderr, "Gap: %ld %ld\n", mg->start, mg->end);
        }
        break;
      default:
        {
          fprintf(stderr, "Unhandled range type %d\n",range->rangeType);
        }
        break;
    }
  }
}
Esempio n. 2
0
int testTransform(Mapper *mapper, int srcId, int srcStart, int srcEnd, int srcStrand, char *srcType, int dest[][4], int nDest ) {

  MapperRangeSet *results = Mapper_mapCoordinates(mapper, srcId, srcStart, srcEnd, srcStrand, srcType);

  printf("\nNew test\n");

  int diff = 0;
  printf("Number of results = %d nDest = %d\n",MapperRangeSet_getNumRange(results), nDest);
  if (MapperRangeSet_getNumRange(results) != nDest) {
    diff =1;
  
  } else{
    int i;
    for (i=0;i<MapperRangeSet_getNumRange(results) && !diff;i++) {
      MapperRange *range = MapperRangeSet_getRangeAt(results, i);

      switch (range->rangeType) {
        case MAPPERRANGE_COORD :
          {
            MapperCoordinate *mc = (MapperCoordinate *)range;
            printf("Coord: "IDFMTSTR" %ld %ld %d\n", mc->id, mc->start, mc->end, mc->strand);
            if (dest[i][0] != mc->id || dest[i][1] != mc->start || dest[i][2] != mc->end || dest[i][3] != mc->strand) {
              diff=1;
            }
          } 
          break;
        case MAPPERRANGE_GAP :
          {
            MapperGap *mg = (MapperGap *)range;
            
            printf("Gap: %ld %ld\n", mg->start, mg->end);
            if (dest[i][0] != srcId || dest[i][1] != mg->start || dest[i][2] != mg->end || dest[i][3] != 0) {
              diff=1;
            }
          } 
          break;
        default:
          {
            printf("Unhandled range type %d\n",range->rangeType);
            diff=1;
          }
          break;
      }
    }
  }
  if (diff) {
    printf("DIFFERENCE\n");
  }
  return 1;
}
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
Esempio n. 4
0
MapperRangeSet *Mapper_mapInsert(Mapper *m, IDType id, long start, long end, int strand, char *type, int fastmap) {

  // swap start/end and map the resultant 2bp coordinate
  long tmp;
  tmp = start;
  start = end;
  end = tmp;
  
  MapperRangeSet *coords = Mapper_mapCoordinates(m, id, start, end, strand, type);

  MapperRangeSet *retSet = MapperRangeSet_new(); 

  if (MapperRangeSet_getNumRange(coords) == 1) {
// Note assuming its a mapper coordinate - not sure if this is always true
    MapperCoordinate *c = (MapperCoordinate *)MapperRangeSet_getRangeAt(coords,0);

    // swap start and end to convert back into insert
    MapperCoordinate *newC = MapperCoordinate_new(c->id, c->end, c->start, c->strand, c->coordSystem, 0); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(retSet, (MapperRange *)newC);

  } else {
    if (MapperRangeSet_getNumRange(coords) != 2) {
      fprintf(stderr, "Unexpected: Got %d expected 2.\n", MapperRangeSet_getNumRange(coords));
      exit(1);
    }

    // adjust coordinates, remove gaps
    MapperRange *c1, *c2;

    if (strand == -1) {
      c1 = MapperRangeSet_getRangeAt(coords,1);
      c2 = MapperRangeSet_getRangeAt(coords,0);
    } else {
      c1 = MapperRangeSet_getRangeAt(coords,0);
      c2 = MapperRangeSet_getRangeAt(coords,1);
    }

    //@coords = ();

    // Was ref($c1) eq 'Bio::EnsEMBL::Mapper::Coordinate' so think this WON'T include MAPPERRANGE_INDEL

    MapperCoordinate *newC1 = NULL;
    if (c1->rangeType == MAPPERRANGE_COORD) {
      MapperCoordinate *mcC1 = (MapperCoordinate *)c1;
      // insert is after first coord
      if (mcC1->strand * strand == -1) {
        mcC1->end--;
      } else {
        mcC1->start++;
      }
      newC1 = MapperCoordinate_new(mcC1->id, mcC1->start, mcC1->end, mcC1->strand, mcC1->coordSystem, 0); // Perl didn't set rank, so use 0
    }

    // (see above for note on this condition if (ref($c2) eq 'Bio::EnsEMBL::Mapper::Coordinate') 
    MapperCoordinate *newC2 = NULL;
    if (c2->rangeType == MAPPERRANGE_COORD) {
      MapperCoordinate *mcC2 = (MapperCoordinate *)c2;
      // insert is before second coord
      if(mcC2->strand * strand == -1) {
        mcC2->start++;
      } else {
        mcC2->end--;
      }
      newC2 = MapperCoordinate_new(mcC2->id, mcC2->start, mcC2->end, mcC2->strand, mcC2->coordSystem, 0); // Perl didn't set rank, so use 0
    }
   
    if (strand == -1) { // Add in 2, 1 order
      if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2);
      if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1);
    } else {  // Add in 1, 2 order
      if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1);
      if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2);
    }

  }

  if (fastmap) {
    if (MapperRangeSet_getNumRange(coords) != 1) {
      MapperRangeSet_free(retSet);
      retSet = NULL;
    } else {
      MapperRange *c = MapperRangeSet_getRangeAt(coords, 0);

      // Type check - not sure what we're supposed to have here so belt and braces seem appropriate
      if (c->rangeType != MAPPERRANGE_COORD) {
        fprintf(stderr, "Expected a Coordinate range, got a %d range (look up in MapperRange.h)\n", c->rangeType);
        exit(1);
      }
      
      MapperCoordinate *mcC = (MapperCoordinate *)c;

      MapperCoordinate *newC = MapperCoordinate_new(mcC->id, mcC->start, mcC->end, mcC->strand, mcC->coordSystem, 0); // Perl didn't set rank, so use 0
      MapperRangeSet_addRange(retSet, (MapperRange *)newC);

/*
      return ($c->{'id'}, $c->{'start'}, $c->{'end'},
              $c->{'strand'}, $c->{'coord_system'});
*/
    }
  }

  MapperRangeSet_free(coords);

  return retSet;
}
Esempio n. 5
0
/*
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
  Status     : Stable

=cut
*/
Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, 
                                                               StatementHandle *sth, 
                                                               AssemblyMapper *assMapper, 
                                                               Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(pta->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(pta->dba);

  Vector *pTranscripts = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType predictionTranscriptId = row->getLongLongAt(row,0);
    IDType seqRegionId            = row->getLongLongAt(row,1);
    long seqRegionStart           = row->getLongAt(row,2);
    long seqRegionEnd             = row->getLongAt(row,3);
    int seqRegionStrand           = row->getIntAt(row,4);
    IDType analysisId             = row->getLongLongAt(row,5);
    char *displayLabel            = row->getStringAt(row,6);

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *ptSlice = slice;

    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    //
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      ptSlice = IDHash_getValue(sliceHash, seqRegionId);
    }

    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    //
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      ptSlice = destSlice;
    }
    
    // Finally, create the new PredictionTranscript.
    PredictionTranscript *pt = PredictionTranscript_new();

    PredictionTranscript_setStart       (pt, seqRegionStart);
    PredictionTranscript_setEnd         (pt, seqRegionEnd);
    PredictionTranscript_setStrand      (pt, seqRegionStrand);
    PredictionTranscript_setSlice       (pt, ptSlice);
    PredictionTranscript_setAnalysis    (pt, analysis);
    PredictionTranscript_setAdaptor     (pt, (BaseAdaptor *)pta);
    PredictionTranscript_setDbID        (pt, predictionTranscriptId);
    PredictionTranscript_setDisplayLabel(pt, displayLabel);

    Vector_addElement(pTranscripts, pt);
  }

  IDHash_free(sliceHash, NULL);
  return pTranscripts;
}