示例#1
0
/*
=head2 add_map_coordinates

    Arg  1      int $id
                id of 'source' sequence
    Arg  2      int $start
                start coordinate of 'source' sequence
    Arg  3      int $end
                end coordinate of 'source' sequence
    Arg  4      int $strand
                relative orientation of source and target (+/- 1)
    Arg  5      int $id
                id of 'target' sequence
    Arg  6      int $start
                start coordinate of 'target' sequence
    Arg  7      int $end
                end coordinate of 'target' sequence
    Function    Stores details of mapping between
                'source' and 'target' regions.
    Returntype  none
    Exceptions  none
    Caller      Bio::EnsEMBL::Mapper

=cut
*/
void Mapper_addMapCoordinates(Mapper *m, IDType contigId, int contigStart, int contigEnd,
                              int contigOri, IDType chrId, int chrStart, int chrEnd) {
  MapperPair *pair;
  MapperUnit *from;
  MapperUnit *to;
  IDHash     *fromHash;
  IDHash     *toHash;
  MapperPairSet *mps;

  if ((contigEnd - contigStart) != (chrEnd - chrStart)) {
    fprintf(stderr,"ERROR: Cannot deal with mis-lengthed mappings so far\n");
    fprintf(stderr,"Contig %d to %d and chromosome %d to %d\n",contigStart,contigEnd,
            chrStart,chrEnd);
    exit(1);
  }

  pair = MapperPair_new();

  from = MapperUnit_new();

  from->start = contigStart;
  from->end = contigEnd;
  from->id = contigId;

  to = MapperUnit_new();

  to->start = chrStart;
  to->end = chrEnd;
  to->id = chrId;

  MapperPair_setUnit(pair,MAPPER_TO_IND,to);
  MapperPair_setUnit(pair,MAPPER_FROM_IND,from);

  pair->ori = contigOri;

  // place into hash on both ids
  fromHash = Mapper_getPairHash(m, MAPPER_FROM_IND);
  toHash   = Mapper_getPairHash(m, MAPPER_TO_IND);

  if (!IDHash_contains(toHash,chrId)) {
    IDHash_add(toHash,chrId,MapperPairSet_new());
  }
  mps = (MapperPairSet *)IDHash_getValue(toHash,chrId);
  MapperPairSet_addPair(mps,pair);

  if (!IDHash_contains(fromHash,contigId)) {
    IDHash_add(fromHash,contigId,MapperPairSet_new());
  }
  mps = (MapperPairSet *)IDHash_getValue(fromHash,contigId);
  MapperPairSet_addPair(mps,pair);

  Mapper_incPairCount(m);
  Mapper_setIsSorted(m,0);
}
示例#2
0
// This is almost identical to Mapper_addCoordinates (just the isIndel line as far as I can see!) - I should refactor this
int Mapper_addIndelCoordinates( Mapper *m, IDType contigId, long contigStart, long contigEnd, int contigOri, IDType chrId, long chrStart, long chrEnd) {
  MapperPair *pair;
  MapperUnit *from;
  MapperUnit *to;
  IDHash     *fromHash;
  IDHash     *toHash;
  MapperPairSet *mps;


  //we need to create the IndelPair object to add to both lists, to and from
  pair = MapperPair_new();

  from = MapperUnit_new();

  from->start = contigStart;
  from->end = contigEnd;
  from->id = contigId;

  to = MapperUnit_new();

  to->start = chrStart;
  to->end = chrEnd;
  to->id = chrId;

  MapperPair_setUnit(pair,MAPPER_TO_IND,to);
  MapperPair_setUnit(pair,MAPPER_FROM_IND,from);


  pair->ori = contigOri;
  pair->isIndel = 1;

  // place into hash on both ids
  fromHash = Mapper_getPairHash(m, MAPPER_FROM_IND);
  toHash   = Mapper_getPairHash(m, MAPPER_TO_IND);

  if (!IDHash_contains(toHash,chrId)) {
    IDHash_add(toHash,chrId,MapperPairSet_new());
  }
  mps = (MapperPairSet *)IDHash_getValue(toHash,chrId);
  MapperPairSet_addPair(mps,pair);

  if (!IDHash_contains(fromHash,contigId)) {
    IDHash_add(fromHash,contigId,MapperPairSet_new());
  }
  mps = (MapperPairSet *)IDHash_getValue(fromHash,contigId);
  MapperPairSet_addPair(mps,pair);

  Mapper_incPairCount(m);
  Mapper_setIsSorted(m,0);

  return 1;
}
示例#3
0
/*
=head2 register_assembled

  Arg [1]    : integer $asm_seq_region
               The dbID of the sequence region to register.
  Arg [2]    : int $chunk_id
               The chunk number of the provided seq_region to register.
  Example    : $asm_mapper->register_assembled( 'X', 4 );
  Description: Flags a given assembled region as registered in this
               assembly mapper.  This should only be called by this
               class or the AssemblyMapperAdaptor.  Do not call this
               method unless you really know what you are doing.
  Return type: None
  Exceptions : Throws on incorrect arguments
  Caller     : Internal, AssemblyMapperAdaptor
  Status     : Stable

=cut
*/
void AssemblyMapper_registerAssembled(AssemblyMapper *am, IDType asmSeqRegionId, int chunkId) {
  IDHash *assembledRegister = AssemblyMapper_getAssembledRegister(am);

  if ( !IDHash_contains(assembledRegister, asmSeqRegionId) ) {
    IDHash_add(assembledRegister, asmSeqRegionId, IDHash_new(IDHASH_MEDIUM));
  }

  IDHash *chunkHash = IDHash_getValue(assembledRegister, asmSeqRegionId);

  if (!IDHash_contains(chunkHash, (IDType)chunkId)) {
    IDHash_add(chunkHash, (IDType)chunkId, &trueVal);
  }
}
示例#4
0
int AssemblyMapper_haveRegisteredAssembled(AssemblyMapper *am, IDType asmSeqRegionId, int chunkId) {
  IDHash *assembledRegister = AssemblyMapper_getAssembledRegister(am);

  if ( !IDHash_contains(assembledRegister, asmSeqRegionId) ) {
    return 0;
  }

  IDHash *chunkHash = IDHash_getValue(assembledRegister, asmSeqRegionId);

  if (!IDHash_contains(chunkHash, (IDType)chunkId)) {
    return 0;
  }

  return 1;
}
示例#5
0
void AssemblyMapper_registerComponent(AssemblyMapper *am, IDType cmpSeqRegionId) {
  IDHash *componentRegister = AssemblyMapper_getComponentRegister(am);

  if ( !IDHash_contains(componentRegister, cmpSeqRegionId) ) {
    IDHash_add(componentRegister, cmpSeqRegionId, &trueVal);
  }
}
示例#6
0
Chromosome *ChromosomeAdaptor_fetchByDbID(ChromosomeAdaptor *ca, IDType dbID) {
  Chromosome *chromosome;
  char qStr[256];
  StatementHandle *sth;
  ResultRow *row;

  if (IDHash_contains(ca->chrCache,dbID)) {

    chromosome = IDHash_getValue(ca->chrCache, dbID);

  } else {
    sprintf(qStr,"SELECT chromosome_id, name, length"
      " FROM chromosome"
      " WHERE  chromosome_id = "
      IDFMTSTR, dbID);
  
    sth = ca->prepare((BaseAdaptor *)ca,qStr,strlen(qStr));
    sth->execute(sth);
  
    row = sth->fetchRow(sth);
    if( row == NULL ) {
      sth->finish(sth);
      return NULL;
    }
  
    chromosome = ChromosomeAdaptor_chromosomeFromRow(ca, row);
    sth->finish(sth);
  }

  return chromosome;
}
示例#7
0
int AssemblyMapper_haveRegisteredComponent(AssemblyMapper *am, IDType cmpSeqRegionId) {
  IDHash *componentRegister = AssemblyMapper_getComponentRegister(am);

  if ( !IDHash_contains(componentRegister, cmpSeqRegionId) ) {
    return 0;
  }
  return 1;
}
示例#8
0
// low level function to access the ranges
// only use for read access
Vector *RangeRegistry_getRanges(RangeRegistry *registry, IDType id) {
  IDHash *regReg = RangeRegistry_getRegistry(registry);
  Vector *list = NULL;

  if (IDHash_contains(regReg, id)) {
    list = IDHash_getValue(regReg, id); 
  }

  return list;
}
示例#9
0
void Translation_transform(Translation *translation, IDHash *exonTransforms) {

  Exon * startExon = Translation_getStartExon(translation);
  Exon * endExon   = Translation_getEndExon(translation);
  IDType startExonRef = (IDType)startExon;
  IDType endExonRef = (IDType)endExon;

/* CHECK */
  if (IDHash_contains(exonTransforms,startExonRef)) {
    Translation_setStartExon(translation,IDHash_getValue(exonTransforms,startExonRef));
  } else {
    // do nothing, the start exon wasnt mapped
  }

  if (IDHash_contains(exonTransforms,endExonRef)) {
    Translation_setEndExon(translation,IDHash_getValue(exonTransforms,endExonRef));
  } else {
    // do nothing, the end exon wasnt mapped
  }
}
示例#10
0
MapperRangeSet *AssemblyMapper_fastMapImpl(AssemblyMapper *am, char *frmSeqRegionName, long frmStart, long frmEnd, int frmStrand, CoordSystem *frmCs, Slice *toSlice) {
  Mapper *mapper  = AssemblyMapper_getMapper(am);
  CoordSystem *asmCs  = AssemblyMapper_getAssembledCoordSystem(am);
  CoordSystem *cmpCs  = AssemblyMapper_getComponentCoordSystem(am);
  AssemblyMapperAdaptor *adaptor = AssemblyMapper_getAdaptor(am);
  char *frm;

  IDType seqRegionId = AssemblyMapper_getSeqRegionId(am, frmSeqRegionName, frmCs);


  // Speed critical section:
  // Try to do simple pointer equality comparisons of the coord system
  // objects first since this is likely to work most of the time and is
  // much faster than a function call.

  if ( frmCs == cmpCs
       || ( frmCs != asmCs && !CoordSystem_compare(frmCs,  cmpCs)) ) {
    if ( !IDHash_contains(AssemblyMapper_getComponentRegister(am), seqRegionId) ) {
      AssemblyMapperAdaptor_registerComponent( adaptor, am, seqRegionId);
    }
    frm = "component";

  } else if ( frmCs == asmCs || !CoordSystem_compare(frmCs, asmCs) ) {
    // This can be probably be sped up some by only calling registered
    // assembled if needed.
    AssemblyMapperAdaptor_registerAssembled( adaptor, am, seqRegionId, frmStart, frmEnd);

    frm = "assembled";
  } else {
    fprintf(stderr,"Coordinate system %s %s is neither the assembled nor the component coordinate system of this AssemblyMapper\n",
            CoordSystem_getName(frmCs), CoordSystem_getVersion(frmCs) );

  }

  return Mapper_fastMap( mapper, seqRegionId, frmStart, frmEnd, frmStrand, frm );
}
示例#11
0
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) {

  //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name);
  Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName);

  // if there are 0 or 1 transcripts still do lazy-loading
  if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) {
    return transcripts;
  }

  // preload all of the exons now, instead of lazy loading later
  // faster than 1 query per transcript

  // get extent of region spanned by transcripts
  long minStart =  2000000000;
  long maxEnd   = -2000000000;

  int i;
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) {
      minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t);
    }
    if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) {
      maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t);
    }
  }

  Slice *extSlice;

  if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) {
    extSlice = slice;
  } else {
    SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba);
    extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice),
                                          minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0);
  }

  // associate exon identifiers with transcripts
  IDHash *trHash = IDHash_new(IDHASH_MEDIUM);
  for (i=0; i<Vector_getNumElement(transcripts); i++) {
    PredictionTranscript *t  = Vector_getElementAt(transcripts, i);
    if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) {
      IDHash_add(trHash, PredictionTranscript_getDbID(t), t);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(trHash);

  char tmpStr[1024];
  char *qStr = NULL;
  if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) {
    fprintf(stderr,"Failed allocating qStr\n");
    return transcripts;
  }

  int lenNum;
  int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE  prediction_transcript_id IN (");
  for (i=0; i<IDHash_getNumValues(trHash); i++) {
    if (i!=0) {
      qStr[endPoint++] = ',';
      qStr[endPoint++] = ' ';
    }
    lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]);
    memcpy(&(qStr[endPoint]), tmpStr, lenNum);
    endPoint+=lenNum;
  }
  qStr[endPoint++] = ')';
  qStr[endPoint] = '\0';

  free(uniqueIds);

  StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr));
  sth->execute(sth);

  IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM);
  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType trId = row->getLongLongAt(row,0);
    IDType exId = row->getLongLongAt(row,1);
    int    rank = row->getIntAt(row,2);

    if (! IDHash_contains(exTrHash, exId)) {
      Vector *vec = Vector_new();
      Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free);
      IDHash_add(exTrHash, exId, vec);
    }
    Vector *exVec = IDHash_getValue(exTrHash, exId);
    PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank);
    Vector_addElement(exVec, trp);
  }

  IDHash_free(trHash, NULL);

  sth->finish(sth);

  PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba);
  Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice);

  // move exons onto transcript slice, and add them to transcripts
  for (i=0; i<Vector_getNumElement(exons); i++) {
    PredictionExon *ex = Vector_getElementAt(exons, i);

  // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it
    if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue;

    PredictionExon *newEx;
    if (slice != extSlice) {
      newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice);
      if (newEx == NULL) {
        fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n");
        exit(1);
      }
    } else {
      newEx = ex;
    }

    Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx));
    int j;
    for (j=0; j<Vector_getNumElement(exVec); j++) {
      PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j);
      PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank);
    }
  }

  IDHash_free(exTrHash, Vector_free);
  free(qStr);

  return transcripts;
}
示例#12
0
long RangeRegistry_overlapSize(RangeRegistry *registry, IDType id, long start, long end)  {
  long overlap = 0;

  if ( start > end ) return 0;

  IDHash *regReg = RangeRegistry_getRegistry(registry);
  Vector *list;
  if (IDHash_contains(regReg, id)) {
    list = IDHash_getValue(regReg, id);
  } else {
    return 0; // No list for this id, so can't be any overlap 
  }

  int len = Vector_getNumElement(list);

  if ( len == 0 ) {
    fprintf(stderr, "Odd have zero length list in RangeRegistry_overlapSize\n");
    return 0;
  }

  int startIdx = 0;
  int endIdx   = Vector_getNumElement(list)-1;
  int midIdx;
  CoordPair *range;

  // binary search the relevant pairs
  // helps if the list is big
  while ( ( endIdx - startIdx ) > 1 ) {
    midIdx = ( startIdx + endIdx ) >> 1;
    range   = Vector_getElementAt(list, midIdx);
    if ( CoordPair_getEnd(range) < start ) {
      startIdx = midIdx;
    } else {
      endIdx = midIdx;
    }
  }

  int i;
  for (i=startIdx; i < len ; i++ ) {
    CoordPair *pRange = Vector_getElementAt(list, i);
    long pStart = CoordPair_getStart(pRange);
    long pEnd   = CoordPair_getEnd(pRange);

    if ( pStart > end ) {
      break;
    }

    if ( pStart <= start && pEnd >= end ) {
      overlap = end - start + 1;
      break;
    }

    long mStart = ( start < pStart ? pStart : start );
    long mEnd   = ( end   < pEnd   ? end    : pEnd );

    if (mEnd - mStart >= 0) {
      overlap += ( mEnd - mStart + 1 );
    }
  }

  return overlap;
}
示例#13
0
/*
=head2 list_pairs

    Arg  1      int $id
                id of 'source' sequence
    Arg  2      int $start
                start coordinate of 'source' sequence
    Arg  3      int $end
                end coordinate of 'source' sequence
    Arg  4      string $type
                nature of transform - gives the type of
                coordinates to be transformed *from*
    Function    list all pairs of mappings in a region
    Returntype  list of Bio::EnsEMBL::Mapper::Pair
    Exceptions  none
    Caller      Bio::EnsEMBL::Mapper

=cut
*/
MapperPairSet *Mapper_listPairs(Mapper *m, IDType id, long start, long end, char *type) {
  MapperPairSet *pairs;
  IDHash *hash;
  int from, to;
  MapperPairSet *retSet;
  int i;

  if (start > end) {
    fprintf(stderr,"ERROR: Start is greater than end for id " IDFMTSTR ", start %ld, end %ld\n",id,start,end);
  }

  if( Mapper_getIsSorted(m) == 0 ) {
    Mapper_sort(m);
  }

  if (!Mapper_compareType(type, Mapper_getTo(m))) {
    from = MAPPER_TO_IND;
    to   = MAPPER_FROM_IND;
  } else {
    from = MAPPER_FROM_IND;
    to   = MAPPER_TO_IND;
  }

  hash = Mapper_getPairHash(m, from);

  if (!hash) {
    fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type);
    exit(1);
  }


  if (!IDHash_contains(hash, id)) {
    return NULL;
  }

  pairs = IDHash_getValue(hash,id);


  retSet = MapperPairSet_new();
  //Mapper_dump(m,NULL);
  //fprintf(stderr, "listPairs with %d %d %d\n",id,start,end);

  if (start == -1 && end == -1) {
    for (i=0;i<MapperPairSet_getNumPair(pairs);i++) {
      MapperPairSet_addPair(retSet,MapperPairSet_getPairAt(pairs,i));
    }
  } else {

    for (i=0;i<MapperPairSet_getNumPair(pairs);i++) {
      MapperPair *pair = MapperPairSet_getPairAt(pairs,i);
      MapperUnit *fromCoord   = MapperPair_getUnit(pair, from);
      //fprintf(stderr," unit %d %d\n",fromCoord->start,fromCoord->end);

      if( fromCoord->end < start ) {
        continue;
      }
      if( fromCoord->start > end ) {
        break;
      }
      MapperPairSet_addPair(retSet,pair);
    }
  }
  return retSet;
}
示例#14
0
Vector *DBEntryAdaptor_fetchByObjectType(DBEntryAdaptor *dbea, IDType ensObj, char *ensType) {
  Vector *out;
  char qStr[1024];
  StatementHandle *sth;
  ResultRow *row;
  IDHash *seen;
  
  if (!ensObj) {
    fprintf(stderr,"Error: Can't fetchByObjectType without an object\n");
    exit(1);
  }

  if (!ensType) {
    fprintf(stderr,"Error: Can't fetchByObjectType without a type\n");
    exit(1);
  }

// Not sure if idt identities are right way round
  sprintf(qStr,
    "SELECT xref.xref_id, xref.dbprimary_acc, xref.display_label, xref.version,"
    "       xref.description,"
    "       exDB.db_name, exDB.db_release, exDB.status," 
    "       oxr.object_xref_id,"
    "       es.synonym," 
    "       idt.xref_identity, idt.ensembl_identity"
    " FROM  (external_db exDB, object_xref oxr, xref xref)" 
    " LEFT JOIN external_synonym es on es.xref_id = xref.xref_id"
    " LEFT JOIN identity_xref idt on idt.object_xref_id = oxr.object_xref_id"
    " WHERE  xref.xref_id = oxr.xref_id"
    "  AND  xref.external_db_id = exDB.external_db_id"
    "  AND  oxr.ensembl_id = " IDFMTSTR
    "  AND  oxr.ensembl_object_type = '%s'",
    ensObj,
    ensType);
  
  sth = dbea->prepare((BaseAdaptor *)dbea,qStr,strlen(qStr));

  sth->execute(sth);

  seen = IDHash_new(IDHASH_SMALL);
  out = Vector_new();

  while ((row = sth->fetchRow(sth))) {
    DBEntry *exDB;
    IDType refID = row->getLongLongAt(row,0);
			    
    // using an outer join on the synonyms as well as on identity_xref, we
    // now have to filter out the duplicates (see v.1.18 for
    // original). Since there is at most one identity_xref row per xref,
    // this is easy enough; all the 'extra' bits are synonyms

    if (!IDHash_contains(seen,refID))  {
      exDB = DBEntry_new();
      DBEntry_setAdaptor(exDB,(BaseAdaptor *)dbea);
      DBEntry_setDbID(exDB, refID);
      DBEntry_setPrimaryId(exDB, row->getStringAt(row,1));
      DBEntry_setDisplayId(exDB, row->getStringAt(row,2));
      DBEntry_setVersion(exDB, row->getStringAt(row,3));
      DBEntry_setDbName(exDB, row->getStringAt(row,5));
      DBEntry_setRelease(exDB, row->getStringAt(row,6));

      if (row->col(row,10)) {
        IdentityXref *idx = IdentityXref_new();
        DBEntry_setIdentityXref(exDB,idx);
	IdentityXref_setQueryIdentity(idx, row->getDoubleAt(row,10));
	IdentityXref_setTargetIdentity(idx, row->getDoubleAt(row,11));
      }
      
      if (row->col(row,4)) DBEntry_setDescription(exDB, row->getStringAt(row,4));
      if (row->col(row,7)) DBEntry_setStatus(exDB, row->getStringAt(row,7));
      
      Vector_addElement(out, exDB);
      IDHash_add(seen, refID, exDB);
    } 

    exDB = IDHash_getValue(seen, refID);

    if (row->col(row,9)) {
      DBEntry_addSynonym(exDB,row->getStringAt(row,9));
    }
  }

  IDHash_free(seen, NULL);

  sth->finish(sth);
  
  return out;
}
示例#15
0
// NIY: May need some reworking to handle mapInsert because I'd changed the way it returns data
// Change back to returning MapperRangeSet
MapperRangeSet *Mapper_fastMap(Mapper *m, IDType id, long start, long end, int strand, char *type) {
  MapperPairSet *pairs;
  int i;
  IDHash *hash;
  int from, to;
  CoordSystem *cs;

  if(end+1 == start) {
    return Mapper_mapInsert(m, id, start, end, strand, type, 1);
  }

  if(!Mapper_compareType(type, Mapper_getTo(m))) {
    from = MAPPER_TO_IND;
    to   = MAPPER_FROM_IND;
    cs   = Mapper_getFromCoordSystem(m);

  } else {
    from = MAPPER_FROM_IND;
    to   = MAPPER_TO_IND;
    cs   = Mapper_getToCoordSystem(m);
  }

  hash = Mapper_getPairHash(m, from);

  if (!hash) {
    fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type);
    exit(1);
  }

  if (Mapper_getIsSorted(m) == 0) {
    Mapper_sort(m);
  }

  if (!IDHash_contains(hash, id)) {
    fprintf(stderr,"ERROR: Fastmap expects to be able to find an id. It couldnt for " IDFMTSTR "\n",id);
    exit(1);
  }

  pairs = IDHash_getValue(hash,id);

  MapperRangeSet *retSet = MapperRangeSet_new();

  for (i=0;i<MapperPairSet_getNumPair(pairs);i++) {
    MapperPair *pair = MapperPairSet_getPairAt(pairs,i);
    MapperUnit *selfCoord   = MapperPair_getUnit(pair, from);
    MapperUnit *targetCoord = MapperPair_getUnit(pair, to);

    // only super easy mapping is done
    if(start < selfCoord->start ||
       end   > selfCoord->end ) {
      continue;
    }

    if (pair->ori == 1) {
      MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, 
                                                        targetCoord->start + start - selfCoord->start, 
                                                        targetCoord->start + end   - selfCoord->start,
                                                        strand,
                                                        cs, 
                                                        0); // Perl didn't set rank, so use 0 

/*
      retRange->id     = targetCoord->id;
      retRange->start  = targetCoord->start + start - selfCoord->start;
      retRange->end    = targetCoord->start + end   - selfCoord->start;
      retRange->strand = strand;
      retRange->coordSystem = cs;
*/

      MapperRangeSet_addRange(retSet, (MapperRange *)retRange);
      break;
    } else {
      MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, 
                                                        targetCoord->end - (end - selfCoord->start),
                                                        targetCoord->end - (start - selfCoord->start),
                                                        -strand,
                                                        cs, 
                                                        0); // Perl didn't set rank, so use 0 

/*
      retRange->id     = targetCoord->id;
      retRange->start  = targetCoord->end - (end - selfCoord->start);
      retRange->end    = targetCoord->end - (start - selfCoord->start);
      retRange->strand = -strand;
      retRange->coordSystem = cs;
*/

      MapperRangeSet_addRange(retSet, (MapperRange *)retRange);
      break;
    }
  }

  // NIY: Here we return empty set, in mapInsert it returns NULL for empty fastmap - need to work out which is right
  return retSet;
}
示例#16
0
MapperRangeSet *Mapper_mapCoordinates(Mapper *m, IDType id, long start, long end, int strand, char *type) {


  // special case for handling inserts:
  if ( start == end+1 ) {
    return Mapper_mapInsert(m, id, start, end, strand, type, 0 /*fastmap flag */);
  } else if (start > end+1) {
    fprintf(stderr,"ERROR: Start is greater than end for id " IDFMTSTR ", start %ld, end %ld\n",id,start,end);
    exit(1);
  }


  IDHash *hash;
  CoordSystem *cs;
  if( Mapper_getIsSorted(m) == 0 ) {
    Mapper_sort(m);
  }

  int from, to;
  if (!Mapper_compareType(type,Mapper_getTo(m))) {
    from = MAPPER_TO_IND;
    to   = MAPPER_FROM_IND;
    cs   = Mapper_getFromCoordSystem(m);
  } else if (!Mapper_compareType(type,Mapper_getFrom(m))) {
    from = MAPPER_FROM_IND;
    to   = MAPPER_TO_IND;
    cs   = Mapper_getToCoordSystem(m);
  } else {
    fprintf(stderr, "Invalid type [%s] in mapper (not from [%s] or to [%s])\n", type, Mapper_getFrom(m), Mapper_getTo(m));
    exit(1);
  }

  hash = Mapper_getPairHash(m, from);

  if (!hash) {
    fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type);
    exit(1);
  }


  MapperRangeSet *results = MapperRangeSet_new();


// Was upcasing the id - its a number in C, I haven't found a case yet where its a string
  if (!IDHash_contains(hash, id)) {
    // one big gap!
    MapperRange *gap = (MapperRange *)MapperGap_new(start,end,0); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(results,gap);
    return results;
  }


  MapperPairSet *pairs = IDHash_getValue(hash,id); //my $lr = $hash->{ uc($id) };


  MapperPair *lastUsedPair = NULL;

  int startIdx, endIdx, midIdx;
  MapperPair *pair;
  MapperUnit *selfCoord;


  startIdx = 0;
  endIdx   = MapperPairSet_getNumPair(pairs)-1;

  // binary search the relevant pairs
  // helps if the list is big
  while ( ( endIdx - startIdx ) > 1 ) {
    midIdx = ( startIdx + endIdx ) >> 1;
    pair   = MapperPairSet_getPairAt(pairs, midIdx);

    selfCoord = MapperPair_getUnit(pair, from);

    if ( selfCoord->end < start ) {
      startIdx = midIdx;
    } else {
      endIdx = midIdx;
    }
  }

  int rank       = 0;
  long origStart = start;
  IDType lastTargetCoord;
  int lastTargetCoordIsSet = 0;

  int i;
  for (i=startIdx; i<MapperPairSet_getNumPair(pairs); i++) {
    MapperPair *pair = MapperPairSet_getPairAt(pairs,i);
    MapperUnit *selfCoord   = MapperPair_getUnit(pair, from);
    MapperUnit *targetCoord = MapperPair_getUnit(pair, to);

    //
    // But not the case for haplotypes!! need to test for this case???
    // so removing this till a better solution is found
    // 
    //
    //     if($self_coord->{'start'} < $start){
    //       $start = $orig_start;
    //       $rank++;
    //     }

    if ( lastTargetCoordIsSet && targetCoord->id != lastTargetCoord ) {
      if ( selfCoord->start < start ) {    // i.e. the same bit is being mapped to another assembled bit
        start = origStart;
      }
    } else {
      lastTargetCoord = targetCoord->id;
      lastTargetCoordIsSet = 1;
    }

    // if we haven't even reached the start, move on
    if (selfCoord->end < origStart) {
      continue;
    }

    // if we have over run, break
    if (selfCoord->start > end) {
      break;
    }

// Check is start not origStart
    if (start < selfCoord->start) {
      // gap detected
      MapperRange *gap = (MapperRange *)MapperGap_new(start, selfCoord->start-1, rank);
      MapperRangeSet_addRange(results,gap);
      start = gap->end+1;
    }

    long targetStart, targetEnd;

    MapperRange *res;

    if ( MapperPair_isIndel(pair) ) {
      // When next pair is an IndelPair and not a Coordinate, create the
      // new mapping Coordinate, the IndelCoordinate.
      targetStart = targetCoord->start;
      targetEnd   = targetCoord->end;

      // create a Gap object
      MapperGap *gap = MapperGap_new(start,
                                     selfCoord->end < end ? selfCoord->end : end,
                                     0); // Perl didn't set rank - don't know if need to

      // create the Coordinate object
      MapperCoordinate *coord = MapperCoordinate_new(targetCoord->id,
                                                     targetStart,
                                                     targetEnd,
                                                     pair->ori * strand, 
                                                     cs, 
                                                     0); // Perl didn't set rank - don't know if need to

      //and finally, the IndelCoordinate object with
      res = (MapperRange *)IndelCoordinate_new(gap, coord);
    } else {
      // start is somewhere inside the region
      if (pair->ori == 1) {
        targetStart = targetCoord->start + (start - selfCoord->start);
      } else {
        targetEnd = targetCoord->end - (start - selfCoord->start);
      }

      // Either we are enveloping this map or not.  If yes, then end
      // point (self perspective) is determined solely by target.  If
      // not we need to adjust.
      if (end > selfCoord->end) {
        // enveloped
        if( pair->ori == 1 ) {
          targetEnd = targetCoord->end;
        } else {
          targetStart = targetCoord->start;
        }
      } else {
        // need to adjust end
        if (pair->ori == 1) {
          targetEnd = targetCoord->start + (end - selfCoord->start);
        } else {
          targetStart = targetCoord->end - (end - selfCoord->start);
        }
      }

      res = (MapperRange *)MapperCoordinate_new(targetCoord->id,
                                                targetStart,
                                                targetEnd,
                                                pair->ori * strand, 
                                                cs, 
                                                rank);
    } // end else [ if ( exists $pair->{'indel'...})]

    MapperRangeSet_addRange(results, res);

    lastUsedPair = pair;
    start = selfCoord->end+1;
  }

  if (lastUsedPair == NULL) {
    MapperRange *gap = (MapperRange *)MapperGap_new(start,end, 0); // Perl doesn't set rank, so use 0
    MapperRangeSet_addRange(results,gap);

  } else if (MapperPair_getUnit(lastUsedPair, from)->end < end) {
    // gap at the end
    MapperRange *gap = (MapperRange *)MapperGap_new(
                           MapperPair_getUnit(lastUsedPair,from)->end + 1,
                           end, 0 ); // Perl didn't set rank so use 0
    MapperRangeSet_addRange(results,gap);
  }

  if (strand == -1) {
    MapperRangeSet_reverse(results);
  }

  return results;
}
示例#17
0
/*
=head2 _objs_from_sth

  Arg [1]    : DBI:st $sth 
               An executed DBI statement handle
  Arg [2]    : (optional) Bio::EnsEMBL::Mapper $mapper 
               An mapper to be used to convert contig coordinates
               to assembly coordinates.
  Arg [3]    : (optional) Bio::EnsEMBL::Slice $slice
               A slice to map the prediction transcript to.   
  Example    : $p_transcripts = $self->_objs_from_sth($sth);
  Description: Creates a list of Prediction transcripts from an executed DBI
               statement handle.  The columns retrieved via the statement 
               handle must be in the same order as the columns defined by the
               _columns method.  If the slice argument is provided then the
               the prediction transcripts will be in returned in the coordinate
               system of the $slice argument.  Otherwise the prediction 
               transcripts will be returned in the RawContig coordinate system.
  Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts
  Exceptions : none
  Caller     : superclass generic_fetch
  Status     : Stable

=cut
*/
Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, 
                                                               StatementHandle *sth, 
                                                               AssemblyMapper *assMapper, 
                                                               Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(pta->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(pta->dba);

  Vector *pTranscripts = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType predictionTranscriptId = row->getLongLongAt(row,0);
    IDType seqRegionId            = row->getLongLongAt(row,1);
    long seqRegionStart           = row->getLongAt(row,2);
    long seqRegionEnd             = row->getLongAt(row,3);
    int seqRegionStrand           = row->getIntAt(row,4);
    IDType analysisId             = row->getLongLongAt(row,5);
    char *displayLabel            = row->getStringAt(row,6);

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *ptSlice = slice;

    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    //
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      ptSlice = IDHash_getValue(sliceHash, seqRegionId);
    }

    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    //
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      ptSlice = destSlice;
    }
    
    // Finally, create the new PredictionTranscript.
    PredictionTranscript *pt = PredictionTranscript_new();

    PredictionTranscript_setStart       (pt, seqRegionStart);
    PredictionTranscript_setEnd         (pt, seqRegionEnd);
    PredictionTranscript_setStrand      (pt, seqRegionStrand);
    PredictionTranscript_setSlice       (pt, ptSlice);
    PredictionTranscript_setAnalysis    (pt, analysis);
    PredictionTranscript_setAdaptor     (pt, (BaseAdaptor *)pta);
    PredictionTranscript_setDbID        (pt, predictionTranscriptId);
    PredictionTranscript_setDisplayLabel(pt, displayLabel);

    Vector_addElement(pTranscripts, pt);
  }

  IDHash_free(sliceHash, NULL);
  return pTranscripts;
}
示例#18
0
Vector *GenomicAlignAdaptor_mergeAlignsets(GenomicAlignAdaptor *gaa, Vector *alignSet1, Vector *alignSet2) {
  int i;
  Vector *bigList = Vector_new();
  IDHash *overlappingSets[2];
  Vector *mergedAligns;


  for (i=0;i<Vector_getNumElement(alignSet1); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet1, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryStart(align), align, 0));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getQueryDNAFrag(align)),
                                                        GenomicAlign_getQueryEnd(align)+0.5, align, 0));
  }

  for (i=0;i<Vector_getNumElement(alignSet2); i++) {
    GenomicAlign *align = Vector_getElementAt(alignSet2, i);
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusStart(align), align, 1));
    Vector_addElement(bigList, GenomicAlignListElem_new(DNAFrag_getDbID(GenomicAlign_getConsensusDNAFrag(align)),
                                                        GenomicAlign_getConsensusEnd(align)+0.5, align, 1));
  }
  
  Vector_sort(bigList, GenomicAlignListElem_compFunc);

  // walking from start to end through sortlist and keep track of the 
  // currently overlapping set of Alignments
 
  overlappingSets[0] = IDHash_new(IDHASH_SMALL);
  overlappingSets[1] = IDHash_new(IDHASH_SMALL);

  mergedAligns = Vector_new();

  for (i=0; i<Vector_getNumElement(bigList); i++) {
    GenomicAlignListElem *gale  = Vector_getElementAt(bigList,i);

    GenomicAlign *align = gale->align;
    IDType alignID      = GenomicAlign_getDbID(align);
    int setNo           = gale->setNum;

    if (IDHash_contains(overlappingSets[setNo], alignID)) {
      // remove from current overlapping set
      IDHash_remove(overlappingSets[setNo], alignID, NULL);
    } else {
      int j;
      void **values = IDHash_getValues(overlappingSets[1-setNo]);

      // insert into the set and do all the overlap business
      IDHash_add(overlappingSets[setNo], alignID, align);

      // the other set contains everything this align overlaps with
      for (j=0; j<IDHash_getNumValues(overlappingSets[1-setNo]); j++) {
        GenomicAlign *align2 = values[j];
        if (setNo == 0) {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align, align2);
        } else {
          GenomicAlignAdaptor_addDerivedAlignments(gaa, mergedAligns, align2, align);
        }
      }
      free(values);
    }
  }

// NIY Free gale

  return mergedAligns;
}
示例#19
0
// Also added a flag to indicate we actually want the gaps vector returned - quite often its not used in the caller and so would leak
// memory
Vector *RangeRegistry_checkAndRegister(RangeRegistry *registry, IDType id, long start, long end, 
                                       long rStart, long rEnd, int wantGaps) {

  // The following was commented out due to Ensembl Genomes requirements
  // for bacterial genomes.
  // The following was uncommented because I'm not caring about those requirements
  if ( start > end ) {
    fprintf(stderr, "start argument [%ld] must be less than (or equal to) end argument [%ld]\n", start, end);
    exit(1);
  }
  
  if ( rStart > rEnd ) {
    fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) rEnd argument [%ld]\n", rStart, rEnd);
    exit(1);
  }

  if ( rStart > start ) {
    fprintf(stderr, "rStart argument [%ld] must be less than (or equal to) start [%ld]\n", rStart, start);
    exit(1);
  }

  if ( rEnd < end ) {
    fprintf(stderr, "rEnd argument [%ld] must be greater than (or equal to) end [%ld]\n", rEnd, end);
    exit(1);
  }

  IDHash *regReg = RangeRegistry_getRegistry(registry);
  Vector *list;
  if (IDHash_contains(regReg, id)) {
    list = IDHash_getValue(regReg, id);
  } else {
    list = Vector_new();
    IDHash_add(regReg, id, list);
  }

  
  Vector *gapPairs = NULL;
  if (wantGaps) {
    gapPairs = Vector_new();
  }

  int len = Vector_getNumElement(list);

  if (len == 0) {
    //this is the first request for this id, return a gap pair for the
    // entire range and register it as seen
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_addElement(list, cp);

    return Vector_copy(list);
  }

  //####
  // loop through the list of existing ranges recording any "gaps" where
  // the existing range does not cover part of the requested range
  // 

  int startIdx = 0;
  int endIdx   = Vector_getNumElement(list)-1;
  int midIdx;
  CoordPair *range;

  // binary search the relevant pairs
  // helps if the list is big
  while ( ( endIdx - startIdx ) > 1 ) {
    midIdx = ( startIdx + endIdx ) >> 1;
    range  = Vector_getElementAt(list, midIdx);

    if ( CoordPair_getEnd(range) < rStart ) {
      startIdx = midIdx;
    } else {
      endIdx = midIdx;
    }
  }

  long gapStart;
  long gapEnd;
  int rIdx = -1;
  int rStartIdx = -1;
  int rEndIdx;

  gapStart = rStart;

  int i;
  for (i=startIdx; i < len ; i++ ) {
    CoordPair *pRange = Vector_getElementAt(list, i);
    long pStart = CoordPair_getStart(pRange);
    long pEnd   = CoordPair_getEnd(pRange);
    
    // no work needs to be done at all if we find a range pair that
    // entirely overlaps the requested region
    if ( pStart <= start && pEnd >= end ) {
      return Vector_new(); // perl returns undef, but that causes me problems
    }

    // find adjacent or overlapping regions already registered
    if ( pEnd >= ( rStart - 1 ) && pStart <= ( rEnd + 1 ) ) {
      if ( rStartIdx < 0 ) { // Not yet been set
        rStartIdx = i;
      }
      rEndIdx = i;
    }

    if ( pStart > rStart ) {
      gapEnd = ( rEnd < pStart ) ? rEnd : pStart - 1;
      if (wantGaps) {
        CoordPair *cp = CoordPair_new(gapStart, gapEnd);
        Vector_addElement(gapPairs, cp);
      }
    }

    gapStart = ( rStart > pEnd ) ? rStart : pEnd + 1;

    if ( pEnd >= rEnd && rIdx < 0 ) {
      rIdx = i;
      break;
    }
  }

  // do we have to make another gap?
  if ( gapStart <= rEnd ) {
    if (wantGaps) {
      CoordPair *cp = CoordPair_new(gapStart, rEnd);
      Vector_addElement(gapPairs, cp);
    }
  }

  // 
  // Merge the new range into the registered list
  // 
  if (rStartIdx >= 0 ) { // rStartIdx has been set to something 
    long newStart;
    long newEnd;
    CoordPair *rStartIdxRange = Vector_getElementAt(list, rStartIdx); 
    CoordPair *rEndIdxRange   = Vector_getElementAt(list, rEndIdx); 

    if ( rStart < CoordPair_getStart(rStartIdxRange)) {
      newStart = rStart;
    } else {
      newStart = CoordPair_getStart(rStartIdxRange);
    }

    if ( rEnd > CoordPair_getEnd(rEndIdxRange)) {
      newEnd = rEnd;
    } else {
      newEnd = CoordPair_getEnd(rEndIdxRange);
    }

    CoordPair *cp = CoordPair_new(newStart, newEnd);

    // Think its <=
    for (i=rStartIdx; i<=rEndIdx; i++) {
      Vector_removeElementAt(list, rStartIdx); // Always remove from rStartIdx as array is shrinking by one each time called
    }
    Vector_insertElementAt(list, rStartIdx, cp);
    //splice( @$list, $rstart_idx,
    //        $rend_idx - $rstart_idx + 1,
    //        [ $new_start, $new_end ] );

  } else if (rIdx >= 0) {
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_insertElementAt(list, rIdx, cp);
    //splice( @$list, $r_idx, 0, [ $rstart, $rend ] );
  } else {
    CoordPair *cp = CoordPair_new(rStart, rEnd);
    Vector_addElement(list, cp);
  }

  // Note if wantGaps is not set then gapPairs will be NULL - but you said you didn't want it so that should be OK
  return gapPairs;
}
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, 
                                                                   StatementHandle *sth,
                                                                   AssemblyMapper *assMapper,
                                                                   Slice *destSlice) {
  SliceAdaptor *sa     = DBAdaptor_getSliceAdaptor(isea->dba);
  AnalysisAdaptor *aa  = DBAdaptor_getAnalysisAdaptor(isea->dba);

  Vector *features = Vector_new();
  IDHash *sliceHash = IDHash_new(IDHASH_SMALL);
  
/* Unneccesary
  my %analysis_hash;
  my %sr_name_hash;
  my %sr_cs_hash;
*/
  

  
/* Unused
  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }
*/

  long         destSliceStart;
  long         destSliceEnd;
  int          destSliceStrand;
  long         destSliceLength;
  //CoordSystem *destSliceCs;
  char *       destSliceSrName;
  IDType       destSliceSrId = 0;
  //AssemblyMapperAdaptor *asma;

  if (destSlice) {
    destSliceStart  = Slice_getStart(destSlice);
    destSliceEnd    = Slice_getEnd(destSlice);
    destSliceStrand = Slice_getStrand(destSlice);
    destSliceLength = Slice_getLength(destSlice);
    //??destSliceCs     = Slice_getCoordSystem(destSlice);
    destSliceSrName = Slice_getSeqRegionName(destSlice);
    destSliceSrId   = Slice_getSeqRegionId(destSlice);
    //??asma            = DBAdaptor_getAssemblyMapperAdaptor(ea->dba);
  }

  ResultRow *row;
  while ((row = sth->fetchRow(sth))) {
    IDType id =           row->getLongLongAt(row,0);
    IDType analysisId =   row->getLongLongAt(row,1);
    IDType seqRegionId =  row->getLongLongAt(row,2);
    long seqRegionStart = row->getLongAt(row,3);
    long seqRegionEnd =   row->getLongAt(row,4);
    int seqRegionStrand = row->getIntAt(row,5);
    char *hitName =       row->getStringAt(row,6);
    double score =        row->getDoubleAt(row,7);
    char *scoreType =     row->getStringAt(row,8);
    int spliceCanonical = row->getIntAt(row,9); 

    // get the analysis object
    Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId);

/*
    // need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    #get the slice object
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
*/
    if (! IDHash_contains(sliceHash, seqRegionId)) {
      IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
    }
    Slice *slice = IDHash_getValue(sliceHash, seqRegionId);

    Slice *iseSlice = slice;
    
    char *srName      = Slice_getSeqRegionName(slice);
    CoordSystem *srCs = Slice_getCoordSystem(slice);

    // 
    // remap the feature coordinates to another coord system
    // if a mapper was provided
    //
    if (assMapper != NULL) {
      MapperRangeSet *mrs;

      // Slightly suspicious about need for this if statement so left in perl statements for now
      if (destSlice != NULL &&
          assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) {
        mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice);
      } else {
        mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL);
      }

      // skip features that map to gaps or coord system boundaries
      //next FEATURE if (!defined($seq_region_id));
      if (MapperRangeSet_getNumRange(mrs) == 0) {
        continue;
      }
      MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0);
      if (range->rangeType == MAPPERRANGE_GAP) {
        fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n");
        exit(1);
      } else {
        MapperCoordinate *mc = (MapperCoordinate *)range;

        seqRegionId     = mc->id;
        seqRegionStart  = mc->start;
        seqRegionEnd    = mc->end;
        seqRegionStrand = mc->strand;
      }

      MapperRangeSet_free(mrs);

      
/* Was - but identical if and else so why test???
      #get a slice in the coord system we just mapped to
      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      } else {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
      }
*/
// Instead...
      if (! IDHash_contains(sliceHash, seqRegionId)) {
        IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF));
      }
      iseSlice = IDHash_getValue(sliceHash, seqRegionId);
    }


    //
    // If a destination slice was provided convert the coords
    // If the dest_slice starts at 1 and is foward strand, nothing needs doing
    // 
    if (destSlice != NULL) {
      if (destSliceStart != 1 || destSliceStrand != 1) {
        if (destSliceStrand == 1) {
          seqRegionStart = seqRegionStart - destSliceStart + 1;
          seqRegionEnd   = seqRegionEnd - destSliceStart + 1;
        } else {
          long tmpSeqRegionStart = seqRegionStart;
          seqRegionStart = destSliceEnd - seqRegionEnd + 1;
          seqRegionEnd   = destSliceEnd - tmpSeqRegionStart + 1;

          seqRegionStrand = -seqRegionStrand;
        }
      }
       
      // throw away features off the end of the requested slice
      if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) {
        continue;
      }
      iseSlice = destSlice;
    }
    
    IntronSupportingEvidence *ise = IntronSupportingEvidence_new();

    IntronSupportingEvidence_setStart             (ise, seqRegionStart);
    IntronSupportingEvidence_setEnd               (ise, seqRegionEnd);
    IntronSupportingEvidence_setStrand            (ise, seqRegionStrand);
    IntronSupportingEvidence_setSlice             (ise, iseSlice);
    IntronSupportingEvidence_setAnalysis          (ise, analysis);
    IntronSupportingEvidence_setAdaptor           (ise, (BaseAdaptor *)isea);
    IntronSupportingEvidence_setDbID              (ise, id);
    IntronSupportingEvidence_setHitName           (ise, hitName);
    IntronSupportingEvidence_setScore             (ise, score);
    IntronSupportingEvidence_setScoreType         (ise, scoreType);
    IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical);

    Vector_addElement(features, ise);
  }
  
  return features;
}
示例#21
0
// Note I didn't implement the stable id fetching uggliness here. I'll probably make a separate method for that
// if necessary
Vector *BaseAdaptor_uncachedFetchAllByDbIDList(BaseAdaptor *ba, Vector *idList, Slice *slice) {
  if ( idList == NULL) {
    fprintf(stderr, "id_list list reference argument is required - bye!");
    return NULL;
  }
  char constraintPref[1024];
  

  if (!Vector_getNumElement(idList)) {
    return Vector_new();
  }

  NameTableType *tables = ba->getTables();
  char **t = (*tables)[0];

  sprintf(constraintPref, "%s.%s_id ", t[SYN], t[NAME] ); 

  // Ensure that we do not exceed MySQL's max_allowed_packet (defaults to
  // 1 MB) splitting large queries into smaller queries of at most 256 KB.
  // Assuming a (generous) average dbID string
  // length of 16, this means 16384 dbIDs in each query.
  int maxSize = 16384;

  // Uniquify the list
  IDHash *idListHash = IDHash_new(IDHASH_MEDIUM);

  int i;
  for (i=0; i<Vector_getNumElement(idList); i++) {
    IDType id = *(IDType *)(Vector_getElementAt(idList, i));
    if (!IDHash_contains(idListHash, id)) {
      IDHash_add(idListHash, id, &trueVal);
    }
  }

  IDType *uniqueIds = IDHash_getKeys(idListHash);
  int nUniqueId = IDHash_getNumValues(idListHash);

  IDHash_free(idListHash, NULL);

  Vector *out = Vector_new();

  int lenNum;
  for (i=0; i<nUniqueId; i+=maxSize) {
    char *constraint = NULL;

    if ((constraint = (char *)calloc(655500,sizeof(char))) == NULL) {
      fprintf(stderr,"Failed allocating constraint\n");
      return out;
    }

    strcpy(constraint, constraintPref);
  
    // Special case for one remaining Id
    if (i == nUniqueId-1) {
      sprintf(constraint, "%s = "IDFMTSTR, constraint, uniqueIds[i]);
    } else {
      char tmpStr[1024];
      int endPoint = sprintf(constraint, "%s IN (", constraint);
      int j;
      for (j=0; j<maxSize && j+i<nUniqueId; j++) {
        if (j!=0) {
          constraint[endPoint++] = ',';
          constraint[endPoint++] = ' ';
        }
        lenNum = sprintf(tmpStr, IDFMTSTR, uniqueIds[i+j]);
        memcpy(&(constraint[endPoint]), tmpStr, lenNum);
        endPoint+=lenNum;
      }
      constraint[endPoint++] = ')';
      constraint[endPoint] = '\0';
    }

    Vector *resChunk = BaseAdaptor_genericFetch(ba, constraint, NULL, slice);

    Vector_append(out, resChunk);

    Vector_free(resChunk);
    free(constraint);
  }
  free(uniqueIds);

  return out;
}