char *CoordSystem_getNameColonVersion(CoordSystem *cs) { if (cs && cs->nameColonVersion == NULL) { char tmpStr[1024]; sprintf(tmpStr,"%s:%s",CoordSystem_getName(cs),CoordSystem_getVersion(cs) ? CoordSystem_getVersion(cs):""); StrUtil_copyString(&cs->nameColonVersion, tmpStr, 0); cs->lenNameColonVersion = strlen(tmpStr); } return cs? cs->nameColonVersion : NULL; }
Vector *AssemblyMapper_listIdsImpl(AssemblyMapper *am, char *frmSeqRegionName, long frmStart, long frmEnd, CoordSystem *frmCs) { IDType seqRegionId = AssemblyMapper_getSeqRegionId(am, frmSeqRegionName, frmCs); AssemblyMapperAdaptor *adaptor = AssemblyMapper_getAdaptor(am); if ( !CoordSystem_compare(frmCs, AssemblyMapper_getComponentCoordSystem(am) ) ) { if ( !AssemblyMapper_haveRegisteredComponent(am, seqRegionId) ) { AssemblyMapperAdaptor_registerComponent(adaptor, am, seqRegionId); } // Pull out the 'from' identifiers of the mapper pairs. The we // loaded the assembled side as the 'from' side in the constructor. MapperPairSet *mps = Mapper_listPairs( AssemblyMapper_getMapper(am), seqRegionId, frmStart, frmEnd, "component"); return MapperPairSet_getFromIds(mps); } else if ( !CoordSystem_compare(frmCs, AssemblyMapper_getAssembledCoordSystem(am) ) ) { AssemblyMapperAdaptor_registerAssembled(adaptor, am, seqRegionId, frmStart, frmEnd); // Pull out the 'to' identifiers of the mapper pairs we loaded the // component side as the 'to' coord system in the constructor. MapperPairSet *mps = Mapper_listPairs( AssemblyMapper_getMapper(am), seqRegionId, frmStart, frmEnd, "assembled"); return MapperPairSet_getToIds(mps); } else { fprintf(stderr, "Coordinate system %s %s is neither the assembled nor the component coordinate system of this AssemblyMapper\n", CoordSystem_getName(frmCs), CoordSystem_getVersion(frmCs) ); exit(1); } }
int CoordSystem_compare(CoordSystem *cs1, CoordSystem *cs2) { /* Dodgy DAS stuff if (!$cs2 || !ref($cs2) || !$cs2->isa('Bio::EnsEMBL::CoordSystem')) { if ($cs2->isa('Bio::EnsEMBL::ExternalData::DAS::CoordSystem')) { return $cs2->equals($self); } throw('Argument must be a CoordSystem'); } */ if (!EcoString_strcmp(CoordSystem_getVersion(cs1), CoordSystem_getVersion(cs2)) && !EcoString_strcmp(CoordSystem_getName(cs1), CoordSystem_getName(cs2))) { // Equal, so return 0 return 0; } return 1; }
MapperRangeSet *AssemblyMapper_fastMapImpl(AssemblyMapper *am, char *frmSeqRegionName, long frmStart, long frmEnd, int frmStrand, CoordSystem *frmCs, Slice *toSlice) { Mapper *mapper = AssemblyMapper_getMapper(am); CoordSystem *asmCs = AssemblyMapper_getAssembledCoordSystem(am); CoordSystem *cmpCs = AssemblyMapper_getComponentCoordSystem(am); AssemblyMapperAdaptor *adaptor = AssemblyMapper_getAdaptor(am); char *frm; IDType seqRegionId = AssemblyMapper_getSeqRegionId(am, frmSeqRegionName, frmCs); // Speed critical section: // Try to do simple pointer equality comparisons of the coord system // objects first since this is likely to work most of the time and is // much faster than a function call. if ( frmCs == cmpCs || ( frmCs != asmCs && !CoordSystem_compare(frmCs, cmpCs)) ) { if ( !IDHash_contains(AssemblyMapper_getComponentRegister(am), seqRegionId) ) { AssemblyMapperAdaptor_registerComponent( adaptor, am, seqRegionId); } frm = "component"; } else if ( frmCs == asmCs || !CoordSystem_compare(frmCs, asmCs) ) { // This can be probably be sped up some by only calling registered // assembled if needed. AssemblyMapperAdaptor_registerAssembled( adaptor, am, seqRegionId, frmStart, frmEnd); frm = "assembled"; } else { fprintf(stderr,"Coordinate system %s %s is neither the assembled nor the component coordinate system of this AssemblyMapper\n", CoordSystem_getName(frmCs), CoordSystem_getVersion(frmCs) ); } return Mapper_fastMap( mapper, seqRegionId, frmStart, frmEnd, frmStrand, frm ); }
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) { //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName); // if there are 0 or 1 transcripts still do lazy-loading if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) { return transcripts; } // preload all of the exons now, instead of lazy loading later // faster than 1 query per transcript // get extent of region spanned by transcripts long minStart = 2000000000; long maxEnd = -2000000000; int i; for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) { minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t); } if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) { maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t); } } Slice *extSlice; if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) { extSlice = slice; } else { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice), minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0); } // associate exon identifiers with transcripts IDHash *trHash = IDHash_new(IDHASH_MEDIUM); for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) { IDHash_add(trHash, PredictionTranscript_getDbID(t), t); } } IDType *uniqueIds = IDHash_getKeys(trHash); char tmpStr[1024]; char *qStr = NULL; if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating qStr\n"); return transcripts; } int lenNum; int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE prediction_transcript_id IN ("); for (i=0; i<IDHash_getNumValues(trHash); i++) { if (i!=0) { qStr[endPoint++] = ','; qStr[endPoint++] = ' '; } lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]); memcpy(&(qStr[endPoint]), tmpStr, lenNum); endPoint+=lenNum; } qStr[endPoint++] = ')'; qStr[endPoint] = '\0'; free(uniqueIds); StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr)); sth->execute(sth); IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM); ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType trId = row->getLongLongAt(row,0); IDType exId = row->getLongLongAt(row,1); int rank = row->getIntAt(row,2); if (! IDHash_contains(exTrHash, exId)) { Vector *vec = Vector_new(); Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free); IDHash_add(exTrHash, exId, vec); } Vector *exVec = IDHash_getValue(exTrHash, exId); PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank); Vector_addElement(exVec, trp); } IDHash_free(trHash, NULL); sth->finish(sth); PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba); Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice); // move exons onto transcript slice, and add them to transcripts for (i=0; i<Vector_getNumElement(exons); i++) { PredictionExon *ex = Vector_getElementAt(exons, i); // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue; PredictionExon *newEx; if (slice != extSlice) { newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice); if (newEx == NULL) { fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n"); exit(1); } } else { newEx = ex; } Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx)); int j; for (j=0; j<Vector_getNumElement(exVec); j++) { PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j); PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank); } } IDHash_free(exTrHash, Vector_free); free(qStr); return transcripts; }