/* =head2 _objs_from_sth Arg [1] : DBI:st $sth An executed DBI statement handle Arg [2] : (optional) Bio::EnsEMBL::Mapper $mapper An mapper to be used to convert contig coordinates to assembly coordinates. Arg [3] : (optional) Bio::EnsEMBL::Slice $slice A slice to map the prediction transcript to. Example : $p_transcripts = $self->_objs_from_sth($sth); Description: Creates a list of Prediction transcripts from an executed DBI statement handle. The columns retrieved via the statement handle must be in the same order as the columns defined by the _columns method. If the slice argument is provided then the the prediction transcripts will be in returned in the coordinate system of the $slice argument. Otherwise the prediction transcripts will be returned in the RawContig coordinate system. Returntype : reference to a list of Bio::EnsEMBL::PredictionTranscripts Exceptions : none Caller : superclass generic_fetch Status : Stable =cut */ Vector *PredictionTranscriptAdaptor_objectsFromStatementHandle(PredictionTranscriptAdaptor *pta, StatementHandle *sth, AssemblyMapper *assMapper, Slice *destSlice) { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); AnalysisAdaptor *aa = DBAdaptor_getAnalysisAdaptor(pta->dba); Vector *pTranscripts = Vector_new(); IDHash *sliceHash = IDHash_new(IDHASH_SMALL); long destSliceStart; long destSliceEnd; int destSliceStrand; long destSliceLength; char * destSliceSrName; IDType destSliceSrId = 0; if (destSlice) { destSliceStart = Slice_getStart(destSlice); destSliceEnd = Slice_getEnd(destSlice); destSliceStrand = Slice_getStrand(destSlice); destSliceLength = Slice_getLength(destSlice); destSliceSrName = Slice_getSeqRegionName(destSlice); destSliceSrId = Slice_getSeqRegionId(destSlice); } ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType predictionTranscriptId = row->getLongLongAt(row,0); IDType seqRegionId = row->getLongLongAt(row,1); long seqRegionStart = row->getLongAt(row,2); long seqRegionEnd = row->getLongAt(row,3); int seqRegionStrand = row->getIntAt(row,4); IDType analysisId = row->getLongLongAt(row,5); char *displayLabel = row->getStringAt(row,6); // get the analysis object Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId); if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } Slice *slice = IDHash_getValue(sliceHash, seqRegionId); Slice *ptSlice = slice; char *srName = Slice_getSeqRegionName(slice); CoordSystem *srCs = Slice_getCoordSystem(slice); // // remap the feature coordinates to another coord system // if a mapper was provided // if (assMapper != NULL) { MapperRangeSet *mrs; // Slightly suspicious about need for this if statement so left in perl statements for now if (destSlice != NULL && assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) { mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice); } else { mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL); } // skip features that map to gaps or coord system boundaries if (MapperRangeSet_getNumRange(mrs) == 0) { continue; } MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0); if (range->rangeType == MAPPERRANGE_GAP) { fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n"); exit(1); } else { MapperCoordinate *mc = (MapperCoordinate *)range; seqRegionId = mc->id; seqRegionStart = mc->start; seqRegionEnd = mc->end; seqRegionStrand = mc->strand; } MapperRangeSet_free(mrs); if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } ptSlice = IDHash_getValue(sliceHash, seqRegionId); } // // If a destination slice was provided convert the coords // If the dest_slice starts at 1 and is foward strand, nothing needs doing // if (destSlice != NULL) { if (destSliceStart != 1 || destSliceStrand != 1) { if (destSliceStrand == 1) { seqRegionStart = seqRegionStart - destSliceStart + 1; seqRegionEnd = seqRegionEnd - destSliceStart + 1; } else { long tmpSeqRegionStart = seqRegionStart; seqRegionStart = destSliceEnd - seqRegionEnd + 1; seqRegionEnd = destSliceEnd - tmpSeqRegionStart + 1; seqRegionStrand = -seqRegionStrand; } } // throw away features off the end of the requested slice if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) { continue; } ptSlice = destSlice; } // Finally, create the new PredictionTranscript. PredictionTranscript *pt = PredictionTranscript_new(); PredictionTranscript_setStart (pt, seqRegionStart); PredictionTranscript_setEnd (pt, seqRegionEnd); PredictionTranscript_setStrand (pt, seqRegionStrand); PredictionTranscript_setSlice (pt, ptSlice); PredictionTranscript_setAnalysis (pt, analysis); PredictionTranscript_setAdaptor (pt, (BaseAdaptor *)pta); PredictionTranscript_setDbID (pt, predictionTranscriptId); PredictionTranscript_setDisplayLabel(pt, displayLabel); Vector_addElement(pTranscripts, pt); } IDHash_free(sliceHash, NULL); return pTranscripts; }
Vector *IntronSupportingEvidenceAdaptor_objectsFromStatementHandle(IntronSupportingEvidenceAdaptor *isea, StatementHandle *sth, AssemblyMapper *assMapper, Slice *destSlice) { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(isea->dba); AnalysisAdaptor *aa = DBAdaptor_getAnalysisAdaptor(isea->dba); Vector *features = Vector_new(); IDHash *sliceHash = IDHash_new(IDHASH_SMALL); /* Unneccesary my %analysis_hash; my %sr_name_hash; my %sr_cs_hash; */ /* Unused my $asm_cs; my $cmp_cs; my $asm_cs_vers; my $asm_cs_name; my $cmp_cs_vers; my $cmp_cs_name; if($mapper) { $asm_cs = $mapper->assembled_CoordSystem(); $cmp_cs = $mapper->component_CoordSystem(); $asm_cs_name = $asm_cs->name(); $asm_cs_vers = $asm_cs->version(); $cmp_cs_name = $cmp_cs->name(); $cmp_cs_vers = $cmp_cs->version(); } */ long destSliceStart; long destSliceEnd; int destSliceStrand; long destSliceLength; //CoordSystem *destSliceCs; char * destSliceSrName; IDType destSliceSrId = 0; //AssemblyMapperAdaptor *asma; if (destSlice) { destSliceStart = Slice_getStart(destSlice); destSliceEnd = Slice_getEnd(destSlice); destSliceStrand = Slice_getStrand(destSlice); destSliceLength = Slice_getLength(destSlice); //??destSliceCs = Slice_getCoordSystem(destSlice); destSliceSrName = Slice_getSeqRegionName(destSlice); destSliceSrId = Slice_getSeqRegionId(destSlice); //??asma = DBAdaptor_getAssemblyMapperAdaptor(ea->dba); } ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType id = row->getLongLongAt(row,0); IDType analysisId = row->getLongLongAt(row,1); IDType seqRegionId = row->getLongLongAt(row,2); long seqRegionStart = row->getLongAt(row,3); long seqRegionEnd = row->getLongAt(row,4); int seqRegionStrand = row->getIntAt(row,5); char *hitName = row->getStringAt(row,6); double score = row->getDoubleAt(row,7); char *scoreType = row->getStringAt(row,8); int spliceCanonical = row->getIntAt(row,9); // get the analysis object Analysis *analysis = AnalysisAdaptor_fetchByDbID(aa, analysisId); /* // need to get the internal_seq_region, if present $seq_region_id = $self->get_seq_region_id_internal($seq_region_id); #get the slice object my $slice = $slice_hash{"ID:".$seq_region_id}; if(!$slice) { $slice = $sa->fetch_by_seq_region_id($seq_region_id); $slice_hash{"ID:".$seq_region_id} = $slice; $sr_name_hash{$seq_region_id} = $slice->seq_region_name(); $sr_cs_hash{$seq_region_id} = $slice->coord_system(); } my $sr_name = $sr_name_hash{$seq_region_id}; my $sr_cs = $sr_cs_hash{$seq_region_id}; */ if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } Slice *slice = IDHash_getValue(sliceHash, seqRegionId); Slice *iseSlice = slice; char *srName = Slice_getSeqRegionName(slice); CoordSystem *srCs = Slice_getCoordSystem(slice); // // remap the feature coordinates to another coord system // if a mapper was provided // if (assMapper != NULL) { MapperRangeSet *mrs; // Slightly suspicious about need for this if statement so left in perl statements for now if (destSlice != NULL && assMapper->objectType == CLASS_CHAINEDASSEMBLYMAPPER) { mrs = ChainedAssemblyMapper_map(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, 1, destSlice); } else { mrs = AssemblyMapper_fastMap(assMapper, srName, seqRegionStart, seqRegionEnd, seqRegionStrand, srCs, NULL); } // skip features that map to gaps or coord system boundaries //next FEATURE if (!defined($seq_region_id)); if (MapperRangeSet_getNumRange(mrs) == 0) { continue; } MapperRange *range = MapperRangeSet_getRangeAt(mrs, 0); if (range->rangeType == MAPPERRANGE_GAP) { fprintf(stderr,"Got a mapper gap in gene obj_from_sth - not sure if this is allowed\n"); exit(1); } else { MapperCoordinate *mc = (MapperCoordinate *)range; seqRegionId = mc->id; seqRegionStart = mc->start; seqRegionEnd = mc->end; seqRegionStrand = mc->strand; } MapperRangeSet_free(mrs); /* Was - but identical if and else so why test??? #get a slice in the coord system we just mapped to if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) { $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id); } else { $slice = $slice_hash{"ID:".$seq_region_id} ||= $sa->fetch_by_seq_region_id($seq_region_id); } */ // Instead... if (! IDHash_contains(sliceHash, seqRegionId)) { IDHash_add(sliceHash, seqRegionId, SliceAdaptor_fetchBySeqRegionId(sa, seqRegionId, POS_UNDEF, POS_UNDEF, STRAND_UNDEF)); } iseSlice = IDHash_getValue(sliceHash, seqRegionId); } // // If a destination slice was provided convert the coords // If the dest_slice starts at 1 and is foward strand, nothing needs doing // if (destSlice != NULL) { if (destSliceStart != 1 || destSliceStrand != 1) { if (destSliceStrand == 1) { seqRegionStart = seqRegionStart - destSliceStart + 1; seqRegionEnd = seqRegionEnd - destSliceStart + 1; } else { long tmpSeqRegionStart = seqRegionStart; seqRegionStart = destSliceEnd - seqRegionEnd + 1; seqRegionEnd = destSliceEnd - tmpSeqRegionStart + 1; seqRegionStrand = -seqRegionStrand; } } // throw away features off the end of the requested slice if (seqRegionEnd < 1 || seqRegionStart > destSliceLength || (destSliceSrId != seqRegionId)) { continue; } iseSlice = destSlice; } IntronSupportingEvidence *ise = IntronSupportingEvidence_new(); IntronSupportingEvidence_setStart (ise, seqRegionStart); IntronSupportingEvidence_setEnd (ise, seqRegionEnd); IntronSupportingEvidence_setStrand (ise, seqRegionStrand); IntronSupportingEvidence_setSlice (ise, iseSlice); IntronSupportingEvidence_setAnalysis (ise, analysis); IntronSupportingEvidence_setAdaptor (ise, (BaseAdaptor *)isea); IntronSupportingEvidence_setDbID (ise, id); IntronSupportingEvidence_setHitName (ise, hitName); IntronSupportingEvidence_setScore (ise, score); IntronSupportingEvidence_setScoreType (ise, scoreType); IntronSupportingEvidence_setIsSpliceCanonical(ise, spliceCanonical); Vector_addElement(features, ise); } return features; }
Vector *PredictionTranscriptAdaptor_fetchAllBySlice(PredictionTranscriptAdaptor *pta, Slice *slice, char *logicName, int loadExons) { //my $transcripts = $self->SUPER::fetch_all_by_Slice($slice,$logic_name); Vector *transcripts = BaseFeatureAdaptor_fetchAllBySlice((BaseFeatureAdaptor *)pta, slice, logicName); // if there are 0 or 1 transcripts still do lazy-loading if ( ! loadExons || Vector_getNumElement(transcripts) < 2 ) { return transcripts; } // preload all of the exons now, instead of lazy loading later // faster than 1 query per transcript // get extent of region spanned by transcripts long minStart = 2000000000; long maxEnd = -2000000000; int i; for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if (PredictionTranscript_getSeqRegionStart((SeqFeature*)t) < minStart) { minStart = PredictionTranscript_getSeqRegionStart((SeqFeature*)t); } if (PredictionTranscript_getSeqRegionEnd((SeqFeature*)t) > maxEnd) { maxEnd = PredictionTranscript_getSeqRegionEnd((SeqFeature*)t); } } Slice *extSlice; if (minStart >= Slice_getStart(slice) && maxEnd <= Slice_getEnd(slice)) { extSlice = slice; } else { SliceAdaptor *sa = DBAdaptor_getSliceAdaptor(pta->dba); extSlice = SliceAdaptor_fetchByRegion(sa, Slice_getCoordSystemName(slice), Slice_getSeqRegionName(slice), minStart, maxEnd, Slice_getStrand(slice), CoordSystem_getVersion(Slice_getCoordSystem(slice)), 0); } // associate exon identifiers with transcripts IDHash *trHash = IDHash_new(IDHASH_MEDIUM); for (i=0; i<Vector_getNumElement(transcripts); i++) { PredictionTranscript *t = Vector_getElementAt(transcripts, i); if ( ! IDHash_contains(trHash, PredictionTranscript_getDbID(t))) { IDHash_add(trHash, PredictionTranscript_getDbID(t), t); } } IDType *uniqueIds = IDHash_getKeys(trHash); char tmpStr[1024]; char *qStr = NULL; if ((qStr = (char *)calloc(655500,sizeof(char))) == NULL) { fprintf(stderr,"Failed allocating qStr\n"); return transcripts; } int lenNum; int endPoint = sprintf(qStr, "SELECT prediction_transcript_id, prediction_exon_id, exon_rank FROM prediction_exon WHERE prediction_transcript_id IN ("); for (i=0; i<IDHash_getNumValues(trHash); i++) { if (i!=0) { qStr[endPoint++] = ','; qStr[endPoint++] = ' '; } lenNum = sprintf(tmpStr,IDFMTSTR,uniqueIds[i]); memcpy(&(qStr[endPoint]), tmpStr, lenNum); endPoint+=lenNum; } qStr[endPoint++] = ')'; qStr[endPoint] = '\0'; free(uniqueIds); StatementHandle *sth = pta->prepare((BaseAdaptor *)pta,qStr,strlen(qStr)); sth->execute(sth); IDHash *exTrHash = IDHash_new(IDHASH_MEDIUM); ResultRow *row; while ((row = sth->fetchRow(sth))) { IDType trId = row->getLongLongAt(row,0); IDType exId = row->getLongLongAt(row,1); int rank = row->getIntAt(row,2); if (! IDHash_contains(exTrHash, exId)) { Vector *vec = Vector_new(); Vector_setFreeFunc(vec, PredictionTranscriptRankPair_free); IDHash_add(exTrHash, exId, vec); } Vector *exVec = IDHash_getValue(exTrHash, exId); PredictionTranscriptRankPair *trp = PredictionTranscriptRankPair_new(IDHash_getValue(trHash, trId), rank); Vector_addElement(exVec, trp); } IDHash_free(trHash, NULL); sth->finish(sth); PredictionExonAdaptor *pea = DBAdaptor_getPredictionExonAdaptor(pta->dba); Vector *exons = PredictionExonAdaptor_fetchAllBySlice(pea, extSlice); // move exons onto transcript slice, and add them to transcripts for (i=0; i<Vector_getNumElement(exons); i++) { PredictionExon *ex = Vector_getElementAt(exons, i); // Perl didn't have this line - it was in GeneAdaptor version so I think I'm going to keep it if (!IDHash_contains(exTrHash, PredictionExon_getDbID(ex))) continue; PredictionExon *newEx; if (slice != extSlice) { newEx = (PredictionExon*)PredictionExon_transfer((SeqFeature*)ex, slice); if (newEx == NULL) { fprintf(stderr, "Unexpected. Exon could not be transferred onto PredictionTranscript slice.\n"); exit(1); } } else { newEx = ex; } Vector *exVec = IDHash_getValue(exTrHash, PredictionExon_getDbID(newEx)); int j; for (j=0; j<Vector_getNumElement(exVec); j++) { PredictionTranscriptRankPair *trp = Vector_getElementAt(exVec, j); PredictionTranscript_addExon(trp->transcript, newEx, &trp->rank); } } IDHash_free(exTrHash, Vector_free); free(qStr); return transcripts; }