MapperRangeSet *PredictionTranscript_genomic2cDNA(PredictionTranscript *trans, int start, int end, int strand, BaseContig *contig) { Mapper *mapper; // "ids" in mapper are contigs of exons, so use the same contig that should // be attached to all of the exons... if (!contig) { Vector *translateable = PredictionTranscript_getAllTranslateableExons(trans); PredictionExon *firstExon; if (!Vector_getNumElement(translateable)) { return MapperRangeSet_new(); } firstExon = Vector_getElementAt(translateable, 0); contig = (BaseContig*)PredictionExon_getSlice(firstExon); Vector_free(translateable); } mapper = PredictionTranscript_getcDNACoordMapper(trans); return Mapper_mapCoordinates(mapper,(IDType)contig, start, end, strand, "genomic"); }
// NIY: May need some reworking to handle mapInsert because I'd changed the way it returns data // Change back to returning MapperRangeSet MapperRangeSet *Mapper_fastMap(Mapper *m, IDType id, long start, long end, int strand, char *type) { MapperPairSet *pairs; int i; IDHash *hash; int from, to; CoordSystem *cs; if(end+1 == start) { return Mapper_mapInsert(m, id, start, end, strand, type, 1); } if(!Mapper_compareType(type, Mapper_getTo(m))) { from = MAPPER_TO_IND; to = MAPPER_FROM_IND; cs = Mapper_getFromCoordSystem(m); } else { from = MAPPER_FROM_IND; to = MAPPER_TO_IND; cs = Mapper_getToCoordSystem(m); } hash = Mapper_getPairHash(m, from); if (!hash) { fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type); exit(1); } if (Mapper_getIsSorted(m) == 0) { Mapper_sort(m); } if (!IDHash_contains(hash, id)) { fprintf(stderr,"ERROR: Fastmap expects to be able to find an id. It couldnt for " IDFMTSTR "\n",id); exit(1); } pairs = IDHash_getValue(hash,id); MapperRangeSet *retSet = MapperRangeSet_new(); for (i=0;i<MapperPairSet_getNumPair(pairs);i++) { MapperPair *pair = MapperPairSet_getPairAt(pairs,i); MapperUnit *selfCoord = MapperPair_getUnit(pair, from); MapperUnit *targetCoord = MapperPair_getUnit(pair, to); // only super easy mapping is done if(start < selfCoord->start || end > selfCoord->end ) { continue; } if (pair->ori == 1) { MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, targetCoord->start + start - selfCoord->start, targetCoord->start + end - selfCoord->start, strand, cs, 0); // Perl didn't set rank, so use 0 /* retRange->id = targetCoord->id; retRange->start = targetCoord->start + start - selfCoord->start; retRange->end = targetCoord->start + end - selfCoord->start; retRange->strand = strand; retRange->coordSystem = cs; */ MapperRangeSet_addRange(retSet, (MapperRange *)retRange); break; } else { MapperCoordinate *retRange = MapperCoordinate_new(targetCoord->id, targetCoord->end - (end - selfCoord->start), targetCoord->end - (start - selfCoord->start), -strand, cs, 0); // Perl didn't set rank, so use 0 /* retRange->id = targetCoord->id; retRange->start = targetCoord->end - (end - selfCoord->start); retRange->end = targetCoord->end - (start - selfCoord->start); retRange->strand = -strand; retRange->coordSystem = cs; */ MapperRangeSet_addRange(retSet, (MapperRange *)retRange); break; } } // NIY: Here we return empty set, in mapInsert it returns NULL for empty fastmap - need to work out which is right return retSet; }
MapperRangeSet *Mapper_mapInsert(Mapper *m, IDType id, long start, long end, int strand, char *type, int fastmap) { // swap start/end and map the resultant 2bp coordinate long tmp; tmp = start; start = end; end = tmp; MapperRangeSet *coords = Mapper_mapCoordinates(m, id, start, end, strand, type); MapperRangeSet *retSet = MapperRangeSet_new(); if (MapperRangeSet_getNumRange(coords) == 1) { // Note assuming its a mapper coordinate - not sure if this is always true MapperCoordinate *c = (MapperCoordinate *)MapperRangeSet_getRangeAt(coords,0); // swap start and end to convert back into insert MapperCoordinate *newC = MapperCoordinate_new(c->id, c->end, c->start, c->strand, c->coordSystem, 0); // Perl didn't set rank so use 0 MapperRangeSet_addRange(retSet, (MapperRange *)newC); } else { if (MapperRangeSet_getNumRange(coords) != 2) { fprintf(stderr, "Unexpected: Got %d expected 2.\n", MapperRangeSet_getNumRange(coords)); exit(1); } // adjust coordinates, remove gaps MapperRange *c1, *c2; if (strand == -1) { c1 = MapperRangeSet_getRangeAt(coords,1); c2 = MapperRangeSet_getRangeAt(coords,0); } else { c1 = MapperRangeSet_getRangeAt(coords,0); c2 = MapperRangeSet_getRangeAt(coords,1); } //@coords = (); // Was ref($c1) eq 'Bio::EnsEMBL::Mapper::Coordinate' so think this WON'T include MAPPERRANGE_INDEL MapperCoordinate *newC1 = NULL; if (c1->rangeType == MAPPERRANGE_COORD) { MapperCoordinate *mcC1 = (MapperCoordinate *)c1; // insert is after first coord if (mcC1->strand * strand == -1) { mcC1->end--; } else { mcC1->start++; } newC1 = MapperCoordinate_new(mcC1->id, mcC1->start, mcC1->end, mcC1->strand, mcC1->coordSystem, 0); // Perl didn't set rank, so use 0 } // (see above for note on this condition if (ref($c2) eq 'Bio::EnsEMBL::Mapper::Coordinate') MapperCoordinate *newC2 = NULL; if (c2->rangeType == MAPPERRANGE_COORD) { MapperCoordinate *mcC2 = (MapperCoordinate *)c2; // insert is before second coord if(mcC2->strand * strand == -1) { mcC2->start++; } else { mcC2->end--; } newC2 = MapperCoordinate_new(mcC2->id, mcC2->start, mcC2->end, mcC2->strand, mcC2->coordSystem, 0); // Perl didn't set rank, so use 0 } if (strand == -1) { // Add in 2, 1 order if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2); if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1); } else { // Add in 1, 2 order if (newC1) MapperRangeSet_addRange(retSet, (MapperRange *)newC1); if (newC2) MapperRangeSet_addRange(retSet, (MapperRange *)newC2); } } if (fastmap) { if (MapperRangeSet_getNumRange(coords) != 1) { MapperRangeSet_free(retSet); retSet = NULL; } else { MapperRange *c = MapperRangeSet_getRangeAt(coords, 0); // Type check - not sure what we're supposed to have here so belt and braces seem appropriate if (c->rangeType != MAPPERRANGE_COORD) { fprintf(stderr, "Expected a Coordinate range, got a %d range (look up in MapperRange.h)\n", c->rangeType); exit(1); } MapperCoordinate *mcC = (MapperCoordinate *)c; MapperCoordinate *newC = MapperCoordinate_new(mcC->id, mcC->start, mcC->end, mcC->strand, mcC->coordSystem, 0); // Perl didn't set rank, so use 0 MapperRangeSet_addRange(retSet, (MapperRange *)newC); /* return ($c->{'id'}, $c->{'start'}, $c->{'end'}, $c->{'strand'}, $c->{'coord_system'}); */ } } MapperRangeSet_free(coords); return retSet; }
MapperRangeSet *Mapper_mapCoordinates(Mapper *m, IDType id, long start, long end, int strand, char *type) { // special case for handling inserts: if ( start == end+1 ) { return Mapper_mapInsert(m, id, start, end, strand, type, 0 /*fastmap flag */); } else if (start > end+1) { fprintf(stderr,"ERROR: Start is greater than end for id " IDFMTSTR ", start %ld, end %ld\n",id,start,end); exit(1); } IDHash *hash; CoordSystem *cs; if( Mapper_getIsSorted(m) == 0 ) { Mapper_sort(m); } int from, to; if (!Mapper_compareType(type,Mapper_getTo(m))) { from = MAPPER_TO_IND; to = MAPPER_FROM_IND; cs = Mapper_getFromCoordSystem(m); } else if (!Mapper_compareType(type,Mapper_getFrom(m))) { from = MAPPER_FROM_IND; to = MAPPER_TO_IND; cs = Mapper_getToCoordSystem(m); } else { fprintf(stderr, "Invalid type [%s] in mapper (not from [%s] or to [%s])\n", type, Mapper_getFrom(m), Mapper_getTo(m)); exit(1); } hash = Mapper_getPairHash(m, from); if (!hash) { fprintf(stderr,"ERROR: Type %s is neither to or from coordinate system\n",type); exit(1); } MapperRangeSet *results = MapperRangeSet_new(); // Was upcasing the id - its a number in C, I haven't found a case yet where its a string if (!IDHash_contains(hash, id)) { // one big gap! MapperRange *gap = (MapperRange *)MapperGap_new(start,end,0); // Perl didn't set rank so use 0 MapperRangeSet_addRange(results,gap); return results; } MapperPairSet *pairs = IDHash_getValue(hash,id); //my $lr = $hash->{ uc($id) }; MapperPair *lastUsedPair = NULL; int startIdx, endIdx, midIdx; MapperPair *pair; MapperUnit *selfCoord; startIdx = 0; endIdx = MapperPairSet_getNumPair(pairs)-1; // binary search the relevant pairs // helps if the list is big while ( ( endIdx - startIdx ) > 1 ) { midIdx = ( startIdx + endIdx ) >> 1; pair = MapperPairSet_getPairAt(pairs, midIdx); selfCoord = MapperPair_getUnit(pair, from); if ( selfCoord->end < start ) { startIdx = midIdx; } else { endIdx = midIdx; } } int rank = 0; long origStart = start; IDType lastTargetCoord; int lastTargetCoordIsSet = 0; int i; for (i=startIdx; i<MapperPairSet_getNumPair(pairs); i++) { MapperPair *pair = MapperPairSet_getPairAt(pairs,i); MapperUnit *selfCoord = MapperPair_getUnit(pair, from); MapperUnit *targetCoord = MapperPair_getUnit(pair, to); // // But not the case for haplotypes!! need to test for this case??? // so removing this till a better solution is found // // // if($self_coord->{'start'} < $start){ // $start = $orig_start; // $rank++; // } if ( lastTargetCoordIsSet && targetCoord->id != lastTargetCoord ) { if ( selfCoord->start < start ) { // i.e. the same bit is being mapped to another assembled bit start = origStart; } } else { lastTargetCoord = targetCoord->id; lastTargetCoordIsSet = 1; } // if we haven't even reached the start, move on if (selfCoord->end < origStart) { continue; } // if we have over run, break if (selfCoord->start > end) { break; } // Check is start not origStart if (start < selfCoord->start) { // gap detected MapperRange *gap = (MapperRange *)MapperGap_new(start, selfCoord->start-1, rank); MapperRangeSet_addRange(results,gap); start = gap->end+1; } long targetStart, targetEnd; MapperRange *res; if ( MapperPair_isIndel(pair) ) { // When next pair is an IndelPair and not a Coordinate, create the // new mapping Coordinate, the IndelCoordinate. targetStart = targetCoord->start; targetEnd = targetCoord->end; // create a Gap object MapperGap *gap = MapperGap_new(start, selfCoord->end < end ? selfCoord->end : end, 0); // Perl didn't set rank - don't know if need to // create the Coordinate object MapperCoordinate *coord = MapperCoordinate_new(targetCoord->id, targetStart, targetEnd, pair->ori * strand, cs, 0); // Perl didn't set rank - don't know if need to //and finally, the IndelCoordinate object with res = (MapperRange *)IndelCoordinate_new(gap, coord); } else { // start is somewhere inside the region if (pair->ori == 1) { targetStart = targetCoord->start + (start - selfCoord->start); } else { targetEnd = targetCoord->end - (start - selfCoord->start); } // Either we are enveloping this map or not. If yes, then end // point (self perspective) is determined solely by target. If // not we need to adjust. if (end > selfCoord->end) { // enveloped if( pair->ori == 1 ) { targetEnd = targetCoord->end; } else { targetStart = targetCoord->start; } } else { // need to adjust end if (pair->ori == 1) { targetEnd = targetCoord->start + (end - selfCoord->start); } else { targetStart = targetCoord->end - (end - selfCoord->start); } } res = (MapperRange *)MapperCoordinate_new(targetCoord->id, targetStart, targetEnd, pair->ori * strand, cs, rank); } // end else [ if ( exists $pair->{'indel'...})] MapperRangeSet_addRange(results, res); lastUsedPair = pair; start = selfCoord->end+1; } if (lastUsedPair == NULL) { MapperRange *gap = (MapperRange *)MapperGap_new(start,end, 0); // Perl doesn't set rank, so use 0 MapperRangeSet_addRange(results,gap); } else if (MapperPair_getUnit(lastUsedPair, from)->end < end) { // gap at the end MapperRange *gap = (MapperRange *)MapperGap_new( MapperPair_getUnit(lastUsedPair,from)->end + 1, end, 0 ); // Perl didn't set rank so use 0 MapperRangeSet_addRange(results,gap); } if (strand == -1) { MapperRangeSet_reverse(results); } return results; }