bool capsAreAdjacent(Cap *cap1, Cap *cap2, int64_t *separationDistance) {
    if (cap_getName(cap2) != cap_getName(cap1) && cap_getCoordinate(cap1) != cap_getCoordinate(cap2)) { //This can happen if end1 == end2
        if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
            assert(strcmp(event_getHeader(cap_getEvent(cap1)), event_getHeader(
                                    cap_getEvent(cap2))) == 0);
            assert(cap_getPositiveOrientation(cap1)
                    != cap_getPositiveOrientation(cap2));
            assert(cap_getName(cap1) != cap_getName(cap2));
            assert(sequence_getMetaSequence(cap_getSequence(cap1))
                                == sequence_getMetaSequence(cap_getSequence(cap2)));

            if (!cap_getStrand(cap1)) {
                cap1 = cap_getReverse(cap1);
            }
            if (!cap_getStrand(cap2)) {
                cap2 = cap_getReverse(cap2);
            }
            assert(cap_getStrand(cap1));
            assert(cap_getStrand(cap2));
            if (cap_getCoordinate(cap1) < cap_getCoordinate(cap2)) {
                if (!cap_getSide(cap1) && cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap2) - cap_getCoordinate(cap1) - 1; //The minus 1, to give the length of the sequence between the two caps.
                    return 1;
                }
            } else {
                if (cap_getSide(cap1) && !cap_getSide(cap2)) {
                    *separationDistance = cap_getCoordinate(cap1) - cap_getCoordinate(cap2) - 1;
                    return 1;
                }
            }
        }
    }
    return 0;
}
Пример #2
0
bool linked(Segment *segmentX, Segment *segmentY, int64_t difference,
        const char *eventString, bool *aligned) {
    assert(segment_getStrand(segmentX));
    assert(segment_getStrand(segmentY));
    *aligned = 0;
    if (segment_getStart(segmentX) < segment_getStart(segmentY)) {
        Block *blockX = segment_getBlock(segmentX);
        Block *blockY = segment_getBlock(segmentY);
        Block_InstanceIterator *instanceItX = block_getInstanceIterator(blockX);
        Segment *segmentX2;
        while ((segmentX2 = block_getNext(instanceItX)) != NULL) {
            if (strcmp(event_getHeader(segment_getEvent(segmentX2)),
                    eventString) == 0) {
                Block_InstanceIterator *instanceItY =
                        block_getInstanceIterator(blockY);
                Segment *segmentY2;
                while ((segmentY2 = block_getNext(instanceItY)) != NULL) {
                    if (strcmp(event_getHeader(segment_getEvent(segmentY2)),
                            eventString) == 0) {
                        *aligned = 1;
                        if (sequence_getMetaSequence(
                                segment_getSequence(segmentX2))
                                == sequence_getMetaSequence(
                                        segment_getSequence(segmentY2))) { //Have the same assembly sequence
                            //Now check if the two segments are connected by a path of adjacency from the 3' end of segmentX to the 5' end of segmentY.
                            int64_t separationDistance;
                            if (capsAreAdjacent(segment_get3Cap(segmentX2),
                                    segment_get5Cap(segmentY2),
                                    &separationDistance)) {
                                //if(difference < 10000 || (separationDistance <=  difference * 1.5 && difference <= separationDistance * 1.5)) {
                                block_destructInstanceIterator(instanceItX);
                                block_destructInstanceIterator(instanceItY);
                                return 1;
                                //}
                            }
                        }
                    }
                }
                block_destructInstanceIterator(instanceItY);
            }
        }
        block_destructInstanceIterator(instanceItX);
    } else {
        assert(segmentX == segmentY);
        if(hasCapInEvent(block_get5End(segment_getBlock(segmentX)),
                eventString)) {
            *aligned = 1;
            return 1;
        }
    }
    return 0;
}
Пример #3
0
static stList *getSubstringsForFlowerSegments(stList *flowers) {
    /*
     * Get the set of substrings representing the strings in the segments of the given flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *blockIt = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIt)) != NULL) {
            Block_InstanceIterator *instanceIt = block_getInstanceIterator(block);
            Segment *segment;
            while ((segment = block_getNext(instanceIt)) != NULL) {
                Sequence *sequence;
                if ((sequence = segment_getSequence(segment)) != NULL) {
                    segment = segment_getStrand(segment) ? segment : segment_getReverse(segment);
                    assert(segment_getLength(segment) > 0);
                    stList_append(substrings,
                            substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                    segment_getStart(segment) - sequence_getStart(sequence),
                                    segment_getLength(segment)));
                }
            }
            block_destructInstanceIterator(instanceIt);
        }
        flower_destructBlockIterator(blockIt);
    }
    return substrings;
}
Пример #4
0
static void getMetaSequencesForEventsP(stSortedSet *metaSequences,
        Flower *flower, stList *eventStrings) {
    //Iterate over the sequences in the flower.
    Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while ((sequence = flower_getNextSequence(seqIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        if (stringIsInList(event_getHeader(sequence_getEvent(sequence)),
                eventStrings) == 0) {
            if (stSortedSet_search(metaSequences, metaSequence) == NULL) {
                stSortedSet_insert(metaSequences, metaSequence);
            }
        }
    }
    flower_destructSequenceIterator(seqIt);
    //Recurse over the flowers
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMetaSequencesForEventsP(metaSequences,
                    group_getNestedFlower(group), eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Пример #5
0
static bool duplicated(Segment *segment) {
    Sequence *sequence = segment_getSequence(segment);
    assert(sequence != NULL);
    MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
    Block *block = segment_getBlock(segment);
    Block_InstanceIterator *it = block_getInstanceIterator(block);
    Segment *segment2;
    while((segment2 = block_getNext(it)) != NULL) {
       if(segment != segment2) {
           assert(segment != segment_getReverse(segment2));
           Sequence *sequence2 = segment_getSequence(segment2);
           if(sequence2 != NULL && sequence_getMetaSequence(sequence2) == metaSequence) {
               block_destructInstanceIterator(it);
               return 1;
           }
       }
    }
    block_destructInstanceIterator(it);
    return 0;
}
bool endsAreConnected(End *end1, End *end2, stList *eventStrings) {
    if (end_getName(end1) == end_getName(end2)) { //Then the ends are the same and are part of the same chromosome by definition.
        End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
        Cap *cap1;
        while ((cap1 = end_getNext(instanceIterator)) != NULL) {
            if (capHasGivenEvents(cap1, eventStrings)) {
                end_destructInstanceIterator(instanceIterator);
                return 1;
            }
        }
        return 0;
    }
    End_InstanceIterator *instanceIterator = end_getInstanceIterator(end1);
    Cap *cap1;
    while ((cap1 = end_getNext(instanceIterator)) != NULL) {
        if (capHasGivenEvents(cap1, eventStrings)) {
            End_InstanceIterator *instanceIterator2 = end_getInstanceIterator(end2);
            Cap *cap2;
            while ((cap2 = end_getNext(instanceIterator2)) != NULL) {
                assert(cap_getName(cap2) != cap_getName(cap1)); //This could only happen if end1 == end2
                if (sequence_getMetaSequence(cap_getSequence(cap1)) == sequence_getMetaSequence(cap_getSequence(cap2))) {
                    assert(strcmp(event_getHeader(cap_getEvent(cap1)),
                                    event_getHeader(cap_getEvent(cap2))) == 0);
                    assert(cap_getPositiveOrientation(cap1)
                            != cap_getPositiveOrientation(cap2));
                    assert(cap_getName(cap1) != cap_getName(cap2));
                    //they could have the same coordinate if they represent two ends of a block of length 1.

                    end_destructInstanceIterator(instanceIterator);
                    end_destructInstanceIterator(instanceIterator2);
                    return 1;
                }
            }
            end_destructInstanceIterator(instanceIterator2);
        }
    }
    end_destructInstanceIterator(instanceIterator);
    return 0;
}
Пример #7
0
int main(int argc, char *argv[])
{
    char *cactusDiskString = NULL;
    stKVDatabaseConf *kvDatabaseConf;
    CactusDisk *cactusDisk;
    Flower *flower;
    Flower_SequenceIterator *flowerIt;
    Sequence *sequence;
    struct option longopts[] = { {"cactusDisk", required_argument, NULL, 'c' },
                                 {0, 0, 0, 0} };
    int flag;
    while((flag = getopt_long(argc, argv, "", longopts, NULL)) != -1) {
        switch(flag) {
        case 'c':
            cactusDiskString = stString_copy(optarg);
            break;
        case '?':
        default:
            usage();
            return 1;
        }
    }
    if (cactusDiskString == NULL) {
        st_errAbort("--cactusDisk option must be provided");
    }
    kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskString);
    cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    // Get top-level flower.
    flower = cactusDisk_getFlower(cactusDisk, 0);
    flowerIt = flower_getSequenceIterator(flower);
    while((sequence = flower_getNextSequence(flowerIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        const char *header;
        char *firstToken, *newHeader;
        stList *tokens;
        // Strip the ID token from the header (should be the first
        // |-separated token) and complain if there isn't one.
        header = metaSequence_getHeader(metaSequence);
        tokens = fastaDecodeHeader(header);
        assert(stList_length(tokens) > 1);
        firstToken = stList_removeFirst(tokens);
        assert(!strncmp(firstToken, "id=", 3));
        free(firstToken);
        newHeader = fastaEncodeHeader(tokens);
        metaSequence_setHeader(metaSequence, newHeader);
    }
    cactusDisk_write(cactusDisk);
}
Пример #8
0
static Segment *getSegment(stSortedSet *sortedSegments, int64_t x, MetaSequence *metaSequence) {
    segmentCompareFn_coordinate = x;
    segmentCompareFn_metaSequence = metaSequence_getName(metaSequence);
    Segment *segment = stSortedSet_searchLessThanOrEqual(sortedSegments,
            &segmentCompareFn_coordinate);
    assert((void *) segment != &segmentCompareFn_coordinate);


    if (segment != NULL) {
        if(sequence_getMetaSequence(segment_getSequence(segment)) == metaSequence) {
            assert(segment_getStart(segment) <= x);
            if (x < segment_getStart(segment) + segment_getLength(segment)) {
                return segment;
            }
        }
    }
    return NULL;
}
Пример #9
0
static void getReferenceSequences(FILE *fileHandle, Flower *flower, char *referenceEventString){
   //get names of all the sequences in 'flower' for event with name 'referenceEventString'
   Sequence *sequence;
   Flower_SequenceIterator * seqIterator = flower_getSequenceIterator(flower);
   while((sequence = flower_getNextSequence(seqIterator)) != NULL)
   {
      Event* event = sequence_getEvent(sequence);
      const char* eventName = event_getHeader(event);
      if (strcmp(eventName, referenceEventString) == 0 &&
          sequence_getLength(sequence) > 0 &&
          !metaSequence_isTrivialSequence(sequence_getMetaSequence(sequence))) {
         const char *sequenceHeader = formatSequenceHeader(sequence);
         st_logInfo("Sequence %s\n", sequenceHeader);
         char *string = sequence_getString(sequence, sequence_getStart(sequence), sequence_getLength(sequence), 1);
         fastaWrite(string, (char *)sequenceHeader, fileHandle);
         free(string);
      }
   }
   flower_destructSequenceIterator(seqIterator);
   return;
}
Пример #10
0
static stList *getSubstringsForFlowers(stList *flowers) {
    /*
     * Get the set of substrings for sequence intervals in the given set of flowers.
     */
    stList *substrings = stList_construct3(0, (void (*)(void *)) substring_destruct);
    for (int64_t i = 0; i < stList_length(flowers); i++) {
        Flower *flower = stList_get(flowers, i);
        Flower_EndIterator *endIt = flower_getEndIterator(flower);
        End *end;
        while ((end = flower_getNextEnd(endIt)) != NULL) {
            if (end_isStubEnd(end)) {
                End_InstanceIterator *instanceIt = end_getInstanceIterator(end);
                Cap *cap;
                while ((cap = end_getNext(instanceIt)) != NULL) {
                    Sequence *sequence;
                    if ((sequence = cap_getSequence(cap)) != NULL) {
                        cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
                        if (!cap_getSide(cap)) { //We have a sequence interval of interest
                            Cap *adjacentCap = cap_getAdjacency(cap);
                            assert(adjacentCap != NULL);
                            int64_t length = cap_getCoordinate(adjacentCap) - cap_getCoordinate(cap) - 1;
                            assert(length >= 0);
                            if (length > 0) {
                                stList_append(substrings,
                                        substring_construct(sequence_getMetaSequence(sequence)->stringName,
                                                cap_getCoordinate(cap) + 1 - sequence_getStart(sequence), length));
                            }
                        }
                    }
                }
                end_destructInstanceIterator(instanceIt);
            }
        }
        flower_destructEndIterator(endIt);
    }
    return substrings;
}
Пример #11
0
void topDown(Flower *flower, Name referenceEventName) {
    /*
     * Run on each flower, top down. Sets the coordinates of each reference cap to the correct
     * sequence, and sets the bases of the reference sequence to be consensus bases.
     */
    Flower_EndIterator *endIt = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIt)) != NULL) {
        Cap *cap = getCapForReferenceEvent(end, referenceEventName); //The cap in the reference
        if (cap != NULL) {
            cap = cap_getStrand(cap) ? cap : cap_getReverse(cap);
            if (!cap_getSide(cap)) {
                assert(cap_getCoordinate(cap) != INT64_MAX);
                Sequence *sequence = cap_getSequence(cap);
                assert(sequence != NULL);
                Group *group = end_getGroup(end);
                if (!group_isLeaf(group)) {
                    Flower *nestedFlower = group_getNestedFlower(group);
                    Cap *nestedCap = flower_getCap(nestedFlower, cap_getName(cap));
                    assert(nestedCap != NULL);
                    nestedCap = cap_getStrand(nestedCap) ? nestedCap : cap_getReverse(nestedCap);
                    assert(cap_getStrand(nestedCap));
                    assert(!cap_getSide(nestedCap));
                    int64_t endCoordinate = setCoordinates(nestedFlower, sequence_getMetaSequence(sequence),
                                                           nestedCap, cap_getCoordinate(cap));
                    (void) endCoordinate;
                    assert(endCoordinate == cap_getCoordinate(cap_getAdjacency(cap)));
                    assert(endCoordinate
                           == cap_getCoordinate(
                               flower_getCap(nestedFlower, cap_getName(cap_getAdjacency(cap)))));
                }
            }
        }
    }
    flower_destructEndIterator(endIt);
}
Пример #12
0
static int segmentCompareFn(const void *segment1, const void *segment2) {
    Name name1 = segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment1)));

    Name name2 = segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_metaSequence : metaSequence_getName(sequence_getMetaSequence(segment_getSequence((Segment *)segment2)));
    int i = cactusMisc_nameCompare(name1, name2);
    if(i == 0) {
        int64_t
                x =
                        segment1 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate
                                : (int64_t) segment_getStart((void *) segment1);
        int64_t
                y =
                        segment2 == &segmentCompareFn_coordinate ? segmentCompareFn_coordinate
                                : (int64_t) segment_getStart((void *) segment2);
        assert(
                segment1 == &segmentCompareFn_coordinate || segment_getStrand(
                        (void *) segment1));
        assert(
                segment2 == &segmentCompareFn_coordinate || segment_getStrand(
                        (void *) segment2));
        return x > y ? 1 : (x < y ? -1 : 0); //i > 0 ? 1 : i < 0 ? -1 : 0; //This was because of an overflow
    }
    return i;
}