Beispiel #1
0
static void getMetaSequencesForEventsP(stSortedSet *metaSequences,
        Flower *flower, stList *eventStrings) {
    //Iterate over the sequences in the flower.
    Flower_SequenceIterator *seqIt = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while ((sequence = flower_getNextSequence(seqIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        if (stringIsInList(event_getHeader(sequence_getEvent(sequence)),
                eventStrings) == 0) {
            if (stSortedSet_search(metaSequences, metaSequence) == NULL) {
                stSortedSet_insert(metaSequences, metaSequence);
            }
        }
    }
    flower_destructSequenceIterator(seqIt);
    //Recurse over the flowers
    Flower_GroupIterator *groupIt = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIt)) != NULL) {
        if (group_getNestedFlower(group) != NULL) {
            getMetaSequencesForEventsP(metaSequences,
                    group_getNestedFlower(group), eventStrings);
        }
    }
    flower_destructGroupIterator(groupIt);
}
Beispiel #2
0
void flower_check(Flower *flower) {
    eventTree_check(flower_getEventTree(flower));

    Flower_GroupIterator *groupIterator = flower_getGroupIterator(flower);
    Group *group;
    while ((group = flower_getNextGroup(groupIterator)) != NULL) {
        group_check(group);
    }
    flower_destructGroupIterator(groupIterator);

    Flower_ChainIterator *chainIterator = flower_getChainIterator(flower);
    Chain *chain;
    while ((chain = flower_getNextChain(chainIterator)) != NULL) {
        chain_check(chain);
    }
    flower_destructCapIterator(chainIterator);

    //We check built trees in here.
    Flower_EndIterator *endIterator = flower_getEndIterator(flower);
    End *end;
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        end_check(end);
        end_check(end_getReverse(end)); //We will test everything backwards also.
    }
    flower_destructEndIterator(endIterator);

    if (flower_builtFaces(flower)) {
        Flower_FaceIterator *faceIterator = flower_getFaceIterator(flower);
        Face *face;
        while ((face = flower_getNextFace(faceIterator)) != NULL) {
            face_check(face);
        }
        flower_destructFaceIterator(faceIterator);
        face_checkFaces(flower);
    } else {
        cactusCheck(flower_getFaceNumber(flower) == 0);
    }

    if (flower_builtBlocks(flower)) { //Note that a flower for which the blocks are not yet built must be a leaf.
        Flower_BlockIterator *blockIterator = flower_getBlockIterator(flower);
        Block *block;
        while ((block = flower_getNextBlock(blockIterator)) != NULL) {
            block_check(block);
            block_check(block_getReverse(block)); //We will test everything backwards also.
        }
        flower_destructBlockIterator(blockIterator);
    } else {
        cactusCheck(flower_isLeaf(flower)); //Defensive
        cactusCheck(flower_isTerminal(flower)); //Checks that a flower without built blocks is a leaf and does not
        //contain any blocks.
    }

    Flower_SequenceIterator *sequenceIterator = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while ((sequence = flower_getNextSequence(sequenceIterator)) != NULL) {
        sequence_check(sequence);
    }
    flower_destructSequenceIterator(sequenceIterator);
}
Beispiel #3
0
void flower_writeBinaryRepresentation(Flower *flower, void(*writeFn)(const void * ptr, size_t size, size_t count)) {
    Flower_SequenceIterator *sequenceIterator;
    Flower_EndIterator *endIterator;
    Flower_BlockIterator *blockIterator;
    Flower_GroupIterator *groupIterator;
    Flower_ChainIterator *chainIterator;
    Sequence *sequence;
    End *end;
    Block *block;
    Group *group;
    Chain *chain;

    binaryRepresentation_writeElementType(CODE_FLOWER, writeFn);
    binaryRepresentation_writeName(flower_getName(flower), writeFn);
    binaryRepresentation_writeBool(flower_builtBlocks(flower), writeFn);
    binaryRepresentation_writeBool(flower_builtTrees(flower), writeFn);
    binaryRepresentation_writeBool(flower_builtFaces(flower), writeFn);
    binaryRepresentation_writeName(flower->parentFlowerName, writeFn);

    if (flower_getEventTree(flower) != NULL) {
        eventTree_writeBinaryRepresentation(flower_getEventTree(flower), writeFn);
    }

    sequenceIterator = flower_getSequenceIterator(flower);
    while ((sequence = flower_getNextSequence(sequenceIterator)) != NULL) {
        sequence_writeBinaryRepresentation(sequence, writeFn);
    }
    flower_destructSequenceIterator(sequenceIterator);

    endIterator = flower_getEndIterator(flower);
    while ((end = flower_getNextEnd(endIterator)) != NULL) {
        end_writeBinaryRepresentation(end, writeFn);
    }
    flower_destructEndIterator(endIterator);

    blockIterator = flower_getBlockIterator(flower);
    while ((block = flower_getNextBlock(blockIterator)) != NULL) {
        block_writeBinaryRepresentation(block, writeFn);
    }
    flower_destructBlockIterator(blockIterator);

    groupIterator = flower_getGroupIterator(flower);
    while ((group = flower_getNextGroup(groupIterator)) != NULL) {
        group_writeBinaryRepresentation(group, writeFn);
    }
    flower_destructGroupIterator(groupIterator);

    chainIterator = flower_getChainIterator(flower);
    while ((chain = flower_getNextChain(chainIterator)) != NULL) {
        chain_writeBinaryRepresentation(chain, writeFn);
    }
    flower_destructChainIterator(chainIterator);

    binaryRepresentation_writeElementType(CODE_FLOWER, writeFn); //this avoids interpretting things wrong.
}
static Sequence *getSequenceMatchesEvent(Flower *flower, char *referenceEventString){
    //Returns the first Sequence whose name matches 'header'
    Flower_SequenceIterator *it = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while((sequence = flower_getNextSequence(it)) != NULL){
        Event* event = sequence_getEvent(sequence);
        const char* eventName = event_getHeader(event);
        if (strcmp(eventName, referenceEventString) == 0){
            flower_destructSequenceIterator(it);
            return sequence;
        }
    }
    flower_destructSequenceIterator(it);
    return NULL;
}
Beispiel #5
0
int main(int argc, char *argv[])
{
    char *cactusDiskString = NULL;
    stKVDatabaseConf *kvDatabaseConf;
    CactusDisk *cactusDisk;
    Flower *flower;
    Flower_SequenceIterator *flowerIt;
    Sequence *sequence;
    struct option longopts[] = { {"cactusDisk", required_argument, NULL, 'c' },
                                 {0, 0, 0, 0} };
    int flag;
    while((flag = getopt_long(argc, argv, "", longopts, NULL)) != -1) {
        switch(flag) {
        case 'c':
            cactusDiskString = stString_copy(optarg);
            break;
        case '?':
        default:
            usage();
            return 1;
        }
    }
    if (cactusDiskString == NULL) {
        st_errAbort("--cactusDisk option must be provided");
    }
    kvDatabaseConf = stKVDatabaseConf_constructFromString(cactusDiskString);
    cactusDisk = cactusDisk_construct(kvDatabaseConf, 0);
    // Get top-level flower.
    flower = cactusDisk_getFlower(cactusDisk, 0);
    flowerIt = flower_getSequenceIterator(flower);
    while((sequence = flower_getNextSequence(flowerIt)) != NULL) {
        MetaSequence *metaSequence = sequence_getMetaSequence(sequence);
        const char *header;
        char *firstToken, *newHeader;
        stList *tokens;
        // Strip the ID token from the header (should be the first
        // |-separated token) and complain if there isn't one.
        header = metaSequence_getHeader(metaSequence);
        tokens = fastaDecodeHeader(header);
        assert(stList_length(tokens) > 1);
        firstToken = stList_removeFirst(tokens);
        assert(!strncmp(firstToken, "id=", 3));
        free(firstToken);
        newHeader = fastaEncodeHeader(tokens);
        metaSequence_setHeader(metaSequence, newHeader);
    }
    cactusDisk_write(cactusDisk);
}
static void getReferenceSequences(FILE *fileHandle, Flower *flower, char *referenceEventString){
   //get names of all the sequences in 'flower' for event with name 'referenceEventString'
   Sequence *sequence;
   Flower_SequenceIterator * seqIterator = flower_getSequenceIterator(flower);
   while((sequence = flower_getNextSequence(seqIterator)) != NULL)
   {
      Event* event = sequence_getEvent(sequence);
      const char* eventName = event_getHeader(event);
      if (strcmp(eventName, referenceEventString) == 0 &&
          sequence_getLength(sequence) > 0 &&
          !metaSequence_isTrivialSequence(sequence_getMetaSequence(sequence))) {
         const char *sequenceHeader = formatSequenceHeader(sequence);
         st_logInfo("Sequence %s\n", sequenceHeader);
         char *string = sequence_getString(sequence, sequence_getStart(sequence), sequence_getLength(sequence), 1);
         fastaWrite(string, (char *)sequenceHeader, fileHandle);
         free(string);
      }
   }
   flower_destructSequenceIterator(seqIterator);
   return;
}
int main(int argc, char *argv[]) {
    //////////////////////////////////////////////
    //Parse the inputs
    //////////////////////////////////////////////

    parseBasicArguments(argc, argv, "coverageStats");
    assert(referenceEventString != NULL);
    assert(otherReferenceEventString != NULL);
    assert(outgroupEventString != NULL);

    ///////////////////////////////////////////////////////////////////////////
    // Calculate and print to file a crap load of numbers.
    ///////////////////////////////////////////////////////////////////////////

    Sequence *referenceSequence = NULL;
    Sequence *otherReferenceSequence = NULL;
    Flower_SequenceIterator *sequenceIt = flower_getSequenceIterator(flower);
    Sequence *sequence;
    while ((sequence = flower_getNextSequence(sequenceIt)) != NULL) {
        const char *eventHeader = event_getHeader(sequence_getEvent(sequence));
        if (eventHeader != NULL && strcmp(eventHeader, referenceEventString)
                == 0) {
            if (referenceSequence == NULL || sequence_getLength(sequence)
                    >= sequence_getLength(referenceSequence)) {
                referenceSequence = sequence;
            }
        }
        if (eventHeader != NULL && strcmp(eventHeader,
                otherReferenceEventString) == 0) {
            if (otherReferenceSequence == NULL || sequence_getLength(sequence)
                    >= sequence_getLength(otherReferenceSequence)) {
                otherReferenceSequence = sequence;
            }
        }
    }
    flower_destructSequenceIterator(sequenceIt);
    assert(referenceSequence != NULL);
    assert(otherReferenceSequence != NULL);

    FILE *fileHandle = fopen(outputFile, "w");
    fprintf(
            fileHandle,
            "<coverageStats referenceSequenceLength=\"%i\" otherReferenceSequenceLength=\"%i\">\n",
            sequence_getLength(referenceSequence),
            sequence_getLength(otherReferenceSequence));
    EventTree_Iterator *eventIt = eventTree_getIterator(
            flower_getEventTree(flower));
    eventNumber = eventTree_getEventNumber(flower_getEventTree(flower));
    Event * event;
    totalBaseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1);
    totalReferenceBases = 0;
    totalOtherReferenceBases = 0;
    int32_t totalSamples = 0;
    ignoreOtherReferenceBlocks = 0;
    while ((event = eventTree_getNext(eventIt)) != NULL) {
        sampleEventString = event_getHeader(event);
        if (sampleEventString != NULL && strcmp(sampleEventString, "ROOT")
                != 0 && strcmp(sampleEventString, "") != 0) {

            baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1);

            baseCoverages[0] = strcmp(sampleEventString, referenceEventString) != 0 ? getTotalLengthOfAdjacencies(flower,
                    sampleEventString) : 0;

            referenceBases = 0;
            otherReferenceBases = 0;

            getMAFs(flower, fileHandle, getMAFBlock2);

            if(strcmp(sampleEventString, referenceEventString) == 0) {
                for(int32_t i=2; i<eventNumber + 1; i++) {
                    baseCoverages[i-1] = baseCoverages[i];
                }
                baseCoverages[eventNumber] = 0;
            }

            printStatsForSample(
                    strcmp(sampleEventString, referenceEventString) != 0 && strcmp(sampleEventString, outgroupEventString) != 0,
                    fileHandle, 1);

            free(baseCoverages);

            totalSamples += (strcmp(sampleEventString, referenceEventString)
                    != 0 && strcmp(sampleEventString, outgroupEventString) != 0) ? 1 : 0;
        }
    }
    eventTree_destructIterator(eventIt);

    //Do average base coverages..
    sampleEventString = "average";
    baseCoverages = totalBaseCoverages;
    referenceBases = totalReferenceBases;
    otherReferenceBases = totalOtherReferenceBases;
    printStatsForSample(0, fileHandle, totalSamples);

    //Do all..
    sampleEventString = referenceEventString;
    baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1);
    baseCoverages[0] = totalBaseCoverages[0];
    referenceBases = 0;
    getMAFs(flower, fileHandle, getMAFBlock2);
    for(int32_t i=2; i<eventNumber + 1; i++) {
        baseCoverages[i-1] = baseCoverages[i];
    }
    baseCoverages[eventNumber] = 0;
    otherReferenceBases = sequence_getLength(otherReferenceSequence);
    sampleEventString = "all";
    printStatsForSample(0, fileHandle, 1);
    free(baseCoverages);

    //Do blocks without other reference
    ignoreOtherReferenceBlocks = 1;
    sampleEventString = referenceEventString;
    baseCoverages = st_calloc(sizeof(int32_t), eventNumber + 1);
    baseCoverages[0] = totalBaseCoverages[0] - getTotalLengthOfAdjacencies(flower,
            otherReferenceEventString);
    referenceBases = 0;
    otherReferenceBases = 0;
    getMAFs(flower, fileHandle, getMAFBlock2);
    for(int32_t i=2; i<eventNumber + 1; i++) {
        baseCoverages[i-1] = baseCoverages[i];
    }
    baseCoverages[eventNumber] = 0;
    sampleEventString = "minusOtherReference";
    printStatsForSample(0, fileHandle, 1);
    free(baseCoverages);

    fprintf(fileHandle, "</coverageStats>\n");

    st_logInfo("Finished writing out the stats.\n");
    fclose(fileHandle);

    return 0;
}