Exemple #1
0
int main(int argc, char *argv[]) {
    FILE *fileHandle;
    struct List *seqs;
    struct List *seqLengths;
    struct List *seqNames;
    int32_t i;

    assert(argc == 3);

    seqs = constructEmptyList(0, free);
    seqLengths = constructEmptyList(0, (void (*)(void *))destructInt);
    seqNames = constructEmptyList(0, free);

    fileHandle = fopen(argv[1], "r");
    fastaRead(fileHandle, seqs, seqLengths, seqNames);
    fclose(fileHandle);

    fileHandle = fopen(argv[2], "w");
    for(i=0; i < seqs->length; i++) {
        assert(strlen(seqs->list[i]) == *((int32_t *)seqLengths->list[i]));
        fastaWrite(seqs->list[i], seqNames->list[i], fileHandle);
    }
    fclose(fileHandle);

    return 0;
}
Exemple #2
0
// Returns a hash mapping from sequence header to sequence data.
static stHash *readFastaFile(char *filename) {
    FILE *fasta = fopen(filename, "r");
    if (fasta == NULL) {
        st_errnoAbort("Could not open fasta file %s", filename);
    }
    stHash *headerToData = stHash_construct3(stHash_stringKey,
                                             stHash_stringEqualKey,
                                             free,
                                             free);
    struct List *seqs = constructEmptyList(0, NULL);
    struct List *seqLengths = constructEmptyList(0, free);
    struct List *headers = constructEmptyList(0, free);
    fastaRead(fasta, seqs, seqLengths, headers);

    for (int64_t i = 0; i < seqs->length; i++) {
        char *fullHeader = headers->list[i];
        stList *headerTokens = stString_splitByString(fullHeader, " ");
        char *usableHeader = stString_copy(stList_get(headerTokens, 0));
        stHash_insert(headerToData, usableHeader, seqs->list[i]);
        stList_destruct(headerTokens);
    }
    destructList(seqs);
    destructList(seqLengths);
    destructList(headers);

    return headerToData;
}
Event *eventTree_getCommonAncestor(Event *event, Event *event2) {
	Event *ancestorEvent;
	struct List *list;

	assert(event != NULL);
	assert(event2 != NULL);
	assert(event_getEventTree(event) == event_getEventTree(event2));

	list = constructEmptyList(0, NULL);
	ancestorEvent = event;
	while(ancestorEvent != NULL) {
		if(ancestorEvent == event2) {
			destructList(list);
			return event2;
		}
		listAppend(list, ancestorEvent);
		ancestorEvent = event_getParent(ancestorEvent);
	}

	ancestorEvent = event2;
	while((ancestorEvent = event_getParent(ancestorEvent)) != NULL) {
		if(listContains(list, ancestorEvent)) {
			destructList(list);
			return ancestorEvent;
		}
	}
	destructList(list);
	assert(FALSE);
	return NULL;
}
static stList *getRandomPairwiseAlignments() {
    stList *pairwiseAlignments = stList_construct3(0, (void(*)(void *)) destructPairwiseAlignment);
    int64_t randomAlignmentNumber = st_randomInt(0, 10);
    for (int64_t i = 0; i < randomAlignmentNumber; i++) {
        char *contig1 = stString_print("%" PRIi64 "", i);
        char *contig2 = stString_print("%" PRIi64 "", i * 10);
        int64_t start1 = st_randomInt(100000, 1000000);
        int64_t start2 = st_randomInt(100000, 1000000);
        int64_t strand1 = st_random() > 0.5;
        int64_t strand2 = st_random() > 0.5;
        int64_t end1 = start1;
        int64_t end2 = start2;
        struct List *operationList = constructEmptyList(0, NULL);
        while (st_random() > 0.1) {
            int64_t length = st_randomInt(0, 10);
            int64_t type = st_randomInt(0, 3);
            assert(type < 3);
            listAppend(operationList, constructAlignmentOperation(type, length, 0));
            if (type != PAIRWISE_INDEL_Y) {
                end1 += strand1 ? length : -length;
            }
            if (type != PAIRWISE_INDEL_X) {
                end2 += strand2 ? length : -length;
            }
        }
        stList_append(pairwiseAlignments,
                      constructPairwiseAlignment(contig1, start1, end1, strand1, contig2, start2, end2, strand2, 0.0, operationList));
        free(contig1);
        free(contig2);
    }
    return pairwiseAlignments;
}
Exemple #5
0
struct List *parseTrioFile(char *trioFile) {
    FILE *fileHandle = fopen(trioFile, "r");
    int bytesRead;
    int nBytes = 100;
    char *cA;
    int j;

    char *species[3];

    struct List *speciesList = NULL;
    speciesList = constructEmptyList(0, freeTrioNames);

    cA = st_malloc(nBytes + 1);
    bytesRead = benLine(&cA, &nBytes, fileHandle);

    while(bytesRead != -1) {
        if (bytesRead > 0) {
            species[0] = st_malloc(sizeof(char) * (1 + (bytesRead)));
            species[1] = st_malloc(sizeof(char) * (1 + (bytesRead)));
            species[2] = st_malloc(sizeof(char) * (1 + (bytesRead)));
            j = sscanf(cA, "%s\t%s\t%s", species[0], species[1], species[2]);
            if (j != 3) {
                fprintf(stderr, "Invalid triple line '%s' in '%s'\n", cA, trioFile);
                exit(1);
            }

            cStr_lowerCase(species[0]);
            cStr_lowerCase(species[1]);
            cStr_lowerCase(species[2]);
            qsort(species, 3, sizeof(char *), cStr_compare);

            TrioNames *trio = st_malloc(sizeof(TrioNames));
            trio->speciesA = species[0];
            trio->speciesB = species[1];
            trio->speciesC = species[2];
            
            listAppend(speciesList, trio);
        }
        bytesRead = benLine(&cA, &nBytes, fileHandle);
    }
    fclose(fileHandle);

    free(cA);

    return speciesList;
}
Exemple #6
0
Event *event_construct(Name name, const char *header, float branchLength, Event *parentEvent,
        EventTree *eventTree) {
    assert(eventTree_getEvent(eventTree, name) == NULL); //the event must not already exist in the tree.
    Event *event;
    event = st_malloc(sizeof(Event));
    event->name = name;
    event->parent = parentEvent;
    event->children = constructEmptyList(0, NULL);
    event->header = stString_copy(header == NULL ? "" : header);
    event->branchLength = branchLength < 0.0 ? 0.0 : branchLength;
    event->isOutgroup = 0;
    if (parentEvent != NULL) {
        listAppend(parentEvent->children, event);
    }
    event->eventTree = eventTree;
    eventTree_addEvent(eventTree, event);
    return event;
}
Exemple #7
0
// copied from cPecanRealign
struct PairwiseAlignment *convertAlignedPairsToPairwiseAlignment(char *seqName1, char *seqName2, double score,
        int64_t length1, int64_t length2, stList *alignedPairs) {
    //Make pairwise alignment
    int64_t pX = -1, pY = -1, mL = 0;
    //Create an end matched pair, which is used to ensure the alignment has the correct end indels.
    struct List *opList = constructEmptyList(0, (void (*)(void *)) destructAlignmentOperation);
    stList_append(alignedPairs, stIntTuple_construct2(length1, length2));
    for (int64_t i = 0; i < stList_length(alignedPairs); i++) {
        stIntTuple *alignedPair = stList_get(alignedPairs, i);
        int64_t x = stIntTuple_get(alignedPair, 0);
        int64_t y = stIntTuple_get(alignedPair, 1);
        assert(x - pX > 0);
        assert(y - pY > 0);
        if (x - pX > 0 && y - pY > 0) { //This is a hack for filtering
            if (x - pX > 1) { //There is an indel.
                if (mL > 0) {
                    listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0));
                    mL = 0;
                }
                listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_X, x - pX - 1, 0));
            }
            if (y - pY > 1) {
                if (mL > 0) {
                    listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL, 0));
                    mL = 0;
                }
                listAppend(opList, constructAlignmentOperation(PAIRWISE_INDEL_Y, y - pY - 1, 0));
            }
            mL++;
            pX = x;
            pY = y;
        }
    }
    //Deal with a trailing match, but exclude the final match
    if (mL > 1) {
        listAppend(opList, constructAlignmentOperation(PAIRWISE_MATCH, mL - 1, 0));
    }
    stIntTuple_destruct(stList_pop(alignedPairs));
    //Construct the alignment
    struct PairwiseAlignment *pA = constructPairwiseAlignment(seqName1, 0, length1, 1, seqName2, 0, length2, 1, score,
            opList);
    return pA;
}
Exemple #8
0
int main(int argc, char *argv[]) {
    /*
     * Arguments/options
     */
    char *logLevelString = NULL;
    char *mfaFile = NULL;
    char *outputFile = NULL;
    char *treeFile = NULL;

    ///////////////////////////////////////////////////////////////////////////
    // (0) Parse the inputs handed by genomeCactus.py / setup stuff.
    ///////////////////////////////////////////////////////////////////////////

    while (1) {
        static struct option long_options[] = {
            { "logLevel", required_argument, 0, 'a' },
            { "mfaFile", required_argument, 0, 'b' },
            { "outputFile", required_argument, 0, 'd' },
            { "treeFile", optional_argument, 0, 't' },
            { "help", no_argument, 0, 'h' },
            { 0, 0, 0, 0 }
        };

        int option_index = 0;

        int key = getopt_long(argc, argv, "a:b:d:t:h", long_options, &option_index);

        if (key == -1) {
            break;
        }

        switch (key) {
            case 'a':
                logLevelString = stString_copy(optarg);
                break;
            case 'b':
                mfaFile = stString_copy(optarg);
                break;
            case 'd':
                outputFile = stString_copy(optarg);
                break;
            case 't':
                treeFile = stString_copy(optarg);
                break;
            case 'h':
                usage();
                return 0;
            default:
                usage();
                return 1;
        }
    }

    ///////////////////////////////////////////////////////////////////////////
    // (0) Check the inputs.
    ///////////////////////////////////////////////////////////////////////////

    if (argc == 1) {
        usage();
        exit(1);
    }

    assert(mfaFile != NULL);
    assert(outputFile != NULL);

    //////////////////////////////////////////////
    //Set up logging
    //////////////////////////////////////////////

    st_setLogLevelFromString(logLevelString);

    //////////////////////////////////////////////
    //Log (some of) the inputs
    //////////////////////////////////////////////

    st_logInfo("MFA file  name : %s\n", mfaFile);
    st_logInfo("Output MAF file : %s\n", outputFile);
    st_logInfo("Tree file name: %s\n", treeFile == NULL ? "null" : treeFile);

    //////////////////////////////////////////////
    //Get the MFA alignment
    //////////////////////////////////////////////

    //get the alignment
    struct List *sequences = constructEmptyList(0, free);
    struct List *seqLengths = constructEmptyList(0, (void (*)(void *))destructInt);
    struct List *fastaNames = constructEmptyList(0, free);
    FILE *fileHandle = fopen(mfaFile, "r");
    if (fileHandle == NULL) {
        usage();
        exit(1);
    }
    fastaRead(fileHandle, sequences, seqLengths, fastaNames);
    fclose(fileHandle);

    //////////////////////////////////////////////
    //Get the tree alignment
    //////////////////////////////////////////////

    stTree *tree = NULL;
    LeafPtrArray *leafArray = NULL;

    int32_t leafCount = 0;
    if (treeFile != NULL) {
        tree = eTreeX_getTreeFromFile(treeFile);

        eTreeX_postOrderTraversal(tree, eTreeX_countLeaves, &leafCount);

        leafArray = eTreeX_constructLeafPtrArray(leafCount);
        eTreeX_postOrderTraversal(tree, eTreeX_getLeafArray, (void *) leafArray);
    }

    //////////////////////////////////////////////
    //Write the MFA alignment.
    //////////////////////////////////////////////

    fileHandle = fopen(outputFile, "w");
    //write the header.
    fprintf(fileHandle, "##maf version=1 scoring=NULL\n");
    fprintf(fileHandle, "# converted_from_MFA\n\n");

    //write the score line
    char *treeString = NULL;
    if (treeFile != NULL) {
        treeString = stTree_getNewickTreeString(tree);
        fprintf(fileHandle, "a score=0 tree=\"%s\"\n", treeString);
    }
    else {
        fprintf(fileHandle, "a score=0\n");
        leafCount = sequences->length;
    }

    //write the alignment
    int32_t i, j;
    int32_t ii;
    const char *label;
    for (ii=0; ii<leafCount; ii++) {
        if (treeFile != NULL) {
            label = stTree_getLabel((stTree *) leafArray->ptrArray[ii]);

            /* Do a brute force search to find the appropriate sequence that matches "label" */
            for (i=0; i<sequences->length; i++) {
                char *fastaHeader = fastaNames->list[i];
                char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
                sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
                if (strcmp(label, sequenceName) == 0) {
                    free(sequenceName);
                    break;
                }
                free(sequenceName);
            }
        }
        else {
            i = ii;
        }

        char *sequence = sequences->list[i];
        int32_t seqLength = *((int32_t *)seqLengths->list[i]);
        assert(seqLength == (int32_t)strlen(sequence));
        char *fastaHeader = fastaNames->list[i];
        char *sequenceName = st_malloc(sizeof(char) *(1 + strlen(fastaHeader)));
        sscanf(fastaHeader, "%s", sequenceName); //take the sequence name to be the first word of the sequence.
        int32_t length = 0;
        for (j=0; j<(int32_t)strlen(sequence); j++) {
            if (sequence[j] != '-') {
                length++;
            }
        }
        fprintf(fileHandle, "s\t%s\t%i\t%i\t%s\t%i\t%s\n", sequenceName, 0, length, "+", length, sequence);
        free(sequenceName);
    }

    fclose(fileHandle);

    //////////////////////////////////////////////
    //Clean up.
    //////////////////////////////////////////////

    free(mfaFile);
    free(outputFile);
    free(treeFile);

    if (treeFile != NULL) {
        stTree_destruct(tree);
        free(treeString);
        eTreeX_destructLeafPtrArray(leafArray);
    }

    destructList(sequences);
    destructList(seqLengths);
    destructList(fastaNames);

    return 0;
}