Exemplo n.º 1
0
void checkBlock(mafBlock_t *block) {
    // read through each line of a mafBlock and filter duplicates.
    // Report the top scoring duplication only.
    mafLine_t *ml = maf_mafBlock_getHeadLine(block);
    unsigned n = maf_mafLine_getNumberOfSequences(ml);
    char **species = (char **) de_malloc(sizeof(char *) * n);
    char **sequences = (char **) de_malloc(sizeof(char *) * n);
    int index = 0;
    bool containsDuplicates = false;
    duplicate_t *d = NULL, *dupSpeciesHead = NULL;
    while (ml != NULL) {
        if (maf_mafLine_getType(ml) != 's') {
            // skip non-sequence lines
            ml = maf_mafLine_getNext(ml);
            continue;
        }
        species[index] = de_strdup(maf_mafLine_getSpecies(ml));
        sequences[index] = de_strdup(maf_mafLine_getSequence(ml));
        duplicate_t *thisDup = findDuplicate(dupSpeciesHead, maf_mafLine_getSpecies(ml));
        if (thisDup == NULL) {
            // first instance of species, add to list
            if (dupSpeciesHead == NULL) {
                dupSpeciesHead = newDuplicate();
                d = dupSpeciesHead;
            } else {
                d->next = newDuplicate();
                d = d->next;
            }
            d->species = de_strdup(maf_mafLine_getSpecies(ml));
            // create the mafline linked list
            d->headScoredMaf = newScoredMafLine();
            d->headScoredMaf->mafLine = ml;
            d->tailScoredMaf = d->headScoredMaf;
        } else {
            // this sequence is a duplicate, extend the duplicate list.
            containsDuplicates = true;
            ++(thisDup->numSequences);
            scoredMafLine_t *sml = thisDup->tailScoredMaf;
            sml->next = newScoredMafLine();
            sml = sml->next;
            sml->mafLine = ml;
            thisDup->tailScoredMaf = sml;
        }
        ++index;
        ml = maf_mafLine_getNext(ml);
    }
    if (!containsDuplicates) {
        reportBlock(block);
        destroyStringArray(species, n);
        destroyStringArray(sequences, n);
        destroyDuplicates(dupSpeciesHead);
        return;
    }
    // this block contains duplicates
    char *consensus = (char *) de_malloc(longestLine(block) + 1);
    consensus[0] = '\0';
    buildConsensus(consensus, sequences, n,
                   maf_mafLine_getLineNumber(maf_mafBlock_getHeadLine(block))); // lineno used for error reporting
    findBestDupes(dupSpeciesHead, consensus);
    reportBlockWithDuplicates(block, dupSpeciesHead);
    // clean up
    destroyStringArray(species, n);
    destroyStringArray(sequences, n);
    destroyDuplicates(dupSpeciesHead);
    free(consensus);
}
Exemplo n.º 2
0
void checkBlock(mafBlock_t *block) {
    // read through each line of a mafBlock and filter duplicates.
    // Report the top scoring duplication only.
    unsigned n = maf_numberOfSequencesMafLineList(block->headLine);
    char **species = (char **) de_malloc(sizeof(char *) * n);
    char **sequences = (char **) de_malloc(sizeof(char *) * n);
    int index = 0;
    bool containsDuplicates = false;
    mafLine_t *m = block->headLine;
    duplicate_t *d = NULL, *dupSpeciesHead = NULL;
    while (m != NULL) {
        if (m->type != 's') {
            // skip non-sequence lines
            m = m->next;
            continue;
        }
        species[index] = (char *) de_malloc(kMaxSeqName);
        sequences[index] = (char *) de_malloc(strlen(m->sequence) + 1);
        strcpy(species[index], m->species);
        strcpy(sequences[index], m->sequence);
        duplicate_t *thisDup = findDuplicate(dupSpeciesHead, m->species);
        if (thisDup == NULL) {
            // add new duplicate species
            debug("adding new species %s\n", m->species);
            if (dupSpeciesHead == NULL) {
                dupSpeciesHead = newDuplicate();
                d = dupSpeciesHead;
            } else {
                d->next = newDuplicate();
                d = d->next;
            }
            d->species = (char *) de_malloc(kMaxSeqName);
            strcpy(d->species, m->species);
            // create the mafline linked list
            d->headScoredMaf = newScoredMafLine();
            d->headScoredMaf->mafLine = m;
        } else {
            // this sequence is a duplicate, extend the duplicate list.
            debug("extending duplicate on species %s\n", m->species);
            containsDuplicates = true;
            scoredMafLine_t *ml = thisDup->headScoredMaf;
            while (ml->next != NULL)
                ml = ml->next;
            ml->next = newScoredMafLine();
            ml = ml->next;
            ml->mafLine  = m;
        }
        ++index;
        m = m->next;
    }
    if (!containsDuplicates) {
        reportBlock(block);
        destroyStringArray(species, n);
        destroyStringArray(sequences, n);
        destroyDuplicates(dupSpeciesHead);
        return;
    }
    // this block contains duplicates
    char *consensus = (char *) de_malloc(longestLine(block) + 1);
    consensus[0] = '\0';
    buildConsensus(consensus, sequences, n, block->headLine->lineNumber);
    findBestDupes(dupSpeciesHead, consensus);
    reportBlockWithDuplicates(block, dupSpeciesHead);
    destroyStringArray(species, n);
    destroyStringArray(sequences, n);
    destroyDuplicates(dupSpeciesHead);
    free(consensus);
}