void checkBlock(mafBlock_t *block) { // read through each line of a mafBlock and filter duplicates. // Report the top scoring duplication only. mafLine_t *ml = maf_mafBlock_getHeadLine(block); unsigned n = maf_mafLine_getNumberOfSequences(ml); char **species = (char **) de_malloc(sizeof(char *) * n); char **sequences = (char **) de_malloc(sizeof(char *) * n); int index = 0; bool containsDuplicates = false; duplicate_t *d = NULL, *dupSpeciesHead = NULL; while (ml != NULL) { if (maf_mafLine_getType(ml) != 's') { // skip non-sequence lines ml = maf_mafLine_getNext(ml); continue; } species[index] = de_strdup(maf_mafLine_getSpecies(ml)); sequences[index] = de_strdup(maf_mafLine_getSequence(ml)); duplicate_t *thisDup = findDuplicate(dupSpeciesHead, maf_mafLine_getSpecies(ml)); if (thisDup == NULL) { // first instance of species, add to list if (dupSpeciesHead == NULL) { dupSpeciesHead = newDuplicate(); d = dupSpeciesHead; } else { d->next = newDuplicate(); d = d->next; } d->species = de_strdup(maf_mafLine_getSpecies(ml)); // create the mafline linked list d->headScoredMaf = newScoredMafLine(); d->headScoredMaf->mafLine = ml; d->tailScoredMaf = d->headScoredMaf; } else { // this sequence is a duplicate, extend the duplicate list. containsDuplicates = true; ++(thisDup->numSequences); scoredMafLine_t *sml = thisDup->tailScoredMaf; sml->next = newScoredMafLine(); sml = sml->next; sml->mafLine = ml; thisDup->tailScoredMaf = sml; } ++index; ml = maf_mafLine_getNext(ml); } if (!containsDuplicates) { reportBlock(block); destroyStringArray(species, n); destroyStringArray(sequences, n); destroyDuplicates(dupSpeciesHead); return; } // this block contains duplicates char *consensus = (char *) de_malloc(longestLine(block) + 1); consensus[0] = '\0'; buildConsensus(consensus, sequences, n, maf_mafLine_getLineNumber(maf_mafBlock_getHeadLine(block))); // lineno used for error reporting findBestDupes(dupSpeciesHead, consensus); reportBlockWithDuplicates(block, dupSpeciesHead); // clean up destroyStringArray(species, n); destroyStringArray(sequences, n); destroyDuplicates(dupSpeciesHead); free(consensus); }
void checkBlock(mafBlock_t *block) { // read through each line of a mafBlock and filter duplicates. // Report the top scoring duplication only. unsigned n = maf_numberOfSequencesMafLineList(block->headLine); char **species = (char **) de_malloc(sizeof(char *) * n); char **sequences = (char **) de_malloc(sizeof(char *) * n); int index = 0; bool containsDuplicates = false; mafLine_t *m = block->headLine; duplicate_t *d = NULL, *dupSpeciesHead = NULL; while (m != NULL) { if (m->type != 's') { // skip non-sequence lines m = m->next; continue; } species[index] = (char *) de_malloc(kMaxSeqName); sequences[index] = (char *) de_malloc(strlen(m->sequence) + 1); strcpy(species[index], m->species); strcpy(sequences[index], m->sequence); duplicate_t *thisDup = findDuplicate(dupSpeciesHead, m->species); if (thisDup == NULL) { // add new duplicate species debug("adding new species %s\n", m->species); if (dupSpeciesHead == NULL) { dupSpeciesHead = newDuplicate(); d = dupSpeciesHead; } else { d->next = newDuplicate(); d = d->next; } d->species = (char *) de_malloc(kMaxSeqName); strcpy(d->species, m->species); // create the mafline linked list d->headScoredMaf = newScoredMafLine(); d->headScoredMaf->mafLine = m; } else { // this sequence is a duplicate, extend the duplicate list. debug("extending duplicate on species %s\n", m->species); containsDuplicates = true; scoredMafLine_t *ml = thisDup->headScoredMaf; while (ml->next != NULL) ml = ml->next; ml->next = newScoredMafLine(); ml = ml->next; ml->mafLine = m; } ++index; m = m->next; } if (!containsDuplicates) { reportBlock(block); destroyStringArray(species, n); destroyStringArray(sequences, n); destroyDuplicates(dupSpeciesHead); return; } // this block contains duplicates char *consensus = (char *) de_malloc(longestLine(block) + 1); consensus[0] = '\0'; buildConsensus(consensus, sequences, n, block->headLine->lineNumber); findBestDupes(dupSpeciesHead, consensus); reportBlockWithDuplicates(block, dupSpeciesHead); destroyStringArray(species, n); destroyStringArray(sequences, n); destroyDuplicates(dupSpeciesHead); free(consensus); }