static hash_element_t *HASH_get_no_complaining(const hash_t *hash, const char *raw_key, int i){ const char *key = STRING_get_utf8_chars(raw_key); unsigned int index = oat_hash(key,i) % hash->elements_size; hash_element_t *element=hash->elements[index]; //fprintf(stderr,"start searching\n"); while(element!=NULL && (element->i!=i || strcmp(key,element->key))) { //fprintf(stderr,"key: -%s- (-%s-), i: %d (%d)\n",element->key,key,element->i,i); element=element->next; } //fprintf(stderr,"end searching\n"); return element; }
static void put2(hash_t *hash, const char *key, int i, hash_element_t *element){ #if !defined(RELEASE) if (strcmp(NOTUSED_EFFECT_NAME, key)) // <- The pd and faust instruments create effect names called "NOTUSED" for unused effects. R_ASSERT(!HASH_has_key_at(hash, key, i)); // NOTE. Hitting this one is not necessarily a bug. But since it's so seldom that we overwrite hash table elements, it seems like a good idea to have this line here. #endif unsigned int index = oat_hash(key,i) % hash->elements_size; //fprintf(stderr,"put %p. index: %u\n",hash,index); element->key=key; element->i=i; hash->num_elements++; element->next = hash->elements[index]; hash->elements[index] = element; }
/*! * Insert into hash table. */ static int hashtab_insert( pdata *pd, const char *key, const char *value){ unsigned int i, j, hkey; if( (double) pd->array_size / (double) MAX_ARRAY_SIZE > MAXFILL ){ return HT_MEM_ERROR; } /* Hash the key */ hkey = oat_hash( key) % MAX_ARRAY_SIZE; i = hkey; /* Check for direct input */ if( pd->array[i].key[0] == '\0'){ ht_add( pd, i, hkey, key, value); return HT_SUCCESS; }else if( strcmp( pd->array[i].key, key) == 0){ strcpy( pd->array[i].value, value); return HT_SUCCESS; /* If spot is occupied shift to next spot */ }else if( pd->array[i].hkey == hkey){ do{ i=(i+1)%MAX_ARRAY_SIZE; }while( pd->array[i].hkey != hkey && pd->array[i].key[0] != '\0' ); } for( j=0; j<pd->array_size+1; j++){ /* Direct input if we can */ if( pd->array[(i+j)%MAX_ARRAY_SIZE].key[0] == '\0'){ ht_add( pd, (i+j)%MAX_ARRAY_SIZE, hkey, key, value); return HT_SUCCESS; } if( strcmp( pd->array[(i+j)%MAX_ARRAY_SIZE].key, key) == 0){ strcpy( pd->array[(i+j)%MAX_ARRAY_SIZE].value, value); return HT_SUCCESS; } /* Otherwise shift down and input */ if( pd->array[(i+j)%MAX_ARRAY_SIZE].hkey != hkey){ ht_shift_down( pd, (i+j)%MAX_ARRAY_SIZE); ht_add( pd, (i+j)%MAX_ARRAY_SIZE, hkey, key, value); return HT_SUCCESS; } } return HT_FAILURE; }
/*! * Find in hashtable. */ static int hashtab_find( pdata *pd, const char *key, char *value){ unsigned int i, hkey; /* Hash the key */ hkey = oat_hash( key) % MAX_ARRAY_SIZE; i = hkey; /* Find first matching key */ while( strcmp( pd->array[i].key, key) != 0 && pd->array[i].key[0] != '\0' ){ i=(i+1)%MAX_ARRAY_SIZE; } if( pd->array[i].key[0] == '\0'){ return HT_FAILURE; } strcpy( value, pd->array[i].value); return HT_SUCCESS; }
bool HASH_remove_at(hash_t *hash, const char *raw_key, int i){ const char *key = STRING_get_utf8_chars(raw_key); unsigned int index = oat_hash(key,i) % hash->elements_size; hash_element_t *element=hash->elements[index]; hash_element_t *prev = NULL; while(element!=NULL && (element->i!=i || strcmp(key,element->key))) { prev=element; element=element->next; } if (element==NULL) return false; hash->num_elements--; if (prev==NULL) hash->elements[index] = element->next; else prev->next = element->next; return true; }
void plausibilityChecker(tree *tr, analdef *adef) { FILE *treeFile, *rfFile; tree *smallTree = (tree *)rax_malloc(sizeof(tree)); char rfFileName[1024]; /* init hash table for big reference tree */ hashtable *h = initHashTable(tr->mxtips * 2 * 2); /* init the bit vectors we need for computing and storing bipartitions during the tree traversal */ unsigned int vLength, **bitVectors = initBitVector(tr, &vLength); int numberOfTreesAnalyzed = 0, branchCounter = 0, i; double avgRF = 0.0; /* set up an output file name */ strcpy(rfFileName, workdir); strcat(rfFileName, "RAxML_RF-Distances."); strcat(rfFileName, run_id); rfFile = myfopen(rfFileName, "wb"); assert(adef->mode == PLAUSIBILITY_CHECKER); /* open the big reference tree file and parse it */ treeFile = myfopen(tree_file, "r"); printBothOpen("Parsing reference tree %s\n", tree_file); treeReadLen(treeFile, tr, FALSE, TRUE, TRUE, adef, TRUE, FALSE); assert(tr->mxtips == tr->ntips); printBothOpen("The reference tree has %d tips\n", tr->ntips); fclose(treeFile); /* extract all induced bipartitions from the big tree and store them in the hastable */ bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, 0, BIPARTITIONS_RF, (branchInfo *)NULL, &branchCounter, 1, FALSE, FALSE); assert(branchCounter == tr->mxtips - 3); /* now see how many small trees we have */ treeFile = getNumberOfTrees(tr, bootStrapFile, adef); checkTreeNumber(tr->numberOfTrees, bootStrapFile); /* allocate a data structure for parsing the potentially mult-furcating tree */ allocateMultifurcations(tr, smallTree); /* loop over all small trees */ for(i = 0; i < tr->numberOfTrees; i++) { int numberOfSplits = readMultifurcatingTree(treeFile, smallTree, adef, TRUE); if(numberOfSplits > 0) { unsigned int entryCount = 0, k, j, *masked = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)), *smallTreeMask = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)); hashtable *rehash = initHashTable(tr->mxtips * 2 * 2); double rf, maxRF; int bCounter = 0, bips, firstTaxon, taxa = 0; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Small tree %d has %d tips and %d bipartitions\n", i, smallTree->ntips, numberOfSplits); /* compute the maximum RF distance for computing the relative RF distance later-on */ /* note that here we need to pay attention, since the RF distance is not normalized by 2 * (n-3) but we need to account for the fact that the multifurcating small tree will potentially contain less bipartitions. Hence the normalization factor is obtained as 2 * numberOfSplits, where numberOfSplits is the number of bipartitions in the small tree. */ maxRF = (double)(2 * numberOfSplits); /* now set up a bit mask where only the bits are set to one for those taxa that are actually present in the small tree we just read */ /* note that I had to apply some small changes to this function to make it work for multi-furcating trees ! */ setupMask(smallTreeMask, smallTree->start, smallTree->mxtips); setupMask(smallTreeMask, smallTree->start->back, smallTree->mxtips); /* now get the index of the first taxon of the small tree. we will use this to unambiguously store the bipartitions */ firstTaxon = smallTree->start->number; /* make sure that this bit vector is set up correctly, i.e., that it contains as many non-zero bits as there are taxa in this small tree */ for(j = 0; j < vLength; j++) taxa += BIT_COUNT(smallTreeMask[j]); assert(taxa == smallTree->ntips); /* now re-hash the big tree by applying the above bit mask */ /* loop over hash table */ for(k = 0, entryCount = 0; k < h->tableSize; k++) { if(h->table[k] != NULL) { entry *e = h->table[k]; /* we resolve collisions by chaining, hence the loop here */ do { unsigned int *bitVector = e->bitVector; hashNumberType position; int count = 0; /* double check that our tree mask contains the first taxon of the small tree */ assert(smallTreeMask[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]); /* if the first taxon is set then we will re-hash the bit-wise complement of the bit vector. The count variable is used for a small optimization */ if(bitVector[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]) { //hash complement for(j = 0; j < vLength; j++) { masked[j] = (~bitVector[j]) & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } else { //hash this vector for(j = 0; j < vLength; j++) { masked[j] = bitVector[j] & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } /* note that padding the last bits is not required because they are set to 0 automatically by smallTreeMask */ /* make sure that we will re-hash the canonic representation of the bipartition where the bit for firstTaxon is set to 0! */ assert(!(masked[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH])); /* only if the masked bipartition of the large tree is a non-trivial bipartition (two or more bits set to 1 will we re-hash it */ if(count > 1) { /* compute hash */ position = oat_hash((unsigned char *)masked, sizeof(unsigned int) * vLength); position = position % rehash->tableSize; /* re-hash to the new hash table that contains the bips of the large tree, pruned down to the taxa contained in the small tree */ insertHashPlausibility(masked, rehash, vLength, position); } entryCount++; e = e->next; } while(e != NULL); } } /* make sure that we tried to re-hash all bipartitions of the original tree */ assert(entryCount == (unsigned int)(tr->mxtips - 3)); /* now traverse the small tree and count how many bipartitions it shares with the corresponding induced tree from the large tree */ /* the following function also had to be modified to account for multi-furcating trees ! */ bips = bitVectorTraversePlausibility(bitVectors, smallTree->start->back, smallTree->mxtips, vLength, rehash, &bCounter, firstTaxon, smallTree, TRUE); /* compute the relative RF */ rf = (double)(2 * (numberOfSplits - bips)) / maxRF; assert(numberOfSplits >= bips); assert(rf <= 1.0); avgRF += rf; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Relative RF tree %d: %f\n\n", i, rf); fprintf(rfFile, "%d %f\n", i, rf); /* I also modified this assertion, we nee to make sure here that we checked all non-trivial splits/bipartitions in the multi-furcating tree whech can be less than n - 3 ! */ assert(bCounter == numberOfSplits); /* free masks and hast table for this iteration */ rax_free(smallTreeMask); rax_free(masked); freeHashTable(rehash); rax_free(rehash); numberOfTreesAnalyzed++; } } printBothOpen("Number of small trees skipped: %d\n\n", tr->numberOfTrees - numberOfTreesAnalyzed); printBothOpen("Average RF distance %f\n\n", avgRF / (double)numberOfTreesAnalyzed); printBothOpen("Total execution time: %f secs\n\n", gettime() - masterTime); printBothOpen("\nFile containing all %d pair-wise RF distances written to file %s\n\n", numberOfTreesAnalyzed, rfFileName); fclose(treeFile); fclose(rfFile); /* free the data structure used for parsing the potentially multi-furcating tree */ freeMultifurcations(smallTree); rax_free(smallTree); freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); }
static int bitVectorTraversePlausibility(unsigned int **bitVectors, nodeptr p, int numsp, unsigned int vectorLength, hashtable *h, int *countBranches, int firstTaxon, tree *tr, boolean multifurcating) { /* trivial bipartition */ if(isTip(p->number, numsp)) return 0; else { int found = 0; nodeptr q = p->next; /* recursively descend into the tree and get the bips of all subtrees first */ do { found = found + bitVectorTraversePlausibility(bitVectors, q->back, numsp, vectorLength, h, countBranches, firstTaxon, tr, multifurcating); q = q->next; } while(q != p); /* compute the bipartition induced by the current branch p, p->back, here we invoke two different functions, depending on whether we are dealing with a multi-furcating or bifurcating tree. */ if(multifurcating) newviewBipartitionsMultifurcating(bitVectors, p, numsp, vectorLength); else newviewBipartitions(bitVectors, p, numsp, vectorLength); assert(p->x); /* if p->back does not lead to a tip this is an inner branch that induces a non-trivial bipartition. in this case we need to lookup if the induced bipartition is already contained in the hash table */ if(!(isTip(p->back->number, numsp))) { /* this is the bit vector to insert into the hash table */ unsigned int *toInsert = bitVectors[p->number]; /* compute the hash number on that bit vector */ hashNumberType position = oat_hash((unsigned char *)toInsert, sizeof(unsigned int) * vectorLength) % h->tableSize; /* each bipartition can be stored in two forms (the two bit-wise complements we always canonically store that version of the bit-vector that does not contain the first taxon of the small tree, we use an assertion to make sure that all is correct */ assert(!(toInsert[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH])); /* increment the branch counter to assure that all inner branches are traversed */ *countBranches = *countBranches + 1; /* now look up this bipartition in the hash table, If it is present the number of shared bipartitions between the small and the big tree is incremented by 1 */ found = found + findHash(toInsert, h, vectorLength, position); } return found; } }