int main(int argc, char *argv[]) { HashTable ht = newHashTable(65535); HashTable_put(ht, "1", "two"); HashTable_put(ht, "2", "three"); HashTable_put(ht, "3", "five"); HashTable_put(ht, "4", "seven"); HashTable_put(ht, "5", "eleven"); HashTable_put(ht, "6", "thirteen"); HashTable_put(ht, "7", "seventeen"); HashTable_remove(ht, "1"); HashTable_remove(ht, "2"); HashTable_remove(ht, "6"); HashTable_remove(ht, "7"); HashTable_put(ht, "3", "not allowed"); printf("%s\n", HashTable_get(ht, "1")); printf("%s\n", HashTable_get(ht, "2")); printf("%s\n", HashTable_get(ht, "3")); printf("%s\n", HashTable_get(ht, "4")); printf("%s\n", HashTable_get(ht, "5")); printf("%s\n", HashTable_get(ht, "6")); printf("%s\n", HashTable_get(ht, "7")); printf("size: %d\n", HashTable_size(ht)); freeHashTable(&ht); return 0; }
int main(int argc, char **argv) { int i; struct Hashtable hash_table; //Allocate memory for a new hash table newHashTable(&hash_table, HASHTABLE_LENGTH); printf("New hash table containing no values\n"); //Print the hash table containing no values for(i=0;i<HASHTABLE_LENGTH;i++){ print_ll(&(hash_table.table[i])); } putchar('\n'); //Insert several values into the hash table insert(&hash_table, "Hello World"); insert(&hash_table, "Hello World"); insert(&hash_table, "Hel World"); insert(&hash_table, "GoodBye"); insert(&hash_table, "GoodBye"); insert(&hash_table, "wangbang"); insert(&hash_table, "Holachola"); insert(&hash_table, "antidis"); printf("Test values inserted printing full hash_table...\n"); //Print the list again for(i=0;i<HASHTABLE_LENGTH;i++){ print_ll(&(hash_table.table[i])); } //Deallocate memory freeHashTable(&hash_table); }
void freeThreadLabelTable(void) { if (threadLabels != NULL) { freeHashTable(threadLabels, stgFree); threadLabels = NULL; } }
void exitStaticPtrTable() { if (spt) { freeHashTable(spt, freeSptEntry); spt = NULL; #ifdef THREADED_RTS closeMutex(&spt_lock); #endif } }
void freeStringHashTable(HashTable *t) { int i; for (i = 0; i < t->cap; i++) { LinkedList *l = t->a[i]; while (l) { tlFree(l->key); l = l->next; } } freeHashTable(t); }
void freeThreadLabelTable(void) { ACQUIRE_LOCK(&threadLabels_mutex); if (threadLabels != NULL) { freeHashTable(threadLabels, stgFree); threadLabels = NULL; } RELEASE_LOCK(&threadLabels_mutex); }
void exitStablePtrTable(void) { if (addrToStableHash) freeHashTable(addrToStableHash, NULL); addrToStableHash = NULL; if (stable_ptr_table) stgFree(stable_ptr_table); stable_ptr_table = NULL; SPT_size = 0; #ifdef THREADED_RTS closeMutex(&stable_mutex); #endif }
int main() { int capacity = 11, i; HashTablePtr t = newHashTable(capacity, hashInt, compare); HashNodePtr node; putIntoHashTable(t, 1, 1); putIntoHashTable(t, 2, 2); putIntoHashTable(t, 3, 3); putIntoHashTable(t, 4, 4); putIntoHashTable(t, 5, 5); printHashTable(t); for(i = 0; i <= 5; i++) { HashNodePtr node = getFromHashTable(t, i); if(node) { printf("key %d values %d\n", node->key, node->value); } else { printf("cann't find key %d\n", i); } } for(i = 11; i <= 16; i++) { HashNodePtr node = getFromHashTable(t, i); if(node) { printf("key %d values %d\n", node->key, node->value); } else { printf("cann't find key %d\n", i); } } removeFromHashTable(t, 2); removeFromHashTable(t, 3); printHashTable(t); putIntoHashTable(t, 2, 2); putIntoHashTable(t, 13, 13); putIntoHashTable(t, 24, 24); printHashTable(t); removeFromHashTable(t, 2); printHashTable(t); node = getFromHashTable(t, 13); printf("key %d values %d\n", node->key, node->value); node = getFromHashTable(t, 24); printf("key %d values %d\n", node->key, node->value); putIntoHashTable(t, 35, 35); printHashTable(t); freeHashTable(t); return 0; }
void rehash(HashTable* h) { if (h->outOfMemory==0) { HashTable* h2 = newHashTable(h->size+h->incrementSize); HashElement *e,*l; unsigned long sz,hash,i=0; if (h2==NULL) { h->outOfMemory=1; } else { sz=h2->size; // make increment of new table same as old h2->incrementSize=h->incrementSize; while (i<h->size) { l = h->hashArray[i]; // get list from old hash table h->hashArray[i]=NULL; while (l!=NULL) { // process every entry from old list e = l; // process first list element l = l->next; // get the tail of the list // hash value for entry to be processed hash = ((unsigned long) e->value) % sz; e->next = h2->hashArray[hash]; // append entries to new list h2->hashArray[hash]=e; // set list in new hash table } i++; } // swap old and new hash table (to keep pointers to old table valid!) l=(HashElement*) h->hashArray; // move new hashArray to old hashTable h->hashArray = h2->hashArray; // set temporary hashTable to old hashArray h2->hashArray= (HashElement**)l; h2->size = h->size; // set temporary hashTable size to old size h->size = sz; // set hash table size to new size h->rehashAt = h->size-(h->size/10); // set new rehash size: 90% // get rid of temporary table, now holding old hashArray freeHashTable(h2); } } }
//initialize hash table void initHashTable(HT hashTable, char *dictName, int (*hashFun)(ElemType *word, int size)) { FILE *fr; fr = fopen(dictName, "rt"); if (fr == NULL) { printf("Can't open file\n"); return; } Node *table; int tableSize = TSIZE; int i; char word[keySize]; int r; while (1) { table = (Node *)malloc(tableSize * sizeof(Node)); hashTable->theList = table; for(i = tableSize - 1; i >= 0; i--){ hashTable->theList[i].key = malloc(keySize * sizeof(ElemType)); hashTable->theList[i].key[0] = '\0'; //initialize key element? hashTable->theList[i].next = NULL; } while((r = readAWord(fr, word)) != 0) { tableInsert(hashTable, word, hashFun); if ((float)(hashTable->nonEmpBucket) / (float)(hashTable->tableSize) > 0.75) { freeHashTable(hashTable); tableSize *= 2; hashTable->tableSize = tableSize; hashTable->numEntry = 0; hashTable->nonEmpBucket = 0; hashTable->maxListLeng = 0; hashTable->avgListLeng = 0; break; } } if (r == 0) { break; } } }
/* Called at the end of execution, to write out the Hpc *.tix file * for this exection. Safe to call, even if coverage is not used. */ void exitHpc(void) { debugTrace(DEBUG_hpc,"exitHpc"); if (hpc_inited == 0) { return; } // Only write the tix file if you are the original process. // Any sub-process from use of fork from inside Haskell will // not clober the .tix file. if (hpc_pid == getpid()) { FILE *f = fopen(tixFilename,"w"); writeTix(f); } freeHashTable(moduleHash, (void (*)(void *))freeHpcModuleInfo); moduleHash = NULL; stgFree(tixFilename); tixFilename = NULL; }
void updateStablePtrTable(rtsBool full) { snEntry *p, *end_stable_ptr_table; if (full && addrToStableHash != NULL) { freeHashTable(addrToStableHash,NULL); addrToStableHash = allocHashTable(); } end_stable_ptr_table = &stable_ptr_table[SPT_size]; // NOTE: _starting_ at index 1; index 0 is unused. for (p = stable_ptr_table + 1; p < end_stable_ptr_table; p++) { if (p->addr == NULL) { if (p->old != NULL) { // The target has been garbage collected. Remove its // entry from the hash table. removeHashTable(addrToStableHash, (W_)p->old, NULL); p->old = NULL; } } else if (p->addr < (P_)stable_ptr_table || p->addr >= (P_)end_stable_ptr_table) { // Target still alive, Re-hash this stable name if (full) { insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_ptr_table)); } else if (p->addr != p->old) { removeHashTable(addrToStableHash, (W_)p->old, NULL); insertHashTable(addrToStableHash, (W_)p->addr, (void *)(p - stable_ptr_table)); } } } }
void plausibilityChecker(tree *tr, analdef *adef) { FILE *treeFile, *rfFile; tree *smallTree = (tree *)rax_malloc(sizeof(tree)); char rfFileName[1024]; /* init hash table for big reference tree */ hashtable *h = initHashTable(tr->mxtips * 2 * 2); /* init the bit vectors we need for computing and storing bipartitions during the tree traversal */ unsigned int vLength, **bitVectors = initBitVector(tr, &vLength); int numberOfTreesAnalyzed = 0, branchCounter = 0, i; double avgRF = 0.0; /* set up an output file name */ strcpy(rfFileName, workdir); strcat(rfFileName, "RAxML_RF-Distances."); strcat(rfFileName, run_id); rfFile = myfopen(rfFileName, "wb"); assert(adef->mode == PLAUSIBILITY_CHECKER); /* open the big reference tree file and parse it */ treeFile = myfopen(tree_file, "r"); printBothOpen("Parsing reference tree %s\n", tree_file); treeReadLen(treeFile, tr, FALSE, TRUE, TRUE, adef, TRUE, FALSE); assert(tr->mxtips == tr->ntips); printBothOpen("The reference tree has %d tips\n", tr->ntips); fclose(treeFile); /* extract all induced bipartitions from the big tree and store them in the hastable */ bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, 0, BIPARTITIONS_RF, (branchInfo *)NULL, &branchCounter, 1, FALSE, FALSE); assert(branchCounter == tr->mxtips - 3); /* now see how many small trees we have */ treeFile = getNumberOfTrees(tr, bootStrapFile, adef); checkTreeNumber(tr->numberOfTrees, bootStrapFile); /* allocate a data structure for parsing the potentially mult-furcating tree */ allocateMultifurcations(tr, smallTree); /* loop over all small trees */ for(i = 0; i < tr->numberOfTrees; i++) { int numberOfSplits = readMultifurcatingTree(treeFile, smallTree, adef, TRUE); if(numberOfSplits > 0) { unsigned int entryCount = 0, k, j, *masked = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)), *smallTreeMask = (unsigned int *)rax_calloc(vLength, sizeof(unsigned int)); hashtable *rehash = initHashTable(tr->mxtips * 2 * 2); double rf, maxRF; int bCounter = 0, bips, firstTaxon, taxa = 0; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Small tree %d has %d tips and %d bipartitions\n", i, smallTree->ntips, numberOfSplits); /* compute the maximum RF distance for computing the relative RF distance later-on */ /* note that here we need to pay attention, since the RF distance is not normalized by 2 * (n-3) but we need to account for the fact that the multifurcating small tree will potentially contain less bipartitions. Hence the normalization factor is obtained as 2 * numberOfSplits, where numberOfSplits is the number of bipartitions in the small tree. */ maxRF = (double)(2 * numberOfSplits); /* now set up a bit mask where only the bits are set to one for those taxa that are actually present in the small tree we just read */ /* note that I had to apply some small changes to this function to make it work for multi-furcating trees ! */ setupMask(smallTreeMask, smallTree->start, smallTree->mxtips); setupMask(smallTreeMask, smallTree->start->back, smallTree->mxtips); /* now get the index of the first taxon of the small tree. we will use this to unambiguously store the bipartitions */ firstTaxon = smallTree->start->number; /* make sure that this bit vector is set up correctly, i.e., that it contains as many non-zero bits as there are taxa in this small tree */ for(j = 0; j < vLength; j++) taxa += BIT_COUNT(smallTreeMask[j]); assert(taxa == smallTree->ntips); /* now re-hash the big tree by applying the above bit mask */ /* loop over hash table */ for(k = 0, entryCount = 0; k < h->tableSize; k++) { if(h->table[k] != NULL) { entry *e = h->table[k]; /* we resolve collisions by chaining, hence the loop here */ do { unsigned int *bitVector = e->bitVector; hashNumberType position; int count = 0; /* double check that our tree mask contains the first taxon of the small tree */ assert(smallTreeMask[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]); /* if the first taxon is set then we will re-hash the bit-wise complement of the bit vector. The count variable is used for a small optimization */ if(bitVector[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH]) { //hash complement for(j = 0; j < vLength; j++) { masked[j] = (~bitVector[j]) & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } else { //hash this vector for(j = 0; j < vLength; j++) { masked[j] = bitVector[j] & smallTreeMask[j]; count += BIT_COUNT(masked[j]); } } /* note that padding the last bits is not required because they are set to 0 automatically by smallTreeMask */ /* make sure that we will re-hash the canonic representation of the bipartition where the bit for firstTaxon is set to 0! */ assert(!(masked[(firstTaxon - 1) / MASK_LENGTH] & mask32[(firstTaxon - 1) % MASK_LENGTH])); /* only if the masked bipartition of the large tree is a non-trivial bipartition (two or more bits set to 1 will we re-hash it */ if(count > 1) { /* compute hash */ position = oat_hash((unsigned char *)masked, sizeof(unsigned int) * vLength); position = position % rehash->tableSize; /* re-hash to the new hash table that contains the bips of the large tree, pruned down to the taxa contained in the small tree */ insertHashPlausibility(masked, rehash, vLength, position); } entryCount++; e = e->next; } while(e != NULL); } } /* make sure that we tried to re-hash all bipartitions of the original tree */ assert(entryCount == (unsigned int)(tr->mxtips - 3)); /* now traverse the small tree and count how many bipartitions it shares with the corresponding induced tree from the large tree */ /* the following function also had to be modified to account for multi-furcating trees ! */ bips = bitVectorTraversePlausibility(bitVectors, smallTree->start->back, smallTree->mxtips, vLength, rehash, &bCounter, firstTaxon, smallTree, TRUE); /* compute the relative RF */ rf = (double)(2 * (numberOfSplits - bips)) / maxRF; assert(numberOfSplits >= bips); assert(rf <= 1.0); avgRF += rf; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Relative RF tree %d: %f\n\n", i, rf); fprintf(rfFile, "%d %f\n", i, rf); /* I also modified this assertion, we nee to make sure here that we checked all non-trivial splits/bipartitions in the multi-furcating tree whech can be less than n - 3 ! */ assert(bCounter == numberOfSplits); /* free masks and hast table for this iteration */ rax_free(smallTreeMask); rax_free(masked); freeHashTable(rehash); rax_free(rehash); numberOfTreesAnalyzed++; } } printBothOpen("Number of small trees skipped: %d\n\n", tr->numberOfTrees - numberOfTreesAnalyzed); printBothOpen("Average RF distance %f\n\n", avgRF / (double)numberOfTreesAnalyzed); printBothOpen("Total execution time: %f secs\n\n", gettime() - masterTime); printBothOpen("\nFile containing all %d pair-wise RF distances written to file %s\n\n", numberOfTreesAnalyzed, rfFileName); fclose(treeFile); fclose(rfFile); /* free the data structure used for parsing the potentially multi-furcating tree */ freeMultifurcations(smallTree); rax_free(smallTree); freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); }
// // Check whether we can unload any object code. This is called at the // appropriate point during a GC, where all the heap data is nice and // packed together and we have a linked list of the static objects. // // The check involves a complete heap traversal, but you only pay for // this (a) when you have called unloadObj(), and (b) at a major GC, // which is much more expensive than the traversal we're doing here. // void checkUnload (StgClosure *static_objects) { nat g, n; HashTable *addrs; StgClosure* p; const StgInfoTable *info; ObjectCode *oc, *prev, *next; gen_workspace *ws; StgClosure* link; if (unloaded_objects == NULL) return; ACQUIRE_LOCK(&linker_unloaded_mutex); // Mark every unloadable object as unreferenced initially for (oc = unloaded_objects; oc; oc = oc->next) { IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n", oc->fileName)); oc->referenced = rtsFalse; } addrs = allocHashTable(); for (p = static_objects; p != END_OF_STATIC_OBJECT_LIST; p = link) { p = UNTAG_STATIC_LIST_PTR(p); checkAddress(addrs, p); info = get_itbl(p); link = *STATIC_LINK(info, p); } // CAFs on revertible_caf_list are not on static_objects for (p = (StgClosure*)revertible_caf_list; p != END_OF_CAF_LIST; p = ((StgIndStatic *)p)->static_link) { p = UNTAG_STATIC_LIST_PTR(p); checkAddress(addrs, p); } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { searchHeapBlocks (addrs, generations[g].blocks); searchHeapBlocks (addrs, generations[g].large_objects); for (n = 0; n < n_capabilities; n++) { ws = &gc_threads[n]->gens[g]; searchHeapBlocks(addrs, ws->todo_bd); searchHeapBlocks(addrs, ws->part_list); searchHeapBlocks(addrs, ws->scavd_list); } } #ifdef PROFILING /* Traverse the cost centre tree, calling checkAddress on each CCS/CC */ searchCostCentres(addrs, CCS_MAIN); /* Also check each cost centre in the CC_LIST */ CostCentre *cc; for (cc = CC_LIST; cc != NULL; cc = cc->link) { checkAddress(addrs, cc); } #endif /* PROFILING */ // Look through the unloadable objects, and any object that is still // marked as unreferenced can be physically unloaded, because we // have no references to it. prev = NULL; for (oc = unloaded_objects; oc; oc = next) { next = oc->next; if (oc->referenced == 0) { if (prev == NULL) { unloaded_objects = oc->next; } else { prev->next = oc->next; } IF_DEBUG(linker, debugBelch("Unloading object file %" PATH_FMT "\n", oc->fileName)); freeObjectCode(oc); } else { IF_DEBUG(linker, debugBelch("Object file still in use: %" PATH_FMT "\n", oc->fileName)); prev = oc; } } freeHashTable(addrs, NULL); RELEASE_LOCK(&linker_unloaded_mutex); }
void doAllInOne(tree *tr, analdef *adef) { int i, n, bestIndex, bootstrapsPerformed; #ifdef _WAYNE_MPI int bootStopTests = 1, j, bootStrapsPerProcess = 0; #endif double loopTime; int *originalRateCategories; int *originalInvariant; #ifdef _WAYNE_MPI int slowSearches, fastEvery; #else int slowSearches, fastEvery = 5; #endif int treeVectorLength = -1; topolRELL_LIST *rl; double bestLH, mlTime, overallTime; long radiusSeed = adef->rapidBoot; FILE *f; char bestTreeFileName[1024]; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int**)NULL; boolean bootStopIt = FALSE; double pearsonAverage = 0.0; pInfo *catParams = allocParams(tr); pInfo *gammaParams = allocParams(tr); unsigned int vLength; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); treeVectorLength = adef->multipleRuns; bitVectors = initBitVector(tr, &vLength); } rl = (topolRELL_LIST *)rax_malloc(sizeof(topolRELL_LIST)); initTL(rl, tr, n); originalRateCategories = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); originalInvariant = (int*)rax_malloc(tr->cdta->endsite * sizeof(int)); initModel(tr, tr->rdta, tr->cdta, adef); if(adef->grouping) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the constraint tree specified in %s\n", tree_file); if(adef->constraint) printBothOpen("\n\nThe topologies of all Bootstrap and ML trees will adhere to the bifurcating backbone constraint tree specified in %s\n", tree_file); #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { #ifdef _WAYNE_MPI j = i + n * processID; tr->treeID = j; #else tr->treeID = i; #endif tr->checkPointCounter = 0; loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; radiusSeed = adef->rapidBoot; } #endif if(i % 10 == 0) { if(i > 0) reductionCleanup(tr, originalRateCategories, originalInvariant); if(adef->grouping || adef->constraint) { FILE *f = myfopen(tree_file, "rb"); assert(adef->restart); if (! treeReadLenMULT(f, tr, adef)) exit(-1); fclose(f); } else makeParsimonyTree(tr, adef); tr->likelihood = unlikely; if(i == 0) { double t; onlyInitrav(tr, tr->start); treeEvaluate(tr, 1); t = gettime(); modOpt(tr, adef, FALSE, 5.0); #ifdef _WAYNE_MPI printBothOpen("\nTime for BS model parameter optimization on Process %d: %f seconds\n", processID, gettime() - t); #else printBothOpen("\nTime for BS model parameter optimization %f\n", gettime() - t); #endif memcpy(originalRateCategories, tr->cdta->rateCategory, sizeof(int) * tr->cdta->endsite); memcpy(originalInvariant, tr->invariant, sizeof(int) * tr->cdta->endsite); if(adef->bootstrapBranchLengths) { if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, catParams, tr->partitionData, tr); assert(tr->cdta->endsite == tr->originalCrunchedLength); catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); copyParams(tr->NumberOfModels, gammaParams, tr->partitionData, tr); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); } else { assert(tr->cdta->endsite == tr->originalCrunchedLength); } } } } computeNextReplicate(tr, &adef->rapidBoot, originalRateCategories, originalInvariant, TRUE, TRUE); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); computeBOOTRAPID(tr, adef, &radiusSeed); #ifdef _WAYNE_MPI saveTL(rl, tr, j); #else saveTL(rl, tr, i); #endif if(adef->bootstrapBranchLengths) { double lh = tr->likelihood; if(tr->rateHetModel == CAT) { copyParams(tr->NumberOfModels, tr->partitionData, gammaParams, tr); catToGamma(tr, adef); resetBranches(tr); onlyInitrav(tr, tr->start); treeEvaluate(tr, 2.0); gammaToCat(tr); copyParams(tr->NumberOfModels, tr->partitionData, catParams, tr); tr->likelihood = lh; } else { treeEvaluate(tr, 2.0); tr->likelihood = lh; } } printBootstrapResult(tr, adef, TRUE); loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; bootStrapsPerProcess = i; concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif rax_freeParams(tr->NumberOfModels, catParams); rax_free(catParams); rax_freeParams(tr->NumberOfModels, gammaParams); rax_free(gammaParams); if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); } { double t; printBothOpenMPI("\n\n"); if(adef->bootStopping) { if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Rapid BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Rapid BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } t = gettime() - masterTime; printBothOpenMPI("Overall Time for %d Rapid Bootstraps %f seconds\n", bootstrapsPerformed, t); printBothOpenMPI("Average Time per Rapid Bootstrap %f seconds\n", (double)(t/((double)bootstrapsPerformed))); if(!adef->allInOne) { printBothOpenMPI("All %d bootstrapped trees written to: %s\n", bootstrapsPerformed, bootstrapFileName); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); } } /* ML-search */ mlTime = gettime(); double t = mlTime; printBothOpenMPI("\nStarting ML Search ...\n\n"); /***CLEAN UP reduction stuff */ reductionCleanup(tr, originalRateCategories, originalInvariant); /****/ #ifdef _WAYNE_MPI restoreTL(rl, tr, n * processID); #else restoreTL(rl, tr, 0); #endif resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); #ifdef _WAYNE_MPI if(bootstrapsPerformed <= 100) fastEvery = 5; else fastEvery = bootstrapsPerformed / 20; for(i = 0; i < bootstrapsPerformed; i++) rl->t[i]->likelihood = unlikely; for(i = 0; i < bootStrapsPerProcess; i++) { j = i + n * processID; if(i % fastEvery == 0) { restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, j); } } #else for(i = 0; i < bootstrapsPerformed; i++) { rl->t[i]->likelihood = unlikely; if(i % fastEvery == 0) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1); optimizeRAPID(tr, adef); saveTL(rl, tr, i); } } #endif printBothOpenMPI("Fast ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Fast ML search on Process %d: Time %f seconds\n\n", processID, t); j = n * processID; qsort(&(rl->t[j]), n, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, j); #else printBothOpen("Fast ML search Time: %f seconds\n\n", t); qsort(&(rl->t[0]), bootstrapsPerformed, sizeof(topolRELL*), compareTopolRell); restoreTL(rl, tr, 0); #endif t = gettime(); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); slowSearches = bootstrapsPerformed / 5; if(bootstrapsPerformed % 5 != 0) slowSearches++; slowSearches = MIN(slowSearches, 10); #ifdef _WAYNE_MPI if(processes > 1) { if(slowSearches % processes == 0) slowSearches = slowSearches / processes; else slowSearches = (slowSearches / processes) + 1; } for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); rl->t[j]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, j); } #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); rl->t[i]->likelihood = unlikely; evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 1.0); thoroughOptimization(tr, adef, rl, i); } #endif /*************************************************************************************************************/ if(tr->rateHetModel == CAT) { catToGamma(tr, adef); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); } bestIndex = -1; bestLH = unlikely; #ifdef _WAYNE_MPI for(i = 0; i < slowSearches; i++) { j = i + n * processID; restoreTL(rl, tr, j); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", j, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = j; } } /*printf("processID = %d, bestIndex = %d; bestLH = %f\n", processID, bestIndex, bestLH);*/ #else for(i = 0; i < slowSearches; i++) { restoreTL(rl, tr, i); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); printBothOpen("Slow ML Search %d Likelihood: %f\n", i, tr->likelihood); if(tr->likelihood > bestLH) { bestLH = tr->likelihood; bestIndex = i; } } #endif printBothOpenMPI("Slow ML optimization finished\n\n"); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Slow ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Slow ML search Time: %f seconds\n", t); #endif t = gettime(); restoreTL(rl, tr, bestIndex); resetBranches(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); Thorough = 1; tr->doCutoff = FALSE; treeOptimizeThorough(tr, 1, 10); evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, TRUE, adef->likelihoodEpsilon); t = gettime() - t; #ifdef _WAYNE_MPI printBothOpen("Thorough ML search on Process %d: Time %f seconds\n", processID, t); #else printBothOpen("Thorough ML search Time: %f seconds\n", t); #endif #ifdef _WAYNE_MPI bestLH = tr->likelihood; printf("\nprocessID = %d, bestLH = %f\n", processID, bestLH); if(processes > 1) { double *buffer; int bestProcess; buffer = (double *)rax_malloc(sizeof(double) * processes); for(i = 0; i < processes; i++) buffer[i] = unlikely; buffer[processID] = bestLH; for(i = 0; i < processes; i++) MPI_Bcast(&buffer[i], 1, MPI_DOUBLE, i, MPI_COMM_WORLD); bestLH = buffer[0]; bestProcess = 0; for(i = 1; i < processes; i++) if(buffer[i] > bestLH) { bestLH = buffer[i]; bestProcess = i; } rax_free(buffer); if(processID != bestProcess) { MPI_Finalize(); exit(0); } } #endif printBothOpen("\nFinal ML Optimization Likelihood: %f\n", tr->likelihood); printBothOpen("\nModel Information:\n\n"); printModelParams(tr, adef); strcpy(bestTreeFileName, workdir); strcat(bestTreeFileName, "RAxML_bestTree."); strcat(bestTreeFileName, run_id); Tree2String(tr->tree_string, tr, tr->start->back, TRUE, TRUE, FALSE, FALSE, TRUE, adef, SUMMARIZE_LH, FALSE, FALSE, FALSE, FALSE); f = myfopen(bestTreeFileName, "wb"); fprintf(f, "%s", tr->tree_string); fclose(f); if(adef->perGeneBranchLengths) printTreePerGene(tr, adef, bestTreeFileName, "w"); overallTime = gettime() - masterTime; mlTime = gettime() - mlTime; printBothOpen("\nML search took %f secs or %f hours\n", mlTime, mlTime / 3600.0); printBothOpen("\nCombined Bootstrap and ML search took %f secs or %f hours\n", overallTime, overallTime / 3600.0); printBothOpen("\nDrawing Bootstrap Support Values on best-scoring ML tree ...\n\n"); freeTL(rl); rax_free(rl); calcBipartitions(tr, adef, bestTreeFileName, bootstrapFileName); overallTime = gettime() - masterTime; printBothOpen("Program execution info written to %s\n", infoFileName); printBothOpen("All %d bootstrapped trees written to: %s\n\n", bootstrapsPerformed, bootstrapFileName); printBothOpen("Best-scoring ML tree written to: %s\n\n", bestTreeFileName); if(adef->perGeneBranchLengths && tr->NumberOfModels > 1) printBothOpen("Per-Partition branch lengths of best-scoring ML tree written to %s.PARTITION.0 to %s.PARTITION.%d\n\n", bestTreeFileName, bestTreeFileName, tr->NumberOfModels - 1); printBothOpen("Best-scoring ML tree with support values written to: %s\n\n", bipartitionsFileName); printBothOpen("Best-scoring ML tree with support values as branch labels written to: %s\n\n", bipartitionsFileNameBranchLabels); printBothOpen("Overall execution time for full ML analysis: %f secs or %f hours or %f days\n\n", overallTime, overallTime/3600.0, overallTime/86400.0); #ifdef _WAYNE_MPI MPI_Finalize(); #endif exit(0); }
void doBootstrap(tree *tr, analdef *adef, rawdata *rdta, cruncheddata *cdta) { int bootstrapsPerformed, i, n, treeVectorLength = -1; unsigned int vLength = 0; #ifdef _WAYNE_MPI int j, bootStopTests = 1; #endif double loopTime, pearsonAverage; hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int **)NULL; boolean bootStopIt = FALSE; n = adef->multipleRuns; #ifdef _WAYNE_MPI if(n % processes != 0) n = processes * ((n / processes) + 1); adef->multipleRuns = n; #endif if(adef->bootStopping) { h = initHashTable(tr->mxtips * 100); bitVectors = initBitVector(tr, &vLength); treeVectorLength = adef->multipleRuns; } #ifdef _WAYNE_MPI long parsimonySeed0 = adef->parsimonySeed; long replicateSeed0 = adef->rapidBoot; long bootstrapSeed0 = adef->boot; n = n / processes; #endif for(i = 0; i < n && !bootStopIt; i++) { loopTime = gettime(); #ifdef _WAYNE_MPI if(i == 0) { if(parsimonySeed0 != 0) adef->parsimonySeed = parsimonySeed0 + 10000 * processID; adef->rapidBoot = replicateSeed0 + 10000 * processID; adef->boot = bootstrapSeed0 + 10000 * processID; } j = i + n*processID; singleBootstrap(tr, j, adef, rdta, cdta); #else singleBootstrap(tr, i, adef, rdta, cdta); #endif loopTime = gettime() - loopTime; writeInfoFile(adef, tr, loopTime); if(adef->bootStopping) #ifdef _WAYNE_MPI { int nn = (i + 1) * processes; if((nn > START_BSTOP_TEST) && (i * processes < FC_SPACING * bootStopTests) && ((i + 1) * processes >= FC_SPACING * bootStopTests) ) { MPI_Barrier(MPI_COMM_WORLD); concatenateBSFiles(processes, bootstrapFileName); MPI_Barrier(MPI_COMM_WORLD); bootStopIt = computeBootStopMPI(tr, bootstrapFileName, adef, &pearsonAverage); bootStopTests++; } } #else bootStopIt = bootStop(tr, h, i, &pearsonAverage, bitVectors, treeVectorLength, vLength, adef); #endif } #ifdef _WAYNE_MPI MPI_Barrier(MPI_COMM_WORLD); bootstrapsPerformed = i * processes; if(processID == 0) { if(!adef->bootStopping) concatenateBSFiles(processes, bootstrapFileName); removeBSFiles(processes, bootstrapFileName); } MPI_Barrier(MPI_COMM_WORLD); #else bootstrapsPerformed = i; #endif adef->multipleRuns = bootstrapsPerformed; if(adef->bootStopping) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); if(bootStopIt) { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Stopped Standard BS search after %d replicates with MRE_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } else { switch(tr->bootStopCriterion) { case FREQUENCY_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with FC Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("Pearson Average of %d random splits: %f\n",BOOTSTOP_PERMUTATIONS , pearsonAverage); break; case MR_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MRE-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; case MRE_IGN_STOP: printBothOpenMPI("Standard BS search did not converge after %d replicates with MR_IGN-based Bootstopping criterion\n", bootstrapsPerformed); printBothOpenMPI("WRF Average of %d random splits: %f\n", BOOTSTOP_PERMUTATIONS, pearsonAverage); break; default: assert(0); } } } }
// // Check whether we can unload any object code. This is called at the // appropriate point during a GC, where all the heap data is nice and // packed together and we have a linked list of the static objects. // // The check involves a complete heap traversal, but you only pay for // this (a) when you have called unloadObj(), and (b) at a major GC, // which is much more expensive than the traversal we're doing here. // void checkUnload (StgClosure *static_objects) { nat g, n; HashTable *addrs; StgClosure* p; const StgInfoTable *info; ObjectCode *oc, *prev, *next; gen_workspace *ws; StgClosure* link; if (unloaded_objects == NULL) return; // Mark every unloadable object as unreferenced initially for (oc = unloaded_objects; oc; oc = oc->next) { IF_DEBUG(linker, debugBelch("Checking whether to unload %" PATH_FMT "\n", oc->fileName)); oc->referenced = rtsFalse; } addrs = allocHashTable(); for (p = static_objects; p != END_OF_STATIC_LIST; p = link) { checkAddress(addrs, p); info = get_itbl(p); link = *STATIC_LINK(info, p); } for (g = 0; g < RtsFlags.GcFlags.generations; g++) { searchHeapBlocks (addrs, generations[g].blocks); searchHeapBlocks (addrs, generations[g].large_objects); for (n = 0; n < n_capabilities; n++) { ws = &gc_threads[n]->gens[g]; searchHeapBlocks(addrs, ws->todo_bd); searchHeapBlocks(addrs, ws->part_list); searchHeapBlocks(addrs, ws->scavd_list); } } // Look through the unloadable objects, and any object that is still // marked as unreferenced can be physically unloaded, because we // have no references to it. prev = NULL; for (oc = unloaded_objects; oc; prev = oc, oc = next) { next = oc->next; if (oc->referenced == 0) { if (prev == NULL) { unloaded_objects = oc->next; } else { prev->next = oc->next; } IF_DEBUG(linker, debugBelch("Unloading object file %" PATH_FMT "\n", oc->fileName)); freeObjectCode(oc); } else { IF_DEBUG(linker, debugBelch("Object file still in use: %" PATH_FMT "\n", oc->fileName)); } } freeHashTable(addrs, NULL); }
void cleanupIdMap() { freeStringHashTable(idToIntMap); freeHashTable(intToIdMap); }
void computeBIGRAPID (tree *tr, analdef *adef, boolean estimateModel) { unsigned int vLength = 0; int i, impr, bestTrav, rearrangementsMax = 0, rearrangementsMin = 0, thoroughIterations = 0, fastIterations = 0; double lh, previousLh, difference, epsilon; bestlist *bestT, *bt; #ifdef _TERRACES /* store the 20 best trees found in a dedicated list */ bestlist *terrace; /* output file names */ char terraceFileName[1024], buf[64]; #endif hashtable *h = (hashtable*)NULL; unsigned int **bitVectors = (unsigned int**)NULL; if(tr->searchConvergenceCriterion) { bitVectors = initBitVector(tr, &vLength); h = initHashTable(tr->mxtips * 4); } bestT = (bestlist *) rax_malloc(sizeof(bestlist)); bestT->ninit = 0; initBestTree(bestT, 1, tr->mxtips); bt = (bestlist *) rax_malloc(sizeof(bestlist)); bt->ninit = 0; initBestTree(bt, 20, tr->mxtips); #ifdef _TERRACES /* initialize the tree list and the output file name for the current tree search/replicate */ terrace = (bestlist *) rax_malloc(sizeof(bestlist)); terrace->ninit = 0; initBestTree(terrace, 20, tr->mxtips); sprintf(buf, "%d", bCount); strcpy(terraceFileName, workdir); strcat(terraceFileName, "RAxML_terrace."); strcat(terraceFileName, run_id); strcat(terraceFileName, ".BS."); strcat(terraceFileName, buf); printf("%s\n", terraceFileName); #endif initInfoList(50); difference = 10.0; epsilon = 0.01; Thorough = 0; if(estimateModel) { if(adef->useBinaryModelFile) { readBinaryModel(tr); evaluateGenericInitrav(tr, tr->start); treeEvaluate(tr, 2); } else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 10.0); } } else treeEvaluate(tr, 2); printLog(tr, adef, FALSE); saveBestTree(bestT, tr); if(!adef->initialSet) bestTrav = adef->bestTrav = determineRearrangementSetting(tr, adef, bestT, bt); else bestTrav = adef->bestTrav = adef->initial; if(estimateModel) { if(adef->useBinaryModelFile) treeEvaluate(tr, 2); else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 5.0); } } else treeEvaluate(tr, 1); saveBestTree(bestT, tr); impr = 1; if(tr->doCutoff) tr->itCount = 0; while(impr) { recallBestTree(bestT, 1, tr); if(tr->searchConvergenceCriterion) { int bCounter = 0; if(fastIterations > 1) cleanupHashTable(h, (fastIterations % 2)); bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, fastIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL, &bCounter, 1, FALSE, FALSE); assert(bCounter == tr->mxtips - 3); if(fastIterations > 0) { double rrf = convergenceCriterion(h, tr->mxtips); if(rrf <= 0.01) /* 1% cutoff */ { printBothOpen("ML fast search converged at fast SPR cycle %d with stopping criterion\n", fastIterations); printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%"); cleanupHashTable(h, 0); cleanupHashTable(h, 1); goto cleanup_fast; } else printBothOpen("ML search convergence criterion fast cycle %d->%d Relative Robinson-Foulds %f\n", fastIterations - 1, fastIterations, rrf); } } fastIterations++; treeEvaluate(tr, 1.0); saveBestTree(bestT, tr); printLog(tr, adef, FALSE); printResult(tr, adef, FALSE); lh = previousLh = tr->likelihood; treeOptimizeRapid(tr, 1, bestTrav, adef, bt); impr = 0; for(i = 1; i <= bt->nvalid; i++) { recallBestTree(bt, i, tr); treeEvaluate(tr, 0.25); difference = ((tr->likelihood > previousLh)? tr->likelihood - previousLh: previousLh - tr->likelihood); if(tr->likelihood > lh && difference > epsilon) { impr = 1; lh = tr->likelihood; saveBestTree(bestT, tr); } } } if(tr->searchConvergenceCriterion) { cleanupHashTable(h, 0); cleanupHashTable(h, 1); } cleanup_fast: Thorough = 1; impr = 1; recallBestTree(bestT, 1, tr); if(estimateModel) { if(adef->useBinaryModelFile) treeEvaluate(tr, 2); else { evaluateGenericInitrav(tr, tr->start); modOpt(tr, adef, FALSE, 1.0); } } else treeEvaluate(tr, 1.0); while(1) { recallBestTree(bestT, 1, tr); if(impr) { printResult(tr, adef, FALSE); rearrangementsMin = 1; rearrangementsMax = adef->stepwidth; if(tr->searchConvergenceCriterion) { int bCounter = 0; if(thoroughIterations > 1) cleanupHashTable(h, (thoroughIterations % 2)); bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, thoroughIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL, &bCounter, 1, FALSE, FALSE); assert(bCounter == tr->mxtips - 3); if(thoroughIterations > 0) { double rrf = convergenceCriterion(h, tr->mxtips); if(rrf <= 0.01) /* 1% cutoff */ { printBothOpen("ML search converged at thorough SPR cycle %d with stopping criterion\n", thoroughIterations); printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%"); goto cleanup; } else printBothOpen("ML search convergence criterion thorough cycle %d->%d Relative Robinson-Foulds %f\n", thoroughIterations - 1, thoroughIterations, rrf); } } thoroughIterations++; } else { rearrangementsMax += adef->stepwidth; rearrangementsMin += adef->stepwidth; if(rearrangementsMax > adef->max_rearrange) goto cleanup; } treeEvaluate(tr, 1.0); previousLh = lh = tr->likelihood; saveBestTree(bestT, tr); printLog(tr, adef, FALSE); treeOptimizeRapid(tr, rearrangementsMin, rearrangementsMax, adef, bt); impr = 0; for(i = 1; i <= bt->nvalid; i++) { recallBestTree(bt, i, tr); treeEvaluate(tr, 0.25); #ifdef _TERRACES /* save all 20 best trees in the terrace tree list */ saveBestTree(terrace, tr); #endif difference = ((tr->likelihood > previousLh)? tr->likelihood - previousLh: previousLh - tr->likelihood); if(tr->likelihood > lh && difference > epsilon) { impr = 1; lh = tr->likelihood; saveBestTree(bestT, tr); } } } cleanup: #ifdef _TERRACES { double bestLH = tr->likelihood; FILE *f = myfopen(terraceFileName, "w"); /* print out likelihood of best tree found */ printf("best tree: %f\n", tr->likelihood); /* print out likelihoods of 20 best trees found during the tree search */ for(i = 1; i <= terrace->nvalid; i++) { recallBestTree(terrace, i, tr); /* if the likelihood scores are smaller than some epsilon 0.000001 print the tree to file */ if(ABS(bestLH - tr->likelihood) < 0.000001) { printf("%d %f\n", i, tr->likelihood); Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE); fprintf(f, "%s\n", tr->tree_string); } } fclose(f); /* increment tree search counter */ bCount++; } #endif if(tr->searchConvergenceCriterion) { freeBitVectors(bitVectors, 2 * tr->mxtips); rax_free(bitVectors); freeHashTable(h); rax_free(h); } freeBestTree(bestT); rax_free(bestT); freeBestTree(bt); rax_free(bt); #ifdef _TERRACES /* free terrace tree list */ freeBestTree(terrace); rax_free(terrace); #endif freeInfoList(); printLog(tr, adef, FALSE); printResult(tr, adef, FALSE); }
void freeFileLocking(void) { freeHashTable(obj_hash, freeLock); freeHashTable(fd_hash, NULL); }
/* ========================================================================== */ int main(int argc, char* argv[]) { int filenum=1; int initdepth=1; // check command line arguments if (argcheck(argc,argv) == 1){ exit(1); } char *starturl = argv[1]; char *targetdir = argv[2]; int depth = atoi(argv[3]); //initialize our hashtables and url list HashTable *myhashtable; List *mylist; myhashtable=initializeHashTable(); mylist=initializelist(); // init curl curl_global_init(CURL_GLOBAL_ALL); // setup seed page WebPage *startpage = createWebPage(starturl,0); // get seed webpage. If the url is invalid, quit and send an error message. if ( GetWebPage(startpage) == 0 ){ printf("The url that you entered was invalid. Please try again."); free(startpage->html); free(startpage); exit(1); } // write seed file createfile(startpage,targetdir,filenum); filenum++; // add seed page to hashtable insertHashTable(myhashtable,startpage->url); // extract urls from seed page if( depth > 0 ){ parseHTML(startpage,mylist,myhashtable,initdepth); } // while there are urls to crawl while ( mylist->head != NULL ){ // get next url from list WebPage *nextpage = listpop(mylist); int currdepth = nextpage->depth; // get webpage for url // If the url is invalid, quit and free the memory if (GetWebPage(nextpage) != 0 ){ createfile(nextpage,targetdir,filenum); filenum++; // extract urls from webpage if ( currdepth < depth ){ parseHTML(nextpage, mylist, myhashtable, currdepth+1); } } free(nextpage->html); free(nextpage->url); free(nextpage); sleep(SLEEPTIME); } // cleanup curl free(startpage->html); free(startpage); freeHashTable(myhashtable); freelist(mylist); curl_global_cleanup(); return 0; }
void plausibilityChecker(tree *tr, analdef *adef) { FILE *treeFile, *rfFile; tree *smallTree = (tree *)rax_malloc(sizeof(tree)); char rfFileName[1024]; int numberOfTreesAnalyzed = 0, i; double avgRF = 0.0, sumEffectivetime = 0.0; /* set up an output file name */ strcpy(rfFileName, workdir); strcat(rfFileName, "RAxML_RF-Distances."); strcat(rfFileName, run_id); rfFile = myfopen(rfFileName, "wb"); assert(adef->mode == PLAUSIBILITY_CHECKER); /* open the big reference tree file and parse it */ treeFile = myfopen(tree_file, "r"); printBothOpen("Parsing reference tree %s\n", tree_file); treeReadLen(treeFile, tr, FALSE, TRUE, TRUE, adef, TRUE, FALSE); assert(tr->mxtips == tr->ntips); /*************************************************************************************/ /* Preprocessing Step */ double preprocesstime = gettime(); /* taxonToLabel[2*tr->mxtips - 2]; Array storing all 2n-2 labels from the preordertraversal: (Taxonnumber - 1) -> (Preorderlabel) */ int *taxonToLabel = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int)), /* taxonHasDeg[2*tr->mxtips - 2] Array used to store the degree of every taxon, is needed to extract Bipartitions from multifurcating trees (Taxonnumber - 1) -> (degree of node(Taxonnumber)) */ *taxonHasDeg = (int *)rax_calloc((2*tr->mxtips - 2),sizeof(int)), /* taxonToReduction[2*tr->mxtips - 2]; Array used for reducing bitvector and speeding up extraction: (Taxonnumber - 1) -> (0..1 (increment count of taxa appearing in small tree)) (Taxonnumber - 1) -> (0..1 (increment count of inner nodes appearing in small tree)) */ *taxonToReduction = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int)); int newcount = 0; //counter used for correct traversals /* labelToTaxon[2*tr->mxtips - 2]; is used to translate between Perorderlabel and p->number: (Preorderlabel) -> (Taxonnumber) */ int *labelToTaxon = (int *)rax_malloc((2*tr->mxtips - 2) * sizeof(int)); /* Preorder-Traversal of the large tree */ preOrderTraversal(tr->start->back,tr->mxtips, tr->start->number, taxonToLabel, labelToTaxon, &newcount); newcount = 0; //counter set to 0 to be now used for Eulertraversal /* eulerIndexToLabel[4*tr->mxtips - 5]; Array storing all 4n-5 PreOrderlabels created during eulertour: (Eulerindex) -> (Preorderlabel) */ int* eulerIndexToLabel = (int *)rax_malloc((4*tr->mxtips - 5) * sizeof(int)); /* taxonToEulerIndex[tr->mxtips]; Stores all indices of the first appearance of a taxa in the eulerTour: (Taxonnumber - 1) -> (Index of the Eulertour where Taxonnumber first appears) is used for efficient computation of the Lowest Common Ancestor during Reconstruction Step */ int* taxonToEulerIndex = (int *)rax_malloc((tr->mxtips) * sizeof(int)); /* Init taxonToEulerIndex and taxonToReduction */ int ix; for(ix = 0; ix < tr->mxtips; ++ix) taxonToEulerIndex[ix] = -1; for(ix = 0; ix < (2*tr->mxtips - 2); ++ix) taxonToReduction[ix] = -1; /* Eulertraversal of the large tree*/ unrootedEulerTour(tr->start->back,tr->mxtips, eulerIndexToLabel, taxonToLabel, &newcount, taxonToEulerIndex); /* Creating RMQ Datastructure for efficient retrieval of LCAs, using Johannes Fischers Library rewritten in C Following Files: rmq.h,rmqs.c,rmqs.h are included in Makefile.RMQ.gcc */ RMQ_succinct(eulerIndexToLabel,4*tr->mxtips - 5); double preprocessendtime = gettime() - preprocesstime; /* Proprocessing Step End */ /*************************************************************************************/ printBothOpen("The reference tree has %d tips\n", tr->ntips); fclose(treeFile); /* now see how many small trees we have */ treeFile = getNumberOfTrees(tr, bootStrapFile, adef); checkTreeNumber(tr->numberOfTrees, bootStrapFile); /* allocate a data structure for parsing the potentially mult-furcating tree */ allocateMultifurcations(tr, smallTree); /* loop over all small trees */ for(i = 0; i < tr->numberOfTrees; i++) { int numberOfSplits = readMultifurcatingTree(treeFile, smallTree, adef, TRUE); if(numberOfSplits > 0) { int firstTaxon; double rec_rf, maxRF; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Small tree %d has %d tips and %d bipartitions\n", i, smallTree->ntips, numberOfSplits); /* compute the maximum RF distance for computing the relative RF distance later-on */ /* note that here we need to pay attention, since the RF distance is not normalized by 2 * (n-3) but we need to account for the fact that the multifurcating small tree will potentially contain less bipartitions. Hence the normalization factor is obtained as n-3 + numberOfSplits, where n-3 is the number of bipartitions of the pruned down large reference tree for which we know that it is bifurcating/strictly binary */ maxRF = (double)(2 * numberOfSplits); /* now get the index of the first taxon of the small tree. we will use this to unambiguously store the bipartitions */ firstTaxon = smallTree->start->number; /***********************************************************************************/ /* Reconstruction Step */ double time_start = gettime(); /* Init hashtable to store Bipartitions of the induced subtree */ /* using smallTree->ntips instead of smallTree->mxtips yields faster code e.g. 120 versus 128 seconds for 20,000 small trees on my laptop */ hashtable *s_hash = initHashTable(smallTree->ntips * 4); /* smallTreeTaxa[smallTree->ntips]; Stores all taxa numbers from smallTree into an array called smallTreeTaxa: (Index) -> (Taxonnumber) */ int* smallTreeTaxa = (int *)rax_malloc((smallTree->ntips) * sizeof(int)); /* counter is set to 0 for correctly extracting taxa of the small tree */ newcount = 0; int newcount2 = 0; /* seq2[2*smallTree->ntips - 2]; stores PreorderSequence of the reference smalltree: (Preorderindex) -> (Taxonnumber) */ int* seq2 = (int *)rax_malloc((2*smallTree->ntips - 2) * sizeof(int)); /* used to store the vectorLength of the bitvector */ unsigned int vectorLength; /* extract all taxa of the smalltree and store it into an array, also store all counts of taxa and nontaxa in taxonToReduction */ rec_extractTaxa(smallTreeTaxa, taxonToReduction, smallTree->start, smallTree->mxtips, &newcount, &newcount2); rec_extractTaxa(smallTreeTaxa, taxonToReduction, smallTree->start->back, smallTree->mxtips, &newcount, &newcount2); /* counter is set to 0 to correctly preorder traverse the small tree */ newcount = 0; /* Preordertraversal of the small tree and save its sequence into seq2 for later extracting the bipartitions, it also stores information about the degree of every node */ rec_preOrderTraversalMulti(smallTree->start->back,smallTree->mxtips, smallTree->start->number, seq2, taxonHasDeg, &newcount); /* calculate the bitvector length */ if(smallTree->ntips % MASK_LENGTH == 0) vectorLength = smallTree->ntips / MASK_LENGTH; else vectorLength = 1 + (smallTree->ntips / MASK_LENGTH); unsigned int **bitVectors = rec_initBitVector(smallTree, vectorLength); /* store all non trivial bitvectors using an subtree approach for the induced subtree and store it into a hashtable, this method was changed for multifurcation */ rec_extractBipartitionsMulti(bitVectors, seq2, newcount,tr->mxtips, vectorLength, smallTree->ntips, firstTaxon, s_hash, taxonToReduction, taxonHasDeg, numberOfSplits); /* counter is set to 0 to be used for correctly storing all EulerIndices */ newcount = 0; /* smallTreeTaxonToEulerIndex[smallTree->ntips]; Saves all first Euler indices for all Taxons appearing in small Tree: (Index) -> (Index of the Eulertour where the taxonnumber of the small tree first appears) */ int* smallTreeTaxonToEulerIndex = (int *)rax_malloc((smallTree->ntips) * sizeof(int)); /* seq[(smallTree->ntips*2) - 1] Stores the Preordersequence of the induced small tree */ int* seq = (int *)rax_malloc((2*smallTree->ntips - 1) * sizeof(int)); /* iterate through all small tree taxa */ for(ix = 0; ix < smallTree->ntips; ix++) { int taxanumber = smallTreeTaxa[ix]; /* To create smallTreeTaxonToEulerIndex we filter taxonToEulerIndex for taxa in the small tree*/ smallTreeTaxonToEulerIndex[newcount] = taxonToEulerIndex[taxanumber-1]; /* Saves all Preorderlabel of the smalltree taxa in seq*/ seq[newcount] = taxonToLabel[taxanumber-1]; newcount++; } /* sort the euler indices to correctly calculate LCA */ //quicksort(smallTreeTaxonToEulerIndex,0,newcount - 1); qsort(smallTreeTaxonToEulerIndex, newcount, sizeof(int), sortIntegers); //printf("newcount2 %i \n", newcount2); /* Iterate through all small tree taxa */ for(ix = 1; ix < newcount; ix++) { /* query LCAs using RMQ Datastructure */ seq[newcount - 1 + ix] = eulerIndexToLabel[query(smallTreeTaxonToEulerIndex[ix - 1],smallTreeTaxonToEulerIndex[ix])]; /* Used for dynamic programming. We save an index for every inner node: For example the reference tree has 3 inner nodes which we saves them as 0,1,2. Now we calculate for example 5 LCA to construct the induced subtree, which are also inner nodes. Therefore we mark them as 3,4,5,6,7 */ taxonToReduction[labelToTaxon[seq[newcount - 1 + ix]] - 1] = newcount2; newcount2 += 1; } /* sort to construct the Preordersequence of the induced subtree */ //quicksort(seq,0,(2*smallTree->ntips - 2)); qsort(seq, (2 * smallTree->ntips - 2) + 1, sizeof(int), sortIntegers); /* calculates all bipartitions of the reference small tree and count how many bipartition it shares with the induced small tree */ int rec_bips = rec_findBipartitions(bitVectors, seq,(2*smallTree->ntips - 1), labelToTaxon, tr->mxtips, vectorLength, smallTree->ntips, firstTaxon, s_hash, taxonToReduction); /* Reconstruction Step End */ /***********************************************************************************/ double effectivetime = gettime() - time_start; /* if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Reconstruction time: %.10f secs\n\n", effectivetime); */ /* compute the relative RF */ rec_rf = (double)(2 * (numberOfSplits - rec_bips)) / maxRF; assert(numberOfSplits >= rec_bips); avgRF += rec_rf; sumEffectivetime += effectivetime; if(numberOfTreesAnalyzed % 100 == 0) printBothOpen("Relative RF tree %d: %f\n\n", i, rec_rf); fprintf(rfFile, "%d %f\n", i, rec_rf); /* free masks and hast table for this iteration */ rec_freeBitVector(smallTree, bitVectors); rax_free(bitVectors); freeHashTable(s_hash); rax_free(s_hash); rax_free(smallTreeTaxa); rax_free(seq); rax_free(seq2); rax_free(smallTreeTaxonToEulerIndex); numberOfTreesAnalyzed++; } } printBothOpen("Number of small trees skipped: %d\n\n", tr->numberOfTrees - numberOfTreesAnalyzed); printBothOpen("Average RF distance %f\n\n", avgRF / (double)numberOfTreesAnalyzed); printBothOpen("Large Tree: %i, Number of SmallTrees analyzed: %i \n\n", tr->mxtips, numberOfTreesAnalyzed); printBothOpen("Total execution time: %f secs\n\n", gettime() - masterTime); printBothOpen("File containing all %d pair-wise RF distances written to file %s\n\n", numberOfTreesAnalyzed, rfFileName); printBothOpen("execution stats:\n\n"); printBothOpen("Accumulated time Effective algorithm: %.5f sec \n", sumEffectivetime); printBothOpen("Average time for effective: %.10f sec \n",sumEffectivetime / (double)numberOfTreesAnalyzed); printBothOpen("Preprocessingtime: %0.5f sec \n\n", preprocessendtime); fclose(treeFile); fclose(rfFile); /* free the data structure used for parsing the potentially multi-furcating tree */ freeMultifurcations(smallTree); rax_free(smallTree); rax_free(taxonToLabel); rax_free(taxonToEulerIndex); rax_free(labelToTaxon); rax_free(eulerIndexToLabel); rax_free(taxonToReduction); rax_free(taxonHasDeg); }