void process_data_samples(uint64_t time) { if(!data_events) return; struct data_ev *event = pqueue_peek(data_events); while(event && event->rdt <= time) { /*printf("%s:%lu:%p:%s\n", event->type==MALLOC?"malloc":"free", event->rdt, (void*)event->free.begin, event->type==MALLOC?event->malloc.info:"");*/ if(event->type==MALLOC) { void * data = rbtree_lookup(active_data, (void*)event->free.begin, pointer_cmp_reverse); if(data) { //printf("#Variable inserted twice ?!\n"); ((struct data_ev *)data)->malloc.end = event->malloc.end; data_fail++; } else { rbtree_insert(active_data, (void*)event->malloc.begin, event, pointer_cmp_reverse); data_success++; } } else if(event->type==FREE) { void * data = rbtree_lookup(active_data, (void*)event->free.begin, pointer_cmp_reverse); if(!data) { //printf("#Free of unknown pointer!\n"); data_fail++; } else { rbtree_delete(active_data, (void*)event->free.begin, pointer_cmp_reverse); data_success++; } } processed_data_samples++; pqueue_pop(data_events); event = pqueue_peek(data_events); } }
static bool _predicate (void) { int i; KeyValuePair_t n; struct rbtree tree; KeyValuePair_t *node; struct rbtree_node *result; rbtree_init (&tree, _compareFn, 0); for (i = 0; i < TreeSize; i++) { node = malloc (sizeof (KeyValuePair_t)); node->key = i; node->val = TreeSize + i; rbtree_insert ((struct rbtree_node *) &node->node, &tree); } // Lookup the nodes. for (i = 0; i < TreeSize; i++) { KeyValuePair_t *kvResult; n.key = i; kvResult = rbtree_container_of (rbtree_lookup ((struct rbtree_node *) &n.node, &tree), KeyValuePair_t, node); if (kvResult->key != i || kvResult->val != TreeSize + i) { return false; } } // This lookup should fail. n.key = TreeSize; result = rbtree_lookup ((struct rbtree_node *) &n.node, &tree); if (result != NULL) { return false; } //iterate (rbtree_first(&tree), iterateFn); result = rbtree_first(&tree); while (result) { KeyValuePair_t *kvResult = rbtree_container_of (result, KeyValuePair_t, node); struct rbtree_node *n = result; result = rbtree_next (result); rbtree_remove (n, &tree); free (kvResult); } // This lookup should fail because we just cleared the tree. n.key = TreeSize; n.key = 0; result = rbtree_lookup ((struct rbtree_node *) &n.node, &tree); if (result != NULL) { return false; } return true; }
/************************************************************************** * Callback invoked when matching an opening pattern tag for a CISML file * of a secondary motif database. It checks that the motif should be scored, * clears out the list of sequence matches and stores the current motif. **************************************************************************/ void motif_secondary(void *ctx, char *accession, char *name, char *db, char *lsId, double *pvalue, double *score) { SECONDARY_LOADER_T *loader = (SECONDARY_LOADER_T*)ctx; SECONDARY_KEY_T key; RBNODE_T *node; PSSM_T *pssm; int i, seq_count; key.db_id = loader->db_id; key.motif_id = accession; node = rbtree_lookup(loader->secondary_motifs, &key, FALSE, NULL); if (node != NULL) { loader->secondary_motif = (SECONDARY_MOTIF_T*)rbtree_value(node); if (!(loader->secondary_motif->loaded)) { seq_count = rbtree_size(loader->sequences); for (i = 0; i < seq_count; ++i) loader->secondary_matches[i] = 0; if (loader->score_threshold_or_multiplier < 0 && loader->score_threshold_or_multiplier >= -1) { pssm = build_motif_pssm(loader->secondary_motif->motif, loader->background, loader->background, NULL, 0, PSSM_RANGE, 0, FALSE); loader->calculated_score_threshold = pssm_best_match_score(pssm) * (-loader->score_threshold_or_multiplier); free_pssm(pssm); } } else { die("Already seen CISML data for this motif!"); } } else { loader->secondary_motif = NULL; } }
int main() { struct rbtree* tree = rbtree_init(compare); int ret = 0; if(tree == NULL) { fprintf(stderr,"malloc tree failed\n"); return -1; } int i = 0; ULL * array = malloc(SIZE*sizeof(ULL )); if(array == NULL) { fprintf(stderr,"malloc failed\n"); return -1; } // srand(time(NULL)); for(i = 0;i<SIZE;i++) { array[i] = rand()%1000; ret = rbtree_insert(tree,&array[i],&array[i]);//-1 mean alloc node failed, //-2 mean existed node with same key void * data = rbtree_lookup(tree,&array[i]); if(ret == 0) assert(data == &array[i]); } print_tree(tree); tree2dot(tree,"tree.dot"); return 0; }
/************************************************************************************************* * PARALLEL ADD / SEARCH FUNCTIONS *************************************************************************************************/ void vOperationStream_parallel(int *iId) { int i = 0; int j; int iAmountOpsPerThread; int iThreadId = (intptr_t) iId; iAmountOpsPerThread = iOperations / iNumThreads; while(i < iAmountOpsPerThread) { for(j = 0; j < iNumInsert; j++) { iValue = rand(); iKey = rand(); pthread_rwlock_wrlock(&lock); // lock tree - exclusive lock rbtree_insert(RBTree, (void*)iKey, (void*)iValue, int_compare); pthread_rwlock_unlock(&lock); // unlock tree - exclusive lock i++; } for(j = 0; j < iNumSearch; j++) { iKey = rand(); pthread_rwlock_rdlock(&lock); // lock - shared lock rbtree_lookup(RBTree, (void*)iKey, int_compare); pthread_rwlock_unlock(&lock); // unlock - shared lock i++; } } }
thread *find_thread (DWORD PID, DWORD TID) { process *p=find_process(PID); thread *t=(thread*)rbtree_lookup (p->threads, (void*)TID); oassert (t!=NULL && "TID not found in threads table"); return t; };
void add_symbol (address a, char *name, add_symbol_params *params) { module *m=params->m; rbtree *symtbl=m->symbols; oassert(symtbl && "symbols=NULL in module"); MemoryCache *mc=params->mc; if (one_time_int3_bp_re && params->t==SYM_TYPE_PE_EXPORT && module_adr_in_executable_section (m, a)) { strbuf sb=STRBUF_INIT; strbuf_addstr (&sb, get_module_name(m)); strbuf_addc (&sb, '!'); strbuf_addstr (&sb, name); if (regexec (one_time_int3_bp_re, sb.buf, 0, NULL, 0)==0) set_onetime_INT3_BP(a, params->p, m, name, mc); strbuf_deinit (&sb); }; if (dump_seh && string_is_ends_with (name, "security_cookie")) { m->security_cookie_adr=a; m->security_cookie_adr_known=true; if (symbol_c_debug) L ("%s() got address of security_cookie (0x" PRI_REG_HEX ") for %s!%s\n", __FUNCTION__, a, get_module_name(m), name); }; bool dump_symbol=false; if (dump_all_symbols_re) { strbuf sb=STRBUF_INIT; strbuf_addstr (&sb, get_module_name(m)); strbuf_addc (&sb, '!'); strbuf_addstr (&sb, name); if (regexec (dump_all_symbols_re, sb.buf, 0, NULL, 0)==0) dump_symbol=true; strbuf_deinit (&sb); }; if (dump_symbol || (dump_all_symbols_re==NULL && dump_all_symbols)) { dump_PID_if_need(params->p); L("New symbol. Module=[%s], address=[0x" PRI_ADR_HEX "], name=[%s]\n", get_module_name(m), a, name); }; symbol *new_sym=create_symbol(params->t, name); symbol *first_sym=(symbol*)rbtree_lookup(symtbl, (void*)a); if (first_sym) new_sym->next=first_sym; // insert at beginning of list rbtree_insert(symtbl, (void*)a, (void*)new_sym); };
static void mmfreerun_add(char *run) { char *buddy; int run_size = mmrun_get_size(run); /* See if the run can be appended to an adjacent run on the free list */ buddy = rbtree_lookup((char *)((int)run-1), free_runs); if (buddy) { int buddy_size = mmrun_get_largesize(buddy); /* Increase the size of the run on the free list */ mmrun_set_largesize(buddy_size + run_size, buddy); return; } /* * See if the run can be expanded with an adjacent run * from the free list */ buddy = rbtree_lookup((char *)((int)run+run_size+1), free_runs); if (buddy) { int buddy_size = mmrun_get_largesize(buddy); /* * Remove the old run and add it's size to the new run. * Then add the new run to the free list. */ rbtree_remove(buddy, &free_runs); mmrun_init(0, 0, run); mmrun_set_largesize(buddy_size + run_size, run); rbtree_insert(run, &free_runs); return; } /* The run can't be merged so add it to the free list */ mmrun_init(0, 0, run); mmrun_set_largesize(run_size, run); rbtree_insert(run, &free_runs); }
static char *mm_findnodetree(char *run, char **out_tree, int *out_bin_index) { char *node; /* See if the run belongs to any bin */ int i; for (i = 0; i < BIN_COUNT; ++i) { if (bins[i] != NULL) { node = rbtree_lookup(run, bins[i]); if (node) { if (out_tree) { *out_tree = bins[i]; } if (out_bin_index) { *out_bin_index = i; } return node; } } } /* See if the run is a large allocation */ if (large_allocations && (node = rbtree_lookup(run, large_allocations))) { if (out_tree) { *out_tree = large_allocations; } if (out_bin_index) { *out_bin_index = -1; } return node; } /* The run doesn't belong to any tree */ if (out_tree) { *out_tree = NULL; } if (out_bin_index) { *out_bin_index = -1; } return NULL; }
void migrate_parse(struct s* s) { if(!s->ibs_dc_phys) return; void *addr = (void*)((s->ibs_dc_linear / PAGE_SIZE / CLUSTER ) * (PAGE_SIZE * CLUSTER)); struct page* v = rbtree_lookup(migrate_tree, addr, pointer_cmp); if(!v) { v = calloc(1, sizeof(*v)); v->addr = addr; v->accesses = calloc(1, sizeof(*v->accesses)*max_node); rbtree_insert(migrate_tree, addr, v, pointer_cmp); } v->accesses[cpu_to_node(s->cpu)]++; pid = get_pid(s); }
/************************************************************************** * Callback invoked when matching an opening scanned_sequence tag in the * CISML file for the primary motif. Checks if the sequence is one we are * scoring and if so records it as the current sequence as well as clearing * the hits list. **************************************************************************/ void sequence_primary(void *ctx, char *accession, char *name, char *db, char *lsId, double *score, double *pvalue, long *length) { PRIMARY_LOADER_T *loader = (PRIMARY_LOADER_T*)ctx; if (!(loader->in_motif)) { loader->current_sequence = NULL; } else { RBNODE_T *node = rbtree_lookup(loader->sequences, name, FALSE, NULL); if (node) { loader->current_sequence = rbtree_value(node); if (loader->current_sequence->primary_match) die("Already seen this sequence! We can't process this information " "because the scoring information from the previous sighting has already been discarded.\n"); loader->current_score = 0; // reset the current score loader->hit_count = 0; //reset the hit count } else { loader->current_sequence = NULL; } } }
void add_thread (process *p, DWORD TID, HANDLE THDL, address start, address TIB) { thread *t=DCALLOC (thread, 1, "thread"); if (thread_c_debug) L ("%s() begin\n", __func__); t->TID=TID; t->THDL=THDL; t->TIB=TIB; t->start=start; oassert (rbtree_lookup(p->threads, (void*)TID)==NULL && "this TID is already in table"); rbtree_insert (p->threads, (void*)TID, t); if (thread_c_debug) L ("%s() end\n", __func__); };
void top_obj_parse(struct s* s) { struct symbol *sym = get_function(s); if(strstr(sym->function_name, "plt")) { nb_plt++; } else { nb_non_plt++; struct symbol *ob = get_object(s); struct dyn_lib* ob3 = sample_to_mmap(s); char *obj = NULL; if(ob) obj = ob->object_name; if(!obj && strstr(sym->function_name, "@plt")) obj = sym->function_name; if(!obj && !strcmp(sym->function_name, "[vdso]")) obj = sym->function_name; if(!obj && ob3) obj = ob3->name; struct value *value = rbtree_lookup(r, obj, cmp); if(!value) { value = calloc(1, sizeof(*value)); value->from_accesses = calloc(max_node, sizeof(*value->from_accesses)); value->to_accesses = calloc(max_node, sizeof(*value->to_accesses)); rbtree_insert(r, obj, value, cmp); } value->accesses++; value->dist_accesses += is_distant(s); value->from_accesses[cpu_to_node(s->cpu)]++; value->to_accesses[get_addr_node(s)]++; if(ob) { value->dist_by_allocator += (is_distant(s) && (get_tid(s) == ob->allocator_tid)); value->dist_by_allocator_remote_cpu += (is_distant(s) && (get_tid(s) == ob->allocator_tid) && (ob->allocator_cpu != s->cpu)); value->dist_by_allocator_alloc_cpu += (is_distant(s) && (get_tid(s) == ob->allocator_tid) && (ob->allocator_cpu == s->cpu)); value->dist_for_obj += (is_distant(s)); value->by_allocator += ((get_tid(s) == ob->allocator_tid)); value->by_everybody += ((get_tid(s) != ob->allocator_tid)); value->by_allocator_before_everybody += (value->by_everybody == 0); value->uid = ob->uid; } nb_total_access++; } }
/************************************************************************** * Callback invoked when matching an opening scanned_sequence tag for a * CISML file of a secondary motif database. It calcualtes and caches the * left and right bounds of the primary motif and stores the current * sequence. **************************************************************************/ void sequence_secondary(void *ctx, char *accession, char *name, char *db, char *lsId, double *score, double *pvalue, long *length) { SECONDARY_LOADER_T *loader = (SECONDARY_LOADER_T*)ctx; RBNODE_T *node; int pmatch; if (loader->secondary_motif == NULL) return; node = rbtree_lookup(loader->sequences, accession, FALSE, NULL); if (node != NULL) { loader->current_sequence = (SEQUENCE_T*)rbtree_value(node); pmatch = loader->current_sequence->primary_match; loader->primary_lpos = (pmatch < 0 ? -pmatch : pmatch); loader->primary_rpos = loader->primary_lpos + get_motif_length(loader->primary_motif) - 1; if (loader->secondary_matches[loader->current_sequence->index] != 0) { die("Already seen this sequence!"); } loader->secondary_score = 0; loader->hit_count = 0; } else { loader->current_sequence = NULL; } }
/* * Checks for infinite loops. Every parsing state must either consume * some data or change the state to one that hasn't been used at this * position. As there are a finite number of states this ensures that * parsing will stop at some point or be detected by this function. */ static bool loop_check(JSONRD_T *jsonrd, PS_EN prior_state, int consumed) { RBTREE_T *prior_states; PS_EN new_state; bool is_new_state; prior_states = jsonrd->prior_states; if (consumed == 0) { new_state = jsonrd->state; if (rbtree_size(prior_states) == 0) { if (prior_state == new_state) return true; rbtree_put(prior_states, &prior_state, NULL); rbtree_put(prior_states, &new_state, NULL); } else { rbtree_lookup(prior_states, &new_state, true, &is_new_state); if (!is_new_state) return true; } } else { rbtree_clear(prior_states); } return false; }
static gnutls_datum_t session_cache_retrieve (void *data, gnutls_datum_t key) { rbtree tree = data; gnutls_datum_t res = { NULL, 0 }; struct session_cache *cache = rbtree_lookup(tree, &key); if(cache == NULL) return res; res.size = cache->value.size; res.data = gnutls_malloc (res.size); if(res.data == NULL) return res; memcpy(res.data, cache->value.data, res.size); //printf("session_cache_retrieve\n"); return res; }
// Remember a pointer, if necessary. static void _maybeRememberPointer(EtnEncoder *e, EtnValue v, bool rememberPointer) { if (rememberPointer == true && v.data != e->topLevelPointer) { AddrToIndex query; query.ptr = v.data; struct rbtree_node *n = rbtree_lookup(&query.node, &e->addrToIndex); if (n == NULL) { AddrToIndex *node = malloc (sizeof (AddrToIndex)); ASSERT (node); node->ptr = v.data; node->index = e->index; rbtree_insert(&node->node, &e->addrToIndex); debugXPrint(typesDebug, "Remember pointer/index: $[pointer]/$[uint]\n", node->ptr, node->index); } } }
// Encode a pointer. static Status _encodePtr(EtnEncoder *e, EtnValue v, bool rememberPointer, EtnLength *length) { ASSERT (e); ASSERT (v.type); ASSERT (v.data); ASSERT (length); Status status; EtnLength index = 0; _maybeRememberPointer(e, v, rememberPointer); e->index++; if (*(void **)v.data == 0) { // NULL. status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pnil), NoRememberPointer, length); debugXPrint(typesDebug, "Encoded nil pointer\n"); return status; } // See elsewhere for discussion of e->topLevelPointer. // Extra logic is due to a performance optimization. if (*(void **)v.data != e->topLevelPointer) { AddrToIndex query = { .ptr = *(void **)v.data }; struct rbtree_node *tr = rbtree_lookup(&query.node, &e->addrToIndex); if (tr) { index = rbtree_container_of(tr, AddrToIndex, node)->index; } } if ((*(void **)v.data == e->topLevelPointer) || index) // In former case, index is zero. // Otherwise index == 0 implies not yet seen. { // Previously seen; encode index. status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pidx), NoRememberPointer, length); if (StatusOk != status) { return status; } status = _encodeLength(e, index); if (StatusOk != status) { return status; } *length += sizeof (EtnLength); debugXPrint(typesDebug, "Encoded pointer $[pointer] by index $[uint]\n", *(void **) v.data, index); } else { // New; encode inline. debugXPrint(typesDebug, "Encoding pointer $[pointer] by value\n", *(void **) v.data); status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pval), RememberPointer, length); if (StatusOk != status) { return status; } EtnIndirectType *c = (EtnIndirectType *)v.type; if (c->elem->kind >= EtnKindInvalid) { return StatusFail; } EtnLength _length; status = encoderMap[c->elem->kind].encode (e, EtnToValue(c->elem, *(void **)v.data), RememberPointer, &_length); if (StatusOk != status) { return status; } *length += _length; debugXPrint(typesDebug, "Encoded pointer $[pointer] by value\n", *(void **) v.data); } return status; }
int main() { int i; rbtree t = NULL; t = rbtree_create(); for(i=0; i<20; i++) { int x = rand() % 10000; int y = rand() % 10000; #ifdef TRACE print_tree(t); printf("Inserting %d -> %d\n\n", x, y); #endif rbtree_insert(t, (void*)x, (void*)y, compare_int); assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y); } print_tree(t); puts(""); // TODO: memory leak! free(t); t = rbtree_create(); for(i=0; i<20; i++) { int x = i; int y = i; #ifdef TRACE print_tree(t); printf("Inserting %d -> %d\n\n", x, y); #endif rbtree_insert(t, (void*)x, (void*)y, compare_int); assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y); } print_tree(t); puts(""); // TODO: memory leak! free(t); t = rbtree_create(); for(i=0; i<20; i++) { int x = 19 - i; int y = 19 - i; #ifdef TRACE print_tree(t); printf("Inserting %d -> %d\n\n", x, y); #endif rbtree_insert(t, (void*)x, (void*)y, compare_int); assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y); } print_tree(t); puts(""); #if 0 for(i=0; i<60000; i++) { int x = rand() % 10000; #ifdef TRACE print_tree(t); printf("Deleting key %d\n\n", x); #endif rbtree_delete(t, (void*)x, compare_int); } #endif return 0; }
static inline wres_event_t *wres_event_lookup(wres_event_group_t *group, wres_event_desc_t *desc) { return rbtree_lookup(group->head, (void *)desc, wres_event_compare); }
/************************************************************************* * Entry point for ama *************************************************************************/ int main(int argc, char *argv[]) { int max_seq_length = MAX_SEQ; STRING_LIST_T* selected_motifs = NULL; double pseudocount = 0.01; int output_format = CISML_FORMAT; program_name = "ama"; int scoring = AVG_ODDS; BOOLEAN_T pvalues = FALSE; BOOLEAN_T normalize_scores = FALSE; BOOLEAN_T combine_duplicates = FALSE; int num_gc_bins = 1; int sdbg_order = -1; // don't use sequence background BOOLEAN_T scan_both_strands = TRUE; ARRAY_T* pos_bg_freqs = NULL; ARRAY_T* rev_bg_freqs = NULL; clock_t c0, c1; /* measuring cpu_time */ CISML_T *cisml; char * out_dir = NULL; BOOLEAN_T clobber = FALSE; int i; int last = 0; ALPH_T alph = INVALID_ALPH; /********************************************** * COMMAND LINE PROCESSING **********************************************/ const int num_options = 16; cmdoption const motif_scan_options[] = { { "max-seq-length", REQUIRED_VALUE }, { "motif", REQUIRED_VALUE }, { "motif-pseudo", REQUIRED_VALUE }, { "rma", NO_VALUE }, { "pvalues", NO_VALUE }, { "sdbg", REQUIRED_VALUE }, { "norc", NO_VALUE }, { "cs", NO_VALUE }, { "o-format", REQUIRED_VALUE }, { "o", REQUIRED_VALUE }, { "oc", REQUIRED_VALUE }, { "scoring", REQUIRED_VALUE }, { "verbosity", REQUIRED_VALUE }, { "gcbins", REQUIRED_VALUE }, { "last", REQUIRED_VALUE }, { "version", NO_VALUE } }; int option_index = 0; // Define the usage message. char usage[] = "USAGE: ama [options] <motif file> <sequence file> [<background file>]\n" "\n" " Options:\n" " --sdbg <order>\t\t\tUse Markov background model of\n" " \t\t\t\t\torder <order> derived from the sequence\n" " \t\t\t\t\tto compute its likelihood ratios.\n" " \t\t\t\t\tOverrides --pvalues, --gcbins and --rma;\n" " \t\t\t\t\t<background file> is required unless\n" " \t\t\t\t\t--sdbg is given.\n" " --motif <id>\t\t\tUse only the motif identified by <id>.\n" " \t\t\t\t\tThis option may be repeated.\n" " --motif-pseudo <float>\t\tThe value <float> times the background\n" " \t\t\t\t\tfrequency is added to the count of each\n" " \t\t\t\t\tletter when creating the likelihood \n" " \t\t\t\t\tratio matrix (default: %g).\n" " --norc\t\t\t\tDisables the scanning of the reverse\n" " \t\t\t\t\tcomplement strand.\n" " --scoring [avg-odds|max-odds]\tIndicates whether the average or \n" " \t\t\t\t\tthe maximum odds should be calculated\n" " \t\t\t\t\t(default: avg-odds)\n" " --rma\t\t\t\tScale motif scores to the range 0-1.\n" " \t\t\t\t\t(Relative Motif Affinity).\n" " \t\t\t\t\tMotif scores are scaled by the maximum\n" " \t\t\t\t\tscore achievable by that PWM. (default:\n" " \t\t\t\t\tmotif scores are not normalized)\n" " --pvalues\t\t\t\tPrint p-value of avg-odds score in cisml\n" " \t\t\t\t\toutput. Ignored for max-odds scoring.\n" " \t\t\t\t\t(default: p-values are not printed)\n" " --gcbins <bins>\t\t\tCompensate p-values for GC content of\n" " \t\t\t\t\teach sequence using given number of \n" " \t\t\t\t\tGC range bins. Recommended bins: 41.\n" " \t\t\t\t\t(default: p-values are based on\n" " \t\t\t\t\tfrequencies in background file)\n" " --cs\t\t\t\tEnable combining sequences with same\n" " \t\t\t\t\tidentifier by taking the average score\n" " \t\t\t\t\tand the Sidac corrected p-value.\n" " --o-format [gff|cisml]\t\tOutput file format (default: cisml)\n" " \t\t\t\t\tignored if --o or --oc option used\n" " --o <directory>\t\t\tOutput all available formats to\n" " \t\t\t\t\t<directory>; give up if <directory>\n" " \t\t\t\t\texists\n" " --oc <directory>\t\t\tOutput all available formats to\n" " \t\t\t\t\t<directory>; if <directory> exists\n" " \t\t\t\t\toverwrite contents\n" " --verbosity [1|2|3|4]\t\tControls amount of screen output\n" " \t\t\t\t\t(default: %d)\n" " --max-seq-length <int>\t\tSet the maximum length allowed for \n" " \t\t\t\t\tinput sequences. (default: %d)\n" " --last <int>\t\t\tUse only scores of (up to) last <n>\n" " \t\t\t\t\tsequence positions to compute AMA.\n" " --version \t\t\tPrint version and exit.\n" "\n"; // Parse the command line. if (simple_setopt(argc, argv, num_options, motif_scan_options) != NO_ERROR) { die("Error processing command line options: option name too long.\n"); } BOOLEAN_T setoutputformat = FALSE; BOOLEAN_T setoutputdirectory = FALSE; while (TRUE) { int c = 0; char* option_name = NULL; char* option_value = NULL; const char * message = NULL; // Read the next option, and break if we're done. c = simple_getopt(&option_name, &option_value, &option_index); if (c == 0) { break; } else if (c < 0) { (void) simple_getopterror(&message); die("Error processing command line options (%s).\n", message); } else if (strcmp(option_name, "max-seq-length") == 0) { max_seq_length = atoi(option_value); } else if (strcmp(option_name, "norc") == 0) { scan_both_strands = FALSE; } else if (strcmp(option_name, "cs") == 0) { combine_duplicates = TRUE; } else if (strcmp(option_name, "motif") == 0) { if (selected_motifs == NULL) { selected_motifs = new_string_list(); } add_string(option_value, selected_motifs); } else if (strcmp(option_name, "motif-pseudo") == 0) { pseudocount = atof(option_value); } else if (strcmp(option_name, "o-format") == 0) { if (setoutputdirectory) { if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "output directory specified, ignoring --o-format\n"); } else { setoutputformat = TRUE; if (strcmp(option_value, "gff") == 0) output_format = GFF_FORMAT; else if (strcmp(option_value, "cisml") == 0) output_format = CISML_FORMAT; else { if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "Output format not known. Using standard instead (cisML).\n"); output_format = CISML_FORMAT; } } } else if (strcmp(option_name, "o") == 0 || strcmp(option_name, "oc") == 0) { setoutputdirectory = TRUE; if (setoutputformat) { if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "output directory specified, ignoring --o-format\n"); } clobber = strcmp(option_name, "oc") == 0; out_dir = (char*) malloc (sizeof(char)*(strlen(option_value)+1)); strcpy(out_dir, option_value); output_format = DIRECTORY_FORMAT; } else if (strcmp(option_name, "verbosity") == 0) { verbosity = atoi(option_value); } else if (strcmp(option_name, "scoring") == 0) { if (strcmp(option_value, "max-odds") == 0) scoring = MAX_ODDS; else if (strcmp(option_value, "avg-odds") == 0) scoring = AVG_ODDS; else if (strcmp(option_value, "sum-odds") == 0) scoring = SUM_ODDS; else die("Specified scoring scheme not known.\n", message); } else if (strcmp(option_name, "pvalues") == 0) { pvalues = TRUE; } else if (strcmp(option_name, "rma") == 0) { normalize_scores = TRUE; fprintf(stderr, "Normalizing motif scores using RMA method.\n"); } else if (strcmp(option_name, "gcbins") == 0) { num_gc_bins = atoi(option_value); pvalues = TRUE; if (num_gc_bins <= 1) die("Number of bins in --gcbins must be greater than 1.\n", message); } else if (strcmp(option_name, "sdbg") == 0) { sdbg_order = atoi(option_value); // >=0 means use sequence bkg } else if (strcmp(option_name, "last") == 0) { int i = 0; if (option_value[0] == '-') ++i; while (option_value[i] != '\0') { if (!isdigit(option_value[i])) { die("Specified parameter 'last' contains non-numeric characters.\n"); } ++i; } last = atoi(option_value); if (errno != 0) { die("Specified parameter 'last' could not be parsed as a number as:\n%s\n",strerror(errno)); } if (last < 0) { die("Specified parameter 'last' had negative value (%d) when only postive or zero values are allowed \n", last); } } else if (strcmp(option_name, "version") == 0) { fprintf(stdout, VERSION "\n"); exit(EXIT_SUCCESS); } } // --sdbg overrides --pvalues and --gcbins and --rma int req_args = 3; if (sdbg_order >= 0) { pvalues = FALSE; normalize_scores = FALSE; num_gc_bins = 1; req_args = 2; } // Check all required arguments given if (sdbg_order >= 0 && argc > option_index + req_args) { die("<background file> cannot be given together with --sdbg.\n"); } else if (argc != option_index + req_args) { fprintf(stderr, usage, pseudocount, verbosity, max_seq_length); exit(EXIT_FAILURE); } // Get required arguments. char* motif_filename = argv[option_index]; option_index++; char* fasta_filename = argv[option_index]; option_index++; char* bg_filename; if (req_args == 3) { // required unless --sdbg given bg_filename = argv[option_index]; option_index++; } else { bg_filename = "--uniform--"; // So PSSMs will use uniform background; // we can multiply them out later. } // measure time c0 = clock(); // Set up hash tables for computing reverse complement if doing --sdbg if (sdbg_order >= 0) setup_hash_alph(DNAB); // Create cisml data structure for recording results cisml = allocate_cisml(program_name, motif_filename, fasta_filename); set_cisml_background_file(cisml, bg_filename); /********************************************** * Read the motifs and background model. **********************************************/ int num_motifs = 0; MREAD_T *mread; ARRAYLST_T *motifs; PSSM_PAIR_T** pssm_pairs; // note pssm_pairs is an array of pointers //this reads any meme file, xml, txt and html mread = mread_create(motif_filename, OPEN_MFILE); mread_set_bg_source(mread, bg_filename); mread_set_pseudocount(mread, pseudocount); motifs = mread_load(mread, NULL); alph = mread_get_alphabet(mread); pos_bg_freqs = mread_get_background(mread); mread_destroy(mread); num_motifs = arraylst_size(motifs); // allocate memory for PSSM pairs pssm_pairs = (PSSM_PAIR_T**)mm_malloc(sizeof(PSSM_PAIR_T*) * num_motifs); if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "Number of motifs in file %d.\n", num_motifs); // make a CISML pattern to hold scores for each motif PATTERN_T** patterns = NULL; Resize(patterns, num_motifs, PATTERN_T*); int motif_index; for (motif_index = 0; motif_index < num_motifs; motif_index++) { MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs); patterns[motif_index] = allocate_pattern(get_motif_id(motif), ""); add_cisml_pattern(cisml, patterns[motif_index]); } // make reverse complement motifs and background frequencies. if (scan_both_strands == TRUE) { add_reverse_complements(motifs); assert(arraylst_size(motifs) == (2 * num_motifs)); rev_bg_freqs = allocate_array(get_array_length(pos_bg_freqs)); complement_dna_freqs(pos_bg_freqs, rev_bg_freqs); } /************************************************************** * Convert motif matrices into log-odds matrices. * Scale them. * Compute the lookup tables for the PDF of scaled log-odds scores. **************************************************************/ int ns = scan_both_strands ? 2 : 1; // number of strands for (motif_index = 0; motif_index < num_motifs; motif_index++) { MOTIF_T *motif, *motif_rc; motif = (MOTIF_T*)arraylst_get(motif_index*ns, motifs); if (scan_both_strands) motif_rc = (MOTIF_T*)arraylst_get(motif_index*ns + 1, motifs); else motif_rc = NULL; /* * Note: If scanning both strands, we complement the motif frequencies * but not the background frequencies so the motif looks the same. * However, the given frequencies are used in computing the p-values * since they represent the frequencies on the negative strands. * (If we instead were to complement the input sequence, keeping the * the motif fixed, we would need to use the complemented frequencies * in computing the p-values. Is that any clearer?) */ double range = 300; // 100 is not very good; 1000 is great but too slow PSSM_T* pos_pssm = build_motif_pssm( motif, pos_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, num_gc_bins, TRUE ); PSSM_T* neg_pssm = (scan_both_strands ? build_motif_pssm( motif_rc, rev_bg_freqs, pos_bg_freqs, NULL, // Priors not used 0.0L, // alpha not used range, num_gc_bins, TRUE ) : NULL ); pssm_pairs[motif_index] = create_pssm_pair(pos_pssm, neg_pssm); } // Open the FASTA file for reading. FILE* fasta_file = NULL; if (open_file(fasta_filename, "r", FALSE, "FASTA", "sequences", &fasta_file) == 0) { die("Couldn't open the file %s.\n", fasta_filename); } if (verbosity >= NORMAL_VERBOSE) { if (last == 0) { fprintf(stderr, "Using entire sequence\n"); } else { fprintf(stderr, "Limiting sequence to last %d positions.\n", last); } } /************************************************************** * Read in all sequences and score with all motifs **************************************************************/ int seq_loading_num = 0; // keeps track on the number of sequences read in total int seq_counter = 0; // holds the index to the seq in the pattern int unique_seqs = 0; // keeps track on the number of unique sequences BOOLEAN_T need_postprocessing = FALSE; SEQ_T* sequence = NULL; RBTREE_T* seq_ids = rbtree_create(rbtree_strcasecmp,NULL,free,rbtree_intcpy,free); RBNODE_T* seq_node; BOOLEAN_T created; while (read_one_fasta(alph, fasta_file, max_seq_length, &sequence)) { ++seq_loading_num; created = FALSE; char* seq_name = get_seq_name(sequence); int seq_len = get_seq_length(sequence); int scan_len; if (last != 0) { scan_len = last; } else { scan_len = seq_len; } // red-black trees are only required if duplicates should be combined if (combine_duplicates){ //lookup seq id and create new entry if required, return sequence index char *tmp_id = mm_malloc(strlen(seq_name)+1); // required copy for rb-tree strncpy(tmp_id,seq_name,strlen(seq_name)+1); seq_node = rbtree_lookup(seq_ids, tmp_id, TRUE, &created); if (created) {// assign it a loading number rbtree_set(seq_ids, seq_node, &unique_seqs); seq_counter = unique_seqs; ++unique_seqs; } else { seq_counter = *((int*)rbnode_get(seq_node)); } } // // Set up sequence-dependent background model and compute // log cumulative probability of sequence. // double *logcumback = NULL; // array of log cumulative probs. if (sdbg_order >= 0) { Resize(logcumback, seq_len+1, double); char* raw_seq = get_raw_sequence(sequence); BOOLEAN rc = FALSE; double *a_cp = get_markov_from_sequence(raw_seq, alph_string(alph), rc, sdbg_order, 0); log_cum_back(raw_seq, a_cp, sdbg_order, logcumback); myfree(a_cp); } // Get the GC content of the sequence if binning p-values by GC // and store it in the sequence object. if (num_gc_bins > 1) { ARRAY_T *freqs = get_sequence_freqs(sequence, alph); set_total_gc_sequence(sequence, get_array_item(1,freqs) + get_array_item(2,freqs)); // f(C) + f(G) free_array(freqs); // clean up } else { set_total_gc_sequence(sequence, -1); // flag ignore } /************************************************************** * Process all motifs. **************************************************************/ int ns = scan_both_strands ? 2 : 1; for (motif_index = 0; motif_index < num_motifs; motif_index++) { PATTERN_T *pattern = patterns[motif_index]; MOTIF_T* motif = (MOTIF_T*)arraylst_get(ns*motif_index, motifs); char* motif_id = (scan_both_strands ? get_motif_st_id(motif) : get_motif_id(motif)); if (verbosity >= HIGH_VERBOSE) { fprintf(stderr, "Using motif %s of width %d.\n", motif_id, get_motif_length(motif)); } if ((selected_motifs == NULL) || (have_string(get_motif_id(motif), selected_motifs) == TRUE)) { if (verbosity >= HIGHER_VERBOSE) { fprintf(stderr, "Scanning %s sequence with length %d " "abbreviated to %d with motif %s with length %d.\n", seq_name, seq_len, scan_len, motif_id, get_motif_length(motif)); } SCANNED_SEQUENCE_T* scanned_seq = NULL; if (!combine_duplicates || get_pattern_num_scanned_sequences(pattern) <= seq_counter){ // Create a scanned_sequence record and save it in the pattern. scanned_seq = allocate_scanned_sequence(seq_name, seq_name, pattern); set_scanned_sequence_length(scanned_seq, scan_len); } else { // get existing sequence record scanned_seq = get_pattern_scanned_sequences(pattern)[seq_counter]; set_scanned_sequence_length(scanned_seq, max(scan_len, get_scanned_sequence_length(scanned_seq))); } // check if scanned component of sequence has sufficient length for the motif if (scan_len < get_motif_length(motif)) { // set score to zero and p-value to 1 if not set yet if(!has_scanned_sequence_score(scanned_seq)){ set_scanned_sequence_score(scanned_seq, 0.0); } if(pvalues && !has_scanned_sequence_pvalue(scanned_seq)){ set_scanned_sequence_pvalue(scanned_seq, 1.0); } add_scanned_sequence_scanned_position(scanned_seq); if (get_scanned_sequence_num_scanned_positions(scanned_seq) > 0L) need_postprocessing = TRUE; if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "%s too short for motif %s. Score set to 0!\n", seq_name, motif_id); } else { // scan the sequence using average/maximum motif affinity ama_sequence_scan(alph, sequence, logcumback, pssm_pairs[motif_index], scoring, pvalues, last, scanned_seq, &need_postprocessing); } } else { if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "Skipping motif %s.\n", motif_id); } } // All motifs parsed free_seq(sequence); if (sdbg_order >= 0) myfree(logcumback); } // read sequences
/* * Change the size of an allocation and copy data from the old pointer. * * If ptr is NULL mm_reallloc is the same as mm_malloc. * If size is 0 mm_realloc is the same as mm_free. * * Returns a pointer to the newly allocated memory. */ void *mm_realloc(void *ptr, size_t size) { /* If size is 0 free the pointer */ if (size == 0) { mm_free(ptr); return NULL; } /* If ptr is NULL just allocate */ if (ptr == NULL) { return mm_malloc(size); } /* Find the run the old pointer belongs to */ char *old_run = mm_findnodetree((char *)ptr, NULL, NULL); /* Find out the size of the old pointer */ int old_size; if (mmrun_get_slotcount(old_run)) { old_size = mmrun_get_slotsize(old_run); } else { old_size = mmrun_get_largesize(old_run); } /* See if ptr can be expanded */ if (mmrun_get_slotcount(old_run) == 0) { /* Return if the run is already large enough */ if ((old_size - RUN_HEADER_SIZE) >= size) { return ptr; } /* See if there is a free run after the old run */ if (free_runs) { char *run = rbtree_lookup(old_run + old_size + 1, free_runs); /* Check if it the expanded run can contain the new size */ if (run && (mmrun_get_size(run) + old_size) >= size) { /* Remove the free run from the free list */ rbtree_remove(run, &free_runs); int run_size = mmrun_get_largesize(run); /* Merge it with the old run */ mmrun_init(0, 0, old_run); mmrun_set_largesize(old_size + run_size, old_run); /* Split off any excess */ mmrun_split(size + RUN_HEADER_SIZE, old_run); /* Return the expanded run */ return old_run + RUN_HEADER_SIZE; } } } /* If ptr can't be expanded just allocate a new run and copy */ void *new_ptr = mm_malloc(size); if (new_ptr) { /* Copy data from the old pointer to the new one */ int min_size = (size > old_size) ? old_size : size; memcpy(new_ptr, ptr, min_size); /* Free the old pointer */ mm_free(ptr); } return new_ptr; }
/************************************************************************* * Entry point for ama *************************************************************************/ int main(int argc, char **argv) { AMA_OPTIONS_T options; ARRAYLST_T *motifs; clock_t c0, c1; // measuring cpu_time MOTIF_AND_PSSM_T *combo; CISML_T *cisml; PATTERN_T** patterns; PATTERN_T *pattern; FILE *fasta_file, *text_output, *cisml_output; int i, seq_loading_num, seq_counter, unique_seqs, seq_len, scan_len, x1, x2, y1, y2; char *seq_name, *path; bool need_postprocessing, created; SEQ_T *sequence; RBTREE_T *seq_ids; RBNODE_T *seq_node; double *logcumback; ALPH_T *alph; // process the command process_command_line(argc, argv, &options); // load DNA motifs motifs = load_motifs(&options); // get the alphabet if (arraylst_size(motifs) > 0) { combo = (MOTIF_AND_PSSM_T*)arraylst_get(0, motifs); alph = alph_hold(get_motif_alph(combo->motif)); } else { alph = alph_dna(); } // pick columns for GC operations x1 = -1; x2 = -1; y1 = -1; y2 = -1; if (alph_size_core(alph) == 4 && alph_size_pairs(alph) == 2) { x1 = 0; // A x2 = alph_complement(alph, x1); // T y1 = (x2 == 1 ? 2 : 1); // C y2 = alph_complement(alph, y1); // G assert(x1 != x2 && y1 != y2 && x1 != y1 && x2 != y2 && x1 != y2 && x2 != y1); } // record starting time c0 = clock(); // Create cisml data structure for recording results cisml = allocate_cisml(PROGRAM_NAME, options.command_line, options.motif_filename, options.fasta_filename); set_cisml_background_file(cisml, options.bg_filename); // make a CISML pattern to hold scores for each motif for (i = 0; i < arraylst_size(motifs); i++) { combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs); add_cisml_pattern(cisml, allocate_pattern(get_motif_id(combo->motif), "")); } // Open the FASTA file for reading. fasta_file = NULL; if (!open_file(options.fasta_filename, "r", false, "FASTA", "sequences", &fasta_file)) { die("Couldn't open the file %s.\n", options.fasta_filename); } if (verbosity >= NORMAL_VERBOSE) { if (options.last == 0) { fprintf(stderr, "Using entire sequence\n"); } else { fprintf(stderr, "Limiting sequence to last %d positions.\n", options.last); } } // // Read in all sequences and score with all motifs // seq_loading_num = 0; // keeps track on the number of sequences read in total seq_counter = 0; // holds the index to the seq in the pattern unique_seqs = 0; // keeps track on the number of unique sequences need_postprocessing = false; sequence = NULL; logcumback = NULL; seq_ids = rbtree_create(rbtree_strcasecmp,rbtree_strcpy,free,rbtree_intcpy,free); while (read_one_fasta(alph, fasta_file, options.max_seq_length, &sequence)) { ++seq_loading_num; seq_name = get_seq_name(sequence); seq_len = get_seq_length(sequence); scan_len = (options.last != 0 ? options.last : seq_len); // red-black trees are only required if duplicates should be combined if (options.combine_duplicates){ //lookup seq id and create new entry if required, return sequence index seq_node = rbtree_lookup(seq_ids, get_seq_name(sequence), true, &created); if (created) { // assign it a loading number rbtree_set(seq_ids, seq_node, &unique_seqs); seq_counter = unique_seqs; ++unique_seqs; } else { seq_counter = *((int*)rbnode_get(seq_node)); } } // // Set up sequence-dependent background model and compute // log cumulative probability of sequence. // This needs the sequence in raw format. // if (options.sdbg_order >= 0) logcumback = log_cumulative_background(alph, options.sdbg_order, sequence); // Index the sequence, throwing away the raw format and ambiguous characters index_sequence(sequence, alph, SEQ_NOAMBIG); // Get the GC content of the sequence if binning p-values by GC // and store it in the sequence object. if (options.num_gc_bins > 1) { ARRAY_T *freqs = get_sequence_freqs(sequence, alph); set_total_gc_sequence(sequence, get_array_item(y1, freqs) + get_array_item(y2, freqs)); // f(C) + f(G) free_array(freqs); // clean up } else { set_total_gc_sequence(sequence, -1); // flag ignore } // Scan with motifs. for (i = 0; i < arraylst_size(motifs); i++) { pattern = get_cisml_patterns(cisml)[i]; combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs); if (verbosity >= HIGHER_VERBOSE) { fprintf(stderr, "Scanning %s sequence with length %d " "abbreviated to %d with motif %s with length %d.\n", seq_name, seq_len, scan_len, get_motif_id(combo->motif), get_motif_length(combo->motif)); } SCANNED_SEQUENCE_T* scanned_seq = NULL; if (!options.combine_duplicates || get_pattern_num_scanned_sequences(pattern) <= seq_counter) { // Create a scanned_sequence record and save it in the pattern. scanned_seq = allocate_scanned_sequence(seq_name, seq_name, pattern); set_scanned_sequence_length(scanned_seq, scan_len); } else { // get existing sequence record scanned_seq = get_pattern_scanned_sequences(pattern)[seq_counter]; set_scanned_sequence_length(scanned_seq, max(scan_len, get_scanned_sequence_length(scanned_seq))); } // check if scanned component of sequence has sufficient length for the motif if (scan_len < get_motif_length(combo->motif)) { // set score to zero and p-value to 1 if not set yet if(!has_scanned_sequence_score(scanned_seq)){ set_scanned_sequence_score(scanned_seq, 0.0); } if(options.pvalues && !has_scanned_sequence_pvalue(scanned_seq)){ set_scanned_sequence_pvalue(scanned_seq, 1.0); } add_scanned_sequence_scanned_position(scanned_seq); if (get_scanned_sequence_num_scanned_positions(scanned_seq) > 0L) { need_postprocessing = true; } if (verbosity >= HIGH_VERBOSE) { fprintf(stderr, "%s too short for motif %s. Score set to 0.\n", seq_name, get_motif_id(combo->motif)); } } else { // scan the sequence using average/maximum motif affinity ama_sequence_scan(alph, sequence, logcumback, combo->pssm_pair, options.scoring, options.pvalues, options.last, scanned_seq, &need_postprocessing); } } // All motifs scanned free_seq(sequence); if (options.sdbg_order >= 0) myfree(logcumback); } // read sequences fclose(fasta_file); if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "(%d) sequences read in.\n", seq_loading_num); if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "Finished \n"); // if any sequence identifier was multiple times in the sequence set then // postprocess of the data is required if (need_postprocessing || options.normalize_scores) { post_process(cisml, motifs, options.normalize_scores); } // output results if (options.output_format == DIRECTORY_FORMAT) { if (create_output_directory(options.out_dir, options.clobber, verbosity > QUIET_VERBOSE)) { // only warn in higher verbose modes fprintf(stderr, "failed to create output directory `%s' or already exists\n", options.out_dir); exit(1); } path = make_path_to_file(options.out_dir, text_filename); //FIXME check for errors: MEME doesn't either and we at least know we have a good directory text_output = fopen(path, "w"); free(path); path = make_path_to_file(options.out_dir, cisml_filename); //FIXME check for errors cisml_output = fopen(path, "w"); free(path); print_cisml(cisml_output, cisml, true, NULL, false); print_score(cisml, text_output); fclose(cisml_output); fclose(text_output); } else if (options.output_format == GFF_FORMAT) { print_score(cisml, stdout); } else if (options.output_format == CISML_FORMAT) { print_cisml(stdout, cisml, true, NULL, false); } else { die("Output format invalid!\n"); } // // Clean up. // rbtree_destroy(seq_ids); arraylst_destroy(motif_and_pssm_destroy, motifs); free_cisml(cisml); rbtree_destroy(options.selected_motifs); alph_release(alph); // measure time if (verbosity >= NORMAL_VERBOSE) { // starting time c1 = clock(); fprintf(stderr, "cycles (CPU); %ld cycles\n", (long) c1); fprintf(stderr, "elapsed CPU time: %f seconds\n", (float) (c1-c0) / CLOCKS_PER_SEC); } return 0; }
/** * Expects a file like: **/ void read_data_events(char *mmaped_file) { /** Header **/ FILE *data = open_file(mmaped_file); if(!data) { printf("#Warning: data file %s not found\n", mmaped_file); return; } if(!data_events) data_events = pqueue_init(10, cmp_pri, get_pri, set_pri, get_pos, set_pos); rbtree metadata = rbtree_create(); char line[512]; struct data_ev *event; int nb_lines = 0; uint64_t type; while(fgets(line, sizeof(line), data)) { nb_lines++; event = malloc(sizeof(*event)); if(sscanf(line, "%lu %lu %lu %lu %d %u", &event->rdt, &event->malloc.begin, &event->malloc.end, &type, &event->cpu, &event->tid) != 6) { goto test_info; } if(type == 0) { // free event->type = FREE; } else if(type == 2) { // munmap event->type = FREE; //munmap is not handled correctly yet => fake free } else { // malloc / mmap event->type = MALLOC; event->malloc.end = event->malloc.begin + event->malloc.end; if(type == 1) { char * val = rbtree_lookup(metadata, (void*)event->rdt, pointer_cmp); if(val) event->malloc.info = val; else asprintf(&event->malloc.info, "datasize%lu-%d", event->malloc.end - event->malloc.begin, nb_lines); } else { /*#define MAP_SHARED 0x01 #define MAP_PRIVATE 0x02*/ if(event->malloc.end - event->malloc.begin == 8392704) { /* All stacks seem to be of that size */ asprintf(&event->malloc.info, "thread-stack-%d", nb_lines); } else if(type & 0x01) { asprintf(&event->malloc.info, "mmap-shared%lu-%d", event->malloc.end - event->malloc.begin, nb_lines); } else if(type & 0x02) { asprintf(&event->malloc.info, "mmap-priv%lu-%d", event->malloc.end - event->malloc.begin, nb_lines); } else { asprintf(&event->malloc.info, "mmap-??%lu-%d", event->malloc.end - event->malloc.begin, nb_lines); } } } pqueue_insert(data_events, event); total_data_samples++; continue; test_info:; uint64_t time, loc; int read; if(sscanf(line, "#%lu 0x%lx %n\n", &time, &loc, &read) != 2) { //printf("fail %s %d\n", line, read); goto fail; } char *met_value = strdup(line + read); int met_len = strlen(met_value)-1; if(met_len < 5) // malloc probably not correctly resolved asprintf(&met_value, "%lu", time); else met_value[met_len] = '\0'; rbtree_insert(metadata, (void*)time, met_value, pointer_cmp); fail: //printf("#Unrecognized line: %s", line); free(event); continue; } if(!active_data) active_data = rbtree_create(); if(verbose) printf("#All data events added successfully ; now processing samples\n"); }