コード例 #1
0
ファイル: data.bis.c プロジェクト: Memprof/parser
void process_data_samples(uint64_t time) {
   if(!data_events)
      return;

   struct data_ev *event = pqueue_peek(data_events);
   while(event && event->rdt <= time) {
      /*printf("%s:%lu:%p:%s\n", event->type==MALLOC?"malloc":"free", event->rdt, (void*)event->free.begin,
            event->type==MALLOC?event->malloc.info:"");*/
      if(event->type==MALLOC) {
         void * data = rbtree_lookup(active_data, (void*)event->free.begin, pointer_cmp_reverse);
         if(data) {
            //printf("#Variable inserted twice ?!\n");
            ((struct data_ev *)data)->malloc.end = event->malloc.end;
            data_fail++;
         } else {
            rbtree_insert(active_data, (void*)event->malloc.begin, event, pointer_cmp_reverse);
            data_success++;
         }
      } else if(event->type==FREE) {
         void * data = rbtree_lookup(active_data, (void*)event->free.begin, pointer_cmp_reverse);
         if(!data) {
            //printf("#Free of unknown pointer!\n");
            data_fail++;
         } else {
            rbtree_delete(active_data, (void*)event->free.begin, pointer_cmp_reverse);
            data_success++;
         }
      }

      processed_data_samples++;
      pqueue_pop(data_events);
      event = pqueue_peek(data_events);
   }
}
コード例 #2
0
ファイル: testRedBlackTree.c プロジェクト: ema8490/etn
static bool
_predicate (void)
{
	int i;
	KeyValuePair_t n;
	struct rbtree tree;
	KeyValuePair_t *node;
	struct rbtree_node *result;

	rbtree_init (&tree, _compareFn, 0);

	for (i = 0; i < TreeSize; i++) {
		node = malloc (sizeof (KeyValuePair_t));

		node->key = i;
		node->val = TreeSize + i;

		rbtree_insert ((struct rbtree_node *) &node->node, &tree);
	}

	// Lookup the nodes.
	for (i = 0; i < TreeSize; i++) {
		KeyValuePair_t *kvResult;
		n.key = i;
		kvResult = rbtree_container_of (rbtree_lookup ((struct rbtree_node *) &n.node, &tree), KeyValuePair_t, node);
		if (kvResult->key != i || kvResult->val != TreeSize + i) {
			return false;
		}
	}

	// This lookup should fail.
	n.key = TreeSize;
	result = rbtree_lookup ((struct rbtree_node *) &n.node, &tree);
	if (result != NULL) {
		return false;
	}

	//iterate (rbtree_first(&tree), iterateFn);
	result = rbtree_first(&tree);
	while (result) {
		KeyValuePair_t *kvResult = rbtree_container_of (result, KeyValuePair_t, node);
		struct rbtree_node *n = result;
		result = rbtree_next (result);
		rbtree_remove (n, &tree);
		free (kvResult);
	}

	// This lookup should fail because we just cleared the tree.
	n.key = TreeSize;
	n.key = 0;
	result = rbtree_lookup ((struct rbtree_node *) &n.node, &tree);
	if (result != NULL) {
		return false;
	}

	return true;
}
コード例 #3
0
ファイル: spamo-cisml.c プロジェクト: brsaran/FuzzyApp
/**************************************************************************
 * Callback invoked when matching an opening pattern tag for a CISML file 
 * of a secondary motif database. It checks that the motif should be scored,
 * clears out the list of sequence matches and stores the current motif.
 **************************************************************************/
void motif_secondary(void *ctx, char *accession, char *name, char *db, char *lsId, double *pvalue, double *score) {
  SECONDARY_LOADER_T *loader = (SECONDARY_LOADER_T*)ctx;
  SECONDARY_KEY_T key;
  RBNODE_T *node;
  PSSM_T *pssm;
  int i, seq_count;
  key.db_id = loader->db_id;
  key.motif_id = accession;
  node = rbtree_lookup(loader->secondary_motifs, &key, FALSE, NULL);
  if (node != NULL) {
    loader->secondary_motif = (SECONDARY_MOTIF_T*)rbtree_value(node);
    if (!(loader->secondary_motif->loaded)) {
      seq_count = rbtree_size(loader->sequences);
      for (i = 0; i < seq_count; ++i) loader->secondary_matches[i] = 0;
      if (loader->score_threshold_or_multiplier < 0 && loader->score_threshold_or_multiplier >= -1) {
        pssm = build_motif_pssm(loader->secondary_motif->motif, loader->background, loader->background, NULL, 0, PSSM_RANGE, 0, FALSE);
        loader->calculated_score_threshold = pssm_best_match_score(pssm) * (-loader->score_threshold_or_multiplier);
        free_pssm(pssm);
      }
    } else {
      die("Already seen CISML data for this motif!");
    }
  } else {
    loader->secondary_motif = NULL;
  }
}
コード例 #4
0
ファイル: rb_test.c プロジェクト: SylvanHuang/rbtree
int main()
{
    struct rbtree* tree = rbtree_init(compare);
    int ret = 0;
    if(tree == NULL)
    {
        fprintf(stderr,"malloc tree failed\n");
        return -1;
    }

    int i = 0;
    ULL  * array = malloc(SIZE*sizeof(ULL ));
    if(array == NULL)
    {
        fprintf(stderr,"malloc failed\n");
        return -1;
    }
 //   srand(time(NULL));
    for(i = 0;i<SIZE;i++)
    {
        array[i] = rand()%1000;
        ret  = rbtree_insert(tree,&array[i],&array[i]);//-1 mean alloc node failed, 
                                                     //-2 mean existed node with same key
        void * data = rbtree_lookup(tree,&array[i]);
        if(ret == 0)
            assert(data == &array[i]);
    }

    print_tree(tree);
    tree2dot(tree,"tree.dot");
    return 0;
}
コード例 #5
0
ファイル: rbtreemain.c プロジェクト: Kayleezee/APC
/*************************************************************************************************
* PARALLEL ADD / SEARCH FUNCTIONS
*************************************************************************************************/
void vOperationStream_parallel(int *iId) {
    int i = 0;
    int j;
    int iAmountOpsPerThread;
    int iThreadId = (intptr_t) iId;

    iAmountOpsPerThread = iOperations / iNumThreads;

    while(i < iAmountOpsPerThread) {
        for(j = 0; j < iNumInsert; j++) {
            iValue = rand();
            iKey = rand();
            pthread_rwlock_wrlock(&lock); // lock tree - exclusive lock
            rbtree_insert(RBTree, (void*)iKey, (void*)iValue, int_compare);
            pthread_rwlock_unlock(&lock); // unlock tree - exclusive lock
            i++;
        }

        for(j = 0; j < iNumSearch; j++) {
            iKey = rand();
            pthread_rwlock_rdlock(&lock); // lock - shared lock
            rbtree_lookup(RBTree, (void*)iKey, int_compare);
            pthread_rwlock_unlock(&lock); // unlock - shared lock
            i++;
        }
    }
}
コード例 #6
0
ファイル: thread.c プロジェクト: ohio813/tracer
thread *find_thread (DWORD PID, DWORD TID)
{
    process *p=find_process(PID);
    thread *t=(thread*)rbtree_lookup (p->threads, (void*)TID);
    oassert (t!=NULL && "TID not found in threads table");
    return t;
};
コード例 #7
0
ファイル: symbol.c プロジェクト: ohio813/tracer
void add_symbol (address a, char *name, add_symbol_params *params)
{
    module *m=params->m;
    rbtree *symtbl=m->symbols;
    oassert(symtbl && "symbols=NULL in module");
    MemoryCache *mc=params->mc;

    if (one_time_int3_bp_re && params->t==SYM_TYPE_PE_EXPORT && module_adr_in_executable_section (m, a))
    {
        strbuf sb=STRBUF_INIT;
        strbuf_addstr (&sb, get_module_name(m));
        strbuf_addc (&sb, '!');
        strbuf_addstr (&sb, name);

        if (regexec (one_time_int3_bp_re, sb.buf, 0, NULL, 0)==0)
            set_onetime_INT3_BP(a, params->p, m, name, mc);

        strbuf_deinit (&sb);
    };

    if (dump_seh && string_is_ends_with (name, "security_cookie"))
    {
        m->security_cookie_adr=a;
        m->security_cookie_adr_known=true;
        if (symbol_c_debug)
            L ("%s() got address of security_cookie (0x" PRI_REG_HEX ") for %s!%s\n", __FUNCTION__, a, get_module_name(m), name);
    };

    bool dump_symbol=false;
    if (dump_all_symbols_re)
    {
        strbuf sb=STRBUF_INIT;
        strbuf_addstr (&sb, get_module_name(m));
        strbuf_addc (&sb, '!');
        strbuf_addstr (&sb, name);

        if (regexec (dump_all_symbols_re, sb.buf, 0, NULL, 0)==0)
            dump_symbol=true;

        strbuf_deinit (&sb);
    };

    if (dump_symbol || (dump_all_symbols_re==NULL && dump_all_symbols))
    {
        dump_PID_if_need(params->p);
        L("New symbol. Module=[%s], address=[0x" PRI_ADR_HEX "], name=[%s]\n", get_module_name(m), a, name);
    };

    symbol *new_sym=create_symbol(params->t, name);
    symbol *first_sym=(symbol*)rbtree_lookup(symtbl, (void*)a);

    if (first_sym)
        new_sym->next=first_sym; // insert at beginning of list

    rbtree_insert(symtbl, (void*)a, (void*)new_sym);
};
コード例 #8
0
static void mmfreerun_add(char *run)
{
    char *buddy;
    int run_size = mmrun_get_size(run);
    
    /* See if the run can be appended to an adjacent run on the free list */
    buddy = rbtree_lookup((char *)((int)run-1), free_runs);
    if (buddy) {
        int buddy_size = mmrun_get_largesize(buddy);
        
        /* Increase the size of the run on the free list */
        mmrun_set_largesize(buddy_size + run_size, buddy);
        
        return;
    }
    
    /*
     * See if the run can be expanded with an adjacent run
     * from the free list
     */
    buddy = rbtree_lookup((char *)((int)run+run_size+1), free_runs);
    if (buddy) {
        int buddy_size = mmrun_get_largesize(buddy);
        
        /*
         * Remove the old run and add it's size to the new run.
         * Then add the new run to the free list.
         */
        rbtree_remove(buddy, &free_runs);
        mmrun_init(0, 0, run);
        mmrun_set_largesize(buddy_size + run_size, run);
        rbtree_insert(run, &free_runs);
        
        return;
    }
    
    /* The run can't be merged so add it to the free list */
    mmrun_init(0, 0, run);
    mmrun_set_largesize(run_size, run);
        
    rbtree_insert(run, &free_runs);
}
コード例 #9
0
static char *mm_findnodetree(char *run, char **out_tree, int *out_bin_index)
{    
    char *node;
    
    /* See if the run belongs to any bin */
    int i;
    for (i = 0; i < BIN_COUNT; ++i) {
        if (bins[i] != NULL) {
            node = rbtree_lookup(run, bins[i]);
            if (node) {
                if (out_tree) {
                    *out_tree = bins[i];
                }
                if (out_bin_index) {
                    *out_bin_index = i;
                }
                return node;
            }
        }
    }
    
    /* See if the run is a large allocation */
    if (large_allocations && (node = rbtree_lookup(run, large_allocations))) {
        if (out_tree) {
            *out_tree = large_allocations;
        }    
        if (out_bin_index) {
            *out_bin_index = -1;
        }
        return node;
    }
    
    /* The run doesn't belong to any tree */
    if (out_tree) {
        *out_tree = NULL;
    }    
    if (out_bin_index) {
        *out_bin_index = -1;
    }
    return NULL;
}
コード例 #10
0
ファイル: builtin-migrate.c プロジェクト: Memprof/parser
void migrate_parse(struct s* s) {
   if(!s->ibs_dc_phys)
      return;

   void *addr = (void*)((s->ibs_dc_linear / PAGE_SIZE / CLUSTER ) * (PAGE_SIZE * CLUSTER));
   struct page* v = rbtree_lookup(migrate_tree, addr, pointer_cmp);
   if(!v) {
      v = calloc(1, sizeof(*v));
      v->addr = addr;
      v->accesses = calloc(1, sizeof(*v->accesses)*max_node);
      rbtree_insert(migrate_tree, addr, v, pointer_cmp);
   }
   v->accesses[cpu_to_node(s->cpu)]++;
   pid = get_pid(s);
}
コード例 #11
0
ファイル: spamo-cisml.c プロジェクト: brsaran/FuzzyApp
/**************************************************************************
 * Callback invoked when matching an opening scanned_sequence tag in the 
 * CISML file for the primary motif. Checks if the sequence is one we are 
 * scoring and if so records it as the current sequence as well as clearing
 * the hits list.
 **************************************************************************/
void sequence_primary(void *ctx, char *accession, char *name, char *db, char *lsId, double *score, double *pvalue, long *length) {
  PRIMARY_LOADER_T *loader = (PRIMARY_LOADER_T*)ctx;
  if (!(loader->in_motif)) {
    loader->current_sequence = NULL;
  } else {
    RBNODE_T *node = rbtree_lookup(loader->sequences, name, FALSE, NULL);
    if (node) {
      loader->current_sequence = rbtree_value(node);
      if (loader->current_sequence->primary_match) die("Already seen this sequence! We can't process this information "
          "because the scoring information from the previous sighting has already been discarded.\n");
      loader->current_score = 0; // reset the current score
      loader->hit_count = 0; //reset the hit count
    } else {
      loader->current_sequence = NULL;
    }
  }
}
コード例 #12
0
ファイル: thread.c プロジェクト: ohio813/tracer
void add_thread (process *p, DWORD TID, HANDLE THDL, address start, address TIB)
{
    thread *t=DCALLOC (thread, 1, "thread");

    if (thread_c_debug)
        L ("%s() begin\n", __func__);

    t->TID=TID;
    t->THDL=THDL;
    t->TIB=TIB;
    t->start=start;
    oassert (rbtree_lookup(p->threads, (void*)TID)==NULL && "this TID is already in table");
    rbtree_insert (p->threads, (void*)TID, t);
    
    if (thread_c_debug)
        L ("%s() end\n", __func__);
};
コード例 #13
0
ファイル: builtin-top-obj.c プロジェクト: Memprof/parser
void top_obj_parse(struct s* s) {
   struct symbol *sym = get_function(s);
   if(strstr(sym->function_name, "plt")) {
      nb_plt++;
   }
   else
   {
      nb_non_plt++;
      struct symbol *ob = get_object(s);
      struct dyn_lib* ob3 = sample_to_mmap(s);
      char *obj = NULL;
      if(ob)
         obj = ob->object_name;
      if(!obj && strstr(sym->function_name, "@plt"))
         obj = sym->function_name;
       if(!obj && !strcmp(sym->function_name, "[vdso]"))
         obj = sym->function_name;
      if(!obj && ob3) 
         obj = ob3->name;
      struct value *value = rbtree_lookup(r, obj, cmp);
      if(!value) {
         value = calloc(1, sizeof(*value));
         value->from_accesses = calloc(max_node, sizeof(*value->from_accesses));
         value->to_accesses = calloc(max_node, sizeof(*value->to_accesses));
         rbtree_insert(r, obj, value, cmp);
      }
      value->accesses++;
      value->dist_accesses += is_distant(s);
      value->from_accesses[cpu_to_node(s->cpu)]++;
      value->to_accesses[get_addr_node(s)]++;
      if(ob) {
         value->dist_by_allocator += (is_distant(s) && (get_tid(s) == ob->allocator_tid));
         value->dist_by_allocator_remote_cpu += (is_distant(s) && (get_tid(s) == ob->allocator_tid) && (ob->allocator_cpu != s->cpu));
         value->dist_by_allocator_alloc_cpu += (is_distant(s) && (get_tid(s) == ob->allocator_tid) && (ob->allocator_cpu == s->cpu));
         value->dist_for_obj += (is_distant(s));
   
         value->by_allocator += ((get_tid(s) == ob->allocator_tid));
         value->by_everybody += ((get_tid(s) != ob->allocator_tid));

         value->by_allocator_before_everybody += (value->by_everybody == 0);
         value->uid = ob->uid;
      }
      nb_total_access++;
   }
}
コード例 #14
0
ファイル: spamo-cisml.c プロジェクト: brsaran/FuzzyApp
/**************************************************************************
 * Callback invoked when matching an opening scanned_sequence tag for a
 * CISML file of a secondary motif database. It calcualtes and caches the
 * left and right bounds of the primary motif and stores the current 
 * sequence.
 **************************************************************************/
void sequence_secondary(void *ctx, char *accession, char *name, char *db, char *lsId, double *score, double *pvalue, long *length) {
  SECONDARY_LOADER_T *loader = (SECONDARY_LOADER_T*)ctx;
  RBNODE_T *node;
  int pmatch;
  if (loader->secondary_motif == NULL) return;
  node = rbtree_lookup(loader->sequences, accession, FALSE, NULL);
  if (node != NULL) {
    loader->current_sequence = (SEQUENCE_T*)rbtree_value(node);
    pmatch = loader->current_sequence->primary_match;
    loader->primary_lpos = (pmatch < 0 ? -pmatch : pmatch);
    loader->primary_rpos = loader->primary_lpos + get_motif_length(loader->primary_motif) - 1;
    if (loader->secondary_matches[loader->current_sequence->index] != 0) {
      die("Already seen this sequence!");
    }
    loader->secondary_score = 0;
    loader->hit_count = 0;
  } else {
    loader->current_sequence = NULL;
  }
}
コード例 #15
0
ファイル: json-reader.c プロジェクト: rreja/CRM_discovery
/*
 * Checks for infinite loops. Every parsing state must either consume
 * some data or change the state to one that hasn't been used at this
 * position. As there are a finite number of states this ensures that
 * parsing will stop at some point or be detected by this function.
 */
static bool loop_check(JSONRD_T *jsonrd, PS_EN prior_state, int consumed) {
  RBTREE_T *prior_states;
  PS_EN new_state;
  bool is_new_state;
  prior_states = jsonrd->prior_states;
  if (consumed == 0) {
    new_state = jsonrd->state;
    if (rbtree_size(prior_states) == 0) {
      if (prior_state == new_state) return true;
      rbtree_put(prior_states, &prior_state, NULL);
      rbtree_put(prior_states, &new_state, NULL);
    } else {
      rbtree_lookup(prior_states, &new_state, true, &is_new_state);
      if (!is_new_state) return true;
    }
  } else {
    rbtree_clear(prior_states);
  }
  return false;
}
コード例 #16
0
ファイル: ebb.c プロジェクト: bakins/libebb
static gnutls_datum_t
session_cache_retrieve (void *data, gnutls_datum_t key)
{
  rbtree tree = data;
  gnutls_datum_t res = { NULL, 0 };
  struct session_cache *cache = rbtree_lookup(tree, &key);

  if(cache == NULL)
    return res;

  res.size = cache->value.size;
  res.data = gnutls_malloc (res.size);
  if(res.data == NULL)
    return res;

  memcpy(res.data, cache->value.data, res.size);

  //printf("session_cache_retrieve\n");

  return res;
}
コード例 #17
0
ファイル: encoder.c プロジェクト: ema8490/etn
// Remember a pointer, if necessary.
static void
_maybeRememberPointer(EtnEncoder *e, EtnValue v, bool rememberPointer)
{
    if (rememberPointer == true && v.data != e->topLevelPointer)
	{
	    AddrToIndex query;
	    query.ptr = v.data;
	    struct rbtree_node *n = rbtree_lookup(&query.node, &e->addrToIndex);
	    if (n == NULL)
		{
		    AddrToIndex *node = malloc (sizeof (AddrToIndex));
		    ASSERT (node);

		    node->ptr = v.data;
		    node->index = e->index;

		    rbtree_insert(&node->node, &e->addrToIndex);

		    debugXPrint(typesDebug, "Remember pointer/index: $[pointer]/$[uint]\n", node->ptr, node->index);
		}
	}
}
コード例 #18
0
ファイル: encoder.c プロジェクト: ema8490/etn
// Encode a pointer.
static Status
_encodePtr(EtnEncoder *e, EtnValue v, bool rememberPointer, EtnLength *length)
{
	ASSERT (e);
	ASSERT (v.type);
	ASSERT (v.data);
	ASSERT (length);

	Status status;
	EtnLength index = 0;

	_maybeRememberPointer(e, v, rememberPointer);
	e->index++;

	if (*(void **)v.data == 0)
	    {
		// NULL.
		status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pnil), NoRememberPointer, length);
		debugXPrint(typesDebug, "Encoded nil pointer\n");
		return status;
	    }

	// See elsewhere for discussion of e->topLevelPointer.
	// Extra logic is due to a performance optimization.
	if (*(void **)v.data != e->topLevelPointer)
	    {
		AddrToIndex query = { .ptr = *(void **)v.data };
		struct rbtree_node *tr = rbtree_lookup(&query.node, &e->addrToIndex);
		if (tr)
		    {
			index = rbtree_container_of(tr, AddrToIndex, node)->index;
		    }
	    }

	if ((*(void **)v.data == e->topLevelPointer) || index)
	     // In former case, index is zero.
	     // Otherwise index == 0 implies not yet seen.
	    {
		// Previously seen; encode index.
		status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pidx), NoRememberPointer, length);
		if (StatusOk != status)
		    {
			return status;
		    }
		
		status = _encodeLength(e, index);
		if (StatusOk != status)
		    {
			return status;
		    }
		*length += sizeof (EtnLength);
		debugXPrint(typesDebug, "Encoded pointer $[pointer] by index $[uint]\n", *(void **) v.data, index);
	    }
	else 
	    {
		// New; encode inline.
		debugXPrint(typesDebug, "Encoding pointer $[pointer] by value\n", *(void **) v.data);
		status = encoderMap[EtnKindUint8].encode (e, EtnToValue(&PtrEncodingType, &pval), RememberPointer, length);
		if (StatusOk != status)
		    {
			return status;
		    }

		EtnIndirectType *c = (EtnIndirectType *)v.type;
		if (c->elem->kind >= EtnKindInvalid)
		    {
			return StatusFail;
		    }

		EtnLength _length;
		status = encoderMap[c->elem->kind].encode (e, EtnToValue(c->elem, *(void **)v.data), RememberPointer, &_length);
		if (StatusOk != status)
		    {
			return status;
		    }
		*length += _length;
		debugXPrint(typesDebug, "Encoded pointer $[pointer] by value\n", *(void **) v.data);
	    }

	return status;
}
コード例 #19
0
ファイル: rbtree_test.c プロジェクト: FBergemann/ASF-RBT
int main() {
    int i;
    rbtree t = NULL;

    t = rbtree_create();

    for(i=0; i<20; i++) {
  int x = rand() % 10000;
	int y = rand() % 10000;
#ifdef TRACE
        print_tree(t);
        printf("Inserting %d -> %d\n\n", x, y);
#endif
        rbtree_insert(t, (void*)x, (void*)y, compare_int);
        assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y);
    }

    print_tree(t);
    puts("");

    // TODO: memory leak!
    free(t);
    t = rbtree_create();

    for(i=0; i<20; i++) {
        int x = i;
        int y = i;
#ifdef TRACE
        print_tree(t);
        printf("Inserting %d -> %d\n\n", x, y);
#endif
        rbtree_insert(t, (void*)x, (void*)y, compare_int);
        assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y);
    }

    print_tree(t);
    puts("");

    // TODO: memory leak!
    free(t);
    t = rbtree_create();

    for(i=0; i<20; i++) {
        int x = 19 - i;
        int y = 19 - i;
#ifdef TRACE
        print_tree(t);
        printf("Inserting %d -> %d\n\n", x, y);
#endif
        rbtree_insert(t, (void*)x, (void*)y, compare_int);
        assert(rbtree_lookup(t, (void*)x, compare_int) == (void*)y);
    }

    print_tree(t);
    puts("");

#if 0
    for(i=0; i<60000; i++) {
        int x = rand() % 10000;
#ifdef TRACE
        print_tree(t);
        printf("Deleting key %d\n\n", x);
#endif
        rbtree_delete(t, (void*)x, compare_int);
    }
#endif
    return 0;
}
コード例 #20
0
ファイル: event.c プロジェクト: sitian/wing
static inline wres_event_t *wres_event_lookup(wres_event_group_t *group, wres_event_desc_t *desc)
{
	return rbtree_lookup(group->head, (void *)desc, wres_event_compare);
}
コード例 #21
0
ファイル: ama.c プロジェクト: a1aks/Haystack
/*************************************************************************
 * Entry point for ama
 *************************************************************************/
int main(int argc, char *argv[]) {
  int max_seq_length = MAX_SEQ;
  STRING_LIST_T* selected_motifs = NULL;
  double pseudocount = 0.01;
  int output_format = CISML_FORMAT;
  program_name = "ama";
  int scoring = AVG_ODDS;
  BOOLEAN_T pvalues = FALSE;
  BOOLEAN_T normalize_scores = FALSE;
  BOOLEAN_T combine_duplicates = FALSE;
  int num_gc_bins = 1;
  int sdbg_order = -1;				// don't use sequence background
  BOOLEAN_T scan_both_strands = TRUE;
  ARRAY_T* pos_bg_freqs = NULL;
  ARRAY_T* rev_bg_freqs = NULL;
  clock_t c0, c1; /* measuring cpu_time */
  CISML_T *cisml;
  char * out_dir = NULL;
  BOOLEAN_T clobber = FALSE;
  int i;
  int last = 0;
  ALPH_T alph = INVALID_ALPH;

  /**********************************************
   * COMMAND LINE PROCESSING
   **********************************************/

  const int num_options = 16;
  cmdoption const motif_scan_options[] = {
    { "max-seq-length", REQUIRED_VALUE },
    { "motif", REQUIRED_VALUE },
    { "motif-pseudo", REQUIRED_VALUE },
    { "rma", NO_VALUE },
    { "pvalues", NO_VALUE },
    { "sdbg", REQUIRED_VALUE },
    { "norc", NO_VALUE },
    { "cs", NO_VALUE },
    { "o-format", REQUIRED_VALUE },
    { "o", REQUIRED_VALUE },
    { "oc", REQUIRED_VALUE },
    { "scoring", REQUIRED_VALUE },
    { "verbosity", REQUIRED_VALUE },
    { "gcbins", REQUIRED_VALUE },
    { "last", REQUIRED_VALUE },
    { "version", NO_VALUE }
  };

  int option_index = 0;

  // Define the usage message.
  char usage[] = "USAGE: ama [options] <motif file> <sequence file> [<background file>]\n"
    "\n"
    "   Options:\n"
    "     --sdbg <order>\t\t\tUse Markov background model of\n"
    "       \t\t\t\t\torder <order> derived from the sequence\n"
    "       \t\t\t\t\tto compute its likelihood ratios.\n"
    "       \t\t\t\t\tOverrides --pvalues, --gcbins and --rma;\n"
    "       \t\t\t\t\t<background file> is required unless\n"
    "       \t\t\t\t\t--sdbg is given.\n"
    "     --motif <id>\t\t\tUse only the motif identified by <id>.\n"
    "       \t\t\t\t\tThis option may be repeated.\n"
    "     --motif-pseudo <float>\t\tThe value <float> times the background\n"
    "       \t\t\t\t\tfrequency is added to the count of each\n"
    "       \t\t\t\t\tletter when creating the likelihood \n"
    "       \t\t\t\t\tratio matrix (default: %g).\n"
    "     --norc\t\t\t\tDisables the scanning of the reverse\n"
    "       \t\t\t\t\tcomplement strand.\n"
    "     --scoring [avg-odds|max-odds]\tIndicates whether the average or \n"
    "       \t\t\t\t\tthe maximum odds should be calculated\n"
    "       \t\t\t\t\t(default: avg-odds)\n"
    "     --rma\t\t\t\tScale motif scores to the range 0-1.\n"
    "       \t\t\t\t\t(Relative Motif Affinity).\n"
    "       \t\t\t\t\tMotif scores are scaled by the maximum\n"
    "       \t\t\t\t\tscore achievable by that PWM. (default:\n"
    "       \t\t\t\t\tmotif scores are not normalized)\n"
    "     --pvalues\t\t\t\tPrint p-value of avg-odds score in cisml\n"
    "       \t\t\t\t\toutput. Ignored for max-odds scoring.\n"
    "       \t\t\t\t\t(default: p-values are not printed)\n"
    "     --gcbins <bins>\t\t\tCompensate p-values for GC content of\n"
    "       \t\t\t\t\teach sequence using given number of \n"
    "       \t\t\t\t\tGC range bins. Recommended bins: 41.\n"
    "       \t\t\t\t\t(default: p-values are based on\n"
    "       \t\t\t\t\tfrequencies in background file)\n"
    "     --cs\t\t\t\tEnable combining sequences with same\n"
    "       \t\t\t\t\tidentifier by taking the average score\n"
    "       \t\t\t\t\tand the Sidac corrected p-value.\n"
    "     --o-format [gff|cisml]\t\tOutput file format (default: cisml)\n"
    "       \t\t\t\t\tignored if --o or --oc option used\n"
    "     --o <directory>\t\t\tOutput all available formats to\n"
    "       \t\t\t\t\t<directory>; give up if <directory>\n"
    "       \t\t\t\t\texists\n"
    "     --oc <directory>\t\t\tOutput all available formats to\n"
    "       \t\t\t\t\t<directory>; if <directory> exists\n"
    "       \t\t\t\t\toverwrite contents\n"
    "     --verbosity [1|2|3|4]\t\tControls amount of screen output\n"
    "       \t\t\t\t\t(default: %d)\n"
    "     --max-seq-length <int>\t\tSet the maximum length allowed for \n"
    "       \t\t\t\t\tinput sequences. (default: %d)\n"
    "     --last <int>\t\t\tUse only scores of (up to) last <n>\n"
    "       \t\t\t\t\tsequence positions to compute AMA.\n"
    "     --version   \t\t\tPrint version and exit.\n"
    "\n";

  // Parse the command line.
  if (simple_setopt(argc, argv, num_options, motif_scan_options) != NO_ERROR) {
    die("Error processing command line options: option name too long.\n");
  }
    
    BOOLEAN_T setoutputformat = FALSE;
    BOOLEAN_T setoutputdirectory = FALSE;

  while (TRUE) {
    int c = 0;
    char* option_name = NULL;
    char* option_value = NULL;
    const char * message = NULL;

    // Read the next option, and break if we're done.
    c = simple_getopt(&option_name, &option_value, &option_index);
    if (c == 0) {
      break;
    } else if (c < 0) {
      (void) simple_getopterror(&message);
      die("Error processing command line options (%s).\n", message);
    } else if (strcmp(option_name, "max-seq-length") == 0) {
	max_seq_length = atoi(option_value);
    } else if (strcmp(option_name, "norc") == 0) {
	scan_both_strands = FALSE;
    } else if (strcmp(option_name, "cs") == 0) {
		combine_duplicates = TRUE;
    } else if (strcmp(option_name, "motif") == 0) {
	if (selected_motifs == NULL) {
	  selected_motifs = new_string_list();
	}
	add_string(option_value, selected_motifs);
    } else if (strcmp(option_name, "motif-pseudo") == 0) {
	pseudocount = atof(option_value);
    } else if (strcmp(option_name, "o-format") == 0) {
        if (setoutputdirectory) {
            if (verbosity >= NORMAL_VERBOSE)
                fprintf(stderr, "output directory specified, ignoring --o-format\n");
        } else {
            setoutputformat = TRUE;
            if (strcmp(option_value, "gff") == 0)
                output_format = GFF_FORMAT;
            else if (strcmp(option_value, "cisml") == 0)
                output_format = CISML_FORMAT;
            else {
                if (verbosity >= NORMAL_VERBOSE)
                  fprintf(stderr, "Output format not known. Using standard instead (cisML).\n");
                  output_format = CISML_FORMAT;
            }
        }
    } else if (strcmp(option_name, "o") == 0 || strcmp(option_name, "oc") == 0) {
        setoutputdirectory = TRUE;
        if (setoutputformat) {
            if (verbosity >= NORMAL_VERBOSE)
                fprintf(stderr, "output directory specified, ignoring --o-format\n");
        }
        clobber = strcmp(option_name, "oc") == 0;
        out_dir = (char*) malloc (sizeof(char)*(strlen(option_value)+1));
        strcpy(out_dir, option_value);
        output_format = DIRECTORY_FORMAT;
    } else if (strcmp(option_name, "verbosity") == 0) {
	verbosity = atoi(option_value);
    } else if (strcmp(option_name, "scoring") == 0) {
      if (strcmp(option_value, "max-odds") == 0)
	scoring = MAX_ODDS;
      else if (strcmp(option_value, "avg-odds") == 0)
	scoring = AVG_ODDS;
      else if (strcmp(option_value, "sum-odds") == 0)
	scoring = SUM_ODDS;
	  else
	die("Specified scoring scheme not known.\n", message);
    } else if (strcmp(option_name, "pvalues") == 0) {
      pvalues = TRUE;
    } else if (strcmp(option_name, "rma") == 0) {
      normalize_scores = TRUE;
      fprintf(stderr, "Normalizing motif scores using RMA method.\n");
    } else if (strcmp(option_name, "gcbins") == 0) {
      num_gc_bins = atoi(option_value);
      pvalues = TRUE;
      if (num_gc_bins <= 1) die("Number of bins in --gcbins must be greater than 1.\n", message);
    } else if (strcmp(option_name, "sdbg") == 0) {
      sdbg_order = atoi(option_value);			// >=0 means use sequence bkg
    }
    else if (strcmp(option_name, "last") == 0) {
      int i = 0;
      if (option_value[0] == '-') ++i;
      while (option_value[i] != '\0') {
        if (!isdigit(option_value[i])) {
          die("Specified parameter 'last' contains non-numeric characters.\n");
        }
        ++i;
      }
      last = atoi(option_value);
      if (errno != 0) {
        die("Specified parameter 'last' could not be parsed as a number as:\n%s\n",strerror(errno));
      }
      if (last < 0) {
        die("Specified parameter 'last' had negative value (%d) when only postive or zero values are allowed \n", last);
      }
    }
    else if (strcmp(option_name, "version") == 0) {
      fprintf(stdout, VERSION "\n");
      exit(EXIT_SUCCESS);
    }
  }

  // --sdbg overrides --pvalues and --gcbins and --rma
  int req_args = 3;
  if (sdbg_order >= 0) {
    pvalues = FALSE;
    normalize_scores = FALSE;
    num_gc_bins = 1;
    req_args = 2;
  }

  // Check all required arguments given
  if (sdbg_order >= 0 && argc > option_index + req_args) {
    die("<background file> cannot be given together with --sdbg.\n");
  } else if (argc != option_index + req_args) {
    fprintf(stderr, usage, pseudocount, verbosity, max_seq_length);
    exit(EXIT_FAILURE);
  }

  // Get required arguments. 
  char* motif_filename = argv[option_index];
  option_index++;
  char* fasta_filename = argv[option_index];
  option_index++;
  char* bg_filename;
  if (req_args == 3) {			// required unless --sdbg given
    bg_filename = argv[option_index];
    option_index++;
  } else {
    bg_filename = "--uniform--";	// So PSSMs will use uniform background;
					// we can multiply them out later.
  }

  // measure time
  c0 = clock();

  // Set up hash tables for computing reverse complement if doing --sdbg
  if (sdbg_order >= 0) setup_hash_alph(DNAB);

  // Create cisml data structure for recording results
  cisml = allocate_cisml(program_name, motif_filename, fasta_filename);
  set_cisml_background_file(cisml, bg_filename);

  /**********************************************
   * Read the motifs and background model.
   **********************************************/
  int num_motifs = 0;
  MREAD_T *mread;
  ARRAYLST_T *motifs;
  PSSM_PAIR_T** pssm_pairs;	// note pssm_pairs is an array of pointers

  //this reads any meme file, xml, txt and html
  mread = mread_create(motif_filename, OPEN_MFILE);
  mread_set_bg_source(mread, bg_filename);
  mread_set_pseudocount(mread, pseudocount);

  motifs = mread_load(mread, NULL);
  alph = mread_get_alphabet(mread);
  pos_bg_freqs = mread_get_background(mread);

  mread_destroy(mread);

  num_motifs = arraylst_size(motifs);

  // allocate memory for PSSM pairs
  pssm_pairs = (PSSM_PAIR_T**)mm_malloc(sizeof(PSSM_PAIR_T*) * num_motifs);

  if (verbosity >= NORMAL_VERBOSE) 
    fprintf(stderr, "Number of motifs in file %d.\n", num_motifs);

  // make a CISML pattern to hold scores for each motif
  PATTERN_T** patterns = NULL;
  Resize(patterns, num_motifs, PATTERN_T*);
  int motif_index;
  for (motif_index = 0; motif_index < num_motifs; motif_index++) {
    MOTIF_T* motif = (MOTIF_T*)arraylst_get(motif_index, motifs);
    patterns[motif_index] = allocate_pattern(get_motif_id(motif), "");
    add_cisml_pattern(cisml, patterns[motif_index]);
  }

  // make reverse complement motifs and background frequencies.
  if (scan_both_strands == TRUE) {
    add_reverse_complements(motifs);
    assert(arraylst_size(motifs) == (2 * num_motifs));
    rev_bg_freqs = allocate_array(get_array_length(pos_bg_freqs));
    complement_dna_freqs(pos_bg_freqs, rev_bg_freqs);
  }

  /**************************************************************
   * Convert motif matrices into log-odds matrices.
   * Scale them.
   * Compute the lookup tables for the PDF of scaled log-odds scores.
   **************************************************************/
  int ns = scan_both_strands ? 2 : 1;	// number of strands
  for (motif_index = 0; motif_index < num_motifs; motif_index++) {
    MOTIF_T *motif, *motif_rc;
    motif = (MOTIF_T*)arraylst_get(motif_index*ns, motifs);
    if (scan_both_strands)
      motif_rc = (MOTIF_T*)arraylst_get(motif_index*ns + 1, motifs);
    else
      motif_rc = NULL;
    /*
     *  Note: If scanning both strands, we complement the motif frequencies
     *  but not the background frequencies so the motif looks the same.
     *  However, the given frequencies are used in computing the p-values
     *  since they represent the frequencies on the negative strands.
     *  (If we instead were to complement the input sequence, keeping the
     *  the motif fixed, we would need to use the complemented frequencies
     *  in computing the p-values.  Is that any clearer?)
    */
    double range = 300;		// 100 is not very good; 1000 is great but too slow
    PSSM_T* pos_pssm =
      build_motif_pssm(
        motif, 
        pos_bg_freqs, 
        pos_bg_freqs, 
        NULL, // Priors not used
        0.0L, // alpha not used
        range, 
        num_gc_bins, 
        TRUE
      );
    PSSM_T* neg_pssm = (scan_both_strands ?
      build_motif_pssm(
        motif_rc, 
        rev_bg_freqs, 
        pos_bg_freqs, 
        NULL, // Priors not used
        0.0L, // alpha not used
        range, 
        num_gc_bins, 
        TRUE
      )
      : NULL
    );
    pssm_pairs[motif_index] = create_pssm_pair(pos_pssm, neg_pssm);
  }

  // Open the FASTA file for reading.
  FILE* fasta_file = NULL;
  if (open_file(fasta_filename, "r", FALSE, "FASTA", "sequences", &fasta_file) == 0) {
    die("Couldn't open the file %s.\n", fasta_filename);
  }
  if (verbosity >= NORMAL_VERBOSE) {
    if (last == 0) {
      fprintf(stderr, "Using entire sequence\n");
    } else {
      fprintf(stderr, "Limiting sequence to last %d positions.\n", last);
    }
  }

  /**************************************************************
   * Read in all sequences and score with all motifs
   **************************************************************/
  int seq_loading_num = 0;  // keeps track on the number of sequences read in total
  int seq_counter = 0;		// holds the index to the seq in the pattern
  int unique_seqs = 0;      // keeps track on the number of unique sequences
  BOOLEAN_T need_postprocessing = FALSE;
  SEQ_T* sequence = NULL;
  RBTREE_T* seq_ids = rbtree_create(rbtree_strcasecmp,NULL,free,rbtree_intcpy,free);
  RBNODE_T* seq_node;
  BOOLEAN_T created;
  while (read_one_fasta(alph, fasta_file, max_seq_length, &sequence)) {
    ++seq_loading_num;
	created = FALSE;
    char* seq_name = get_seq_name(sequence);
    int seq_len = get_seq_length(sequence);
    int scan_len;
    if (last != 0) {
      scan_len = last;
    } else {
      scan_len = seq_len;
    }
	  
	// red-black trees are only required if duplicates should be combined
	if (combine_duplicates){
		//lookup seq id and create new entry if required, return sequence index
		char *tmp_id = mm_malloc(strlen(seq_name)+1); // required copy for rb-tree
		strncpy(tmp_id,seq_name,strlen(seq_name)+1);
		seq_node = rbtree_lookup(seq_ids, tmp_id, TRUE, &created);
		if (created) {// assign it a loading number
			rbtree_set(seq_ids, seq_node, &unique_seqs);
			seq_counter = unique_seqs;
			++unique_seqs;
		} else {
			seq_counter = *((int*)rbnode_get(seq_node));
		}
	}
	  
    //
    // Set up sequence-dependent background model and compute
    // log cumulative probability of sequence.
    //
    double *logcumback = NULL;                    // array of log cumulative probs.
    if (sdbg_order >= 0) {
      Resize(logcumback, seq_len+1, double);
      char* raw_seq = get_raw_sequence(sequence);
      BOOLEAN rc = FALSE;
      double *a_cp = get_markov_from_sequence(raw_seq, alph_string(alph), rc, sdbg_order, 0);
      log_cum_back(raw_seq, a_cp, sdbg_order, logcumback);
      myfree(a_cp);
    }

    // Get the GC content of the sequence if binning p-values by GC
    // and store it in the sequence object.
    if (num_gc_bins > 1) {
      ARRAY_T *freqs = get_sequence_freqs(sequence, alph);
      set_total_gc_sequence(sequence,
        get_array_item(1,freqs) + get_array_item(2,freqs));	// f(C) + f(G)
      free_array(freqs);			// clean up
    } else {
      set_total_gc_sequence(sequence, -1);	// flag ignore
    }

    /**************************************************************
     * Process all motifs.
     **************************************************************/
    int ns = scan_both_strands ? 2 : 1;
    for (motif_index = 0; motif_index < num_motifs; motif_index++) {
      PATTERN_T *pattern = patterns[motif_index];
      MOTIF_T* motif = (MOTIF_T*)arraylst_get(ns*motif_index, motifs);
      char* motif_id = (scan_both_strands ? get_motif_st_id(motif) : get_motif_id(motif));
      if (verbosity >= HIGH_VERBOSE) {
        fprintf(stderr, "Using motif %s of width %d.\n", motif_id, get_motif_length(motif));
      }
      if ((selected_motifs == NULL) || (have_string(get_motif_id(motif), selected_motifs) == TRUE)) {
        if (verbosity >= HIGHER_VERBOSE) {
          fprintf(stderr, "Scanning %s sequence with length %d "
              "abbreviated to %d with motif %s with length %d.\n",
              seq_name, seq_len, scan_len, motif_id, get_motif_length(motif));
        }
		SCANNED_SEQUENCE_T* scanned_seq = NULL;

		
		if (!combine_duplicates || get_pattern_num_scanned_sequences(pattern) <= seq_counter){
			// Create a scanned_sequence record and save it in the pattern.
			scanned_seq = allocate_scanned_sequence(seq_name, seq_name, pattern);
			set_scanned_sequence_length(scanned_seq, scan_len);
		} else {
			// get existing sequence record
			scanned_seq = get_pattern_scanned_sequences(pattern)[seq_counter];
			set_scanned_sequence_length(scanned_seq, max(scan_len, get_scanned_sequence_length(scanned_seq)));
		}
		
		// check if scanned component of sequence has sufficient length for the motif
		if (scan_len < get_motif_length(motif)) {
			// set score to zero and p-value to 1 if not set yet
			if(!has_scanned_sequence_score(scanned_seq)){
				set_scanned_sequence_score(scanned_seq, 0.0);
			}
			if(pvalues && !has_scanned_sequence_pvalue(scanned_seq)){
				set_scanned_sequence_pvalue(scanned_seq, 1.0);
			} 
			add_scanned_sequence_scanned_position(scanned_seq); 
			if (get_scanned_sequence_num_scanned_positions(scanned_seq) > 0L) need_postprocessing = TRUE;
			if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "%s too short for motif %s. Score set to 0!\n", seq_name, motif_id);
		} else {  
			// scan the sequence using average/maximum motif affinity
			ama_sequence_scan(alph, sequence, logcumback, pssm_pairs[motif_index], scoring, 
							  pvalues, last, scanned_seq, &need_postprocessing);
		}

      } else {
        if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "Skipping motif %s.\n", motif_id);
      }
    } // All motifs parsed

    free_seq(sequence);
    if (sdbg_order >= 0) myfree(logcumback);

  } // read sequences
コード例 #22
0
/*
 * Change the size of an allocation and copy data from the old pointer.
 *
 * If ptr is NULL mm_reallloc is the same as mm_malloc.
 * If size is 0 mm_realloc is the same as mm_free.
 *
 * Returns a pointer to the newly allocated memory.
 */
void *mm_realloc(void *ptr, size_t size)
{
    /* If size is 0 free the pointer */
    if (size == 0) {
        mm_free(ptr);
        return NULL;
    }
    
    /* If ptr is NULL just allocate */
    if (ptr == NULL) {
        return mm_malloc(size);
    }
    
    /* Find the run the old pointer belongs to */
    char *old_run = mm_findnodetree((char *)ptr, NULL, NULL);
    
    /* Find out the size of the old pointer */
    int old_size;
    if (mmrun_get_slotcount(old_run)) {
        old_size = mmrun_get_slotsize(old_run);
    } else {
        old_size = mmrun_get_largesize(old_run);
    }
    
    /* See if ptr can be expanded */
    if (mmrun_get_slotcount(old_run) == 0) {
        /* Return if the run is already large enough */
        if ((old_size - RUN_HEADER_SIZE) >= size) {
            return ptr;
        }
        
        /* See if there is a free run after the old run */
        if (free_runs) {
            char *run = rbtree_lookup(old_run + old_size + 1, free_runs);
            
            /* Check if it the expanded run can contain the new size */
            if (run && (mmrun_get_size(run) + old_size) >= size) {
                /* Remove the free run from the free list */
                rbtree_remove(run, &free_runs);
                int run_size = mmrun_get_largesize(run);
                
                /* Merge it with the old run */
                mmrun_init(0, 0, old_run);
                mmrun_set_largesize(old_size + run_size, old_run);
                
                /* Split off any excess */
                mmrun_split(size + RUN_HEADER_SIZE, old_run);
                
                /* Return the expanded run */
                return old_run + RUN_HEADER_SIZE;
            }
        }
    }
    
    /* If ptr can't be expanded just allocate a new run and copy */
    void *new_ptr = mm_malloc(size);
    
    if (new_ptr) {
        /* Copy data from the old pointer to the new one */
        int min_size = (size > old_size) ? old_size : size;
        memcpy(new_ptr, ptr, min_size);
    
        /* Free the old pointer */
        mm_free(ptr);
    }
    
    return new_ptr;
}
コード例 #23
0
ファイル: ama.c プロジェクト: NeonTheBlackstar/RiboDatabase
/*************************************************************************
 * Entry point for ama
 *************************************************************************/
int main(int argc, char **argv) {
  AMA_OPTIONS_T options;
  ARRAYLST_T *motifs;
  clock_t c0, c1; // measuring cpu_time
  MOTIF_AND_PSSM_T *combo;
  CISML_T *cisml;
  PATTERN_T** patterns;
  PATTERN_T *pattern;
  FILE *fasta_file, *text_output, *cisml_output;
  int i, seq_loading_num, seq_counter, unique_seqs, seq_len, scan_len, x1, x2, y1, y2;
  char *seq_name, *path;
  bool need_postprocessing, created;
  SEQ_T *sequence;
  RBTREE_T *seq_ids;
  RBNODE_T *seq_node;
  double *logcumback;
  ALPH_T *alph;

  // process the command
  process_command_line(argc, argv, &options);

  // load DNA motifs
  motifs = load_motifs(&options);

  // get the alphabet
  if (arraylst_size(motifs) > 0) {
    combo = (MOTIF_AND_PSSM_T*)arraylst_get(0, motifs);
    alph = alph_hold(get_motif_alph(combo->motif));
  } else {
    alph = alph_dna();
  }

  // pick columns for GC operations
  x1 = -1; x2 = -1; y1 = -1; y2 = -1;
  if (alph_size_core(alph) == 4 && alph_size_pairs(alph) == 2) {
    x1 = 0; // A
    x2 = alph_complement(alph, x1); // T
    y1 = (x2 == 1 ? 2 : 1); // C
    y2 = alph_complement(alph, y1); // G
    assert(x1 != x2 && y1 != y2 && x1 != y1 && x2 != y2 && x1 != y2 && x2 != y1);
  }

  // record starting time
  c0 = clock();

  // Create cisml data structure for recording results
  cisml = allocate_cisml(PROGRAM_NAME, options.command_line, options.motif_filename, options.fasta_filename);
  set_cisml_background_file(cisml, options.bg_filename);

  // make a CISML pattern to hold scores for each motif
  for (i = 0; i < arraylst_size(motifs); i++) {
    combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs);
    add_cisml_pattern(cisml, allocate_pattern(get_motif_id(combo->motif), ""));
  }

  // Open the FASTA file for reading.
  fasta_file = NULL;
  if (!open_file(options.fasta_filename, "r", false, "FASTA", "sequences", &fasta_file)) {
    die("Couldn't open the file %s.\n", options.fasta_filename);
  }
  if (verbosity >= NORMAL_VERBOSE) {
    if (options.last == 0) {
      fprintf(stderr, "Using entire sequence\n");
    } else {
      fprintf(stderr, "Limiting sequence to last %d positions.\n", options.last);
    }
  }

  //
  // Read in all sequences and score with all motifs
  //
  seq_loading_num = 0;  // keeps track on the number of sequences read in total
  seq_counter = 0;      // holds the index to the seq in the pattern
  unique_seqs = 0;      // keeps track on the number of unique sequences
  need_postprocessing = false;
  sequence = NULL;
  logcumback = NULL;
  seq_ids = rbtree_create(rbtree_strcasecmp,rbtree_strcpy,free,rbtree_intcpy,free);
  while (read_one_fasta(alph, fasta_file, options.max_seq_length, &sequence)) {
    ++seq_loading_num;
    seq_name = get_seq_name(sequence);
    seq_len = get_seq_length(sequence);
    scan_len = (options.last != 0 ? options.last : seq_len);
    // red-black trees are only required if duplicates should be combined
    if (options.combine_duplicates){
      //lookup seq id and create new entry if required, return sequence index
      seq_node = rbtree_lookup(seq_ids, get_seq_name(sequence), true, &created);
      if (created) { // assign it a loading number
        rbtree_set(seq_ids, seq_node, &unique_seqs);
        seq_counter = unique_seqs;
        ++unique_seqs;
      } else {
        seq_counter = *((int*)rbnode_get(seq_node));
      }
    }
          
    //
    // Set up sequence-dependent background model and compute
    // log cumulative probability of sequence.
    // This needs the sequence in raw format.
    //
    if (options.sdbg_order >= 0)
      logcumback = log_cumulative_background(alph, options.sdbg_order, sequence);

    // Index the sequence, throwing away the raw format and ambiguous characters
    index_sequence(sequence, alph, SEQ_NOAMBIG);

    // Get the GC content of the sequence if binning p-values by GC
    // and store it in the sequence object.
    if (options.num_gc_bins > 1) {
      ARRAY_T *freqs = get_sequence_freqs(sequence, alph);
      set_total_gc_sequence(sequence, get_array_item(y1, freqs) + get_array_item(y2, freqs)); // f(C) + f(G)
      free_array(freqs);                        // clean up
    } else {
      set_total_gc_sequence(sequence, -1);      // flag ignore
    }

    // Scan with motifs.
    for (i = 0; i < arraylst_size(motifs); i++) {
      pattern = get_cisml_patterns(cisml)[i];
      combo = (MOTIF_AND_PSSM_T*)arraylst_get(i, motifs);
      if (verbosity >= HIGHER_VERBOSE) {
        fprintf(stderr, "Scanning %s sequence with length %d "
            "abbreviated to %d with motif %s with length %d.\n",
            seq_name, seq_len, scan_len, 
            get_motif_id(combo->motif), get_motif_length(combo->motif));
      }
      SCANNED_SEQUENCE_T* scanned_seq = NULL;
      if (!options.combine_duplicates || get_pattern_num_scanned_sequences(pattern) <= seq_counter) {
        // Create a scanned_sequence record and save it in the pattern.
        scanned_seq = allocate_scanned_sequence(seq_name, seq_name, pattern);
        set_scanned_sequence_length(scanned_seq, scan_len);
      } else {
        // get existing sequence record
        scanned_seq = get_pattern_scanned_sequences(pattern)[seq_counter];
        set_scanned_sequence_length(scanned_seq, max(scan_len, get_scanned_sequence_length(scanned_seq)));
      }
      
      // check if scanned component of sequence has sufficient length for the motif
      if (scan_len < get_motif_length(combo->motif)) {
        // set score to zero and p-value to 1 if not set yet
        if(!has_scanned_sequence_score(scanned_seq)){
          set_scanned_sequence_score(scanned_seq, 0.0);
        }
        if(options.pvalues && !has_scanned_sequence_pvalue(scanned_seq)){
          set_scanned_sequence_pvalue(scanned_seq, 1.0);
        } 
        add_scanned_sequence_scanned_position(scanned_seq); 
        if (get_scanned_sequence_num_scanned_positions(scanned_seq) > 0L) {
          need_postprocessing = true;
        }
        if (verbosity >= HIGH_VERBOSE) {
          fprintf(stderr, "%s too short for motif %s. Score set to 0.\n",
              seq_name, get_motif_id(combo->motif));
        }
      } else {
        // scan the sequence using average/maximum motif affinity
        ama_sequence_scan(alph, sequence, logcumback, combo->pssm_pair,
            options.scoring, options.pvalues, options.last, scanned_seq,
            &need_postprocessing);
      }
    } // All motifs scanned

    free_seq(sequence);
    if (options.sdbg_order >= 0) myfree(logcumback);

  } // read sequences

  fclose(fasta_file);
  if (verbosity >= HIGH_VERBOSE) fprintf(stderr, "(%d) sequences read in.\n", seq_loading_num);
  if (verbosity >= NORMAL_VERBOSE) fprintf(stderr, "Finished          \n");

        
  // if any sequence identifier was multiple times in the sequence set  then
  // postprocess of the data is required
  if (need_postprocessing || options.normalize_scores) {
    post_process(cisml, motifs, options.normalize_scores);
  }
        
  // output results
  if (options.output_format == DIRECTORY_FORMAT) {
    if (create_output_directory(options.out_dir, options.clobber, verbosity > QUIET_VERBOSE)) {
      // only warn in higher verbose modes
      fprintf(stderr, "failed to create output directory `%s' or already exists\n", options.out_dir);
      exit(1);
    }
    path = make_path_to_file(options.out_dir, text_filename);
    //FIXME check for errors: MEME doesn't either and we at least know we have a good directory
    text_output = fopen(path, "w");
    free(path);
    path = make_path_to_file(options.out_dir, cisml_filename);
    //FIXME check for errors
    cisml_output = fopen(path, "w");
    free(path);
    print_cisml(cisml_output, cisml, true, NULL, false);
    print_score(cisml, text_output);
    fclose(cisml_output);
    fclose(text_output);
  } else if (options.output_format == GFF_FORMAT) {
    print_score(cisml, stdout);
  } else if (options.output_format == CISML_FORMAT) {
    print_cisml(stdout, cisml, true, NULL, false);
  } else {
    die("Output format invalid!\n");
  }

  //
  // Clean up.
  //
  rbtree_destroy(seq_ids);
  arraylst_destroy(motif_and_pssm_destroy, motifs);
  free_cisml(cisml);
  rbtree_destroy(options.selected_motifs);
  alph_release(alph);
        
  // measure time
  if (verbosity >= NORMAL_VERBOSE) { // starting time
    c1 = clock();
    fprintf(stderr, "cycles (CPU);            %ld cycles\n", (long) c1);
    fprintf(stderr, "elapsed CPU time:        %f seconds\n", (float) (c1-c0) / CLOCKS_PER_SEC);
  }
  return 0;
}
コード例 #24
0
ファイル: data.bis.c プロジェクト: Memprof/parser
/**
 * Expects a file like:
 **/
void read_data_events(char *mmaped_file) {
   /** Header **/
   FILE *data = open_file(mmaped_file);
   if(!data) {
      printf("#Warning: data file %s not found\n", mmaped_file);
      return;
   }

   if(!data_events)
      data_events = pqueue_init(10, cmp_pri, get_pri, set_pri, get_pos, set_pos);

   rbtree metadata = rbtree_create();

   char line[512];
   struct data_ev *event;
   int nb_lines = 0;
   uint64_t type;


   while(fgets(line, sizeof(line), data)) {
      nb_lines++;
      event = malloc(sizeof(*event));

      if(sscanf(line, "%lu %lu %lu %lu %d %u", &event->rdt, &event->malloc.begin, &event->malloc.end, &type, &event->cpu, &event->tid) != 6) {
         goto test_info;
      }
      if(type == 0) {               // free
         event->type = FREE;
      } else if(type == 2) {        // munmap
         event->type = FREE;        //munmap is not handled correctly yet => fake free
      } else {                      // malloc / mmap
         event->type = MALLOC;
         event->malloc.end = event->malloc.begin + event->malloc.end;
         if(type == 1) {
            char * val = rbtree_lookup(metadata, (void*)event->rdt, pointer_cmp);
            if(val)
               event->malloc.info = val;
            else
               asprintf(&event->malloc.info, "datasize%lu-%d", event->malloc.end - event->malloc.begin, nb_lines);
         } else {
            /*#define MAP_SHARED    0x01 
            #define MAP_PRIVATE     0x02*/
            if(event->malloc.end - event->malloc.begin == 8392704) { /* All stacks seem to be of that size */
               asprintf(&event->malloc.info, "thread-stack-%d", nb_lines);
            } else if(type & 0x01) {
               asprintf(&event->malloc.info, "mmap-shared%lu-%d", event->malloc.end - event->malloc.begin, nb_lines);
            } else if(type & 0x02) {
               asprintf(&event->malloc.info, "mmap-priv%lu-%d", event->malloc.end - event->malloc.begin, nb_lines);
            } else {
               asprintf(&event->malloc.info, "mmap-??%lu-%d", event->malloc.end - event->malloc.begin, nb_lines);
            }
         }
      }


      pqueue_insert(data_events, event);
      total_data_samples++;
      continue;

test_info:;
      uint64_t time, loc;
      int read;
      if(sscanf(line, "#%lu 0x%lx %n\n", &time, &loc, &read) != 2) {
         //printf("fail %s %d\n", line, read);
         goto fail;
      }
      char *met_value = strdup(line + read);
      int met_len = strlen(met_value)-1;
      if(met_len < 5) // malloc probably not correctly resolved
         asprintf(&met_value, "%lu", time);
      else
         met_value[met_len] = '\0';
      rbtree_insert(metadata, (void*)time, met_value, pointer_cmp);

fail:
      //printf("#Unrecognized line: %s", line);
      free(event);
      continue;
   }

   if(!active_data)
      active_data = rbtree_create();

   if(verbose)
      printf("#All data events added successfully ; now processing samples\n");
}