Ejemplo n.º 1
0
int create_relation_nonunique(relation_t *relation, int64_t num_tuples,
                              const int64_t maxid) 
{
    check_seed();

    relation->num_tuples = num_tuples;
    
    if (!relation->tuples) { 
        perror("memory must be allocated first");
        return -1; 
    }

    random_gen(relation, maxid);

    return 0;
}
Ejemplo n.º 2
0
int 
create_relation_zipf(relation_t * relation, int64_t num_tuples,
                     const int64_t maxid, const double zipf_param) 
{
    check_seed();

    relation->num_tuples = num_tuples;
    
    if (!relation->tuples) {
        perror("memory must be allocated first");
        return -1; 
    }

    gen_zipf(num_tuples, maxid, zipf_param, &relation->tuples);
    
    /* write_relation(relation, "S128M-skew1.tbl"); */
    return 0;
}
Ejemplo n.º 3
0
int 
create_relation_pk(relation_t *relation, int64_t num_tuples) 
{
    check_seed();

    relation->num_tuples = num_tuples;
    
    if (!relation->tuples) {
        perror("memory must be allocated first");
        return -1; 
    }
  
    random_unique_gen(relation);

#ifdef PERSIST_RELATIONS
    write_relation(relation, "R.tbl");
#endif

    return 0;
}
Ejemplo n.º 4
0
int 
create_relation_fk(relation_t *relation, int64_t num_tuples, const int64_t maxid)
{
    int32_t i, iters;
    int64_t remainder;
    relation_t tmp;

    check_seed();

    relation->num_tuples = num_tuples;
      
    if (!relation->tuples) { 
        perror("memory must be allocated first");
        return -1; 
    }
  
    /* alternative generation method */
    iters = num_tuples / maxid;
    for(i = 0; i < iters; i++){
        tmp.num_tuples = maxid;
        tmp.tuples = relation->tuples + maxid * i;
        random_unique_gen(&tmp);
    }

    /* if num_tuples is not an exact multiple of maxid */
    remainder = num_tuples % maxid;
    if(remainder > 0) {
        tmp.num_tuples = remainder;
        tmp.tuples = relation->tuples + maxid * iters;
        random_unique_gen(&tmp);
    }

#ifdef PERSIST_RELATIONS
    write_relation(relation, "S.tbl");
#endif

    return 0;
}
Ejemplo n.º 5
0
int 
parallel_create_relation(relation_t *relation, uint64_t num_tuples, 
                         uint32_t nthreads, uint64_t maxid) 
{
    int rv;
    uint32_t i;
    uint64_t offset = 0;
    
    check_seed();

    relation->num_tuples = num_tuples;


    if (!relation->tuples) {
        perror("memory must be allocated first");
        return -1; 
    }

    create_arg_t args[nthreads];
    pthread_t tid[nthreads];
    cpu_set_t set;
    pthread_attr_t attr;
    pthread_barrier_t barrier;

    unsigned int pagesize;
    unsigned int npages;
    unsigned int npages_perthr;
    uint64_t ntuples_perthr;
    uint64_t ntuples_lastthr;

    pagesize        = getpagesize();
    npages          = (num_tuples * sizeof(tuple_t)) / pagesize + 1;
    npages_perthr   = npages / nthreads;
    ntuples_perthr  =  npages_perthr * (pagesize/sizeof(tuple_t));

    if(npages_perthr == 0) 
       ntuples_perthr = num_tuples / nthreads;

    ntuples_lastthr = num_tuples - ntuples_perthr * (nthreads-1);

    pthread_attr_init(&attr);

    rv = pthread_barrier_init(&barrier, NULL, nthreads);
    if(rv != 0){
        printf("[ERROR] Couldn't create the barrier\n");
        exit(EXIT_FAILURE);
    }


    volatile void * locks = (volatile void *)calloc(num_tuples, sizeof(char));

    for( i = 0; i < nthreads; i++ ) {
        int cpu_idx = get_cpu_id(i);
        
        CPU_ZERO(&set);
        CPU_SET(cpu_idx, &set);
        pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &set);

        args[i].firstkey       = (offset + 1) % maxid;
        args[i].maxid          = maxid;
        args[i].rel.tuples     = relation->tuples + offset;
        args[i].rel.num_tuples = (i == nthreads-1) ? ntuples_lastthr 
                                 : ntuples_perthr;

        args[i].fullrel = relation;
        args[i].locks   = locks;
        args[i].barrier = &barrier;

        offset += ntuples_perthr;

        rv = pthread_create(&tid[i], &attr, random_unique_gen_thread, 
                            (void*)&args[i]);
        if (rv){
            fprintf(stderr, "[ERROR] pthread_create() return code is %d\n", rv);
            exit(-1);
        }
    }

    for(i = 0; i < nthreads; i++){
        pthread_join(tid[i], NULL);
    }

    /* randomly shuffle elements */
    /* knuth_shuffle(relation); */

    /* clean up */
    free((char*)locks);
    pthread_barrier_destroy(&barrier);

#ifdef PERSIST_RELATIONS
    char * const tables[] = {"R.tbl", "S.tbl"};
    static int rs = 0;
    write_relation(relation, tables[(rs++)%2]);
#endif

    return 0;
}
Ejemplo n.º 6
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;
	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) 
		AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);

    

	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; 
	i = 0;
	while (i++ < n)
	{	
		/*printf ("%d\n",i);*/
		e = *el++;
		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 250)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
				flag = FALSE;
			else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag =FALSE;
		}
		else   
		{
			flag = check_seed(e, bb, block_id);
			if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex))
				flag = FALSE;
			if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two]))
				flag = FALSE;
		}
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) 
				profile[j][k] = 0;

		/*you must allocate a struct if you want to use the pointers related to it*/
		AllocVar(b);
		/*initial the b->score*/
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		/*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) 
			cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) 
			candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the genes by which we get the best score*/
		for(k = 0; k < components; k++)
		{
/*			printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		}
		components = k + 1;
		/*printf ("%d",components);*/
		int ki;
		for (ki=0; ki < rows; ki++)
			candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) 
			colcand[ki] = FALSE;             
    
		/* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
		/* add some new possible genes */
		int m_cnt=0;
		continuous KL_score=0;
		discrete *sub_array;
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				/*printf ("%d\t%.2f\n",m_cnt,KL_score);*/
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}

                b->block_rows_pre = components;
		
		/* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			if (po->IS_list && !sublist[ki]) continue;
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt);
				KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols);
				if (KL_score>=b->significance * po->TOLERANCE)
				{
					dsPush(genes,ki);
					components++;
					candidates[ki] = FALSE;
				}
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;

		b->score = b->score;
		/*	b->score = b->block_rows * b->block_cols;		*/

		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) 
				dsPush(allincluster,dsItem(genes,ki));	
		/*save the current block b to the block list bb so that we can sort the blocks by their score*/
		bb[block_id++] = b;

		/* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	
	}
	/* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position.
	 * It is equivalent to putc(character,stdout).*/
	putchar('\n');
	/* free-up the candidate list */
	free(candidates);
	free(allincluster);
	block_enrichment(fw, bb, block_id);
	return report_blocks(fw, bb, block_id);
}
Ejemplo n.º 7
0
/* Core algorithm */
int cluster (FILE *fw, Edge **el, int n)
{
	int block_id = 0;
	Block **bb;
	int allocated = po->SCH_BLOCK;
	AllocArray(bb, allocated);

	Edge *e;
	Block *b;
	struct dyStack *genes, *scores, *b_genes, *allincluster;
	
	int i, j, k, components;

	AllocArray(profile, cols);
	for (j = 0; j < cols; j++) AllocArray(profile[j], sigma);

	genes = dsNew(rows);
	scores = dsNew(rows);
	allincluster = dsNew(rows);
    
	bool *candidates;
	AllocArray(candidates, rows);

	e = *el; i = 0;
	while (i++ < n)
	{	
		e = *el++;
        /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/

		/* check if both genes already enumerated in previous blocks */
		bool flag = TRUE;
		/* speed up the program if the rows bigger than 200 */
	        if (rows > 200)
		{ 
			if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) )
			flag = FALSE;
		}
		else   
                    {
		     flag = check_seed(e, bb, block_id);
		    }
		if (!flag) continue;

		for (j = 0; j < cols; j++)
			for (k = 0; k < sigma; k++) profile[j][k] = 0;

		AllocVar(b);
                b->score = MIN(2, e->score);
	
		/* initialize the stacks genes and scores */		
		int ii;		
		dsClear(genes);
		dsClear(scores);		
		for(ii = 0; ii < rows; ii ++)
		{
			dsPush(genes,-1);
			dsPush(scores,-1);
		}		
		dsClear(genes);
		dsClear(scores);
		
		dsPush(genes, e->gene_one);
		dsPush(genes, e->gene_two);
		dsPush(scores, 1);
		dsPush(scores, b->score);

		/* branch-and-cut condition for seed expansion */
		int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE);
                if (cand_threshold < 2) cand_threshold = 2;

		/* maintain a candidate list to avoid looping through all rows */		
		for (j = 0; j < rows; j++) candidates[j] = TRUE;
		candidates[e->gene_one] = candidates[e->gene_two] = FALSE;
		
		components = 2;

		/* expansion step, generate a bicluster without noise */
		block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster);

		/* track back to find the best score that which genes makes it */
		for(k = 0; k < components; k++)
			if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break;
		components = k + 1;

		int ki;
		for (ki=0; ki < rows; ki++)
		candidates[ki] = TRUE;

		for (ki=0; ki < components - 1 ; ki++)
		{
			seed_update(arr_c[dsItem(genes,ki)]);
			candidates[dsItem(genes,ki)] = FALSE;
		}
		candidates[dsItem(genes,k)] = FALSE;
		genes->top = k ;
		int cnt = 0;
		bool *colcand;
		AllocArray(colcand, cols);
		for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE;             
    
        /* add columns satisfy the conservative r */ 
		seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components);
		
        /* add some new possible genes */
		int m_cnt;
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
                b->block_rows_pre = components;
		
        /* add genes that negative regulated to the consensus */
		for ( ki = 0; ki < rows; ki++)
		{
			m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]);
			if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) )
			{
				dsPush(genes,ki);
				components++;
				candidates[ki] = FALSE;
			}
		}
		free(colcand);

		/* save the current cluster*/
		b_genes = dsNew(b->block_rows_pre);
		for (ki = 0; ki < b->block_rows_pre; ki++)
			dsPush(b_genes, dsItem(genes,ki));

		/* store gene arrays inside block */
		b->genes = dsNew(components);
		b->conds = dsNew(cols);
	
		scan_block(b_genes, b);
		if (b->block_cols == 0) continue;
		b->block_rows = components;
                b->score = b->block_rows * b->block_cols;		
		dsClear(b->genes);
		for ( ki=0; ki < components; ki++)
			dsPush(b->genes,dsItem(genes,ki));
		for(ki = 0; ki < components; ki++)
			if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki));	

		bb[block_id++] = b;

        /* reaching the results number limit */
		if (block_id == po->SCH_BLOCK) break;
		verboseDot();	

	}

	putchar('\n');
    /* free-up the candidate list */
	free(candidates);
	free(allincluster);

	return report_blocks(fw, bb, block_id);
}