int create_relation_nonunique(relation_t *relation, int64_t num_tuples, const int64_t maxid) { check_seed(); relation->num_tuples = num_tuples; if (!relation->tuples) { perror("memory must be allocated first"); return -1; } random_gen(relation, maxid); return 0; }
int create_relation_zipf(relation_t * relation, int64_t num_tuples, const int64_t maxid, const double zipf_param) { check_seed(); relation->num_tuples = num_tuples; if (!relation->tuples) { perror("memory must be allocated first"); return -1; } gen_zipf(num_tuples, maxid, zipf_param, &relation->tuples); /* write_relation(relation, "S128M-skew1.tbl"); */ return 0; }
int create_relation_pk(relation_t *relation, int64_t num_tuples) { check_seed(); relation->num_tuples = num_tuples; if (!relation->tuples) { perror("memory must be allocated first"); return -1; } random_unique_gen(relation); #ifdef PERSIST_RELATIONS write_relation(relation, "R.tbl"); #endif return 0; }
int create_relation_fk(relation_t *relation, int64_t num_tuples, const int64_t maxid) { int32_t i, iters; int64_t remainder; relation_t tmp; check_seed(); relation->num_tuples = num_tuples; if (!relation->tuples) { perror("memory must be allocated first"); return -1; } /* alternative generation method */ iters = num_tuples / maxid; for(i = 0; i < iters; i++){ tmp.num_tuples = maxid; tmp.tuples = relation->tuples + maxid * i; random_unique_gen(&tmp); } /* if num_tuples is not an exact multiple of maxid */ remainder = num_tuples % maxid; if(remainder > 0) { tmp.num_tuples = remainder; tmp.tuples = relation->tuples + maxid * iters; random_unique_gen(&tmp); } #ifdef PERSIST_RELATIONS write_relation(relation, "S.tbl"); #endif return 0; }
int parallel_create_relation(relation_t *relation, uint64_t num_tuples, uint32_t nthreads, uint64_t maxid) { int rv; uint32_t i; uint64_t offset = 0; check_seed(); relation->num_tuples = num_tuples; if (!relation->tuples) { perror("memory must be allocated first"); return -1; } create_arg_t args[nthreads]; pthread_t tid[nthreads]; cpu_set_t set; pthread_attr_t attr; pthread_barrier_t barrier; unsigned int pagesize; unsigned int npages; unsigned int npages_perthr; uint64_t ntuples_perthr; uint64_t ntuples_lastthr; pagesize = getpagesize(); npages = (num_tuples * sizeof(tuple_t)) / pagesize + 1; npages_perthr = npages / nthreads; ntuples_perthr = npages_perthr * (pagesize/sizeof(tuple_t)); if(npages_perthr == 0) ntuples_perthr = num_tuples / nthreads; ntuples_lastthr = num_tuples - ntuples_perthr * (nthreads-1); pthread_attr_init(&attr); rv = pthread_barrier_init(&barrier, NULL, nthreads); if(rv != 0){ printf("[ERROR] Couldn't create the barrier\n"); exit(EXIT_FAILURE); } volatile void * locks = (volatile void *)calloc(num_tuples, sizeof(char)); for( i = 0; i < nthreads; i++ ) { int cpu_idx = get_cpu_id(i); CPU_ZERO(&set); CPU_SET(cpu_idx, &set); pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &set); args[i].firstkey = (offset + 1) % maxid; args[i].maxid = maxid; args[i].rel.tuples = relation->tuples + offset; args[i].rel.num_tuples = (i == nthreads-1) ? ntuples_lastthr : ntuples_perthr; args[i].fullrel = relation; args[i].locks = locks; args[i].barrier = &barrier; offset += ntuples_perthr; rv = pthread_create(&tid[i], &attr, random_unique_gen_thread, (void*)&args[i]); if (rv){ fprintf(stderr, "[ERROR] pthread_create() return code is %d\n", rv); exit(-1); } } for(i = 0; i < nthreads; i++){ pthread_join(tid[i], NULL); } /* randomly shuffle elements */ /* knuth_shuffle(relation); */ /* clean up */ free((char*)locks); pthread_barrier_destroy(&barrier); #ifdef PERSIST_RELATIONS char * const tables[] = {"R.tbl", "S.tbl"}; static int rs = 0; write_relation(relation, tables[(rs++)%2]); #endif return 0; }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { /*printf ("%d\n",i);*/ e = *el++; /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 250) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; else if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; else if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag =FALSE; } else { flag = check_seed(e, bb, block_id); if ((po->IS_TFname)&&(e->gene_one!= TFindex)&&(e->gene_two!=TFindex)) flag = FALSE; if ((po->IS_list)&&(!sublist[e->gene_one] || !sublist[e->gene_two])) flag = FALSE; } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; /*you must allocate a struct if you want to use the pointers related to it*/ AllocVar(b); /*initial the b->score*/ b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); /*printf ("%d\t%d\n",e->gene_one,e->gene_two);*/ dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the genes by which we get the best score*/ for(k = 0; k < components; k++) { /* printf ("******%d\t%d\n",dsItem(scores,k),b->score);*/ if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; } components = k + 1; /*printf ("%d",components);*/ int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt=0; continuous KL_score=0; discrete *sub_array; for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { sub_array = get_intersect_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); /*printf ("%d\t%.2f\n",m_cnt,KL_score);*/ if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { if (po->IS_list && !sublist[ki]) continue; m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { sub_array = get_intersect_reverse_row(colcand,arr_c[dsItem(genes,0)],arr_c[ki],m_cnt); KL_score = get_KL (sub_array, arr_c[ki], m_cnt, cols); if (KL_score>=b->significance * po->TOLERANCE) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->score; /* b->score = b->block_rows * b->block_cols; */ dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); /*save the current block b to the block list bb so that we can sort the blocks by their score*/ bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } /* writes character to the current position in the standard output (stdout) and advances the internal file position indicator to the next position. * It is equivalent to putc(character,stdout).*/ putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); block_enrichment(fw, bb, block_id); return report_blocks(fw, bb, block_id); }
/* Core algorithm */ int cluster (FILE *fw, Edge **el, int n) { int block_id = 0; Block **bb; int allocated = po->SCH_BLOCK; AllocArray(bb, allocated); Edge *e; Block *b; struct dyStack *genes, *scores, *b_genes, *allincluster; int i, j, k, components; AllocArray(profile, cols); for (j = 0; j < cols; j++) AllocArray(profile[j], sigma); genes = dsNew(rows); scores = dsNew(rows); allincluster = dsNew(rows); bool *candidates; AllocArray(candidates, rows); e = *el; i = 0; while (i++ < n) { e = *el++; /*printf("a:%d b:%d score:%d\n",e->gene_one,e->gene_two,e->score);*/ /* check if both genes already enumerated in previous blocks */ bool flag = TRUE; /* speed up the program if the rows bigger than 200 */ if (rows > 200) { if ( isInStack(allincluster,e->gene_one) && isInStack(allincluster,e->gene_two) ) flag = FALSE; } else { flag = check_seed(e, bb, block_id); } if (!flag) continue; for (j = 0; j < cols; j++) for (k = 0; k < sigma; k++) profile[j][k] = 0; AllocVar(b); b->score = MIN(2, e->score); /* initialize the stacks genes and scores */ int ii; dsClear(genes); dsClear(scores); for(ii = 0; ii < rows; ii ++) { dsPush(genes,-1); dsPush(scores,-1); } dsClear(genes); dsClear(scores); dsPush(genes, e->gene_one); dsPush(genes, e->gene_two); dsPush(scores, 1); dsPush(scores, b->score); /* branch-and-cut condition for seed expansion */ int cand_threshold = floor(po->COL_WIDTH * po->TOLERANCE); if (cand_threshold < 2) cand_threshold = 2; /* maintain a candidate list to avoid looping through all rows */ for (j = 0; j < rows; j++) candidates[j] = TRUE; candidates[e->gene_one] = candidates[e->gene_two] = FALSE; components = 2; /* expansion step, generate a bicluster without noise */ block_init(e, b, genes, scores, candidates, cand_threshold, &components, allincluster); /* track back to find the best score that which genes makes it */ for(k = 0; k < components; k++) if ((dsItem(scores,k) == b->score)&&(dsItem(scores,k+1)!= b->score)) break; components = k + 1; int ki; for (ki=0; ki < rows; ki++) candidates[ki] = TRUE; for (ki=0; ki < components - 1 ; ki++) { seed_update(arr_c[dsItem(genes,ki)]); candidates[dsItem(genes,ki)] = FALSE; } candidates[dsItem(genes,k)] = FALSE; genes->top = k ; int cnt = 0; bool *colcand; AllocArray(colcand, cols); for(ki = 0; ki < cols; ki++) colcand[ki] = FALSE; /* add columns satisfy the conservative r */ seed_current_modify(arr_c[dsItem(genes,k)], colcand, &cnt, components); /* add some new possible genes */ int m_cnt; for ( ki = 0; ki < rows; ki++) { m_cnt = intersect_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt* po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } b->block_rows_pre = components; /* add genes that negative regulated to the consensus */ for ( ki = 0; ki < rows; ki++) { m_cnt = reverse_row(colcand, arr_c[dsItem(genes,0)], arr_c[ki]); if ( candidates[ki] && (m_cnt >= floor(cnt * po->TOLERANCE)) ) { dsPush(genes,ki); components++; candidates[ki] = FALSE; } } free(colcand); /* save the current cluster*/ b_genes = dsNew(b->block_rows_pre); for (ki = 0; ki < b->block_rows_pre; ki++) dsPush(b_genes, dsItem(genes,ki)); /* store gene arrays inside block */ b->genes = dsNew(components); b->conds = dsNew(cols); scan_block(b_genes, b); if (b->block_cols == 0) continue; b->block_rows = components; b->score = b->block_rows * b->block_cols; dsClear(b->genes); for ( ki=0; ki < components; ki++) dsPush(b->genes,dsItem(genes,ki)); for(ki = 0; ki < components; ki++) if(!isInStack(allincluster, dsItem(genes,ki))) dsPush(allincluster,dsItem(genes,ki)); bb[block_id++] = b; /* reaching the results number limit */ if (block_id == po->SCH_BLOCK) break; verboseDot(); } putchar('\n'); /* free-up the candidate list */ free(candidates); free(allincluster); return report_blocks(fw, bb, block_id); }