/* ******************************** Multiplicative model for the placement of a file at a given depth taking into account the mean bytes at the depth and the count of files at that depth ********************************/ int fn_depthsize_prob (long double filesize) { double meansizediff[DEPTH_ENTRIES]; double final_prob[DEPTH_ENTRIES]; double totalsize_prob=0, totalprob=0; double sum1=0, sum2=0, sum3=0; int i =0; float token_until_now=0; int token; int factor = 100000; double depthsize_prob[DEPTH_ENTRIES]; srand(deseeder()); if(filesize ==0) return rand()%max_dir_depth+1; for(i=0; i< DEPTH_ENTRIES; i++) { meansizediff[i]=(double) 1/fabsl(log2l(filesize)-(long double)depth_meansize[i]); print_debug(0,"%Lf %Lf %f\n", log2l(filesize), (long double)depth_meansize[i], meansizediff[i]); totalsize_prob+=meansizediff[i]; } for(i=0; i< DEPTH_ENTRIES; i++) { final_prob[i]=(depthcount_prob[i]/Total_depthcount_prob)*\ (meansizediff[i]/totalsize_prob); } for(i=0; i< DEPTH_ENTRIES; i++) { totalprob+=final_prob[i]; } for(i=0; i< DEPTH_ENTRIES; i++) { print_debug(0, "Probsize[%d] %f; Probcount %f; Finalprob %f\n", i+1, \ meansizediff[i]/totalsize_prob*100, depthcount_prob[i]/Total_depthcount_prob*100,\ final_prob[i]/totalprob*100); sum1+= meansizediff[i]/totalsize_prob; sum2+= depthcount_prob[i]/Total_depthcount_prob; sum3+= final_prob[i]/totalprob; } print_debug(0,"sums %f, sumc %f sumt %f\n", sum1, sum2, sum3); i=0; do { token_until_now=0; token = rand() % factor; i=0; token_until_now=final_prob[i]/totalprob*factor; while (token_until_now < token) { print_debug(0,"%f %d\n", token_until_now , token); i++; token_until_now+=final_prob[i]/totalprob*factor; } if(i== DEPTH_ENTRIES-1) { // last bin is actually 20 to infinite ..not just 20 i+= rand()%10; // e.g., any depth between 20 and 30, if DEPTH_ENTRIES=20 } print_debug(0,"Chosen %d, max_dir %d\n", i, max_dir_depth); } while(i > max_dir_depth); return i+1; }
//int main () { int subsetsumconstraint(long double * Numbers_orig, int N) { int MAX_ALPHA = N * 1; int MAX_RAND_TRIALS = N * 100; //double Num_soln[N]; long double * Numbers = (long double *) malloc(sizeof(long double)*(N+MAX_ALPHA)); long double * T = (long double *) malloc(sizeof(long double)*MAX_ALPHA); int * soln_vector = (int *) malloc(sizeof(int)*(N+MAX_ALPHA)); int * num_vector = (int *) malloc(sizeof(int)*(N)); int * t_vector = (int *) malloc(sizeof(int)*(MAX_ALPHA)); if(!Numbers || !T || !soln_vector || !num_vector || !t_vector) { print_debug(1, "ERROR: allocating memory for constraint solving\n"); return 1; } /* int soln_vector[N+MAX_ALPHA]; int num_vector[N]; int t_vector[MAX_ALPHA]; */ long double Sum_D= IMP_input->FSused; //60000*N; long double Sum_C=0, Sum_phase1=0; int toggle =1, alpha =0, Na=N, i =0, j=0, num_soln=0; double beta=0, best_beta=0, abs_error =0; //allowed error, abs error int from_start=0; //double mu= 8.34, sigma = 2.38; Random rv_constraint(deseeder()); int LOCKER = 0; for (i =0; i< N ; i++) { Numbers[i]= Numbers_orig[i]; //floor(rv_constraint.lognormal(0, mu, sigma)); #ifdef Accuracy_Mode //experimental_center_1(Numbers[i]); #endif soln_vector[i]=0; // empty Sum_C += (long double) Numbers[i]; beta = abs(Sum_D - Sum_C)/Sum_D; } for (i=N; i< N+MAX_ALPHA; i++) { soln_vector[i]=0; } /* if (Sum_C < Sum_D ) { // pre-LOCKED! LOCKER=1; Sum_phase1 = Sum_C; print_debug(1,"Prelocked!! ... "); } */ print_debug(1,"Initial stat beta: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, N); if (beta <= BETA_MAX){ print_debug(1,"beta_0: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, N); print_debug(1,"Initial success!!!\n"); return 1; } else if (beta <= BETA_MAX_1){ print_debug(1,"beta_1: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, N); print_debug(1,"Initial Half success!!!\n"); } /* ******************************************************** Start resampling * ********************************************************/ i=0; while (i<MAX_ALPHA && beta > BETA_MAX) { Na = N + alpha; if( (bias = (double)(rv_constraint.uniformDiscrete(0, 100000-1))) <= alpha1) Numbers[Na] = (long double) floor(rv_constraint.lognormal(0, poisson_mu, poisson_sigma)); else if (bias > alpha1) //&& bias <= alpha2*100000) Numbers[Na] = (long double) floor(rv_constraint.pareto(pareto_shape1)*pareto_base1); //Numbers[Na]= floor(rv_constraint.lognormal(0, mu, sigma)); /* ***************** * First Phase of Approximation Algorithm * *****************/ if(LOCKER ==0) { for (j=0; j<= Na; j++) { soln_vector[j]=0; } Sum_C=0; qsort((void*)&Numbers,(size_t)(Na+1),(size_t)sizeof(long double),compfunc_ld); int choice =0, rand_trials=0; j=0; while(j<N && rand_trials < MAX_RAND_TRIALS) { // while we don't have N numbers //choice = rand()%N; // 0 to N-1 choice = j; // 0 to N-1 if(Numbers[choice]+Sum_C <= Sum_D && soln_vector[choice]==0) { soln_vector[choice]=1; Sum_C+=Numbers[choice]; //num_vector[j]=choice; //Num_soln[j]=Numbers[choice];// soln array. Not used rgt now j++; } rand_trials++; } print_debug(1,"test: J: %d Sum_C: %Lf Sum_D: %Lf rand_trials %d T# %d\n",\ j, Sum_C/1000, Sum_D/1000, rand_trials, Na); if(j==N) // Lock the first phase { print_debug(1,"LOCKING initial set: J: %d Sum_C: %Lf\n", j, Sum_C/1000); LOCKER=1; } Sum_phase1 = Sum_C; } /* ***************** * First Phase Ends, Phase 2 Begins * *****************/ int k=0; Sum_C = Sum_phase1; for (k=0; k<N; k++) { soln_vector[k]=1; } for (k=N; k<=Na; k++) { soln_vector[k]=0; } k=0; for(int c =0; c <= Na; c++) { if(soln_vector[c]==0) { t_vector[k]=c; T[k]=Numbers[c]; k++; } } qsort((void*) &T, (size_t) (alpha+1), (size_t) sizeof(long double),compfunc_ld); #ifdef PRINTER print_debug(1,"\n-------- T -------------------\n"); for (k =0; k<=alpha ; k++) { print_debug(1,"%f ", T[k]/1000); } print_debug(1,"\n-----------N ----------------\n"); for (k =0; k< N ; k++) { print_debug(1,"%f ", Num_soln[k]/1000); } for (k =0; k<=Na ; k++) { print_debug(1,"%f ", Numbers[k]/1000); } print_debug(1,"\n------------- J: %d\n", j); #endif /* Sum_C=0; for (k =0; k<= Na; k++) { Sum_C += (long double) Numbers[k]*soln_vector[k]; } */ // REMEMBER THAT SOLUTION ARRAY IS SCATTERED ACROSS N+ALPHA if( j== N && Sum_C <= Sum_D) { /* There exists N random numbers such that their sum < Sum_D Phase 2 continues */ //int random_k=0; abs_error = abs(Sum_C - Sum_D); for (k=0; k<N; k++) { /* Traverse in random order */ //random_k = rand()%N; // do this for all soln_vector entries // go in RANDOM ORDER NOT IN SERIAL ORDER IF NUMBERS ARE SORTED // do not bias the file sizes to remove smaller numbers for(int l= alpha; l>=0; l--) { /* start with the biggest */ // if(soln_vector[t_vector[l]]==0 && T[l]> Num_soln[k] && (T[l]-Num_soln[k]) <= abs_error) { if(soln_vector[N+l]==0 && T[l]> Numbers[k] && (T[l]-Numbers[k]) <= abs_error) { /* since our array is sorted we stop at first match */ //soln_vector[num_vector[k]]=0; //soln_vector[t_vector[l]]=1; soln_vector[N+l]=1; soln_vector[k]=0; break; } } Sum_C=0; num_soln=0; for (int kk =0; kk<= Na; kk++) { Sum_C += (long double) Numbers[kk]*soln_vector[kk]; if(soln_vector[kk]==1) { num_soln++; } } abs_error = abs(Sum_C - Sum_D); beta = abs_error/Sum_D; /* if there is no match move onto the next */ } /* completed one pass of local improvement */ if(beta < best_beta) best_beta = beta; } else { // if solution is not feasible with current data: resample print_debug(0,"Sum_C exceeds Sum_D: resampling...\n"); } #ifdef PRINTER for (int k =0; k<= Na; k++) { print_debug(1,"(%f,%d) ", Numbers[k]/1000, soln_vector[k]); } #endif if(beta <= BETA_MAX_1 && toggle) { print_debug(1,"beta_2: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, i+1+N); toggle=0; } print_debug(0,"beta_trial: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, i+1+N); alpha++; i++; Sum_C =0; } num_soln=0; from_start=0; for(j=0; j<= Na; j++){ if(soln_vector[j]==1) { num_soln++; Sum_C += Numbers[j]*soln_vector[j]; Numbers_orig[from_start++]=Numbers[j]; } } abs_error = abs(Sum_C - Sum_D); beta = abs_error/Sum_D; print_debug(1,"beta_f: %f Sum_C: %Lf Sum_D: %Lf num_soln %d T# %d from_start %d\n",\ beta, Sum_C/1000, Sum_D/1000, num_soln, i+N, from_start); #ifdef PRINTER for (int k =0; k<= Na; k++) { print_debug(1,"(%f,%d) ", Numbers[k]/1000, soln_vector[k]); } #endif #ifdef Accuracy_Mode for(j=0; j<= Na; j++){ if(soln_vector[j]==1) { print_debug(0,"Number2: %f\n", Numbers[j]); experimental_center_2(Numbers[j]); } } #endif #ifdef PRINTER for(j =0; j< 40; j++) { print_debug(1,"%d \t%d \t%d \t%Lf \t%Lf\n", j, bincounter_1[j], \ bincounter_2[j], binsize_1[j], binsize_2[j]); } print_debug(1,"num_soln %d\n", num_soln); #endif /* for(j =0; j< N; j++) { print_debug(0,"%Lf ",Num_soln[j]); print_debug(1,"%d ", num_vector[j]); } print_debug(1,"=============================\n"); print_debug(1,"\n"); for(j =0; j<= alpha; j++) { print_debug(0,"%Lf ",T[j]); print_debug(1,"%d ", t_vector[j]); } print_debug(1,"=============================\n"); print_debug(1,"\n"); for(j =0; j<=Na; j++) { print_debug(1,"%d ",soln_vector[j]); } print_debug(1,"\n"); */ if(Numbers) free(Numbers); if(T) free(T); if(soln_vector) free(soln_vector); if(num_vector) free(num_vector); if(t_vector) free(t_vector); return 1; }
/* **************************************************** Run the montecarlo simulation for creating a directory tree according to the generative model in Agrawal Et. Al. FAST 2007 **************************************************** */ int montecarlo(int numdirs) { int mapdepth=0, mapid=0; extern int ACTUAL_FILE_CREATION; int local_err=0; mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH | S_IRWXU; srand(deseeder()); int root=0; long i=0, j=0; char parent_path[1024], strerr[100]; int my_parent=0, parent_depth =0; for (i =0;i<numdirs; i++) Dirs[i].setid(i); long current_dirs=0, token=0, token_uptil_now =0, sum_childs_plus2=0; Dirs[0].setroot(); DirIDmap[mapid] = Dirs[0];//Mdir;//(Dirs+sizeof(Dirs)*i); DirDepthmultimap.insert(pair<int, dir>(mapdepth,Dirs[0])); LD.push_front(Dirs[0]); current_dirs++; sum_childs_plus2+=2; for(i=1; i < numdirs; i++) { token_uptil_now =0; token = (rand() % sum_childs_plus2) + 1; // any one will be parent #ifdef DEBUG for(li=LD.begin(), j=0; j< current_dirs; li++, j++) cout << (*li).subdirs+2 << " "; cout << "======" << endl; cout << "Token: " << token << " CurrentDirs " << current_dirs \ << " sum_childs_plus2 " << sum_childs_plus2 << endl; #endif ni=LD.begin(); token_uptil_now+= (*ni).subdirs+2; while(token_uptil_now < token) { ni++; token_uptil_now+= (*ni).subdirs+2; // fix this } // ni is the chosen parent #ifdef DEBUG cout << "Chosen parent " << (*ni).id << endl; #endif (*ni).subdirs++; my_parent= (*ni).id; parent_depth = (*ni).depth; strcpy(parent_path, (*ni).path); Dirs[i].setparent_depth((*ni).id, (*ni).depth, parent_path); // Add to list mapdepth = Dirs[i].getdepth(); mapid = Dirs[i].getid(); print_debug(0, "mapdepth: %d %d\n", mapdepth, mapid); LD.push_back(Dirs[i]); //DirIDmap.insert(std::pair<int, dir>((*li).id), Mdir); DirIDmap[mapid] = Dirs[i];//Mdir;//(Dirs+sizeof(Dirs)*i); DirDepthmultimap.insert(pair<int, dir>(mapdepth,Dirs[i])); //LD.sort(); current_dirs++; sum_childs_plus2+=2+1; if(((*ni).depth+1) > max_dir_depth) max_dir_depth = (*ni).depth+1; } /* IMP_input->Actualfilecreation 0: Do not create files or dir 1: Create both 2: Create only dir (for testing maybe) */ if(IMP_input->Actualfilecreation==1 || IMP_input->Actualfilecreation==2){ li = LD.begin(); li++; // skip the root, already created for(; li != LD.end(); li++) { sprintf( parent_path,"%s/%s", PARENT_PATH, (*li).path); if((local_err = pos_mkdir(parent_path, mode)) <0) { strerror_r(errno, strerr, 100); print_debug(1, "Error: Unable to mkdir (pathname = %s %d\n", parent_path, errno); } } } return 1; }