void compute_step_factor(int nelr, double* variables, double* areas, double* step_factors) { { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { double density = variables[NVAR*i + VAR_DENSITY]; cfd_double3 momentum; momentum.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy = variables[NVAR*i + VAR_DENSITY_ENERGY]; cfd_double3 velocity; compute_velocity(density, momentum, velocity); double speed_sqd = compute_speed_sqd(velocity); double pressure = compute_pressure(density, density_energy, speed_sqd); double speed_of_sound = compute_speed_of_sound(density, pressure); // dt = double(0.5) * std::sqrt(areas[i]) / (||v|| + c).... but when we do time stepping, this later would need to be divided by the area, so we just do it all at once step_factors[i] = double(0.5) / (std::sqrt(areas[i]) * (std::sqrt(speed_sqd) + speed_of_sound)); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma155_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
/* * Verifies the correctness of the sort. * Ensures all keys are within a PE's bucket boundaries. * Ensures the final number of keys is equal to the initial. */ static int verify_results(int const * const my_local_key_counts, KEY_TYPE const * const my_local_keys) { shmem_barrier_all(); int error = 0; const int my_rank = shmem_my_pe(); const int my_min_key = my_rank * BUCKET_WIDTH; const int my_max_key = (my_rank+1) * BUCKET_WIDTH - 1; #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif // Verify all keys are within bucket boundaries for(long long int i = 0; i < my_bucket_size; ++i){ const int key = my_local_keys[i]; if((key < my_min_key) || (key > my_max_key)){ printf("Rank %d Failed Verification!\n",my_rank); printf("Key: %d is outside of bounds [%d, %d]\n", key, my_min_key, my_max_key); error = 1; } } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Verifying took %llu ns\n", end - start); #endif // Verify the sum of the key population equals the expected bucket size long long int bucket_size_test = 0; for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){ bucket_size_test += my_local_key_counts[i]; } if(bucket_size_test != my_bucket_size){ printf("Rank %d Failed Verification!\n",my_rank); printf("Actual Bucket Size: %lld Should be %lld\n", bucket_size_test, my_bucket_size); error = 1; } // Verify the final number of keys equals the initial number of keys static long long int total_num_keys = 0; shmem_longlong_sum_to_all(&total_num_keys, &my_bucket_size, 1, 0, 0, NUM_PES, llWrk, pSync); shmem_barrier_all(); if(total_num_keys != (long long int)(NUM_KEYS_PER_PE * NUM_PES)){ if(my_rank == ROOT_PE){ printf("Verification Failed!\n"); printf("Actual total number of keys: %lld Expected %" PRIu64 "\n", total_num_keys, NUM_KEYS_PER_PE * NUM_PES ); error = 1; } } return error; }
void copy(double *dst, double *src, int N) { { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < N; i++) { dst[i] = src[i]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma53_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
void initialize_variables(int nelr, double* variables) { { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { for(int j = 0; j < NVAR; j++) variables[i*NVAR + j] = ff_variable[j]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma102_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
/* * Counts the occurence of each key in my bucket. * Key indices into the count array are the key's value minus my bucket's * minimum key value to allow indexing from 0. * my_bucket_keys: All keys in my bucket unsorted [my_rank * BUCKET_WIDTH, (my_rank+1)*BUCKET_WIDTH) */ static int * count_local_keys(KEY_TYPE const * const my_bucket_keys) { int * const my_local_key_counts = malloc(BUCKET_WIDTH * sizeof(int)); assert(my_local_key_counts); memset(my_local_key_counts, 0, BUCKET_WIDTH * sizeof(int)); timer_start(&timers[TIMER_SORT]); const int my_rank = shmem_my_pe(); const int my_min_key = my_rank * BUCKET_WIDTH; #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif // Count the occurences of each key in my bucket for(long long int i = 0; i < my_bucket_size; ++i){ const unsigned int key_index = my_bucket_keys[i] - my_min_key; assert(my_bucket_keys[i] >= my_min_key); assert(key_index < BUCKET_WIDTH); my_local_key_counts[key_index]++; } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Counting local took %llu ns, my_bucket_size = %u, BUCKET_WIDTH = " "%llu\n", end - start, my_bucket_size, BUCKET_WIDTH); #endif timer_stop(&timers[TIMER_SORT]); #ifdef DEBUG wait_my_turn(); char msg[4096]; sprintf(msg,"Rank %d: Bucket Size %lld | Local Key Counts:", my_rank, my_bucket_size); for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", my_local_key_counts[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return my_local_key_counts; }
/* * Places local keys into their corresponding local bucket. * The contents of each bucket are not sorted. */ static KEY_TYPE * bucketize_local_keys(KEY_TYPE const * const my_keys, int * const local_bucket_offsets) { KEY_TYPE * const my_local_bucketed_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE)); assert(my_local_bucketed_keys); timer_start(&timers[TIMER_BUCKETIZE]); #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){ const KEY_TYPE key = my_keys[i]; const uint32_t bucket_index = key / BUCKET_WIDTH; uint32_t index; assert(local_bucket_offsets[bucket_index] >= 0); index = local_bucket_offsets[bucket_index]++; assert(index < NUM_KEYS_PER_PE); my_local_bucketed_keys[index] = key; } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Bucketizing took %llu ns\n", end - start); #endif timer_stop(&timers[TIMER_BUCKETIZE]); #ifdef DEBUG wait_my_turn(); char msg[1024]; const int my_rank = shmem_my_pe(); sprintf(msg,"Rank %d: local bucketed keys: ", my_rank); for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", my_local_bucketed_keys[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return my_local_bucketed_keys; }
void time_step(int j, int nelr, double* old_variables, double* variables, double* step_factors, double* fluxes) { { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { double factor = step_factors[i]/double(RK+1-j); variables[NVAR*i + VAR_DENSITY] = old_variables[NVAR*i + VAR_DENSITY] + factor*fluxes[NVAR*i + VAR_DENSITY]; variables[NVAR*i + VAR_DENSITY_ENERGY] = old_variables[NVAR*i + VAR_DENSITY_ENERGY] + factor*fluxes[NVAR*i + VAR_DENSITY_ENERGY]; variables[NVAR*i + (VAR_MOMENTUM+0)] = old_variables[NVAR*i + (VAR_MOMENTUM+0)] + factor*fluxes[NVAR*i + (VAR_MOMENTUM+0)]; variables[NVAR*i + (VAR_MOMENTUM+1)] = old_variables[NVAR*i + (VAR_MOMENTUM+1)] + factor*fluxes[NVAR*i + (VAR_MOMENTUM+1)]; variables[NVAR*i + (VAR_MOMENTUM+2)] = old_variables[NVAR*i + (VAR_MOMENTUM+2)] + factor*fluxes[NVAR*i + (VAR_MOMENTUM+2)]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma317_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
/* * Computes the size of each bucket by iterating all keys and incrementing * their corresponding bucket's size */ static int * count_local_bucket_sizes(KEY_TYPE const * const my_keys) { int * const local_bucket_sizes = malloc(NUM_BUCKETS * sizeof(int)); assert(local_bucket_sizes); timer_start(&timers[TIMER_BCOUNT]); init_array(local_bucket_sizes, NUM_BUCKETS); #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){ const uint32_t bucket_index = my_keys[i]/BUCKET_WIDTH; local_bucket_sizes[bucket_index]++; } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Counting local bucket sizes took %llu ns\n", end - start); #endif timer_stop(&timers[TIMER_BCOUNT]); #ifdef DEBUG wait_my_turn(); char msg[1024]; const int my_rank = shmem_my_pe(); sprintf(msg,"Rank %d: local bucket sizes: ", my_rank); for(uint64_t i = 0; i < NUM_BUCKETS; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", local_bucket_sizes[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return local_bucket_sizes; }
void hclib_launch(generic_frame_ptr fct_ptr, void *arg, const char **deps, int ndeps) { unsigned long long start_time = 0; unsigned long long end_time; const int instrument = (getenv("HCLIB_INSTRUMENT") != NULL); hclib_init(deps, ndeps, instrument); if (profile_launch_body) { start_time = current_time_ns(); } hclib_async(fct_ptr, arg, NULL, 0, hclib_get_closest_locale()); hclib_finalize(instrument); if (profile_launch_body) { end_time = current_time_ns(); printf("\nHCLIB TIME %llu ns\n", end_time - start_time); } }
void sim_village_main_par(struct Village *top) { long i; const unsigned long long full_program_start = current_time_ns(); { #pragma omp parallel { #pragma omp single { #pragma omp task untied { for (i = 0; i < sim_time; i++) sim_village_par(top); } } } } ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); }
/* * Generates uniformly random keys [0, MAX_KEY_VAL] on each rank using the time and rank * number as a seed */ static KEY_TYPE * make_input(void) { timer_start(&timers[TIMER_INPUT]); KEY_TYPE * const my_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE)); assert(my_keys); pcg32_random_t rng = seed_my_rank(); #ifdef ISX_PROFILING unsigned long long start = current_time_ns(); #endif for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i) { my_keys[i] = pcg32_boundedrand_r(&rng, MAX_KEY_VAL); } #ifdef ISX_PROFILING unsigned long long end = current_time_ns(); if (shmem_my_pe() == 0) printf("Making input took %llu ns\n", end - start); #endif timer_stop(&timers[TIMER_INPUT]); #ifdef DEBUG wait_my_turn(); char msg[1024]; const int my_rank = shmem_my_pe(); sprintf(msg,"Rank %d: Initial Keys: ", my_rank); for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){ if(i < PRINT_MAX) sprintf(msg + strlen(msg),"%d ", my_keys[i]); } sprintf(msg + strlen(msg),"\n"); printf("%s",msg); fflush(stdout); my_turn_complete(); #endif return my_keys; }
/*---< main() >-------------------------------------------------------------*/ int main(int argc, char **argv) { int opt; extern char *optarg; extern int optind; int nclusters=5; char *filename = 0; float *buf; float *attributes; float *cluster_centres=NULL; int i, j; int numAttributes; int numObjects; char line[1024]; int isBinaryFile = 0; int nloops = 1; float threshold = 0.001; double timing; while ( (opt=getopt(argc,argv,"i:k:t:b:n:?"))!= EOF) { switch (opt) { case 'i': filename=optarg; break; case 'b': isBinaryFile = 1; break; case 't': threshold=atof(optarg); break; case 'k': nclusters = atoi(optarg); break; case '?': usage(argv[0]); break; default: usage(argv[0]); break; } } if (filename == 0) usage(argv[0]); numAttributes = numObjects = 0; /* from the input file, get the numAttributes and numObjects ------------*/ if (isBinaryFile) { int infile; if ((infile = open(filename, O_RDONLY, "0600")) == -1) { fprintf(stderr, "Error: no such file (%s)\n", filename); exit(1); } read(infile, &numObjects, sizeof(int)); read(infile, &numAttributes, sizeof(int)); /* allocate space for attributes[] and read attributes of all objects */ attributes = (float*) malloc(numObjects * numAttributes * sizeof(float)); read(infile, attributes, numObjects*numAttributes*sizeof(float)); close(infile); } else { FILE *infile; if ((infile = fopen(filename, "r")) == NULL) { fprintf(stderr, "Error: no such file (%s)\n", filename); exit(1); } while (fgets(line, 1024, infile) != NULL) if (strtok(line, " \t\n") != 0) numObjects++; rewind(infile); while (fgets(line, 1024, infile) != NULL) { if (strtok(line, " \t\n") != 0) { /* ignore the id (first attribute): numAttributes = 1; */ while (strtok(NULL, " ,\t\n") != NULL) numAttributes++; break; } } /* allocate space for attributes[] and read attributes of all objects */ attributes = (float*) malloc(numObjects*numAttributes*sizeof(float)); rewind(infile); i = 0; while (fgets(line, 1024, infile) != NULL) { if (strtok(line, " \t\n") == NULL) continue; for (j=0; j<numAttributes; j++) { attributes[i] = atof(strtok(NULL, " ,\t\n")); i++; } } fclose(infile); } printf("I/O completed\n"); const unsigned long long full_program_start = current_time_ns(); for (i=0; i<nloops; i++) { cluster_centres = NULL; cluster(numObjects, numAttributes, attributes, /* [numObjects][numAttributes] */ nclusters, threshold, &cluster_centres ); } ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); printf("number of Clusters %d\n",nclusters); printf("number of Attributes %d\n\n",numAttributes); /* printf("Cluster Centers Output\n"); printf("The first number is cluster number and the following data is arribute value\n"); printf("=============================================================================\n\n"); for (i=0; i< nclusters; i++) { printf("%d: ", i); for (j=0; j<numAttributes; j++) printf("%.2f ", cluster_centres[i][j]); printf("\n\n"); } */ free(attributes); return(0); }
int main() { int l; uint64_t start, end; long int diff = 0; int i, j, k; long double h, t1, t2, dppi, ans = 5.795776322412856L; long double s1; // variables for logging/checking long double log[INPUTS]; long double threshold = 0.0; long double epsilon = -4.0; // 0. read input from the file final_inputs int finputs[INPUTS]; FILE* infile = fopen("final_inputs", "r"); if (!infile) { printf("Could not open final_inputs\n"); } char *s = malloc(10); for (i = 0; i < INPUTS; i++) { if (!feof(infile)) { fscanf(infile, "%s", s); finputs[i] = (int)cov_deserialize(s, 10); } } // dummy calls sqrtf(0); acosf(0); sinf(0); start = current_time_ns(); for (l = 0; l < INPUTS; l++) { int n = finputs[l]; t1 = -1.0; dppi = acos(t1); s1 = 0.0; t1 = 0.0; h = dppi / n; for( i = 1; i <= n; i++ ) { t2 = fun (i * h); s1 = s1 + sqrt (h*h + (t2 - t1)*(t2 - t1)); t1 = t2; } // 1. compute threshold and record result log[l] = (long double) s1; if (s1*pow(10, epsilon) > threshold) { threshold = s1*pow(10, epsilon); } } end = current_time_ns(); diff = (end-start); // 2. create spec, or checking results cov_arr_spec_log("spec.cov", threshold, INPUTS, log); cov_arr_log(log, INPUTS, "result", "log.cov"); cov_check("log.cov", "spec.cov", INPUTS); // 3. print score (diff) to a file FILE* file; file = fopen("score.cov", "w"); fprintf(file, "%ld\n", diff); fclose(file); return 0; }
/* * Main function */ int main(int argc, char** argv) { if (argc < 2) { std::cout << "specify data file name" << std::endl; return 0; } const char* data_file_name = argv[1]; const unsigned long long full_program_start = current_time_ns(); { // set far field conditions { const double angle_of_attack = double(3.1415926535897931 / 180.0) * double(deg_angle_of_attack); ff_variable[VAR_DENSITY] = double(1.4); double ff_pressure = double(1.0); double ff_speed_of_sound = sqrt(GAMMA*ff_pressure / ff_variable[VAR_DENSITY]); double ff_speed = double(ff_mach)*ff_speed_of_sound; cfd_double3 ff_velocity; ff_velocity.x = ff_speed*double(cos((double)angle_of_attack)); ff_velocity.y = ff_speed*double(sin((double)angle_of_attack)); ff_velocity.z = 0.0; ff_variable[VAR_MOMENTUM+0] = ff_variable[VAR_DENSITY] * ff_velocity.x; ff_variable[VAR_MOMENTUM+1] = ff_variable[VAR_DENSITY] * ff_velocity.y; ff_variable[VAR_MOMENTUM+2] = ff_variable[VAR_DENSITY] * ff_velocity.z; ff_variable[VAR_DENSITY_ENERGY] = ff_variable[VAR_DENSITY]*(double(0.5)*(ff_speed*ff_speed)) + (ff_pressure / double(GAMMA-1.0)); cfd_double3 ff_momentum; ff_momentum.x = *(ff_variable+VAR_MOMENTUM+0); ff_momentum.y = *(ff_variable+VAR_MOMENTUM+1); ff_momentum.z = *(ff_variable+VAR_MOMENTUM+2); compute_flux_contribution(ff_variable[VAR_DENSITY], ff_momentum, ff_variable[VAR_DENSITY_ENERGY], ff_pressure, ff_velocity, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy); } int nel; int nelr; // read in domain geometry double* areas; int* elements_surrounding_elements; double* normals; { std::ifstream file(data_file_name); file >> nel; nelr = block_length*((nel / block_length )+ std::min(1, nel % block_length)); areas = new double[nelr]; elements_surrounding_elements = new int[nelr*NNB]; normals = new double[NDIM*NNB*nelr]; // read in data for(int i = 0; i < nel; i++) { file >> areas[i]; for(int j = 0; j < NNB; j++) { file >> elements_surrounding_elements[i*NNB + j]; if(elements_surrounding_elements[i*NNB+j] < 0) elements_surrounding_elements[i*NNB+j] = -1; elements_surrounding_elements[i*NNB + j]--; //it's coming in with Fortran numbering for(int k = 0; k < NDIM; k++) { file >> normals[(i*NNB + j)*NDIM + k]; normals[(i*NNB + j)*NDIM + k] = -normals[(i*NNB + j)*NDIM + k]; } } } // fill in remaining data int last = nel-1; for(int i = nel; i < nelr; i++) { areas[i] = areas[last]; for(int j = 0; j < NNB; j++) { // duplicate the last element elements_surrounding_elements[i*NNB + j] = elements_surrounding_elements[last*NNB + j]; for(int k = 0; k < NDIM; k++) normals[(i*NNB + j)*NDIM + k] = normals[(last*NNB + j)*NDIM + k]; } } } // Create arrays and set initial conditions double* variables = alloc<double>(nelr*NVAR); initialize_variables(nelr, variables); double* old_variables = alloc<double>(nelr*NVAR); double* fluxes = alloc<double>(nelr*NVAR); double* step_factors = alloc<double>(nelr); // these need to be computed the first time in order to compute time step std::cout << "Starting..." << std::endl; // Begin iterations for(int i = 0; i < iterations; i++) { copy(old_variables, variables, nelr*NVAR); // for the first iteration we compute the time step compute_step_factor(nelr, variables, areas, step_factors); for(int j = 0; j < RK; j++) { compute_flux(nelr, elements_surrounding_elements, normals, variables, fluxes); time_step(j, nelr, old_variables, variables, step_factors, fluxes); } } std::cout << "Saving solution..." << std::endl; dump(variables, nel, nelr); std::cout << "Saved solution..." << std::endl; std::cout << "Cleaning up..." << std::endl; dealloc<double>(areas); dealloc<int>(elements_surrounding_elements); dealloc<double>(normals); dealloc<double>(variables); dealloc<double>(old_variables); dealloc<double>(fluxes); dealloc<double>(step_factors); } ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); std::cout << "Done..." << std::endl; return 0; }
int main (int argc, char *argv[]) { /**** Initialising ****/ const unsigned long long full_program_start = current_time_ns(); { shmem_init (); /* Variable Declarations */ int Numprocs,MyRank, Root = 0; int i,j,k, NoofElements, NoofElements_Bloc, NoElementsToSort; int count, temp; TYPE *Input, *InputData; TYPE *Splitter, *AllSplitter; TYPE *Buckets, *BucketBuffer, *LocalBucket; TYPE *OutputBuffer, *Output; MyRank = shmem_my_pe (); Numprocs = shmem_n_pes (); NoofElements = SIZE; if(( NoofElements % Numprocs) != 0){ if(MyRank == Root) printf("Number of Elements are not divisible by Numprocs \n"); shmem_finalize (); exit(0); } /**** Reading Input ****/ Input = (TYPE *) shmem_malloc (NoofElements*sizeof(*Input)); if(Input == NULL) { printf("Error : Can not allocate memory \n"); } if (MyRank == Root){ /* Initialise random number generator */ printf ("Generating input Array for Sorting %d uint64_t numbers\n",SIZE); srand48((TYPE)NoofElements); for(i=0; i< NoofElements; i++) { Input[i] = rand(); } } /**** Sending Data ****/ NoofElements_Bloc = NoofElements / Numprocs; InputData = (TYPE *) shmem_malloc (NoofElements_Bloc * sizeof (*InputData)); if(InputData == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Scatter(Input, NoofElements_Bloc, TYPE_MPI, InputData, // NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); if(MyRank == Root) { for(i=0; i<Numprocs; i++) { TYPE* start = &Input[i * NoofElements_Bloc]; shmem_put64(InputData, start, NoofElements_Bloc, i); } } shmem_barrier_all(); /**** Sorting Locally ****/ sorting(InputData, NoofElements_Bloc); /**** Choosing Local Splitters ****/ Splitter = (TYPE *) shmem_malloc (sizeof (TYPE) * (Numprocs-1)); if(Splitter == NULL) { printf("Error : Can not allocate memory \n"); } for (i=0; i< (Numprocs-1); i++){ Splitter[i] = InputData[NoofElements/(Numprocs*Numprocs) * (i+1)]; } /**** Gathering Local Splitters at Root ****/ AllSplitter = (TYPE *) shmem_malloc (sizeof (TYPE) * Numprocs * (Numprocs-1)); if(AllSplitter == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Gather (Splitter, Numprocs-1, TYPE_MPI, AllSplitter, Numprocs-1, // TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); TYPE* target_index = &AllSplitter[MyRank * (Numprocs-1)]; shmem_put64(target_index, Splitter, Numprocs-1, Root); shmem_barrier_all(); /**** Choosing Global Splitters ****/ if (MyRank == Root){ sorting (AllSplitter, Numprocs*(Numprocs-1)); for (i=0; i<Numprocs-1; i++) Splitter[i] = AllSplitter[(Numprocs-1)*(i+1)]; } /**** Broadcasting Global Splitters ****/ //MPI_Bcast (Splitter, Numprocs-1, TYPE_MPI, 0, MPI_COMM_WORLD); { int _i; for(_i=0; _i<_SHMEM_BCAST_SYNC_SIZE; _i++) { pSync[_i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); } shmem_broadcast64(Splitter, Splitter, Numprocs-1, 0, 0, 0, Numprocs, pSync); shmem_barrier_all(); /**** Creating Numprocs Buckets locally ****/ Buckets = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs)); if(Buckets == NULL) { printf("Error : Can not allocate memory \n"); } j = 0; k = 1; for (i=0; i<NoofElements_Bloc; i++){ if(j < (Numprocs-1)){ if (InputData[i] < Splitter[j]) Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i]; else{ Buckets[(NoofElements_Bloc + 1) * j] = k-1; k=1; j++; i--; } } else Buckets[((NoofElements_Bloc + 1) * j) + k++] = InputData[i]; } Buckets[(NoofElements_Bloc + 1) * j] = k - 1; shmem_free(Splitter); shmem_free(AllSplitter); /**** Sending buckets to respective processors ****/ BucketBuffer = (TYPE *) shmem_malloc (sizeof (TYPE) * (NoofElements + Numprocs)); if(BucketBuffer == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Alltoall (Buckets, NoofElements_Bloc + 1, TYPE_MPI, BucketBuffer, // NoofElements_Bloc + 1, TYPE_MPI, MPI_COMM_WORLD); shmem_barrier_all(); for(i=0; i<Numprocs; i++) { shmem_put64(&BucketBuffer[MyRank*(NoofElements_Bloc + 1)], &Buckets[i*(NoofElements_Bloc + 1)], NoofElements_Bloc + 1, i); } shmem_barrier_all(); /**** Rearranging BucketBuffer ****/ LocalBucket = (TYPE *) shmem_malloc (sizeof (TYPE) * 2 * NoofElements / Numprocs); if(LocalBucket == NULL) { printf("Error : Can not allocate memory \n"); } count = 1; for (j=0; j<Numprocs; j++) { k = 1; for (i=0; i<BucketBuffer[(NoofElements/Numprocs + 1) * j]; i++) LocalBucket[count++] = BucketBuffer[(NoofElements/Numprocs + 1) * j + k++]; } LocalBucket[0] = count-1; /**** Sorting Local Buckets using Bubble Sort ****/ /*sorting (InputData, NoofElements_Bloc, sizeof(int), intcompare); */ NoElementsToSort = LocalBucket[0]; sorting (&LocalBucket[1], NoElementsToSort); /**** Gathering sorted sub blocks at root ****/ OutputBuffer = (TYPE *) shmem_malloc (sizeof(TYPE) * 2 * NoofElements); if(OutputBuffer == NULL) { printf("Error : Can not allocate memory \n"); } //MPI_Gather (LocalBucket, 2*NoofElements_Bloc, TYPE_MPI, OutputBuffer, // 2*NoofElements_Bloc, TYPE_MPI, Root, MPI_COMM_WORLD); shmem_barrier_all(); target_index = &OutputBuffer[MyRank * (2*NoofElements_Bloc)]; shmem_put64(target_index, LocalBucket, 2*NoofElements_Bloc, Root); shmem_barrier_all(); /**** Rearranging output buffer ****/ if (MyRank == Root){ Output = (TYPE *) malloc (sizeof (TYPE) * NoofElements); count = 0; for(j=0; j<Numprocs; j++){ k = 1; for(i=0; i<OutputBuffer[(2 * NoofElements/Numprocs) * j]; i++) Output[count++] = OutputBuffer[(2*NoofElements/Numprocs) * j + k++]; } printf ( "Number of Elements to be sorted : %d \n", NoofElements); TYPE prev = 0; int fail = 0; for (i=0; i<NoofElements; i++){ if(Output[i] < prev) { printf("Failed at index %d\n",i); fail = 1; } prev = Output[i]; } if(fail) printf("Sorting FAILED\n"); else printf("Sorting PASSED\n"); free(Output); }/* MyRank==0*/ shmem_free(Input); shmem_free(OutputBuffer); shmem_free(InputData); shmem_free(Buckets); shmem_free(BucketBuffer); shmem_free(LocalBucket); /**** Finalize ****/ shmem_finalize(); } ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); }
void compute_flux(int nelr, int* elements_surrounding_elements, double* normals, double* variables, double* fluxes) { double smoothing_coefficient = double(0.2f); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for default(shared) schedule(static) for(int i = 0; i < nelr; i++) { int j, nb; cfd_double3 normal; double normal_len; double factor; double density_i = variables[NVAR*i + VAR_DENSITY]; cfd_double3 momentum_i; momentum_i.x = variables[NVAR*i + (VAR_MOMENTUM+0)]; momentum_i.y = variables[NVAR*i + (VAR_MOMENTUM+1)]; momentum_i.z = variables[NVAR*i + (VAR_MOMENTUM+2)]; double density_energy_i = variables[NVAR*i + VAR_DENSITY_ENERGY]; cfd_double3 velocity_i; compute_velocity(density_i, momentum_i, velocity_i); double speed_sqd_i = compute_speed_sqd(velocity_i); double speed_i = std::sqrt(speed_sqd_i); double pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i); double speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i); cfd_double3 flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z; cfd_double3 flux_contribution_i_density_energy; compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i, flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z, flux_contribution_i_density_energy); double flux_i_density = double(0.0); cfd_double3 flux_i_momentum; flux_i_momentum.x = double(0.0); flux_i_momentum.y = double(0.0); flux_i_momentum.z = double(0.0); double flux_i_density_energy = double(0.0); cfd_double3 velocity_nb; double density_nb, density_energy_nb; cfd_double3 momentum_nb; cfd_double3 flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z; cfd_double3 flux_contribution_nb_density_energy; double speed_sqd_nb, speed_of_sound_nb, pressure_nb; for(j = 0; j < NNB; j++) { nb = elements_surrounding_elements[i*NNB + j]; normal.x = normals[(i*NNB + j)*NDIM + 0]; normal.y = normals[(i*NNB + j)*NDIM + 1]; normal.z = normals[(i*NNB + j)*NDIM + 2]; normal_len = std::sqrt(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z); if(nb >= 0) // a legitimate neighbor { density_nb = variables[nb*NVAR + VAR_DENSITY]; momentum_nb.x = variables[nb*NVAR + (VAR_MOMENTUM+0)]; momentum_nb.y = variables[nb*NVAR + (VAR_MOMENTUM+1)]; momentum_nb.z = variables[nb*NVAR + (VAR_MOMENTUM+2)]; density_energy_nb = variables[nb*NVAR + VAR_DENSITY_ENERGY]; compute_velocity(density_nb, momentum_nb, velocity_nb); speed_sqd_nb = compute_speed_sqd(velocity_nb); pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb); compute_flux_contribution(density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb, flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z, flux_contribution_nb_density_energy); // artificial viscosity factor = -normal_len*smoothing_coefficient*double(0.5)*(speed_i + std::sqrt(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb); flux_i_density += factor*(density_i-density_nb); flux_i_density_energy += factor*(density_energy_i-density_energy_nb); flux_i_momentum.x += factor*(momentum_i.x-momentum_nb.x); flux_i_momentum.y += factor*(momentum_i.y-momentum_nb.y); flux_i_momentum.z += factor*(momentum_i.z-momentum_nb.z); // accumulate cell-centered fluxes factor = double(0.5)*normal.x; flux_i_density += factor*(momentum_nb.x+momentum_i.x); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.x+flux_contribution_i_density_energy.x); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.x+flux_contribution_i_momentum_x.x); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.x+flux_contribution_i_momentum_y.x); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.x+flux_contribution_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(momentum_nb.y+momentum_i.y); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.y+flux_contribution_i_density_energy.y); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.y+flux_contribution_i_momentum_x.y); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.y+flux_contribution_i_momentum_y.y); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.y+flux_contribution_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(momentum_nb.z+momentum_i.z); flux_i_density_energy += factor*(flux_contribution_nb_density_energy.z+flux_contribution_i_density_energy.z); flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.z+flux_contribution_i_momentum_x.z); flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.z+flux_contribution_i_momentum_y.z); flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.z+flux_contribution_i_momentum_z.z); } else if(nb == -1) // a wing boundary { flux_i_momentum.x += normal.x*pressure_i; flux_i_momentum.y += normal.y*pressure_i; flux_i_momentum.z += normal.z*pressure_i; } else if(nb == -2) // a far field boundary { factor = double(0.5)*normal.x; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+0]+momentum_i.x); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.x+flux_contribution_i_density_energy.x); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.x + flux_contribution_i_momentum_x.x); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.x + flux_contribution_i_momentum_y.x); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.x + flux_contribution_i_momentum_z.x); factor = double(0.5)*normal.y; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+1]+momentum_i.y); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.y+flux_contribution_i_density_energy.y); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.y + flux_contribution_i_momentum_x.y); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.y + flux_contribution_i_momentum_y.y); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.y + flux_contribution_i_momentum_z.y); factor = double(0.5)*normal.z; flux_i_density += factor*(ff_variable[VAR_MOMENTUM+2]+momentum_i.z); flux_i_density_energy += factor*(ff_flux_contribution_density_energy.z+flux_contribution_i_density_energy.z); flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x.z + flux_contribution_i_momentum_x.z); flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y.z + flux_contribution_i_momentum_y.z); flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z.z + flux_contribution_i_momentum_z.z); } } fluxes[i*NVAR + VAR_DENSITY] = flux_i_density; fluxes[i*NVAR + (VAR_MOMENTUM+0)] = flux_i_momentum.x; fluxes[i*NVAR + (VAR_MOMENTUM+1)] = flux_i_momentum.y; fluxes[i*NVAR + (VAR_MOMENTUM+2)] = flux_i_momentum.z; fluxes[i*NVAR + VAR_DENSITY_ENERGY] = flux_i_density_energy; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma186_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } }
unsigned long long hclib_current_time_ns() { return current_time_ns(); }
/** * The implementation of the particle filter using OpenMP for many frames * @see http://openmp.org/wp/ * @note This function is designed to work with a video of several frames. In addition, it references a provided MATLAB function which takes the video, the objxy matrix and the x and y arrays as arguments and returns the likelihoods * @param I The video to be run * @param IszX The x dimension of the video * @param IszY The y dimension of the video * @param Nfr The number of frames * @param seed The seed array used for random number generation * @param Nparticles The number of particles to be used */ void particleFilter(int * I, int IszX, int IszY, int Nfr, int * seed, int Nparticles){ int max_size = IszX*IszY*Nfr; long long start = get_time(); //original particle centroid double xe = roundDouble(IszY/2.0); double ye = roundDouble(IszX/2.0); //expected object locations, compared to center int radius = 5; int diameter = radius*2 - 1; int * disk = (int *)malloc(diameter*diameter*sizeof(int)); strelDisk(disk, radius); int countOnes = 0; int x, y; for(x = 0; x < diameter; x++){ for(y = 0; y < diameter; y++){ if(disk[x*diameter + y] == 1) countOnes++; } } double * objxy = (double *)malloc(countOnes*2*sizeof(double)); getneighbors(disk, countOnes, objxy, radius); long long get_neighbors = get_time(); printf("TIME TO GET NEIGHBORS TOOK: %f\n", elapsed_time(start, get_neighbors)); //initial weights are all equal (1/Nparticles) double * weights = (double *)malloc(sizeof(double)*Nparticles); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = 1/((double)(Nparticles)); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma373_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long get_weights = get_time(); printf("TIME TO GET WEIGHTSTOOK: %f\n", elapsed_time(get_neighbors, get_weights)); //initial likelihood to 0.0 double * likelihood = (double *)malloc(sizeof(double)*Nparticles); double * arrayX = (double *)malloc(sizeof(double)*Nparticles); double * arrayY = (double *)malloc(sizeof(double)*Nparticles); double * xj = (double *)malloc(sizeof(double)*Nparticles); double * yj = (double *)malloc(sizeof(double)*Nparticles); double * CDF = (double *)malloc(sizeof(double)*Nparticles); double * u = (double *)malloc(sizeof(double)*Nparticles); int * ind = (int*)malloc(sizeof(int)*countOnes*Nparticles); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(arrayX, arrayY, xe, ye) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] = xe; arrayY[x] = ye; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma388_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } int k; printf("TIME TO SET ARRAYS TOOK: %f\n", elapsed_time(get_weights, get_time())); int indX, indY; for(k = 1; k < Nfr; k++){ long long set_arrays = get_time(); //apply motion model //draws sample from motion model (random walk). The only prior information //is that the object moves 2x as fast as in the y direction { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(arrayX, arrayY, Nparticles, seed) private(x) for(x = 0; x < Nparticles; x++){ arrayX[x] += 1 + 5*randn(seed, x); arrayY[x] += -2 + 2*randn(seed, x); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma402_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long error = get_time(); printf("TIME TO SET ERROR TOOK: %f\n", elapsed_time(set_arrays, error)); //particle filter likelihood { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(likelihood, I, arrayX, arrayY, objxy, ind) private(x, y, indX, indY) for(x = 0; x < Nparticles; x++){ //compute the likelihood: remember our assumption is that you know // foreground and the background image intensity distribution. // Notice that we consider here a likelihood ratio, instead of // p(z|x). It is possible in this case. why? a hometask for you. //calc ind for(y = 0; y < countOnes; y++){ indX = roundDouble(arrayX[x]) + objxy[y*2 + 1]; indY = roundDouble(arrayY[x]) + objxy[y*2]; ind[x*countOnes + y] = fabs((double)(indX*IszY*Nfr + indY*Nfr + k)); if(ind[x*countOnes + y] >= max_size) ind[x*countOnes + y] = 0; } likelihood[x] = 0; for(y = 0; y < countOnes; y++) likelihood[x] += (pow((I[ind[x*countOnes + y]] - 100),2) - pow((I[ind[x*countOnes + y]]-228),2))/50.0; likelihood[x] = likelihood[x]/((double) countOnes); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma410_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long likelihood_time = get_time(); printf("TIME TO GET LIKELIHOODS TOOK: %f\n", elapsed_time(error, likelihood_time)); // update & normalize weights // using equation (63) of Arulampalam Tutorial { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(Nparticles, weights, likelihood) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x] * exp(likelihood[x]); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma433_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long exponential = get_time(); printf("TIME TO GET EXP TOOK: %f\n", elapsed_time(likelihood_time, exponential)); double sumWeights = 0; { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for private(x) reduction(+:sumWeights) for(x = 0; x < Nparticles; x++){ sumWeights += weights[x]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma440_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long sum_time = get_time(); printf("TIME TO SUM WEIGHTS TOOK: %f\n", elapsed_time(exponential, sum_time)); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(sumWeights, weights) private(x) for(x = 0; x < Nparticles; x++){ weights[x] = weights[x]/sumWeights; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma446_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long normalize = get_time(); printf("TIME TO NORMALIZE WEIGHTS TOOK: %f\n", elapsed_time(sum_time, normalize)); xe = 0; ye = 0; // estimate the object location by expected values { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for private(x) reduction(+:xe, ye) for(x = 0; x < Nparticles; x++){ xe += arrayX[x] * weights[x]; ye += arrayY[x] * weights[x]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma455_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long move_time = get_time(); printf("TIME TO MOVE OBJECT TOOK: %f\n", elapsed_time(normalize, move_time)); printf("XE: %lf\n", xe); printf("YE: %lf\n", ye); double distance = sqrt( pow((double)(xe-(int)roundDouble(IszY/2.0)),2) + pow((double)(ye-(int)roundDouble(IszX/2.0)),2) ); printf("%lf\n", distance); //display(hold off for now) //pause(hold off for now) //resampling CDF[0] = weights[0]; for(x = 1; x < Nparticles; x++){ CDF[x] = weights[x] + CDF[x-1]; } long long cum_sum = get_time(); printf("TIME TO CALC CUM SUM TOOK: %f\n", elapsed_time(move_time, cum_sum)); double u1 = (1/((double)(Nparticles)))*randu(seed, 0); { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(u, u1, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ u[x] = u1 + x/((double)(Nparticles)); } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma480_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long u_time = get_time(); printf("TIME TO CALC U TOOK: %f\n", elapsed_time(cum_sum, u_time)); int j, i; { const unsigned long long parallel_for_start = current_time_ns(); #pragma omp parallel for shared(CDF, Nparticles, xj, yj, u, arrayX, arrayY) private(i, j) for(j = 0; j < Nparticles; j++){ i = findIndex(CDF, Nparticles, u[j]); if(i == -1) i = Nparticles-1; xj[j] = arrayX[i]; yj[j] = arrayY[i]; } ; const unsigned long long parallel_for_end = current_time_ns(); printf("pragma488_omp_parallel %llu ns\n", parallel_for_end - parallel_for_start); } long long xyj_time = get_time(); printf("TIME TO CALC NEW ARRAY X AND Y TOOK: %f\n", elapsed_time(u_time, xyj_time)); //#pragma omp parallel for shared(weights, Nparticles) private(x) for(x = 0; x < Nparticles; x++){ //reassign arrayX and arrayY arrayX[x] = xj[x]; arrayY[x] = yj[x]; weights[x] = 1/((double)(Nparticles)); } long long reset = get_time(); printf("TIME TO RESET WEIGHTS TOOK: %f\n", elapsed_time(xyj_time, reset)); } free(disk); free(objxy); free(weights); free(likelihood); free(xj); free(yj); free(arrayX); free(arrayY); free(CDF); free(u); free(ind); }
int main(int argc, char * argv[]){ char* usage = "openmp.out -x <dimX> -y <dimY> -z <Nfr> -np <Nparticles>"; //check number of arguments if(argc != 9) { printf("%s\n", usage); return 0; } //check args deliminators if( strcmp( argv[1], "-x" ) || strcmp( argv[3], "-y" ) || strcmp( argv[5], "-z" ) || strcmp( argv[7], "-np" ) ) { printf( "%s\n",usage ); return 0; } int IszX, IszY, Nfr, Nparticles; //converting a string to a integer if( sscanf( argv[2], "%d", &IszX ) == EOF ) { printf("ERROR: dimX input is incorrect"); return 0; } if( IszX <= 0 ) { printf("dimX must be > 0\n"); return 0; } //converting a string to a integer if( sscanf( argv[4], "%d", &IszY ) == EOF ) { printf("ERROR: dimY input is incorrect"); return 0; } if( IszY <= 0 ) { printf("dimY must be > 0\n"); return 0; } //converting a string to a integer if( sscanf( argv[6], "%d", &Nfr ) == EOF ) { printf("ERROR: Number of frames input is incorrect"); return 0; } if( Nfr <= 0 ) { printf("number of frames must be > 0\n"); return 0; } //converting a string to a integer if( sscanf( argv[8], "%d", &Nparticles ) == EOF ) { printf("ERROR: Number of particles input is incorrect"); return 0; } if( Nparticles <= 0 ) { printf("Number of particles must be > 0\n"); return 0; } //establish seed int * seed = (int *)malloc(sizeof(int)*Nparticles); int i; for(i = 0; i < Nparticles; i++) seed[i] = time(0)*i; //malloc matrix int * I = (int *)malloc(sizeof(int)*IszX*IszY*Nfr); long long start = get_time(); //call video sequence videoSequence(I, IszX, IszY, Nfr, seed); long long endVideoSequence = get_time(); printf("VIDEO SEQUENCE TOOK %f\n", elapsed_time(start, endVideoSequence)); //call particle filter const unsigned long long full_program_start = current_time_ns(); particleFilter(I, IszX, IszY, Nfr, seed, Nparticles) ; const unsigned long long full_program_end = current_time_ns(); printf("full_program %llu ns\n", full_program_end - full_program_start); ; long long endParticleFilter = get_time(); printf("PARTICLE FILTER TOOK %f\n", elapsed_time(endVideoSequence, endParticleFilter)); printf("ENTIRE PROGRAM TOOK %f\n", elapsed_time(start, endParticleFilter)); free(seed); free(I); return 0; }
unsigned long long hclib_current_time_ms() { return current_time_ns() / 1000000; }