/** * Compute command */ void command_compute(char* line) { char cmd[MAX_BUFFER]; char key[MAX_BUFFER]; char func[MAX_BUFFER]; char arg1[MAX_BUFFER]; int argc = sscanf(line, "%s %s %s %s", cmd, func, key, arg1); if (argc < 3) { goto invalid; } MATRIX_GUARD(key); uint32_t result = 0; if (strcasecmp(func, "sum") == 0) { result = get_sum(m); } else if (strcasecmp(func, "trace") == 0) { result = get_trace(m); } else if (strcasecmp(func, "minimum") == 0) { result = get_minimum(m); } else if (strcasecmp(func, "maximum") == 0) { result = get_maximum(m); } else if (strcasecmp(func, "frequency") == 0) { result = get_frequency(m, atoll(arg1)); } else { goto invalid; } printf("%" PRIu32 "\n", result); return; invalid: puts("invalid arguments"); }
float retrieve_saturation(unsigned int r, unsigned int g, unsigned int b) { float saturation; unsigned int max = get_maximum(r, g, b); unsigned int min = get_minimum(r, g, b); saturation = max - min; return saturation; }
/** * Saturation is calculates as below: * * S = max(R, G, B) − min(R, G, B) */ tFloat32 retrieve_saturation(tUInt r, tUInt g, tUInt b) { tFloat32 saturation; tUInt max = get_maximum(r, g, b); tUInt min = get_minimum(r, g, b); saturation = static_cast<tFloat32> (max - min); return saturation; }
void radix_sort (int64_t * vec, int64_t len) { ////////////////////////////////////////////////// //////////////IMPORANT: TO DO///////////////////// ////////////////////////////////////////////////// ////REMOVE ASSUMPTION THAT VEC LEN = 5//////////// ////////////////////////////////////////////////// //Bucket of values based on digit check int64_t ** bucket = calloc(10, sizeof(int64_t *)); //Count of values in each bucket int64_t * buckCnt = calloc(10, sizeof(int64_t *)); int64_t max = get_maximum(vec); int64_t digits = 1; while (max) { max /= 10; digits++; } for (int i = 0; i < digits; i++) { buckCnt = calloc(10, sizeof(int64_t *)); for (int m = 0; m <10; m++) bucket[m] = calloc(g_length, sizeof(int64_t)); for (int64_t j = 0; j < len; j++) { int curDigit = getdigit(vec[j], i); bucket[curDigit][buckCnt[curDigit]] = vec[j]; buckCnt[curDigit]++; } int64_t put = 0; for (int i = 0; i < 10; i++) { for (int64_t j = 0; j < buckCnt[i]; j++) { vec[put++] = bucket[i][j]; } } } for (int i = 0; i < digits; i++) free(bucket[i]); free(bucket); free(buckCnt); }
// ******************************************************************* // Function print_employee_wages // // Purpose: This function will print out the gross // wages of each employee. // // Parameters: employees - the array of structures for employees // size - the number of employees to input // // Returns: Nothing (void) // // ******************************************************************* void print_employee_wages (struct employee employees[], int size) { int i; // To increment the loop float maximum[3] = {0}; // stores the maximum values float minimum[3] = {0}; // stores the minimum values float total[3] = {0}; // Totals to be used for the total and average // Declare functions called void get_totals (struct employee employees[], float total_array[], int size); void get_minimum (struct employee employees[], float minimum[], int size); void get_maximum (struct employee employees[], float maximum[], int size); // Print out header information for data to be displayed printf ("\n--------------------------------------------------------------\n"); printf ("Name\t\t\tClock#\tWage\tHours\tOT\tGross\n"); printf ("--------------------------------------------------------------\n"); // Print out employee information to the screen for (i = 0; i < size; ++i) { printf ("%s\t\t%06li\t%.2f\t%.1f\t%.1f\t%4.2f\n", employees[i].name ,employees[i].id_number, employees[i].wage, employees[i].hours, employees[i].overtime, employees[i].gross); } printf("\n"); // run the stats functions get_totals (employees, total, size); get_minimum (employees, minimum, size); get_maximum (employees, maximum, size); // Print out various stats printf ("--------------------------------------------------------------\n"); // Total of hours, overtime hours, and gross wages paid printf("Total:\t\t\t\t\t%.1f\t%.1f\t\%.2f\n", total[0], total[1], total[2]); // Average of hours, overtime hours, and gross wages paid printf("Average:\t\t\t\t%.1f\t%.1f\t\%.2f\n", total[0] / size, total[1] / size, total[2] / size); // Minimum hours, overtime, and gross printf("Minimum:\t\t\t\t%.1f\t%.1f\t\%.2f\n", minimum[0], minimum[1], minimum[2]); // Maximum hours, overtime, and gross printf("Maximum:\t\t\t\t%.1f\t%.1f\t\%.2f\n", maximum[0], maximum[1], maximum[2]); printf("\n"); }
/** * Compute command. */ void command_compute(char* line) { char cmd[MAX_BUFFER]; char key[MAX_BUFFER]; char func[MAX_BUFFER]; char arg1[MAX_BUFFER]; int argc = sscanf(line, "%s %s %s %s", cmd, func, key, arg1); if (argc < 3) { goto invalid; } MATRIX_GUARD(key); float result = 0; if (strcasecmp(func, "sum") == 0) { result = get_sum(m); } else if (strcasecmp(func, "trace") == 0) { result = get_trace(m); } else if (strcasecmp(func, "minimum") == 0) { result = get_minimum(m); } else if (strcasecmp(func, "maximum") == 0) { result = get_maximum(m); } else if (strcasecmp(func, "determinant") == 0) { result = get_determinant(m); } else if (strcasecmp(func, "frequency") == 0) { ssize_t count = get_frequency(m, atof(arg1)); printf("%zu\n", count); return; } else { goto invalid; } printf("%.2f\n", result); return; invalid: puts("invalid arguments"); }
// TODO: Change this function depending on model implied by activation functions double predict(const Eigen::VectorXd& parameters, const Eigen::VectorXd& inputs) { forward_propagation(parameters, inputs, outputs()); return get_maximum(outputs()); }
void local_max_clust_3D(float* im_vals, unsigned short* local_max_vals, unsigned short* bImg, unsigned short* out1, int r, int c, int z, int scale_xy, int scale_z) { //im_vals is the Laplacian of Gaussian //local_max_vals is the seed points (local maximum) with foreground seeds assigned an id > 0 and background seeds id == -1 // out1 will contain the clustering output int*** max_nghbr_im; //create max_nghbr_im and initialize it with its index (node) value max_nghbr_im = (int ***) malloc(r*sizeof(int**)); #pragma omp parallel for for(int i=0; i<r; i++) { max_nghbr_im[i] = (int **) malloc(c*sizeof(int*)); for(int j=0; j<c; j++) { max_nghbr_im[i][j] = (int *) malloc(z*sizeof(int)); for(int k=0; k<z; k++) { max_nghbr_im[i][j][k] = (k*r*c)+(i*c)+j;//LMX; } } } std::cout << "max_nghbr_im initialized" << std::endl; //In this loop we look in a local region around each point and find the maximum value in the LoG image //Set the value to the index of the local maximum, (so if I am a seed point do nothing). /*cerr << "sizeof(int ***) = " << sizeof(int ***) << endl; cerr << "sizeof(int **) = " << sizeof(int **) << endl; cerr << "sizeof(int *) = " << sizeof(int *) << endl; cerr << "sizeof(int) = " << sizeof(int) << endl; cerr << "Total size of array = " << (r * c * z * 3 * sizeof(int) + r * c * z * sizeof(int *) + r * c * sizeof(int **) + r * sizeof(int ***) + sizeof (int ****)) / (1024 * 1024) << " MB" << endl; //Probably incorrect */ clock_t start_time_init_max_nghbr_im = clock(); unsigned short *max_response_r; unsigned short *max_response_c; unsigned short *max_response_z; max_response_r = new unsigned short[r * c * z]; max_response_c = new unsigned short[r * c * z]; max_response_z = new unsigned short[r * c * z]; #ifdef OPENCL // START OPENCL BOILERPLATE ---------------------------------------------------------------------------------------------------------------- cl_platform_id platforms[10]; cl_uint num_platforms; cl_device_id device[10]; cl_uint num_devices; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_int errorcode; // Platform clGetPlatformIDs(10, platforms, &num_platforms); //Get OpenCL Platforms clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, 10, device, &num_devices); //Get the first platform and get a list of devices context = clCreateContext(0, 1, device, &clustering_pfn_notify, NULL, NULL); //Create context from first device queue = clCreateCommandQueue(context, device[0], 0, NULL); //Create a command queue for the first device //cout << endl << LocalMaximaKernel << endl << endl; //print out strinfified kernel program = clCreateProgramWithSource(context, 1, (const char **) &InitialClusteringKernel, 0, &errorcode); //Read in kernel and create a program if (errorcode != CL_SUCCESS) cout << "clCreateProgramWithSource Error code: " << errorcode << endl; errorcode = clBuildProgram(program, 0, 0, 0, 0, 0); //Build the program if (errorcode != CL_SUCCESS) //If there was a build error, print out build_info { cout << "clBuildProgram Error Code: " << errorcode << endl; char build_log[1024*1024]; errorcode = clGetProgramBuildInfo(program, device[0], CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, NULL); //Get the build log if (errorcode == CL_SUCCESS) cout << "Build Log:" << endl << build_log << endl; else cout << "clGetProgramBuildInfo Error Code: " << errorcode << endl; } kernel = clCreateKernel(program, "InitialClusteringKernel", &errorcode); //Create the kernel from the source code if (errorcode != CL_SUCCESS) cout << "clCreateKernel Error code: " << errorcode << endl; //END OPENCL BOILERPLATE --------------------------------------------------------------------------------------------------------------------- size_t cnDimension = r * c * z; //array size cl_ulong device_max_mem_alloc_size, device_global_mem_size; clGetDeviceInfo(device[0], CL_DEVICE_MAX_MEM_ALLOC_SIZE, 1024, &device_max_mem_alloc_size, NULL); clGetDeviceInfo(device[0], CL_DEVICE_GLOBAL_MEM_SIZE, 1024, &device_global_mem_size, NULL); cout << "Maximum memory allocation size: " << device_max_mem_alloc_size / (double)(1024*1024) << " MB" << endl; cout << "Maximum global memory allocation size: " << device_global_mem_size / (double)(1024*1024) << " MB" << endl; cout << "Allocating " << (sizeof(*im_vals) * cnDimension)/(double)(1024*1024) << " MB of memory on GPU for im_vals" << endl; cout << "Allocating " << (sizeof(*local_max_vals) * cnDimension)/(double)(1024*1024) << " MB of memory on GPU for local_max_vals" << endl; cout << "Allocating " << (sizeof(*max_response_r) * cnDimension)/(double)(1024*1024) << " MB of memory on GPU for max_response_r" << endl; cout << "Allocating " << (sizeof(*max_response_c) * cnDimension)/(double)(1024*1024) << " MB of memory on GPU for max_response_c" << endl; cout << "Allocating " << (sizeof(*max_response_z) * cnDimension)/(double)(1024*1024) << " MB of memory on GPU for max_response_z" << endl; //Allocate device memory cl_int errorcode1, errorcode2, errorcode3, errorcode4, errorcode5; cl_mem device_mem_im_vals = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_float) * cnDimension, NULL, &errorcode1); cl_mem device_mem_local_max_vals = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_ushort) * cnDimension, NULL, &errorcode2); cl_mem device_mem_max_response_r = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_ushort) * cnDimension, NULL, &errorcode3); cl_mem device_mem_max_response_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_ushort) * cnDimension, NULL, &errorcode4); cl_mem device_mem_max_response_z = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_ushort) * cnDimension, NULL, &errorcode5); if (errorcode1 || errorcode2 || errorcode3 || errorcode4 || errorcode5) { cout << "Failed to allocate buffer memory on GPU" << endl; } //Write memory from host to device clEnqueueWriteBuffer(queue, device_mem_im_vals, CL_TRUE, 0, sizeof(cl_float) * cnDimension, im_vals, NULL, NULL, NULL); clEnqueueWriteBuffer(queue, device_mem_local_max_vals, CL_TRUE, 0, sizeof(cl_ushort) * cnDimension, local_max_vals, NULL, NULL, NULL); //Set kernel arguments clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *) &device_mem_im_vals); clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *) &device_mem_local_max_vals); clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *) &device_mem_max_response_r); clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *) &device_mem_max_response_c); clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *) &device_mem_max_response_z); clSetKernelArg(kernel, 5, sizeof(int), &r); clSetKernelArg(kernel, 6, sizeof(int), &c); clSetKernelArg(kernel, 7, sizeof(int), &z); clSetKernelArg(kernel, 8, sizeof(int), &scale_xy); clSetKernelArg(kernel, 9, sizeof(int), &scale_z); //Execute the kernel clEnqueueNDRangeKernel(queue, kernel, 1, 0, (const size_t *) &cnDimension, 0, 0, 0, 0); //Read the output from the device back into host memory clEnqueueReadBuffer(queue, device_mem_max_response_r, CL_TRUE, 0, sizeof(cl_ushort) * cnDimension, max_response_r, NULL, NULL, NULL); clEnqueueReadBuffer(queue, device_mem_max_response_c, CL_TRUE, 0, sizeof(cl_ushort) * cnDimension, max_response_c, NULL, NULL, NULL); clEnqueueReadBuffer(queue, device_mem_max_response_z, CL_TRUE, 0, sizeof(cl_ushort) * cnDimension, max_response_z, NULL, NULL, NULL); //Block till all commands are complete clFinish(queue); /*for (int i = 0; i < cnDimension * 3; i+=3) cout << i << " " << max_response[i] << " " << max_response[i+1] << " " << max_response[i+2] << endl;*/ cout << endl; clReleaseKernel(kernel); clReleaseProgram(program); clReleaseMemObject(device_mem_im_vals); clReleaseMemObject(device_mem_local_max_vals); clReleaseMemObject(device_mem_max_response_r); clReleaseMemObject(device_mem_max_response_c); clReleaseMemObject(device_mem_max_response_z); clReleaseCommandQueue(queue); clReleaseContext(context); cout << endl; #elif CUDA initialClustering_CUDA(im_vals, local_max_vals, max_response_r, max_response_c, max_response_z, r, c, z, scale_xy, scale_z); #else int min_r, min_c, min_z, max_r, max_c, max_z; #pragma omp parallel for private(min_r, min_c, min_z, max_r, max_c, max_z) for(int i=0; i<r; i++) { for(int j=0; j<c; j++) { for(int k=0; k<z; k++) { min_r = (int) std::max((double)(0.0),(double)(i-scale_xy)); min_c = (int) std::max((double)(0.0),(double)(j-scale_xy)); min_z = (int) std::max((double)(0.0),(double)(k-scale_z)); max_r = (int) std::min((double)(r-1),(double)(i+scale_xy)); max_c = (int) std::min((double)(c-1),(double)(j+scale_xy)); max_z = (int) std::min((double)(z-1),(double)(k+scale_z)); int R, C, Z; if(local_max_vals[(k*r*c)+(i*c)+j] !=0)//local_max_im[i][j][k]!=0) continue; else { get_maximum(im_vals, min_r, max_r, min_c, max_c, min_z, max_z, &R, &C, &Z, r, c, z); max_response_r[i * (c * z) + j * z + k] = R; max_response_c[i * (c * z) + j * z + k] = C; max_response_z[i * (c * z) + j * z + k] = Z; } } } } #endif // OPENCL /*for (int k = 0; k < r * c * z * 3; k+=3) cout << k << " " << max_response[k] << " " << max_response[k+1] << " " << max_response[k+2] << endl;*/ std::cout << "Max_response array done" << std::endl; for(int i=0; i<r; i++) { for(int j=0; j<c; j++) { for(int k=0; k<z; k++) { if(local_max_vals[(k*r*c)+(i*c)+j] !=0)//local_max_im[i][j][k]!=0) continue; else max_nghbr_im[i][j][k] = max_nghbr_im[ max_response_r[i * (c * z) + j * z + k] ] [ max_response_c[i * (c * z) + j * z + k] ] [ max_response_z[i * (c * z) + j * z + k] ]; } } } delete [] max_response_r; delete [] max_response_c; delete [] max_response_z; std::cout << "Initial max_nghbr_im took " << (clock() - start_time_init_max_nghbr_im)/(float)CLOCKS_PER_SEC << " seconds" << std::endl; int change = 1; double LM; std::cout << "Entering main Clustering Loop" << std::endl; //Now continue to update until no more changes occur, eventually will have clusters pointing to seeds int iterr = 0; while(change) { //For now, limit it to a maximum of 10 iterations iterr++; if(iterr == 10) break; change=0; for(int i=0; i<r; i++) { for(int j=0; j<c; j++) { for(int k=0; k<z; k++) { LM = max_nghbr_im[i][j][k]; if(LM==0) continue; //Calculate coordinates of local maximum based on its index int rem = ((long)LM) % (r*c); int Z = ((int)LM-rem) / (r*c); int C = ((long)rem) % c; int R = (rem-C)/c; //Check for seed (already a local max value) or connected to seed (my local maximum is a seed point) if(local_max_vals[(k*r*c)+(i*c)+j] !=0 || local_max_vals[(Z*r*c)+(R*c)+C]!=0 ) continue; else { change++; max_nghbr_im[i][j][k]=max_nghbr_im[R][C][Z]; } } } } std::cout<< "change=" << change << std::endl; } //cerr << "Preparing Output" << endl; for(int i=0; i<r; i++) { for(int j=0; j<c; j++) { for(int k=0; k<z; k++) { LM = max_nghbr_im[i][j][k]; //if(local_max_vals[(int)LM] == -1 || bImg[(k*r*c)+(i*c)+j]==0) if(local_max_vals[(int)LM] == 65535 || bImg[(k*r*c)+(i*c)+j]==0) out1[(k*r*c)+(i*c)+j] = 0; else { //modified by Yousef on 8/21/2009 //if the distance between me and my seed is more than a threshold.. then ignore me /*int rem = ((long)LM) % (r*c); int Z = (LM-rem) / (r*c); int C = ((long)rem) % c; int R = (rem-C)/c; double d = (i-R)*(i-R) + (j-C)*(j-C) + 3*(k-Z)*(k-Z); d = sqrt(d); if(d>10) out1[(k*r*c)+(i*c)+j] = 0; else*/ out1[(k*r*c)+(i*c)+j] =local_max_vals[(int)LM]; } } } } #pragma omp parallel for for(int i=0; i<r; i++) { for(int j=0; j<c; j++) { free(max_nghbr_im[i][j]); } free(max_nghbr_im[i]); } free(max_nghbr_im); }
//Returns the most frequently occuring element //or -1 if there is no such unique element int64_t get_mode(int64_t* vector) { if (g_existCalcs[arg1].modeFlag == 1) return g_existCalcs[arg1].mode; g_existCalcs[arg1].modeFlag = 1; //Mode of a uniform vector is any element within it if (g_length == 1 || g_vec_properties[arg1][0] == UNIFORM) { g_existCalcs[arg1].mode = vector[0]; return vector[0]; } //All vectors with unique values have no mode else if (g_vec_properties[arg1][0] == SEQUP || g_vec_properties[arg1][0] == SEQDOWN || g_vec_properties[arg1][0] == PRIME) { g_existCalcs[arg1].mode = -1; return -1; } int64_t max = get_maximum(vector); int64_t min = get_minimum(vector); if ((max - min) < 2147483648) //16gb can hold 2147483648 int64s //12gb can hold 1610612736 int64s //8gb can hold 1073741824 int64s //4gb can hold 536870912 int64s return fast_mode(vector, max, min); int64_t* result = cloned(vector); qsort(result, g_length, sizeof(int64_t), int64Ascend); int64_t curSpanCount = 0, hiSpanCount = 0, currentCheck = 0, repeatCount = 0, mode = 0; for (int64_t i = 0; i < g_length; i++) { if (currentCheck != result[i]) { currentCheck = result[i]; curSpanCount = 0; } curSpanCount++; if (curSpanCount > hiSpanCount) { repeatCount = 0; hiSpanCount = curSpanCount; mode = currentCheck; } else if (currentCheck != result[i+1] && curSpanCount == hiSpanCount) { repeatCount = 1; } } if (repeatCount > 0) { g_existCalcs[arg1].mode = -1; return -1; } g_existCalcs[arg1].mode = mode; return mode; }