// compute the real number of motifs void met_motifs_search_real(Network *N,Res_tbl *met_res_tbl,int vec[14]) { // int rc=RC_OK; time_measure_start(&GNRL_ST.real_net_time); //if(GNRL_ST.run_prob_app==FALSE) count_subgraphs(N, 3, &met_res_tbl->real, REAL_NET); //else // search_motifs_prob(N, GNRL_ST.mtf_sz, RES_TBL.real, REAL_NET); //free_network_mem(N); //calc result after isomorphism of ids met_join_subgraphs_res(&met_res_tbl->real, 3, 0); // fprintf(GNRL_ST.out_fp,"\n Summary motif results\n"); // fprintf(GNRL_ST.out_fp," =====================\n"); //Nadav I made the next call comment met_dump_motifs_res(met_res_tbl->real,N->name,vec); //time_measure_stop(&GNRL_ST.real_net_time); //if(GNRL_ST.quiet_mode==FALSE) // dump_time_measure(stdout, "Real network processing runtime was:", &GNRL_ST.real_net_time); // return rc; }
void run(int argc, char** argv) { int size; int grid_rows,grid_cols; float *FilesavingTemp,*FilesavingPower,*MatrixOut; char *tfile, *pfile, *ofile; int total_iterations = 60; int pyramid_height = 1; // number of iterations if (argc != 7) usage(argc, argv); if((grid_rows = atoi(argv[1]))<=0|| (grid_cols = atoi(argv[1]))<=0|| (pyramid_height = atoi(argv[2]))<=0|| (total_iterations = atoi(argv[3]))<=0) usage(argc, argv); tfile=argv[4]; pfile=argv[5]; ofile=argv[6]; size=grid_rows*grid_cols; /* --------------- pyramid parameters --------------- */ # define EXPAND_RATE 2// add one iteration will extend the pyramid base by 2 per each borderline int borderCols = (pyramid_height)*EXPAND_RATE/2; int borderRows = (pyramid_height)*EXPAND_RATE/2; int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE; int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE; int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1); int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1); FilesavingTemp = (float *) malloc(size*sizeof(float)); FilesavingPower = (float *) malloc(size*sizeof(float)); MatrixOut = (float *) calloc (size, sizeof(float)); if( !FilesavingPower || !FilesavingTemp || !MatrixOut) fatal("unable to allocate memory"); printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, %d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", \ pyramid_height, grid_cols, grid_rows, borderCols, borderRows, blockCols, blockRows, smallBlockCol, smallBlockRow); readinput(FilesavingTemp, grid_rows, grid_cols, tfile); readinput(FilesavingPower, grid_rows, grid_cols, pfile); struct timeval tv; CUdeviceptr MatrixTemp[2], MatrixPower; CUcontext ctx; CUmodule mod; CUresult res; int ret; /* * call our common CUDA initialization utility function. */ res = cuda_driver_api_init(&ctx, &mod, "./hotspot.cubin"); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_init failed: res = %u\n", res); return; } res = cuMemAlloc(&MatrixTemp[0], sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return; } res = cuMemAlloc(&MatrixTemp[1], sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return; } res = cuMemAlloc(&MatrixPower, sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return; } /* * measurement start! */ time_measure_start(&tv); res = cuMemcpyHtoD(MatrixTemp[0], FilesavingTemp, sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return; } res = cuMemcpyHtoD(MatrixPower, FilesavingPower, sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return; } ret = compute_tran_temp(mod, MatrixPower, MatrixTemp, grid_cols, grid_rows, total_iterations, pyramid_height, blockCols, blockRows, borderCols, borderRows); res = cuMemcpyDtoH(MatrixOut, MatrixTemp[ret], sizeof(float) * size); if (res != CUDA_SUCCESS) { printf("cuMemcpyDtoH failed: res = %u\n", res); return; } /* * measurement end! will print out the time. */ time_measure_end(&tv); writeoutput(MatrixOut, grid_rows, grid_cols, ofile); cuMemFree(MatrixPower); cuMemFree(MatrixTemp[0]); cuMemFree(MatrixTemp[1]); free(MatrixOut); res = cuda_driver_api_exit(ctx, mod); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_exit faild: res = %u\n", res); return; } }
void bpnn_train_cuda(BPNN *net, float *eo, float *eh) { int j, k; int in, hid, out; float out_err, hid_err; struct timeval tv; in = net->input_n; hid = net->hidden_n; out = net->output_n; #ifdef GPU int m = 0; float *partial_sum; float sum; float *input_weights_one_dim; float *input_weights_prev_one_dim; num_blocks = in / 16; CUdeviceptr input_cuda; CUdeviceptr input_hidden_cuda; CUdeviceptr output_hidden_cuda; CUdeviceptr hidden_partial_sum; CUdeviceptr hidden_delta_cuda; CUdeviceptr input_prev_weights_cuda; CUcontext ctx; CUmodule mod; CUresult res; input_weights_one_dim = (float *) malloc((in + 1) * (hid + 1) * sizeof(float)); input_weights_prev_one_dim = (float *) malloc((in + 1) * (hid + 1) * sizeof(float)); partial_sum = (float *) malloc(num_blocks * WIDTH * sizeof(float)); /* this preprocessing stage is added to correct the bugs of wrong memcopy using two-dimensional net->inputweights */ for (k = 0; k <= in; k++) { for (j = 0; j <= hid; j++) { input_weights_one_dim[m] = net->input_weights[k][j]; input_weights_prev_one_dim[m] = net-> input_prev_weights[k][j]; m++; } } /* * call our common CUDA initialization utility function. */ res = cuda_driver_api_init(&ctx, &mod, "./backprop.cubin"); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_init failed: res = %u\n", res); return ; } /* * allocate device memory space */ res = cuMemAlloc(&input_cuda, (in + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } res = cuMemAlloc(&output_hidden_cuda, (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } res = cuMemAlloc(&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } res = cuMemAlloc(&hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } res = cuMemAlloc(&hidden_delta_cuda, (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } res = cuMemAlloc(&input_prev_weights_cuda, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed: res = %u\n", res); return ; } #endif #ifdef CPU printf("Performing CPU computation\n"); bpnn_layerforward(net->input_units, net->hidden_units,net->input_weights, in, hid); #endif #ifdef GPU printf("Performing GPU computation\n"); //printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks); /* * measurement start! */ time_measure_start(&tv); res = cuMemcpyHtoD(input_cuda, net->input_units, (in + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return ; } res = cuMemcpyHtoD(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return ; } res = bpnn_layerforward_launch(mod, input_cuda, output_hidden_cuda, input_hidden_cuda, hidden_partial_sum, in, hid); if (res != CUDA_SUCCESS) { printf("bpnn_layerforward failed: res = %u\n", res); return ; } cuCtxSynchronize(); #if 0 cudaError_t error = cudaGetLastError(); if (error != cudaSuccess) { printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); exit(EXIT_FAILURE); } #endif res = cuMemcpyDtoH(partial_sum, hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyDtoH(layerforward) failed: res = %u\n", res); return ; } for (j = 1; j <= hid; j++) { sum = 0.0; for (k = 0; k < num_blocks; k++) { sum += partial_sum[k * hid + j-1] ; } sum += net->input_weights[0][j]; net-> hidden_units[j] = (float) (1.0 / (1.0 + exp(-sum))); } #endif bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, hid, out); bpnn_output_error(net->output_delta, net->target, net->output_units, out, &out_err); bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, net->hidden_weights, net->hidden_units, &hid_err); bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, net->hidden_weights, net->hidden_prev_weights); #ifdef CPU bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, net->input_weights, net->input_prev_weights); #endif #ifdef GPU res = cuMemcpyHtoD(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return ; } res = cuMemcpyHtoD(input_prev_weights_cuda, input_weights_prev_one_dim, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return ; } res = cuMemcpyHtoD(input_hidden_cuda, input_weights_one_dim, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD failed: res = %u\n", res); return ; } res = bpnn_adjust_weights_launch(mod, hidden_delta_cuda, hid, input_cuda, in, input_hidden_cuda, input_prev_weights_cuda); if (res != CUDA_SUCCESS) { printf("bpnn_adjust_weights failed: res = %u\n", res); return ; } res = cuMemcpyDtoH(net->input_units, input_cuda, (in + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyDtoH(adjust_weights) failed: res = %u\n", res); return ; } res = cuMemcpyDtoH(input_weights_one_dim, input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyDtoH(adjust_weights) failed: res = %u\n", res); return ; } cuMemFree(input_cuda); cuMemFree(output_hidden_cuda); cuMemFree(input_hidden_cuda); cuMemFree(hidden_partial_sum); cuMemFree(input_prev_weights_cuda); cuMemFree(hidden_delta_cuda); /* * measurement end! will print out the time. */ time_measure_end(&tv); res = cuda_driver_api_exit(ctx, mod); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_exit faild: res = %u\n", res); return ; } free(partial_sum); free(input_weights_one_dim); free(input_weights_prev_one_dim); #endif }
int main (int argc, char *argv[]) { int matrix_dim = 32; /* default matrix_dim */ int opt, option_index = 0; func_ret_t ret; const char *input_file = NULL; const char *cubin_file = NULL; float *m, *mm; struct timeval tv; CUdeviceptr d_m; CUcontext ctx; CUmodule mod; CUresult res; while ((opt = getopt_long(argc, argv, "::vs:i:c:", long_options, &option_index)) != -1 ) { switch(opt) { case 'c': cubin_file = optarg; break; case 'i': input_file = optarg; break; case 'v': do_verify = 1; break; case 's': matrix_dim = atoi(optarg); fprintf(stderr, "Currently not supported, use -i instead\n"); fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-c cubin]\n", argv[0]); exit(EXIT_FAILURE); case '?': fprintf(stderr, "invalid option\n"); break; case ':': fprintf(stderr, "missing argument\n"); break; default: fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-c cubin]\n", argv[0]); exit(EXIT_FAILURE); } } if ( (optind < argc) || (optind == 1)) { fprintf(stderr, "Usage: %s [-v] [-s matrix_size|-i input_file|-c cubin]\n", argv[0]); exit(EXIT_FAILURE); } if (!cubin_file) { printf("No cubin file specified!\n"); exit(EXIT_FAILURE); } if (input_file) { printf("Reading matrix from file %s\n", input_file); ret = create_matrix_from_file(&m, input_file, &matrix_dim); if (ret != RET_SUCCESS) { m = NULL; fprintf(stderr, "error create matrix from file %s\n", input_file); exit(EXIT_FAILURE); } } else { printf("No input file specified!\n"); exit(EXIT_FAILURE); } if (do_verify){ print_matrix(m, matrix_dim); matrix_duplicate(m, &mm, matrix_dim); } /* * call our common CUDA initialization utility function. */ res = cuda_driver_api_init(&ctx, &mod, cubin_file); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_init failed: res = %u\n", res); return -1; } res = cuMemAlloc(&d_m, matrix_dim * matrix_dim * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemAlloc failed\n"); return -1; } /* * measurement start! */ time_measure_start(&tv); res = cuMemcpyHtoD(d_m, m, matrix_dim * matrix_dim * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyHtoD (a) failed: res = %u\n", res); return -1; } lud_launch(mod, d_m, matrix_dim); res = cuMemcpyDtoH(m, d_m, matrix_dim * matrix_dim * sizeof(float)); if (res != CUDA_SUCCESS) { printf("cuMemcpyDtoH failed: res = %u\n", res); return -1; } /* * measurement end! will print out the time. */ time_measure_end(&tv); res = cuMemFree(d_m); if (res != CUDA_SUCCESS) { printf("cuMemFree failed: res = %u\n", res); return -1; } res = cuda_driver_api_exit(ctx, mod); if (res != CUDA_SUCCESS) { printf("cuda_driver_api_exit faild: res = %u\n", res); return -1; } if (do_verify){ print_matrix(m, matrix_dim); printf(">>>Verify<<<<\n"); lud_verify(mm, m, matrix_dim); free(mm); } free(m); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
int main(int argc, char *argv[]) { int rc = 0; Network *N = NULL; time_measure_start(&GNRL_ST.total_time); //process input arguments and init global structure rc |= process_input_args(argc, argv); if (GNRL_ST.quiet_mode == FALSE) printf("mfinder Version %.2f\n\n", VERSION); if (rc == RC_ERR) at_exit(-1); //general initialization rc |= gnrl_init(); if (rc == RC_ERR) at_exit(-1); // load network from input file if (GNRL_ST.quiet_mode == FALSE) printf("Loading Network\n"); load_network(&G_N, input_network_fname); duplicate_network(G_N, &N, "real_network"); init_random_seed(); if (rc == RC_ERR) at_exit(-1); if (GNRL_ST.quiet_mode == FALSE) printf("Searching motifs size %d\nProcessing Real network...\n", GNRL_ST.mtf_sz); //search motifs size n in Real network if (GNRL_ST.dont_search_real != TRUE) rc |= motifs_search_real(N); if (rc == RC_ERR) at_exit(-1); /* printf("RES TBL real\n"); for (l_id = list64_get_next(RES_TBL.real, NULL); l_id != NULL; l_id = list64_get_next(RES_TBL.real, l_id)) { printf("id: %d\ncount: %.0f\n", (int)((Motif*)l_id->p)->id, ((Motif*)l_id->p)->count); } */ if (GNRL_ST.quiet_mode == FALSE) printf("Processing Random networks\n"); if (GNRL_ST.rnd_net_num > 0) { // create random networks with same single node statisticfs as the input network if (GNRL_ST.r_grassberger == FALSE) { //use switches or stubs rc |= process_rand_networks(&RES_TBL, GNRL_ST.mtf_sz); } else { //use grassberger alg weights_arr = (double*) calloc(GNRL_ST.rnd_net_num + 1, sizeof (double)); rc |= process_rand_networks_grassberger(&RES_TBL, GNRL_ST.mtf_sz, weights_arr); } if (rc == RC_ERR) at_exit(-1); } if (GNRL_ST.quiet_mode == FALSE) printf("Calculating Results...\n"); if (GNRL_ST.rnd_net_num >= 0) { //calculate final results and dump them to the results file if (!GNRL_ST.r_grassberger) { calc_final_results(&RES_TBL, &final_res, &final_res_all, GNRL_ST.rnd_net_num); } else { //Nadav change for GRASS NEW calc_final_results_grassberger(&RES_TBL, FALSE, res_sub_motif, &final_res, &final_res_all, GNRL_ST.rnd_net_num, weights_arr); } } //calculate final results time_measure_stop(&GNRL_ST.total_time); //output results rc |= output_results(final_res, final_res_all); free_network_mem(G_N); free(G_N); final_res_free(final_res); final_res_free(final_res_all); if (GNRL_ST.r_grassberger) free(weights_arr); res_tbl_mem_free(&RES_TBL); if (GNRL_ST.calc_roles == TRUE) { free_mem_role_hash(); free_roles_res_tbl(GNRL_ST.rnd_net_num); free_role_members(); } if (rc == RC_ERR) at_exit(-1); exit(at_exit(rc)); }