void SERVICE_fwd(float* in, int in_size, float* out, int out_size, Net<float>* net) { string net_name = net->name(); STATS_INIT("service", "DjiNN service inference"); PRINT_STAT_STRING("network", net_name.c_str()); if (Caffe::mode() == Caffe::CPU) PRINT_STAT_STRING("platform", "cpu"); else PRINT_STAT_STRING("platform", "gpu"); float loss; vector<Blob<float>*> in_blobs = net->input_blobs(); tic(); in_blobs[0]->set_cpu_data(in); vector<Blob<float>*> out_blobs = net->ForwardPrefilled(&loss); memcpy(out, out_blobs[0]->cpu_data(), sizeof(float)); PRINT_STAT_DOUBLE("inference latency", toc()); STATS_END(); if (out_size != out_blobs[0]->count()) LOG(FATAL) << "out_size =! out_blobs[0]->count())"; else memcpy(out, out_blobs[0]->cpu_data(), out_size * sizeof(float)); }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT (); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t **) & off, (int64_t **) & ind, (int64_t **) & weight, (int64_t **) & graphmem, action_stream_name, &naction, (int64_t **) & action, (int64_t **) & actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK (); /* Convert to STINGER */ tic (); S = stinger_new (); stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0); PRINT_STAT_DOUBLE ("time_stinger", toc ()); fflush (stdout); int64_t numSteps = 3; int64_t src_dest_pair[2] = { 124, 381 }; tic (); int64_t size_intersect = st_conn_stinger (S, nv, ne, src_dest_pair, 1, numSteps); PRINT_STAT_DOUBLE ("time_st_conn_stinger", toc ()); PRINT_STAT_INT64 ("size_intersect", size_intersect); stinger_free_all (S); free (graphmem); free (actionmem); STATS_END (); }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT (); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t **) & off, (int64_t **) & ind, (int64_t **) & weight, (int64_t **) & graphmem, action_stream_name, &naction, (int64_t **) & action, (int64_t **) & actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK (); int64_t *component_map = xmalloc (nv * sizeof (int64_t)); /* Convert to STINGER */ tic (); S = stinger_new (); stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0); PRINT_STAT_DOUBLE ("time_stinger", toc ()); fflush (stdout); tic (); int64_t num_comp_end = connected_components_stinger (S, nv, ne, component_map, NULL, NULL, NULL, NULL, NULL); PRINT_STAT_DOUBLE ("time_components_tree", toc ()); PRINT_STAT_INT64 ("number_of_components", num_comp_end); stinger_free_all (S); free (graphmem); free (actionmem); STATS_END (); }
int main(int argc, char **argv) { u16 (*bayer)[WAMI_DEBAYER_IMG_NUM_COLS] = NULL; rgb_pixel (*debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; char *input_directory = NULL; #ifdef ENABLE_CORRECTNESS_CHECKING rgb_pixel (*gold_debayer)[WAMI_DEBAYER_IMG_NUM_COLS-2*PAD] = NULL; #endif const size_t num_bayer_pixels = WAMI_DEBAYER_IMG_NUM_ROWS * WAMI_DEBAYER_IMG_NUM_COLS; const size_t num_debayer_pixels = (WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD) * (WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); if (argc != 2) { fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]); exit(EXIT_FAILURE); } input_directory = argv[1]; bayer = XMALLOC(sizeof(u16) * num_bayer_pixels); debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #ifdef ENABLE_CORRECTNESS_CHECKING gold_debayer = XMALLOC(sizeof(rgb_pixel) * num_debayer_pixels); #endif read_image_file( (char *) bayer, input_filename, input_directory, sizeof(u16) * num_bayer_pixels); memset(debayer, 0, sizeof(u16) * num_debayer_pixels); printf("WAMI kernel 1 parameters:\n\n"); printf("Input image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS); printf("Input image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS); printf("Output image width = %u pixels\n", WAMI_DEBAYER_IMG_NUM_COLS-2*PAD); printf("Output image height = %u pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS-2*PAD); printf("\nStarting WAMI kernel 1 (debayer).\n"); tic(); accept_roi_begin(); wami_debayer( debayer, bayer); accept_roi_end(); PRINT_STAT_DOUBLE("CPU time using func toc - ", toc()); #ifdef ENABLE_CORRECTNESS_CHECKING read_image_file( (char *) gold_debayer, golden_output_filename, input_directory, sizeof(rgb_pixel) * num_debayer_pixels); /* * An exact match is expected for the debayer kernel, so we check * each pixel individually and report either the first failure or * a success message. */ { /* // original error metric int r, c, success = 1; for (r = 0; success && r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { if (ENDORSE(debayer[r][c].r != gold_debayer[r][c].r)) { printf("Validation error: red pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].r, gold_debayer[r][c].r); success = 0; break; } if (ENDORSE(debayer[r][c].g != gold_debayer[r][c].g)) { printf("Validation error: green pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].g, gold_debayer[r][c].g); success = 0; break; } if (ENDORSE(debayer[r][c].b != gold_debayer[r][c].b)) { printf("Validation error: blue pixel mismatch at row=%d, col=%d : " "test value = %u, golden value = %u\n\n", r, c, debayer[r][c].b, gold_debayer[r][c].b); success = 0; break; } } } if (success) { printf("\nValidation checks passed -- the test output matches the golden output.\n\n"); } */ // new error metric int r, c; double err; for (r = 0; r < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++r) { for (c = 0; c < WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD; ++c) { double pixel_error = 0.0; pixel_error += ENDORSE(((double) abs(debayer[r][c].r - gold_debayer[r][c].r)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].g - gold_debayer[r][c].g)) / ((double) 65535)); pixel_error += ENDORSE(((double) abs(debayer[r][c].b - gold_debayer[r][c].b)) / ((double) 65535)); err += (pixel_error / ((double) 3)) / ((double) ((WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD) * (WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD))); } } FILE *fp = fopen("err.txt", "wb"); assert(fp != NULL); fprintf(fp, "%.2f\n", err); fclose(fp); } #endif #ifdef WRITE_OUTPUT_TO_DISK printf("Writing output to %s/%s.\n", output_directory, output_filename); { const u16 output_channels = 3; write_image_file( (char *) debayer, output_filename, output_directory, WAMI_DEBAYER_IMG_NUM_COLS - 2*PAD, WAMI_DEBAYER_IMG_NUM_ROWS - 2*PAD, output_channels); } #endif FREE_AND_NULL(bayer); FREE_AND_NULL(debayer); #ifdef ENABLE_CORRECTNESS_CHECKING FREE_AND_NULL(gold_debayer); #endif return 0; }
int main(int argc, char *argv[]) { if (argc < 4) { fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n"); fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]); exit(0); } /* Timing */ STATS_INIT("kernel", "pthread_porter_stemming"); PRINT_STAT_STRING("abrv", "pthread_stemmer"); NTHREADS = atoi(argv[1]); int WORDS = atoi(argv[2]); PRINT_STAT_INT("threads", NTHREADS); FILE *f = fopen(argv[3], "r"); if (f == 0) { fprintf(stderr, "File %s not found\n", argv[1]); exit(1); } stem_list = (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *)); int words = load_data(WORDS, stem_list, f); fclose(f); if (words < 0) goto out; PRINT_STAT_INT("words", words); tic(); int start, tids[NTHREADS]; pthread_t threads[NTHREADS]; pthread_attr_t attr; iterations = words / NTHREADS; sirius_pthread_attr_init(&attr); sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (int i = 0; i < NTHREADS; i++) { tids[i] = i; sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]); } for (int i = 0; i < NTHREADS; i++) { sirius_pthread_join(threads[i], NULL); } PRINT_STAT_DOUBLE("pthread_stemmer", toc()); STATS_END(); #ifdef TESTING f = fopen("../input/stem_porter.pthread", "w"); for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b); fclose(f); #endif out: sirius_free(s); // free up allocated data for (int i = 0; i < words; i++) { sirius_free(stem_list[i]->b); sirius_free(stem_list[i]); } return 0; }
int main (int argc, char * argv[]) { APPROX int * frame; APPROX int * output; int i; int nFilterRowsFD = 9; int nFilterColsFD = 9; APPROX fltPixel_t FD[] = { 1, 3, 4, 5, 6, 5, 4, 3, 1, 3, 9, 12, 15, 18, 15, 12, 9, 3, 4, 12, 16, 20, 24, 20, 16, 12, 4, 5, 15, 20, 25, 30, 25, 20, 15, 5, 6, 18, 24, 30, 36, 30, 24, 18, 6, 5, 15, 20, 25, 30, 25, 20, 15, 5, 4, 12, 16, 20, 24, 20, 16, 12, 4, 3, 9, 12, 15, 18, 15, 12, 9, 3, 1, 3, 4, 5, 6, 5, 4, 3, 1 }; for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID { FD[i] /= (1024.0); } srand (time (NULL)); STATS_INIT (); PRINT_STAT_STRING ("kernel", "2d_convolution"); PRINT_STAT_INT ("rows", N); PRINT_STAT_INT ("columns", M); PRINT_STAT_INT ("num_frames", BATCH_SIZE); frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); if (!frame || !output) { fprintf(stderr, "ERROR: Allocation failed.\n"); exit(-1); } /* load image */ tic (); read_array_from_octave (ENDORSE(frame), N, M, FILENAME); PRINT_STAT_DOUBLE ("time_load_image", toc ()); /* Make BATCH_SIZE-1 copies */ tic (); for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID { memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t)); } PRINT_STAT_DOUBLE ("time_copy", toc ()); /* Perform the 2D convolution */ tic (); accept_roi_begin(); for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD); } accept_roi_end(); PRINT_STAT_DOUBLE ("time_2d_convolution", toc ()); /* Write the results out to disk */ for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { char buffer [30]; sprintf (buffer, "2dconv_output.%d.mat", i); write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE); } PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat"); STATS_END (); free (output); free (frame); return 0; }
int main(int argc, char **argv) { float (*mu)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL; float (*sigma)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL; float (*weights)[WAMI_GMM_IMG_NUM_COLS][WAMI_GMM_NUM_MODELS] = NULL; u8 (*foreground)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL; #ifdef ENABLE_CORRECTNESS_CHECKING u8 (*golden_foreground)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL; u8 (*golden_eroded)[WAMI_GMM_IMG_NUM_COLS] = NULL; u8 (*eroded)[WAMI_GMM_IMG_NUM_COLS] = NULL; #endif u8 (*morph)[WAMI_GMM_IMG_NUM_COLS] = NULL; u16 (*frames)[WAMI_GMM_IMG_NUM_ROWS][WAMI_GMM_IMG_NUM_COLS] = NULL; int i; char *input_directory = NULL; const size_t num_pixels = WAMI_GMM_IMG_NUM_ROWS * WAMI_GMM_IMG_NUM_COLS; if (argc != 2) { fprintf(stderr, "%s <directory-containing-input-files>\n", argv[0]); exit(EXIT_FAILURE); } input_directory = argv[1]; mu = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); sigma = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); weights = XMALLOC(sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); foreground = XMALLOC(sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES); #ifdef ENABLE_CORRECTNESS_CHECKING golden_foreground = XMALLOC(sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES); eroded = XMALLOC(sizeof(u8) * num_pixels); golden_eroded = XMALLOC(sizeof(u8) * num_pixels); #endif morph = XMALLOC(sizeof(u8) * num_pixels); frames = XMALLOC(sizeof(u16) * num_pixels * WAMI_GMM_NUM_FRAMES); memset(mu, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); memset(sigma, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); memset(weights, 0, sizeof(float) * num_pixels * WAMI_GMM_NUM_MODELS); memset(foreground, 0, sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES); memset(morph, 0, sizeof(u8) * num_pixels); memset(frames, 0, sizeof(u16) * num_pixels * WAMI_GMM_NUM_FRAMES); read_gmm_input_data( mu, sigma, weights, frames, input_filename, input_directory); #ifdef ENABLE_CORRECTNESS_CHECKING read_data_file( (char *) golden_foreground, golden_output_filename, input_directory, sizeof(u8) * num_pixels * WAMI_GMM_NUM_FRAMES); #endif printf("WAMI kernel 2 parameters:\n\n"); printf("Image width = %d pixels\n", WAMI_DEBAYER_IMG_NUM_COLS); printf("Image height = %d pixels\n", WAMI_DEBAYER_IMG_NUM_ROWS); printf("Number of input frames = %d\n", WAMI_GMM_NUM_FRAMES); printf("\nStarting WAMI kernel 3 (Gaussian Mixture Model / Change Detection).\n\n"); tic(); accept_roi_begin(); for (i = 0; i < WAMI_GMM_NUM_FRAMES; ++i) { wami_gmm( (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &foreground[i][0][0], mu, sigma, weights, (u16 (*)[WAMI_GMM_IMG_NUM_COLS]) &frames[i][0][0]); } accept_roi_end(); PRINT_STAT_DOUBLE("CPU time using func toc - ", toc()); printf ("\n"); #ifdef ENABLE_CORRECTNESS_CHECKING { int j, k, validation_warning = 0; double err; for (i = 0; i < WAMI_GMM_NUM_FRAMES; ++i) { int num_misclassified = 0, num_foreground = 0; double misclassification_rate = 0; wami_morpho_erode( eroded, (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &foreground[i][0][0]); wami_morpho_erode( golden_eroded, (u8 (*)[WAMI_GMM_IMG_NUM_COLS]) &golden_foreground[i][0][0]); printf("\nValidating frame %d output...\n", i); for (j = 0; j < WAMI_GMM_IMG_NUM_ROWS; ++j) { for (k = 0; k < WAMI_GMM_IMG_NUM_COLS; ++k) { if (eroded[j][k] != golden_eroded[j][k]) { ++num_misclassified; } if (golden_eroded[j][k] != 0) { ++num_foreground; } } } misclassification_rate = (100.0*num_misclassified)/num_foreground; err += (((double) num_misclassified) / ((double) num_foreground)) / ((double) WAMI_GMM_NUM_FRAMES); printf("\tMisclassified pixels: %d\n", num_misclassified); printf("\tGolden foreground pixels (after erosion): %d\n", num_foreground); printf("\tMisclassification rate relative to foreground: %f%%\n", misclassification_rate); if (misclassification_rate > 0.1) { validation_warning = 1; } } FILE *fp = fopen("err.txt", "wb"); assert(fp != NULL); fprintf(fp, "%.2f\n", err); fclose(fp); if (validation_warning) { printf("\nValidation warning: Misclassification rate appears high; check images.\n\n"); } else { printf("\nValidation checks passed.\n\n"); } } #endif #ifdef WRITE_OUTPUT_TO_DISK printf("Writing output to %s.\n", output_filename); { FILE *fp = fopen(output_filename, "wb"); assert(fp != NULL); assert(fwrite(foreground, sizeof(u8), num_pixels * WAMI_GMM_NUM_FRAMES, fp) == num_pixels * WAMI_GMM_NUM_FRAMES); fclose(fp); } #endif FREE_AND_NULL(mu); FREE_AND_NULL(sigma); FREE_AND_NULL(weights); FREE_AND_NULL(foreground); #ifdef ENABLE_CORRECTNESS_CHECKING FREE_AND_NULL(golden_foreground); FREE_AND_NULL(eroded); FREE_AND_NULL(golden_eroded); #endif FREE_AND_NULL(morph); FREE_AND_NULL(frames); return 0; }