void SERVICE_fwd(float* in, int in_size, float* out, int out_size, Net<float>* net) { string net_name = net->name(); STATS_INIT("service", "DjiNN service inference"); PRINT_STAT_STRING("network", net_name.c_str()); if (Caffe::mode() == Caffe::CPU) PRINT_STAT_STRING("platform", "cpu"); else PRINT_STAT_STRING("platform", "gpu"); float loss; vector<Blob<float>*> in_blobs = net->input_blobs(); tic(); in_blobs[0]->set_cpu_data(in); vector<Blob<float>*> out_blobs = net->ForwardPrefilled(&loss); memcpy(out, out_blobs[0]->cpu_data(), sizeof(float)); PRINT_STAT_DOUBLE("inference latency", toc()); STATS_END(); if (out_size != out_blobs[0]->count()) LOG(FATAL) << "out_size =! out_blobs[0]->count())"; else memcpy(out, out_blobs[0]->cpu_data(), out_size * sizeof(float)); }
int main(int argc, char *argv[]) { if (argc < 4) { fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n"); fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]); exit(0); } /* Timing */ STATS_INIT("kernel", "pthread_porter_stemming"); PRINT_STAT_STRING("abrv", "pthread_stemmer"); NTHREADS = atoi(argv[1]); int WORDS = atoi(argv[2]); PRINT_STAT_INT("threads", NTHREADS); FILE *f = fopen(argv[3], "r"); if (f == 0) { fprintf(stderr, "File %s not found\n", argv[1]); exit(1); } stem_list = (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *)); int words = load_data(WORDS, stem_list, f); fclose(f); if (words < 0) goto out; PRINT_STAT_INT("words", words); tic(); int start, tids[NTHREADS]; pthread_t threads[NTHREADS]; pthread_attr_t attr; iterations = words / NTHREADS; sirius_pthread_attr_init(&attr); sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (int i = 0; i < NTHREADS; i++) { tids[i] = i; sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]); } for (int i = 0; i < NTHREADS; i++) { sirius_pthread_join(threads[i], NULL); } PRINT_STAT_DOUBLE("pthread_stemmer", toc()); STATS_END(); #ifdef TESTING f = fopen("../input/stem_porter.pthread", "w"); for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b); fclose(f); #endif out: sirius_free(s); // free up allocated data for (int i = 0; i < words; i++) { sirius_free(stem_list[i]->b); sirius_free(stem_list[i]); } return 0; }
int main (int argc, char * argv[]) { APPROX int * frame; APPROX int * output; int i; int nFilterRowsFD = 9; int nFilterColsFD = 9; APPROX fltPixel_t FD[] = { 1, 3, 4, 5, 6, 5, 4, 3, 1, 3, 9, 12, 15, 18, 15, 12, 9, 3, 4, 12, 16, 20, 24, 20, 16, 12, 4, 5, 15, 20, 25, 30, 25, 20, 15, 5, 6, 18, 24, 30, 36, 30, 24, 18, 6, 5, 15, 20, 25, 30, 25, 20, 15, 5, 4, 12, 16, 20, 24, 20, 16, 12, 4, 3, 9, 12, 15, 18, 15, 12, 9, 3, 1, 3, 4, 5, 6, 5, 4, 3, 1 }; for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID { FD[i] /= (1024.0); } srand (time (NULL)); STATS_INIT (); PRINT_STAT_STRING ("kernel", "2d_convolution"); PRINT_STAT_INT ("rows", N); PRINT_STAT_INT ("columns", M); PRINT_STAT_INT ("num_frames", BATCH_SIZE); frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); if (!frame || !output) { fprintf(stderr, "ERROR: Allocation failed.\n"); exit(-1); } /* load image */ tic (); read_array_from_octave (ENDORSE(frame), N, M, FILENAME); PRINT_STAT_DOUBLE ("time_load_image", toc ()); /* Make BATCH_SIZE-1 copies */ tic (); for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID { memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t)); } PRINT_STAT_DOUBLE ("time_copy", toc ()); /* Perform the 2D convolution */ tic (); accept_roi_begin(); for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD); } accept_roi_end(); PRINT_STAT_DOUBLE ("time_2d_convolution", toc ()); /* Write the results out to disk */ for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { char buffer [30]; sprintf (buffer, "2dconv_output.%d.mat", i); write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE); } PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat"); STATS_END (); free (output); free (frame); return 0; }