예제 #1
0
파일: thread.cpp 프로젝트: Averroes/djinn
void SERVICE_fwd(float* in, int in_size, float* out, int out_size,
                 Net<float>* net) {
  string net_name = net->name();
  STATS_INIT("service", "DjiNN service inference");
  PRINT_STAT_STRING("network", net_name.c_str());

  if (Caffe::mode() == Caffe::CPU)
    PRINT_STAT_STRING("platform", "cpu");
  else
    PRINT_STAT_STRING("platform", "gpu");

  float loss;
  vector<Blob<float>*> in_blobs = net->input_blobs();

  tic();
  in_blobs[0]->set_cpu_data(in);
  vector<Blob<float>*> out_blobs = net->ForwardPrefilled(&loss);
  memcpy(out, out_blobs[0]->cpu_data(), sizeof(float));

  PRINT_STAT_DOUBLE("inference latency", toc());

  STATS_END();

  if (out_size != out_blobs[0]->count())
    LOG(FATAL) << "out_size =! out_blobs[0]->count())";
  else
    memcpy(out, out_blobs[0]->cpu_data(), out_size * sizeof(float));
}
예제 #2
0
파일: main.cpp 프로젝트: 123de7/lucida
int main(int argc, char *argv[]) {
  if (argc < 4) {
    fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n");
    fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]);
    exit(0);
  }

  /* Timing */
  STATS_INIT("kernel", "pthread_porter_stemming");
  PRINT_STAT_STRING("abrv", "pthread_stemmer");

  NTHREADS = atoi(argv[1]);
  int WORDS = atoi(argv[2]);
  PRINT_STAT_INT("threads", NTHREADS);
  FILE *f = fopen(argv[3], "r");
  if (f == 0) {
    fprintf(stderr, "File %s not found\n", argv[1]);
    exit(1);
  }

  stem_list =
      (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *));
  int words = load_data(WORDS, stem_list, f);
  fclose(f);
 
 if (words < 0)
    goto out;

  PRINT_STAT_INT("words", words);

  tic();
  int start, tids[NTHREADS];
  pthread_t threads[NTHREADS];
  pthread_attr_t attr;
  iterations = words / NTHREADS;

  sirius_pthread_attr_init(&attr);
  sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  for (int i = 0; i < NTHREADS; i++) {
    tids[i] = i;
    sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]);
  }

  for (int i = 0; i < NTHREADS; i++) {
    sirius_pthread_join(threads[i], NULL);
  }
  PRINT_STAT_DOUBLE("pthread_stemmer", toc());

  STATS_END();

#ifdef TESTING
  f = fopen("../input/stem_porter.pthread", "w");

  for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b);

  fclose(f);
#endif

out:
  sirius_free(s);

  // free up allocated data
  for (int i = 0; i < words; i++) {
    sirius_free(stem_list[i]->b);
    sirius_free(stem_list[i]);
  }

  return 0;
}
예제 #3
0
파일: main.c 프로젝트: hoangt/accept-apps
int main (int argc, char * argv[])
{
  APPROX int * frame;
  APPROX int * output;
  int i;

  int nFilterRowsFD = 9; 
  int nFilterColsFD = 9;
	  
  APPROX fltPixel_t FD[] =  {
			 1,   3,   4,   5,   6,   5,  4,    3,  1,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 6,  18,  24,  30,  36,  30,  24,  18,  6,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 1,   3,   4,   5,   6,   5,   4,   3,  1
  };

  for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID
  {
    FD[i] /= (1024.0);
  }

  srand (time (NULL));

  STATS_INIT ();
  PRINT_STAT_STRING ("kernel", "2d_convolution");
  PRINT_STAT_INT ("rows", N);
  PRINT_STAT_INT ("columns", M);
  PRINT_STAT_INT ("num_frames", BATCH_SIZE);

  frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));
  output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));

  if (!frame || !output) {
    fprintf(stderr, "ERROR: Allocation failed.\n");
    exit(-1);
  }

  /* load image */
  tic ();
  read_array_from_octave (ENDORSE(frame), N, M, FILENAME);
  PRINT_STAT_DOUBLE ("time_load_image", toc ());

  /* Make BATCH_SIZE-1 copies */
  tic ();
  for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t));
  }
  PRINT_STAT_DOUBLE ("time_copy", toc ());

  /* Perform the 2D convolution */
  tic ();
  accept_roi_begin();
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD);
  }
  accept_roi_end();
  PRINT_STAT_DOUBLE ("time_2d_convolution", toc ());

  /* Write the results out to disk */
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    char buffer [30];
    sprintf (buffer, "2dconv_output.%d.mat", i);
    write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE);
  }
  PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat");

  STATS_END ();

  free (output);
  free (frame);
  return 0;
}