Example #1
0
void SERVICE_fwd(float* in, int in_size, float* out, int out_size,
                 Net<float>* net) {
  string net_name = net->name();
  STATS_INIT("service", "DjiNN service inference");
  PRINT_STAT_STRING("network", net_name.c_str());

  if (Caffe::mode() == Caffe::CPU)
    PRINT_STAT_STRING("platform", "cpu");
  else
    PRINT_STAT_STRING("platform", "gpu");

  float loss;
  vector<Blob<float>*> in_blobs = net->input_blobs();

  tic();
  in_blobs[0]->set_cpu_data(in);
  vector<Blob<float>*> out_blobs = net->ForwardPrefilled(&loss);
  memcpy(out, out_blobs[0]->cpu_data(), sizeof(float));

  PRINT_STAT_DOUBLE("inference latency", toc());

  STATS_END();

  if (out_size != out_blobs[0]->count())
    LOG(FATAL) << "out_size =! out_blobs[0]->count())";
  else
    memcpy(out, out_blobs[0]->cpu_data(), out_size * sizeof(float));
}
Example #2
0
int
main (const int argc, char *argv[])
{
  parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch);
  STATS_INIT();

  load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t**)&off,
	      (int64_t**)&ind, (int64_t**)&weight, (int64_t**)&graphmem,
	      action_stream_name, &naction, (int64_t**)&action, (int64_t**)&actionmem);

  print_initial_graph_stats (nv, ne, batch_size, nbatch, naction);
  BATCH_SIZE_CHECK();

#if defined(_OPENMP)
  OMP(omp parallel)
  {
  OMP(omp master)
  PRINT_STAT_INT64 ("num_threads", (long int) omp_get_num_threads());
  }
Example #3
0
int
main (const int argc, char *argv[])
{
  parse_args (argc, argv, &initial_graph_name, &action_stream_name,
              &batch_size, &nbatch);
  STATS_INIT ();

  load_graph_and_action_stream (initial_graph_name, &nv, &ne,
                                (int64_t **) & off, (int64_t **) & ind,
                                (int64_t **) & weight,
                                (int64_t **) & graphmem, action_stream_name,
                                &naction, (int64_t **) & action,
                                (int64_t **) & actionmem);

  print_initial_graph_stats (nv, ne, batch_size, nbatch, naction);
  BATCH_SIZE_CHECK ();

  /* Convert to STINGER */
  tic ();
  S = stinger_new ();
  stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0);
  PRINT_STAT_DOUBLE ("time_stinger", toc ());
  fflush (stdout);

  int64_t numSteps = 3;
  int64_t src_dest_pair[2] = { 124, 381 };

  tic ();
  int64_t size_intersect =
    st_conn_stinger (S, nv, ne, src_dest_pair, 1, numSteps);
  PRINT_STAT_DOUBLE ("time_st_conn_stinger", toc ());
  PRINT_STAT_INT64 ("size_intersect", size_intersect);

  stinger_free_all (S);
  free (graphmem);
  free (actionmem);
  STATS_END ();
}
Example #4
0
int
main (const int argc, char *argv[])
{
  parse_args (argc, argv, &initial_graph_name, &action_stream_name,
              &batch_size, &nbatch);
  STATS_INIT ();

  load_graph_and_action_stream (initial_graph_name, &nv, &ne,
                                (int64_t **) & off, (int64_t **) & ind,
                                (int64_t **) & weight,
                                (int64_t **) & graphmem, action_stream_name,
                                &naction, (int64_t **) & action,
                                (int64_t **) & actionmem);

  print_initial_graph_stats (nv, ne, batch_size, nbatch, naction);
  BATCH_SIZE_CHECK ();

  int64_t *component_map = xmalloc (nv * sizeof (int64_t));

  /* Convert to STINGER */
  tic ();
  S = stinger_new ();
  stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0);
  PRINT_STAT_DOUBLE ("time_stinger", toc ());
  fflush (stdout);

  tic ();
  int64_t num_comp_end =
    connected_components_stinger (S, nv, ne, component_map, NULL, NULL, NULL,
                                  NULL, NULL);
  PRINT_STAT_DOUBLE ("time_components_tree", toc ());
  PRINT_STAT_INT64 ("number_of_components", num_comp_end);

  stinger_free_all (S);
  free (graphmem);
  free (actionmem);
  STATS_END ();
}
Example #5
0
int main(int argc, char *argv[]) {
  if (argc < 4) {
    fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n");
    fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]);
    exit(0);
  }

  /* Timing */
  STATS_INIT("kernel", "pthread_porter_stemming");
  PRINT_STAT_STRING("abrv", "pthread_stemmer");

  NTHREADS = atoi(argv[1]);
  int WORDS = atoi(argv[2]);
  PRINT_STAT_INT("threads", NTHREADS);
  FILE *f = fopen(argv[3], "r");
  if (f == 0) {
    fprintf(stderr, "File %s not found\n", argv[1]);
    exit(1);
  }

  stem_list =
      (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *));
  int words = load_data(WORDS, stem_list, f);
  fclose(f);
 
 if (words < 0)
    goto out;

  PRINT_STAT_INT("words", words);

  tic();
  int start, tids[NTHREADS];
  pthread_t threads[NTHREADS];
  pthread_attr_t attr;
  iterations = words / NTHREADS;

  sirius_pthread_attr_init(&attr);
  sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
  for (int i = 0; i < NTHREADS; i++) {
    tids[i] = i;
    sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]);
  }

  for (int i = 0; i < NTHREADS; i++) {
    sirius_pthread_join(threads[i], NULL);
  }
  PRINT_STAT_DOUBLE("pthread_stemmer", toc());

  STATS_END();

#ifdef TESTING
  f = fopen("../input/stem_porter.pthread", "w");

  for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b);

  fclose(f);
#endif

out:
  sirius_free(s);

  // free up allocated data
  for (int i = 0; i < words; i++) {
    sirius_free(stem_list[i]->b);
    sirius_free(stem_list[i]);
  }

  return 0;
}
static int
ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv,
    int enc)
{
	struct ssh_aes_ctr_ctx *c;
	int i;

	if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) {
		c = xmalloc(sizeof(*c));

		c->state = HAVE_NONE;
		for (i = 0; i < NUMKQ; i++) {
			pthread_mutex_init(&c->q[i].lock, NULL);
			pthread_cond_init(&c->q[i].cond, NULL);
		}

		STATS_INIT(c->stats);
		EVP_CIPHER_CTX_set_app_data(ctx, c);
	}

	if (c->state == (HAVE_KEY | HAVE_IV)) {
		/* Cancel pregen threads */
		for (i = 0; i < CIPHER_THREADS; i++)
			pthread_cancel(c->tid[i]);
		for (i = 0; i < CIPHER_THREADS; i++)
			pthread_join(c->tid[i], NULL);
		/* Start over getting key & iv */
		c->state = HAVE_NONE;
	}

	if (key != NULL) {
		AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
		    &c->aes_ctx);
		c->state |= HAVE_KEY;
	}

	if (iv != NULL) {
		memcpy(ctx->iv, iv, AES_BLOCK_SIZE);
		c->state |= HAVE_IV;
	}

	if (c->state == (HAVE_KEY | HAVE_IV)) {
		/* Clear queues */
		memcpy(c->q[0].ctr, ctx->iv, AES_BLOCK_SIZE);
		c->q[0].qstate = KQINIT;
		for (i = 1; i < NUMKQ; i++) {
			memcpy(c->q[i].ctr, ctx->iv, AES_BLOCK_SIZE);
			ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE);
			c->q[i].qstate = KQEMPTY;
		}
		c->qidx = 0;
		c->ridx = 0;

		/* Start threads */
		for (i = 0; i < CIPHER_THREADS; i++) {
			debug("spawned a thread");
			pthread_create(&c->tid[i], NULL, thread_loop, c);
		}
		pthread_mutex_lock(&c->q[0].lock);
		while (c->q[0].qstate != KQDRAINING)
			pthread_cond_wait(&c->q[0].cond, &c->q[0].lock);
		pthread_mutex_unlock(&c->q[0].lock);
	}
	return 1;
}
/*
 * The life of a pregen thread:
 *    Find empty keystream queues and fill them using their counter.
 *    When done, update counter for the next fill.
 */
static void *
thread_loop(void *x)
{
	AES_KEY key;
	STATS_STRUCT(stats);
	struct ssh_aes_ctr_ctx *c = x;
	struct kq *q;
	int i;
	int qidx;

	/* Threads stats on cancellation */
	STATS_INIT(stats);
#ifdef CIPHER_THREAD_STATS
	pthread_cleanup_push(thread_loop_stats, &stats);
#endif

	/* Thread local copy of AES key */
	memcpy(&key, &c->aes_ctx, sizeof(key));

	/*
	 * Handle the special case of startup, one thread must fill
	 * the first KQ then mark it as draining. Lock held throughout.
	 */
	if (pthread_equal(pthread_self(), c->tid[0])) {
		q = &c->q[0];
		pthread_mutex_lock(&q->lock);
		if (q->qstate == KQINIT) {
			for (i = 0; i < KQLEN; i++) {
				AES_encrypt(q->ctr, q->keys[i], &key);
				ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE);
			}
			ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE);
			q->qstate = KQDRAINING;
			STATS_FILL(stats);
			pthread_cond_broadcast(&q->cond);
		}
		pthread_mutex_unlock(&q->lock);
	} else
		STATS_SKIP(stats);

	/*
	 * Normal case is to find empty queues and fill them, skipping over
	 * queues already filled by other threads and stopping to wait for
	 * a draining queue to become empty.
	 *
	 * Multiple threads may be waiting on a draining queue and awoken
	 * when empty.  The first thread to wake will mark it as filling,
	 * others will move on to fill, skip, or wait on the next queue.
	 */
	for (qidx = 1;; qidx = (qidx + 1) % NUMKQ) {
		/* Check if I was cancelled, also checked in cond_wait */
		pthread_testcancel();

		/* Lock queue and block if its draining */
		q = &c->q[qidx];
		pthread_mutex_lock(&q->lock);
		pthread_cleanup_push(thread_loop_cleanup, &q->lock);
		while (q->qstate == KQDRAINING || q->qstate == KQINIT) {
			STATS_WAIT(stats);
			pthread_cond_wait(&q->cond, &q->lock);
		}
		pthread_cleanup_pop(0);

		/* If filling or full, somebody else got it, skip */
		if (q->qstate != KQEMPTY) {
			pthread_mutex_unlock(&q->lock);
			STATS_SKIP(stats);
			continue;
		}

		/*
		 * Empty, let's fill it.
		 * Queue lock is relinquished while we do this so others
		 * can see that it's being filled.
		 */
		q->qstate = KQFILLING;
		pthread_mutex_unlock(&q->lock);
		for (i = 0; i < KQLEN; i++) {
			AES_encrypt(q->ctr, q->keys[i], &key);
			ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE);
		}

		/* Re-lock, mark full and signal consumer */
		pthread_mutex_lock(&q->lock);
		ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE);
		q->qstate = KQFULL;
		STATS_FILL(stats);
		pthread_cond_signal(&q->cond);
		pthread_mutex_unlock(&q->lock);
	}

#ifdef CIPHER_THREAD_STATS
	/* Stats */
	pthread_cleanup_pop(1);
#endif

	return NULL;
}
Example #8
0
int main (int argc, char * argv[])
{
  APPROX int * frame;
  APPROX int * output;
  int i;

  int nFilterRowsFD = 9; 
  int nFilterColsFD = 9;
	  
  APPROX fltPixel_t FD[] =  {
			 1,   3,   4,   5,   6,   5,  4,    3,  1,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 6,  18,  24,  30,  36,  30,  24,  18,  6,
			 5,  15,  20,  25,  30,  25,  20,  15,  5,
			 4,  12,  16,  20,  24,  20,  16,  12,  4,
			 3,   9,  12,  15,  18,  15,  12,   9,  3,
			 1,   3,   4,   5,   6,   5,   4,   3,  1
  };

  for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID
  {
    FD[i] /= (1024.0);
  }

  srand (time (NULL));

  STATS_INIT ();
  PRINT_STAT_STRING ("kernel", "2d_convolution");
  PRINT_STAT_INT ("rows", N);
  PRINT_STAT_INT ("columns", M);
  PRINT_STAT_INT ("num_frames", BATCH_SIZE);

  frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));
  output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t));

  if (!frame || !output) {
    fprintf(stderr, "ERROR: Allocation failed.\n");
    exit(-1);
  }

  /* load image */
  tic ();
  read_array_from_octave (ENDORSE(frame), N, M, FILENAME);
  PRINT_STAT_DOUBLE ("time_load_image", toc ());

  /* Make BATCH_SIZE-1 copies */
  tic ();
  for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t));
  }
  PRINT_STAT_DOUBLE ("time_copy", toc ());

  /* Perform the 2D convolution */
  tic ();
  accept_roi_begin();
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD);
  }
  accept_roi_end();
  PRINT_STAT_DOUBLE ("time_2d_convolution", toc ());

  /* Write the results out to disk */
  for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID
  {
    char buffer [30];
    sprintf (buffer, "2dconv_output.%d.mat", i);
    write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE);
  }
  PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat");

  STATS_END ();

  free (output);
  free (frame);
  return 0;
}