void SERVICE_fwd(float* in, int in_size, float* out, int out_size, Net<float>* net) { string net_name = net->name(); STATS_INIT("service", "DjiNN service inference"); PRINT_STAT_STRING("network", net_name.c_str()); if (Caffe::mode() == Caffe::CPU) PRINT_STAT_STRING("platform", "cpu"); else PRINT_STAT_STRING("platform", "gpu"); float loss; vector<Blob<float>*> in_blobs = net->input_blobs(); tic(); in_blobs[0]->set_cpu_data(in); vector<Blob<float>*> out_blobs = net->ForwardPrefilled(&loss); memcpy(out, out_blobs[0]->cpu_data(), sizeof(float)); PRINT_STAT_DOUBLE("inference latency", toc()); STATS_END(); if (out_size != out_blobs[0]->count()) LOG(FATAL) << "out_size =! out_blobs[0]->count())"; else memcpy(out, out_blobs[0]->cpu_data(), out_size * sizeof(float)); }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT(); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t**)&off, (int64_t**)&ind, (int64_t**)&weight, (int64_t**)&graphmem, action_stream_name, &naction, (int64_t**)&action, (int64_t**)&actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK(); #if defined(_OPENMP) OMP(omp parallel) { OMP(omp master) PRINT_STAT_INT64 ("num_threads", (long int) omp_get_num_threads()); }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT (); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t **) & off, (int64_t **) & ind, (int64_t **) & weight, (int64_t **) & graphmem, action_stream_name, &naction, (int64_t **) & action, (int64_t **) & actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK (); /* Convert to STINGER */ tic (); S = stinger_new (); stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0); PRINT_STAT_DOUBLE ("time_stinger", toc ()); fflush (stdout); int64_t numSteps = 3; int64_t src_dest_pair[2] = { 124, 381 }; tic (); int64_t size_intersect = st_conn_stinger (S, nv, ne, src_dest_pair, 1, numSteps); PRINT_STAT_DOUBLE ("time_st_conn_stinger", toc ()); PRINT_STAT_INT64 ("size_intersect", size_intersect); stinger_free_all (S); free (graphmem); free (actionmem); STATS_END (); }
int main (const int argc, char *argv[]) { parse_args (argc, argv, &initial_graph_name, &action_stream_name, &batch_size, &nbatch); STATS_INIT (); load_graph_and_action_stream (initial_graph_name, &nv, &ne, (int64_t **) & off, (int64_t **) & ind, (int64_t **) & weight, (int64_t **) & graphmem, action_stream_name, &naction, (int64_t **) & action, (int64_t **) & actionmem); print_initial_graph_stats (nv, ne, batch_size, nbatch, naction); BATCH_SIZE_CHECK (); int64_t *component_map = xmalloc (nv * sizeof (int64_t)); /* Convert to STINGER */ tic (); S = stinger_new (); stinger_set_initial_edges (S, nv, 0, off, ind, weight, NULL, NULL, 0); PRINT_STAT_DOUBLE ("time_stinger", toc ()); fflush (stdout); tic (); int64_t num_comp_end = connected_components_stinger (S, nv, ne, component_map, NULL, NULL, NULL, NULL, NULL); PRINT_STAT_DOUBLE ("time_components_tree", toc ()); PRINT_STAT_INT64 ("number_of_components", num_comp_end); stinger_free_all (S); free (graphmem); free (actionmem); STATS_END (); }
int main(int argc, char *argv[]) { if (argc < 4) { fprintf(stderr, "[ERROR] Invalid arguments provided.\n\n"); fprintf(stderr, "Usage: %s [NUMBER OF THREADS] [WORDS] [INPUT FILE]\n\n", argv[0]); exit(0); } /* Timing */ STATS_INIT("kernel", "pthread_porter_stemming"); PRINT_STAT_STRING("abrv", "pthread_stemmer"); NTHREADS = atoi(argv[1]); int WORDS = atoi(argv[2]); PRINT_STAT_INT("threads", NTHREADS); FILE *f = fopen(argv[3], "r"); if (f == 0) { fprintf(stderr, "File %s not found\n", argv[1]); exit(1); } stem_list = (struct stemmer **)sirius_malloc(WORDS * sizeof(struct stemmer *)); int words = load_data(WORDS, stem_list, f); fclose(f); if (words < 0) goto out; PRINT_STAT_INT("words", words); tic(); int start, tids[NTHREADS]; pthread_t threads[NTHREADS]; pthread_attr_t attr; iterations = words / NTHREADS; sirius_pthread_attr_init(&attr); sirius_pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); for (int i = 0; i < NTHREADS; i++) { tids[i] = i; sirius_pthread_create(&threads[i], &attr, stem_thread, (void *)&tids[i]); } for (int i = 0; i < NTHREADS; i++) { sirius_pthread_join(threads[i], NULL); } PRINT_STAT_DOUBLE("pthread_stemmer", toc()); STATS_END(); #ifdef TESTING f = fopen("../input/stem_porter.pthread", "w"); for (int i = 0; i < words; ++i) fprintf(f, "%s\n", stem_list[i]->b); fclose(f); #endif out: sirius_free(s); // free up allocated data for (int i = 0; i < words; i++) { sirius_free(stem_list[i]->b); sirius_free(stem_list[i]); } return 0; }
static int ssh_aes_ctr_init(EVP_CIPHER_CTX *ctx, const u_char *key, const u_char *iv, int enc) { struct ssh_aes_ctr_ctx *c; int i; if ((c = EVP_CIPHER_CTX_get_app_data(ctx)) == NULL) { c = xmalloc(sizeof(*c)); c->state = HAVE_NONE; for (i = 0; i < NUMKQ; i++) { pthread_mutex_init(&c->q[i].lock, NULL); pthread_cond_init(&c->q[i].cond, NULL); } STATS_INIT(c->stats); EVP_CIPHER_CTX_set_app_data(ctx, c); } if (c->state == (HAVE_KEY | HAVE_IV)) { /* Cancel pregen threads */ for (i = 0; i < CIPHER_THREADS; i++) pthread_cancel(c->tid[i]); for (i = 0; i < CIPHER_THREADS; i++) pthread_join(c->tid[i], NULL); /* Start over getting key & iv */ c->state = HAVE_NONE; } if (key != NULL) { AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8, &c->aes_ctx); c->state |= HAVE_KEY; } if (iv != NULL) { memcpy(ctx->iv, iv, AES_BLOCK_SIZE); c->state |= HAVE_IV; } if (c->state == (HAVE_KEY | HAVE_IV)) { /* Clear queues */ memcpy(c->q[0].ctr, ctx->iv, AES_BLOCK_SIZE); c->q[0].qstate = KQINIT; for (i = 1; i < NUMKQ; i++) { memcpy(c->q[i].ctr, ctx->iv, AES_BLOCK_SIZE); ssh_ctr_add(c->q[i].ctr, i * KQLEN, AES_BLOCK_SIZE); c->q[i].qstate = KQEMPTY; } c->qidx = 0; c->ridx = 0; /* Start threads */ for (i = 0; i < CIPHER_THREADS; i++) { debug("spawned a thread"); pthread_create(&c->tid[i], NULL, thread_loop, c); } pthread_mutex_lock(&c->q[0].lock); while (c->q[0].qstate != KQDRAINING) pthread_cond_wait(&c->q[0].cond, &c->q[0].lock); pthread_mutex_unlock(&c->q[0].lock); } return 1; }
/* * The life of a pregen thread: * Find empty keystream queues and fill them using their counter. * When done, update counter for the next fill. */ static void * thread_loop(void *x) { AES_KEY key; STATS_STRUCT(stats); struct ssh_aes_ctr_ctx *c = x; struct kq *q; int i; int qidx; /* Threads stats on cancellation */ STATS_INIT(stats); #ifdef CIPHER_THREAD_STATS pthread_cleanup_push(thread_loop_stats, &stats); #endif /* Thread local copy of AES key */ memcpy(&key, &c->aes_ctx, sizeof(key)); /* * Handle the special case of startup, one thread must fill * the first KQ then mark it as draining. Lock held throughout. */ if (pthread_equal(pthread_self(), c->tid[0])) { q = &c->q[0]; pthread_mutex_lock(&q->lock); if (q->qstate == KQINIT) { for (i = 0; i < KQLEN; i++) { AES_encrypt(q->ctr, q->keys[i], &key); ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); } ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE); q->qstate = KQDRAINING; STATS_FILL(stats); pthread_cond_broadcast(&q->cond); } pthread_mutex_unlock(&q->lock); } else STATS_SKIP(stats); /* * Normal case is to find empty queues and fill them, skipping over * queues already filled by other threads and stopping to wait for * a draining queue to become empty. * * Multiple threads may be waiting on a draining queue and awoken * when empty. The first thread to wake will mark it as filling, * others will move on to fill, skip, or wait on the next queue. */ for (qidx = 1;; qidx = (qidx + 1) % NUMKQ) { /* Check if I was cancelled, also checked in cond_wait */ pthread_testcancel(); /* Lock queue and block if its draining */ q = &c->q[qidx]; pthread_mutex_lock(&q->lock); pthread_cleanup_push(thread_loop_cleanup, &q->lock); while (q->qstate == KQDRAINING || q->qstate == KQINIT) { STATS_WAIT(stats); pthread_cond_wait(&q->cond, &q->lock); } pthread_cleanup_pop(0); /* If filling or full, somebody else got it, skip */ if (q->qstate != KQEMPTY) { pthread_mutex_unlock(&q->lock); STATS_SKIP(stats); continue; } /* * Empty, let's fill it. * Queue lock is relinquished while we do this so others * can see that it's being filled. */ q->qstate = KQFILLING; pthread_mutex_unlock(&q->lock); for (i = 0; i < KQLEN; i++) { AES_encrypt(q->ctr, q->keys[i], &key); ssh_ctr_inc(q->ctr, AES_BLOCK_SIZE); } /* Re-lock, mark full and signal consumer */ pthread_mutex_lock(&q->lock); ssh_ctr_add(q->ctr, KQLEN * (NUMKQ - 1), AES_BLOCK_SIZE); q->qstate = KQFULL; STATS_FILL(stats); pthread_cond_signal(&q->cond); pthread_mutex_unlock(&q->lock); } #ifdef CIPHER_THREAD_STATS /* Stats */ pthread_cleanup_pop(1); #endif return NULL; }
int main (int argc, char * argv[]) { APPROX int * frame; APPROX int * output; int i; int nFilterRowsFD = 9; int nFilterColsFD = 9; APPROX fltPixel_t FD[] = { 1, 3, 4, 5, 6, 5, 4, 3, 1, 3, 9, 12, 15, 18, 15, 12, 9, 3, 4, 12, 16, 20, 24, 20, 16, 12, 4, 5, 15, 20, 25, 30, 25, 20, 15, 5, 6, 18, 24, 30, 36, 30, 24, 18, 6, 5, 15, 20, 25, 30, 25, 20, 15, 5, 4, 12, 16, 20, 24, 20, 16, 12, 4, 3, 9, 12, 15, 18, 15, 12, 9, 3, 1, 3, 4, 5, 6, 5, 4, 3, 1 }; for (i = 0; i < nFilterRowsFD * nFilterColsFD; i++) // ACCEPT_FORBID { FD[i] /= (1024.0); } srand (time (NULL)); STATS_INIT (); PRINT_STAT_STRING ("kernel", "2d_convolution"); PRINT_STAT_INT ("rows", N); PRINT_STAT_INT ("columns", M); PRINT_STAT_INT ("num_frames", BATCH_SIZE); frame = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); output = calloc (M * N * BATCH_SIZE, sizeof(algPixel_t)); if (!frame || !output) { fprintf(stderr, "ERROR: Allocation failed.\n"); exit(-1); } /* load image */ tic (); read_array_from_octave (ENDORSE(frame), N, M, FILENAME); PRINT_STAT_DOUBLE ("time_load_image", toc ()); /* Make BATCH_SIZE-1 copies */ tic (); for (i = 1; i < BATCH_SIZE; i++) // ACCEPT_FORBID { memcpy (&frame[i * M * N], frame, M * N * sizeof(algPixel_t)); } PRINT_STAT_DOUBLE ("time_copy", toc ()); /* Perform the 2D convolution */ tic (); accept_roi_begin(); for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { conv2d (&frame[i * M * N], &output[i * M * N], N, M, FD, 1.0, nFilterRowsFD, nFilterColsFD); } accept_roi_end(); PRINT_STAT_DOUBLE ("time_2d_convolution", toc ()); /* Write the results out to disk */ for (i = 0; i < BATCH_SIZE; i++) // ACCEPT_FORBID { char buffer [30]; sprintf (buffer, "2dconv_output.%d.mat", i); write_array_to_octave (ENDORSE(&output[i * M * N]), N, M, buffer, "output_" SIZE); } PRINT_STAT_STRING ("output_file", "2dconv_output." SIZE ".#.mat"); STATS_END (); free (output); free (frame); return 0; }