/** * QR-based solver with Givens rotations. * @param[in] argc ARGument Counter * @param[in] argv ARGument Vector * @retval EXIT_SUCCESS Normal termination of the program * @retval EXIT_FAILURE Some error occurred */ int main(const int argc, char * const argv[]) { st_matrix_t M = st_matrix_load(stdin); const unsigned int size = st_matrix_size(M); double *eigenvalues; unsigned int i; stopwatch_t stopwatch = stopwatch_create("QR_solver"); (void) argc; (void) argv; /* Allocates resources */ SAFE_MALLOC(eigenvalues, double *, size * sizeof(double)); /* Computes eigenvalues */ stopwatch_start(stopwatch, 0, "Compute eigenvalues"); qr_iterative(M, eigenvalues, NULL); stopwatch_stop(stopwatch, 0); /* Prints results */ printf("Eigenvalues:\n["); for (i = 0; i < size - 1; ++i) { printf("%g, ", eigenvalues[i]); } printf("%g]\n", eigenvalues[i]); /* Frees memory */ st_matrix_delete(&M); free(eigenvalues); stopwatch_delete(&stopwatch); return EXIT_SUCCESS; }
PROFILER* profiler_create(char* name) { PROFILER* profiler; profiler = (PROFILER*) malloc(sizeof(PROFILER)); profiler->name = name; profiler->stopwatch = stopwatch_create(); return profiler; }
PROFILER* profiler_create(char* name) { PROFILER* profiler; profiler = (PROFILER*) malloc(sizeof(PROFILER)); if (!profiler) return NULL; profiler->name = name; profiler->stopwatch = stopwatch_create(); if (!profiler->stopwatch) { free(profiler); return NULL; } return profiler; }
/* Subversion delta editor callback */ static svn_error_t *de_apply_textdelta(void *file_baton, const char *base_checksum, apr_pool_t *pool, svn_txdelta_window_handler_t *handler, void **handler_baton) { apr_file_t *src_file = NULL, *dest_file = NULL; svn_stream_t *src_stream, *dest_stream; de_node_baton_t *node = (de_node_baton_t *)file_baton; dump_options_t *opts = node->de_baton->opts; #ifdef USE_TIMING stopwatch_t watch = stopwatch_create(); #endif char *filename; DEBUG_MSG("de_apply_textdelta(%s)\n", node->path); /* Create a new temporary file to write to */ node->filename = apr_psprintf(node->pool, "%s/XXXXXX", opts->temp_dir); apr_file_mktemp(&dest_file, node->filename, APR_CREATE | APR_READ | APR_WRITE | APR_EXCL, pool); dest_stream = svn_stream_from_aprfile2(dest_file, FALSE, pool); /* Update the local copy */ filename = rhash_get(delta_hash, node->path, APR_HASH_KEY_STRING); if (filename == NULL) { src_stream = svn_stream_empty(pool); } else { apr_file_open(&src_file, filename, APR_READ, 0600, pool); src_stream = svn_stream_from_aprfile2(src_file, FALSE, pool); } svn_txdelta_apply(src_stream, dest_stream, node->md5sum, node->path, pool, handler, handler_baton); node->old_filename = apr_pstrdup(node->pool, filename); rhash_set(delta_hash, node->path, APR_HASH_KEY_STRING, node->filename, RHASH_VAL_STRING); DEBUG_MSG("applied delta: %s -> %s\n", node->old_filename, node->filename); node->applied_delta = 1; node->dump_needed = 1; #ifdef USE_TIMING tm_de_apply_textdelta += stopwatch_elapsed(&watch); #endif return SVN_NO_ERROR; }
void hello_msg_handler(struct q_node* qn) { static int if_LLDP_timer_exist = 0; if(unlikely(qn == NULL)) { log_warn("qn is NULL"); return; } static uint8_t is_timer_added; if(is_timer_added == 0) { //struct stopwatch * spw = stopwatch_create(1.0, &hello_msg_stopwatch_callback, PERMANENT, (void*)qn); is_timer_added = 1; } uint32_t xid = ntoh_32bit(qn->rx_packet + 4); struct ofp_header oh = of13_hello_msg_constructor(xid); if(qn->sw->is_hello_sent == 0) { send(qn->sw->sockfd, &oh, sizeof(oh), MSG_DONTWAIT); qn->sw->is_hello_sent = 1; } if(qn->sw->is_feature_request_sent == 0) { send_switch_features_request(qn); qn->sw->is_feature_request_sent = 1; } if(qn->sw->is_port_desc_request_sent == 0) { send_multipart_port_desc_request(qn); qn->sw->is_port_desc_request_sent = 1; } if(if_LLDP_timer_exist == 0) { struct stopwatch * spw = stopwatch_create(1.0, &send_LLDP_packet, PERMANENT, (void*)(qn->sw)); if_LLDP_timer_exist = 1; } log_info("Hello msg handling"); }
/* * vecsum is a microbenchmark which measures the speed of various ways of * reading from HDFS. It creates a file containing floating-point 'doubles', * and computes the sum of all the doubles several times. For some CPUs, * assembly optimizations are used for the summation (SSE, etc). */ int main(void) { int ret = 1; struct options *opts = NULL; struct local_data *cdata = NULL; struct libhdfs_data *ldata = NULL; struct stopwatch *watch = NULL; if (check_byte_size(VECSUM_CHUNK_SIZE, "VECSUM_CHUNK_SIZE") || check_byte_size(ZCR_READ_CHUNK_SIZE, "ZCR_READ_CHUNK_SIZE") || check_byte_size(NORMAL_READ_CHUNK_SIZE, "NORMAL_READ_CHUNK_SIZE")) { goto done; } opts = options_create(); if (!opts) goto done; if (opts->ty == VECSUM_LOCAL) { cdata = local_data_create(opts); if (!cdata) goto done; } else { ldata = libhdfs_data_create(opts); if (!ldata) goto done; } watch = stopwatch_create(); if (!watch) goto done; switch (opts->ty) { case VECSUM_LOCAL: vecsum_local(cdata, opts); ret = 0; break; case VECSUM_LIBHDFS: ret = vecsum_libhdfs(ldata, opts); break; case VECSUM_ZCR: ret = vecsum_zcr(ldata, opts); break; } if (ret) { fprintf(stderr, "vecsum failed with error %d\n", ret); goto done; } ret = 0; done: fprintf(stderr, "cleaning up...\n"); if (watch && (ret == 0)) { long long length = vecsum_length(opts, ldata); if (length >= 0) { stopwatch_stop(watch, length * opts->passes); } } if (cdata) local_data_free(cdata); if (ldata) libhdfs_data_free(ldata); if (opts) options_free(opts); return ret; }
void * avx512_fma(void *args_in) { /* Thread input */ struct thread_args *args; const int n_avx512 = VFMAPS_LATENCY; const __m512 add0 = _mm512_set1_ps((float) 1e-6); const __m512 mul0 = _mm512_set1_ps((float) (1. + 1e-6)); __m512 r[n_avx512]; // Declare as volatile to prevent removal during optimisation volatile float result; long r_max, i; int j; double runtime, flops; Stopwatch *t; /* Read inputs */ args = (struct thread_args *) args_in; t = stopwatch_create(args->timer_type); for (j = 0; j < n_avx512; j++) { r[j] = _mm512_set1_ps((float) j); } /* Add over registers r0-r4, multiply over r5-r9, and rely on pipelining, * OOO execution, and latency difference (3 vs 5 cycles) for 2x FLOPs */ runtime_flag = 0; r_max = 1; do { pthread_barrier_wait(&timer_barrier); t->start(t); for (i = 0; i < r_max; i++) { #pragma unroll(n_avx512) for (j = 0; j < n_avx512; j++) r[j] = _mm512_fmadd_ps(r[j], mul0, add0); } t->stop(t); runtime = t->runtime(t); /* Set runtime flag if any thread exceeds runtime limit */ if (runtime > args->min_runtime) { pthread_mutex_lock(&runtime_mutex); runtime_flag = 1; pthread_mutex_unlock(&runtime_mutex); } pthread_barrier_wait(&timer_barrier); if (!runtime_flag) r_max *= 2; } while (!runtime_flag); /* In order to prevent removal of the prior loop by optimisers, * sum the register values and save the result as volatile. */ for (j = 0; j < n_avx512; j++) r[0] = _mm512_add_ps(r[0], r[j]); result = reduce_AVX512(r[0]); /* (iter) * (16 instr / reg) * (2 flops / instr) * (n_avx512 reg / iter) */ flops = r_max * 16 * 2 * n_avx512 / runtime; /* Cleanup */ t->destroy(t); /* Thread output */ args->runtime = runtime; args->flops = flops; args->bw_load = 0.; args->bw_store = 0.; pthread_exit(NULL); }
void main(void) { char* input_data = malloc(DATA_SIZE); FILE* input_file; input_file = fopen(INPUT_FILE, "r"); if(input_file == NULL){ fprintf(stderr, "Failed to open %s\n", INPUT_FILE); exit(1); } int i = 0; char buffer[BLOCK_SIZE]; while(fgets(buffer, BLOCK_SIZE, input_file)){ strcpy(input_data+i,buffer); i = i + BLOCK_SIZE - 1; } fclose(input_file); /*Replace the line field ascii with \0*/ input_data[strlen(input_data) - 1] = '\0'; struct stopwatch_t* sw = stopwatch_create(); /*--------------------------------------------------------------------------------------*/ stopwatch_init(); stopwatch_start(sw); printf("crcSlow() 0x%X ", crcSlow(input_data, strlen(input_data))); stopwatch_stop(sw); printf(" Time: %Lg\n", stopwatch_elapsed(sw)); /*--------------------------------------------------------------------------------------*/ stopwatch_start(sw); size_t input_data_len = strlen(input_data); int input_blocks = input_data_len / BLOCK_SIZE; int extra_blocks = 0; if(input_data_len % BLOCK_SIZE != 0) extra_blocks = 1; int total_blocks = input_blocks + extra_blocks; int *result = malloc(total_blocks * sizeof(int)); omp_set_num_threads(16); unsigned int ans = 0; char* block_data = malloc(input_blocks * (BLOCK_SIZE + 1)); char* block_addr; i = 0; #pragma omp parallel for default(none) shared(input_blocks, input_data, result, block_data) private (i, block_addr) for(i = 0; i < input_blocks; ++i){ block_addr = block_data + (BLOCK_SIZE + 1) * i; strncpy(block_addr, input_data + BLOCK_SIZE * i, BLOCK_SIZE); *(block_addr + BLOCK_SIZE) = '\0'; result[i] = CrcHash(block_addr, BLOCK_SIZE); } int rem = input_data_len % BLOCK_SIZE; char* last_block_data = malloc(rem + 1); if(extra_blocks == 1){ strncpy(last_block_data, input_data + BLOCK_SIZE * input_blocks, rem); *(last_block_data + rem) = '\0'; result[input_blocks] = CrcHash(last_block_data, rem); } i=0; for(i = 0; i < input_blocks; ++i){ ans = crc32_combine(ans, result[i], BLOCK_SIZE); } if(extra_blocks == 1) ans = crc32_combine(ans, result[i], rem); stopwatch_stop(sw); printf("Parallel() 0x%X Time: %Lg \n",ans, stopwatch_elapsed(sw)); /*--------------------------------------------------------------------------------------*/ crcInit(); stopwatch_start(sw); printf("crcFast() 0x%X ", crcFast(input_data, strlen(input_data))); stopwatch_stop(sw); printf(" Time: %Lg\n", stopwatch_elapsed(sw)); stopwatch_destroy(sw); /*--------------------------------------------------------------------------------------*/ stopwatch_start(sw); printf("crc_intel() 0x%X ", CrcHash((const void*)input_data, strlen(input_data))); stopwatch_stop(sw); printf(" Time: %Lg\n", stopwatch_elapsed(sw)); /*--------------------------------------------------------------------------------------*/ /*Cleanup*/ free(last_block_data); free(block_data); free(input_data); }