int integrity_terminal_worker() { int length; int sockfd; struct client_transaction_t client_data; extern int errno; /* Connect to the client program. */ sockfd = connect_to_client(hostname, client_port); if (sockfd < 1) { LOG_ERROR_MESSAGE("connect_to_client() failed, thread exiting..."); printf("connect_to_client() failed, thread exiting..."); pthread_exit(NULL); } client_data.transaction = INTEGRITY; generate_input_data(client_data.transaction, &client_data.transaction_data, table_cardinality.warehouses); #ifdef DEBUG printf("executing transaction %c\n", transaction_short_name[client_data.transaction]); fflush(stdout); LOG_ERROR_MESSAGE("executing transaction %c", transaction_short_name[client_data.transaction]); #endif /* DEBUG */ length = send_transaction_data(sockfd, &client_data); length = receive_transaction_data(sockfd, &client_data); close(sockfd); return client_data.status; }
int main() { fir_filter_t fir; uint32_t i; generate_input_data(); fir_init( &fir, blue_fir, blue_fir_buffer, sizeof(blue_fir_buffer)/sizeof(int8_t)); for( i = 0; i < 1024; i++ ) { int8_t sample = fir_push_sample( &fir, data[i] ); printf( "%d\r\n", sample ); } return 0; }
void part02(char* input, char* result) { int password_length = 0; char input_data[INPUT_MAX_LENGTH * PASSWORD_SEARCH_CHUNK]; unsigned char md5_hashes[16 * PASSWORD_SEARCH_CHUNK]; int input_salt = 0; while(password_length < 8) { generate_input_data(input, input_salt, PASSWORD_SEARCH_CHUNK, input_data); input_salt += PASSWORD_SEARCH_CHUNK; generate_md5_hashes(input_data, PASSWORD_SEARCH_CHUNK, md5_hashes); int i; for(i = 0; i < PASSWORD_SEARCH_CHUNK; i++) { unsigned char byte_0 = *(md5_hashes + (i * 16)); unsigned char byte_1 = *(md5_hashes + (i * 16) + 1); unsigned char byte_2 = *(md5_hashes + (i * 16) + 2); if(byte_0 == 0 && byte_1 == 0 && byte_2 <= 0x0f) { char md5_digest[32 + 1]; digest_to_hexadecimal(md5_hashes + (i * 16), md5_digest); int location = md5_digest[5] - '0'; if(location >= 0 && location < 8 && *(result + location) == '*') { *(result + location) = md5_digest[6]; password_length += 1; } } } print_cinematic_decryption(input, result, md5_hashes); } }
void part01(char* input, char* result) { int password_length = 0; char input_data[INPUT_MAX_LENGTH * PASSWORD_SEARCH_CHUNK]; unsigned char md5_hashes[16 * PASSWORD_SEARCH_CHUNK]; int input_salt = 0; while(password_length < 8) { generate_input_data(input, input_salt, PASSWORD_SEARCH_CHUNK, input_data); input_salt += PASSWORD_SEARCH_CHUNK; generate_md5_hashes(input_data, PASSWORD_SEARCH_CHUNK, md5_hashes); int i; for(i = 0; i < PASSWORD_SEARCH_CHUNK; i++) { unsigned char byte_0 = *(md5_hashes + (i * 16)); unsigned char byte_1 = *(md5_hashes + (i * 16) + 1); unsigned char byte_2 = *(md5_hashes + (i * 16) + 2); if(byte_0 == 0 && byte_1 == 0 && byte_2 <= 0x0f) { char md5_digest[32 + 1]; digest_to_hexadecimal(md5_hashes + (i * 16), md5_digest); *(result + password_length++) = md5_digest[5]; } } print_cinematic_decryption(input, result, md5_hashes); } printf("\nFOUND: %s!\n\n", result); }
/** * Play */ void *playback_alsa(struct bat *bat) { int err = 0; struct snd_pcm_container sndpcm; int size, offset, count; printf("Entering playback thread (ALSA).\n"); memset(&sndpcm, 0, sizeof(sndpcm)); if (NULL != bat->playback.device) { err = snd_pcm_open(&sndpcm.handle, bat->playback.device, SND_PCM_STREAM_PLAYBACK, 0); if (err < 0) { loge(E_OPENPCMP, "%s(%d)", snd_strerror(err), err); goto fail_exit; } } else { loge(E_NOPCMP, "exit"); goto fail_exit; } err = set_snd_pcm_params(bat, &sndpcm); if (err != 0) goto fail_exit; if (bat->playback.file == NULL) { printf("Playing generated audio sine wave"); bat->sinus_duration == 0 ? printf(" endlessly\n") : printf("\n"); } else { printf("Playing input audio file: %s\n", bat->playback.file); bat->fp = fopen(bat->playback.file, "rb"); if (bat->fp == NULL) { loge(E_OPENFILEC, "%s", bat->playback.file); goto fail_exit; } } count = sndpcm.period_bytes; /* playback buffer size */ #ifdef DEBUG FILE *sin_file; sin_file = fopen("/tmp/sin.wav", "wb"); #endif while (1) { offset = 0; size = count * 8 / sndpcm.frame_bits; err = generate_input_data(sndpcm, count, bat); if (err < 0) goto fail_exit; else if (err > 0) break; #ifdef DEBUG fwrite(sndpcm.buffer, count * 8 / sndpcm.frame_bits, 4, sin_file); #endif if (bat->period_limit && bat->periods_played >= bat->periods_total) break; err = write_to_pcm(size, &sndpcm, offset); if (err == -1) goto fail_exit; } #ifdef DEBUG fclose(sin_file); #endif snd_pcm_drain(sndpcm.handle); if (bat->fp) fclose(bat->fp); free(sndpcm.buffer); snd_pcm_close(sndpcm.handle); retval_play = 0; pthread_exit(&retval_play); fail_exit: if (bat->fp) fclose(bat->fp); if (sndpcm.buffer) free(sndpcm.buffer); if (sndpcm.handle) snd_pcm_close(sndpcm.handle); retval_play = 1; pthread_exit(&retval_play); }
void *terminal_worker(void *data) { #ifndef STANDALONE int length; int sockfd; #endif /* NOT STANDALONE */ struct terminal_context_t *tc; struct client_transaction_t client_data; double threshold; int keying_time; struct timespec thinking_time, rem; int mean_think_time; /* In milliseconds. */ struct timeval rt0, rt1; double response_time; extern int errno; int rc; int local_seed; #ifdef STANDALONE struct db_context_t dbc; struct transaction_queue_node_t *node = (struct transaction_queue_node_t *) malloc(sizeof(struct transaction_queue_node_t)); extern char sname[32]; extern int exiting; #ifdef LIBPQ extern char postmaster_port[32]; #endif /* LIBPQ */ #ifdef LIBMYSQL extern char dbt2_mysql_port[32]; #endif /* LIBMYSQL */ #endif /* STANDALONE */ tc = (struct terminal_context_t *) data; /* Each thread needs to seed in Linux. */ if (seed == -1) { struct timeval tv; unsigned long junk; /* Purposely used uninitialized */ gettimeofday(&tv, NULL); local_seed = getpid() ^ (int) pthread_self() ^ tv.tv_sec ^ tv.tv_usec ^ junk; } else { local_seed = seed; } printf("seed: %u\n", local_seed); fflush(stdout); srand(local_seed); #ifdef STANDALONE #ifdef ODBC db_init(sname, DB_USER, DB_PASS); #endif /* ODBC */ #ifdef LIBPQ db_init(DB_NAME, sname, postmaster_port); #endif /* LIBPQ */ #ifdef LIBMYSQL printf("CONNECTED TO DB |%s| |%s| |%s|\n", DB_NAME, sname, dbt2_mysql_port); db_init(sname, "", dbt2_mysql_port); #endif /* LIBMYSQL */ if (!exiting && connect_to_db(&dbc) != OK) { LOG_ERROR_MESSAGE("db_connect() error, terminating program"); printf("cannot connect to database, exiting...\n"); exit(1); } #else /* Connect to the client program. */ sockfd = connect_to_client(hostname, client_port); if (sockfd < 1) { LOG_ERROR_MESSAGE( "connect_to_client() failed, thread exiting..."); printf("connect_to_client() failed, thread exiting..."); pthread_exit(NULL); } #endif /* STANDALONE */ do { if (mode_altered == 1) { /* * Determine w_id and d_id for the client per * transaction. */ tc->w_id = w_id_min + get_random(w_id_max - w_id_min + 1); tc->d_id = get_random(table_cardinality.districts) + 1; } /* * Determine which transaction to execute, minimum keying time, * and mean think time. */ threshold = get_percentage(); if (threshold < transaction_mix.new_order_threshold) { client_data.transaction = NEW_ORDER; keying_time = key_time.new_order; mean_think_time = think_time.new_order; } else if (transaction_mix.payment_actual != 0 && threshold < transaction_mix.payment_threshold) { client_data.transaction = PAYMENT; keying_time = key_time.payment; mean_think_time = think_time.payment; } else if (transaction_mix.order_status_actual != 0 && threshold < transaction_mix.order_status_threshold) { client_data.transaction = ORDER_STATUS; keying_time = key_time.order_status; mean_think_time = think_time.order_status; } else if (transaction_mix.delivery_actual != 0 && threshold < transaction_mix.delivery_threshold) { client_data.transaction = DELIVERY; keying_time = key_time.delivery; mean_think_time = think_time.delivery; } else { client_data.transaction = STOCK_LEVEL; keying_time = key_time.stock_level; mean_think_time = think_time.stock_level; } #ifdef DEBUG printf("executing transaction %c\n", transaction_short_name[client_data.transaction]); fflush(stdout); LOG_ERROR_MESSAGE("executing transaction %c", transaction_short_name[client_data.transaction]); #endif /* DEBUG */ /* Generate the input data for the transaction. */ if (client_data.transaction != STOCK_LEVEL) { generate_input_data(client_data.transaction, &client_data.transaction_data, tc->w_id); } else { generate_input_data2(client_data.transaction, &client_data.transaction_data, tc->w_id, tc->d_id); } /* Keying time... */ pthread_mutex_lock( &mutex_terminal_state[KEYING][client_data.transaction]); ++terminal_state[KEYING][client_data.transaction]; pthread_mutex_unlock( &mutex_terminal_state[KEYING][client_data.transaction]); if (time(NULL) < stop_time) { sleep(keying_time); } else { break; } pthread_mutex_lock( &mutex_terminal_state[KEYING][client_data.transaction]); --terminal_state[KEYING][client_data.transaction]; pthread_mutex_unlock( &mutex_terminal_state[KEYING][client_data.transaction]); /* Note this thread is executing a transation. */ pthread_mutex_lock( &mutex_terminal_state[EXECUTING][client_data.transaction]); ++terminal_state[EXECUTING][client_data.transaction]; pthread_mutex_unlock( &mutex_terminal_state[EXECUTING][client_data.transaction]); /* Execute transaction and record the response time. */ if (gettimeofday(&rt0, NULL) == -1) { perror("gettimeofday"); } #ifdef STANDALONE memcpy(&node->client_data, &client_data, sizeof(client_data)); /* enqueue_transaction(node); node = get_node(); if (node == NULL) { LOG_ERROR_MESSAGE("Cannot get a transaction node.\n"); } */ rc = process_transaction(node->client_data.transaction, &dbc, &node->client_data.transaction_data); if (rc == ERROR) { LOG_ERROR_MESSAGE("process_transaction() error on %s", transaction_name[node->client_data.transaction]); } #else /* STANDALONE */ length = send_transaction_data(sockfd, &client_data); length = receive_transaction_data(sockfd, &client_data); rc = client_data.status; #endif /* STANDALONE */ if (gettimeofday(&rt1, NULL) == -1) { perror("gettimeofday"); } response_time = difftimeval(rt1, rt0); pthread_mutex_lock(&mutex_mix_log); if (rc == OK) { fprintf(log_mix, "%d,%c,%f,%d\n", (int) time(NULL), transaction_short_name[client_data.transaction], response_time, (int) pthread_self()); } else if (rc == STATUS_ROLLBACK) { fprintf(log_mix, "%d,%c,%f,%d\n", (int) time(NULL), toupper(transaction_short_name[client_data.transaction]), response_time, (int) pthread_self()); } else if (rc == ERROR) { fprintf(log_mix, "%d,%c,%f,%d\n", (int) time(NULL), 'E', response_time, (int) pthread_self()); } fflush(log_mix); pthread_mutex_unlock(&mutex_mix_log); pthread_mutex_lock(&mutex_terminal_state[EXECUTING][client_data.transaction]); --terminal_state[EXECUTING][client_data.transaction]; pthread_mutex_unlock(&mutex_terminal_state[EXECUTING][client_data.transaction]); /* Thinking time... */ pthread_mutex_lock(&mutex_terminal_state[THINKING][client_data.transaction]); ++terminal_state[THINKING][client_data.transaction]; pthread_mutex_unlock(&mutex_terminal_state[THINKING][client_data.transaction]); if (time(NULL) < stop_time) { thinking_time.tv_nsec = (long) get_think_time(mean_think_time); thinking_time.tv_sec = (time_t) (thinking_time.tv_nsec / 1000); thinking_time.tv_nsec = (thinking_time.tv_nsec % 1000) * 1000000; while (nanosleep(&thinking_time, &rem) == -1) { if (errno == EINTR) { memcpy(&thinking_time, &rem, sizeof(struct timespec)); } else { LOG_ERROR_MESSAGE( "sleep time invalid %d s %ls ns", thinking_time.tv_sec, thinking_time.tv_nsec); break; } } } pthread_mutex_lock(&mutex_terminal_state[THINKING][client_data.transaction]); --terminal_state[THINKING][client_data.transaction]; pthread_mutex_unlock(&mutex_terminal_state[THINKING][client_data.transaction]); } while (time(NULL) < stop_time); #ifdef STANDALONE /*recycle_node(node);*/ #endif /* STANDALONE */ /* Note when each thread has exited. */ pthread_mutex_lock(&mutex_mix_log); fprintf(log_mix, "%d,TERMINATED,%d\n", (int) time(NULL), (int) pthread_self()); fflush(log_mix); pthread_mutex_unlock(&mutex_mix_log); return NULL; /* keep the compiler quiet */ }
static int write_to_pcm_loop(struct pcm_container *sndpcm, struct bat *bat) { int err; int bytes = sndpcm->period_bytes; /* playback buffer size */ int frames = bytes * 8 / sndpcm->frame_bits; /* frame count */ FILE *fp = NULL; struct wav_container wav; int bytes_total = 0; if (bat->debugplay) { fp = fopen(bat->debugplay, "wb"); if (fp == NULL) { fprintf(bat->err, _("Cannot open file for capture: ")); fprintf(bat->err, _("%s %d\n"), bat->debugplay, -errno); return -errno; } /* leave space for wav header */ err = fseek(fp, sizeof(wav), SEEK_SET); if (err != 0) { fprintf(bat->err, _("Seek file error: %d %d\n"), err, -errno); return -errno; } } while (1) { err = generate_input_data(sndpcm, bytes, bat); if (err < 0) return err; else if (err > 0) break; if (bat->debugplay) { err = fwrite(sndpcm->buffer, 1, bytes, fp); if (err != bytes) { fprintf(bat->err, _("Write file error: ")); fprintf(bat->err, _("%s(%d)\n"), snd_strerror(err), err); return -EIO; } bytes_total += bytes; } bat->periods_played++; if (bat->period_is_limited && bat->periods_played >= bat->periods_total) break; err = write_to_pcm(sndpcm, frames, bat); if (err != 0) return err; } if (bat->debugplay) { /* update wav header */ prepare_wav_info(&wav, bat); wav.chunk.length = bytes_total; wav.header.length = (wav.chunk.length) + sizeof(wav.chunk) + sizeof(wav.format) + sizeof(wav.header) - 8; rewind(fp); err = write_wav_header(fp, &wav, bat); if (err != 0) { fprintf(bat->err, _("Write file error: %s %s(%d)\n"), bat->debugplay, snd_strerror(err), err); return err; } fclose(fp); } snd_pcm_drain(sndpcm->handle); return 0; }
bool run_convolve_test( const nn_device_interface_0_t &di, uint_least32_t num_output_feature_maps, uint_least32_t num_input_feature_maps, uint_least32_t input_feature_map_width, uint_least32_t input_feature_map_height, uint_least32_t kernel_width, uint_least32_t kernel_height, uint_least32_t kernel_stride_x, uint_least32_t kernel_stride_y, uint_least32_t num_batches, NN_ACTIVATION_FUNCTION activation_function ) { // Input generation float *input = nullptr; generate_input_data( input, input_feature_map_width, input_feature_map_height, num_input_feature_maps, num_batches ); // Generate Filter Data float *filters = nullptr; generate_filter_data( filters, kernel_width, kernel_height, num_input_feature_maps, num_output_feature_maps ); uint_least32_t output_width = ( ( input_feature_map_width - kernel_width ) / kernel_stride_x + 1 ); uint_least32_t output_height = ( ( input_feature_map_height - kernel_height ) / kernel_stride_y + 1 ); uint_least32_t output_depth = num_output_feature_maps; // cpu_outputs and gpu_outputs are filled in with biases // so as such biases do not exist as separate entity float init_output_val = 0.0; //No biases in output then output is initialized with zeros float *biases = nullptr; float *cpu_outputs = nullptr; float *gpu_outputs = nullptr; // Biases exists as separate entity (each neuron got it own bias value) init_data( biases, output_width * output_height * output_depth, 1.0f ); init_data( gpu_outputs, output_width * output_height * output_depth * num_batches, 0.0f ); init_data( cpu_outputs, output_width * output_height * output_depth * num_batches, 0.0f ); // Activation function fp_func_activ activ_func = nullptr; switch( activation_function ) { case NN_ACTIVATION_FUNCTION_NONE: activ_func = none; break; case NN_ACTIVATION_FUNCTION_TANH: activ_func = mytanh; break; case NN_ACTIVATION_FUNCTION_RELU: activ_func = relu; break; case NN_ACTIVATION_FUNCTION_SOFTPLUS: activ_func = softplus; break; default: printf( "Error: Not supported activation function chosen: %d\n", activation_function ); assert( 0 ); break; } nn_workload_data_coords_t conv_input_view_begin( 0, 0, 0, 0, 0, 0 ); nn_workload_data_coords_t conv_input_view_end( num_batches - 1, input_feature_map_width - 1, input_feature_map_height - 1, num_input_feature_maps - 1, 0, 0 ); nn_workload_data_coords_t conv_output_view_begin( 0, 0, 0, 0, 0, 0 ); nn_workload_data_coords_t conv_output_view_end( num_batches - 1, output_width - 1, output_height - 1, output_depth - 1, 0, 0 ); // Run reference convolving (needed for comparison) convolve_ref( activ_func, cpu_outputs, input, filters, biases, conv_output_view_begin, conv_output_view_end, conv_input_view_begin, conv_input_view_end, output_width, output_height, output_depth, input_feature_map_width, input_feature_map_height, num_input_feature_maps, kernel_width, kernel_height, num_input_feature_maps, kernel_stride_x, kernel_stride_y, 0, // center offset x 0, // center offset y num_batches ); // First workload item is input one (entity producing input data) nn_gpu_workload_item *input_workload_item = nullptr; initialize_input_workload_item( input_workload_item); // Specify layout nn_workload_data_layout_t input_output_weights_layout = { { 0, 0, 0, 0, 0, 0 }, // tile in log2(size) { 0, 0, 0, 0, 0, 0 }, // alignment { NN_DATA_COORD_x, NN_DATA_COORD_y, NN_DATA_COORD_z, NN_DATA_COORD_p, NN_DATA_COORD_n, NN_DATA_COORD_q }, // ordering NN_DATATYPE_FLOAT }; // specify dimensions of input, output and weights nn_workload_data_coords_t input_coords = { num_batches, input_feature_map_width, input_feature_map_height, num_input_feature_maps, 1, 1 }; nn_workload_data_coords_t output_coords = { num_batches, output_width, output_height, num_output_feature_maps, 1, 1 }; nn_workload_data_coords_t weight_coords = { 1, kernel_width, kernel_height, num_input_feature_maps, num_output_feature_maps, 1 }; // Now create convolution workload_item giving as input input_workload_item nn_gpu_workload_item *convolution_workload_item = nullptr; initialize_layer_workload_item( convolution_workload_item, input_workload_item, input_output_weights_layout, output_coords); convolution_workload_item->type = NN_WORK_ITEM_TYPE_CONVOLUTION; convolution_workload_item->arguments.forward_convolution.padding = NN_PADDING_MODE_NONE; convolution_workload_item->arguments.forward_convolution.stride[0] = kernel_stride_x; convolution_workload_item->arguments.forward_convolution.stride[1] = kernel_stride_y; convolution_workload_item->arguments.forward_convolution.center_offset[0] = 0; convolution_workload_item->arguments.forward_convolution.center_offset[1] = 0; convolution_workload_item->arguments.forward_convolution.activation.function = activation_function; nn::nn_workload_data_t< float > *weight_data = new nn::nn_workload_data_t< float >( filters, weight_coords, input_output_weights_layout ); convolution_workload_item->arguments.forward_convolution.weights = new nn::nn_workload_data_t< float >( weight_coords, input_output_weights_layout ); nn_workload_data_copy( weight_data, convolution_workload_item->arguments.forward_convolution.weights ); delete weight_data; //release temporary buffers nn_workload_data_coords_t bias_coords = { 1, 1, 1, 1, num_output_feature_maps, 1 }; nn::nn_workload_data_t< float > *bias_data = new nn::nn_workload_data_t< float >(biases, bias_coords, input_output_weights_layout); convolution_workload_item->arguments.forward_convolution.biases = new nn::nn_workload_data_t< float >( bias_coords, input_output_weights_layout ); nn_workload_data_copy( bias_data, convolution_workload_item->arguments.forward_convolution.biases ); delete bias_data; //release temporary buffers // Now create output workload_item giving softmax workload item as precedessor nn_gpu_workload_item *output_workload_item = nullptr; initialize_output_workload_item( output_workload_item, convolution_workload_item ); // Make a workload using two above created workload_items nn_gpu_workload *gpu_workload = nullptr; create_workload_using_workload_items( di, gpu_workload, num_batches, NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH, NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH, input_workload_item, convolution_workload_item, output_workload_item ); using io_data = std::unique_ptr<nn::data<float, 0>>; io_data execute_inputs[1]; io_data execute_outputs[1]; // specify dimensions of input, output and weights size_t execution_input_size[4] = {input_feature_map_width, input_feature_map_height, num_input_feature_maps, num_batches}; size_t execution_output_size[4] = {output_width, output_height, num_output_feature_maps, num_batches}; execute_inputs[0] = io_data(new nn::data<float, 0>(input, execution_input_size, 4)); execute_outputs[0] = io_data(new nn::data<float, 0>(gpu_outputs, execution_output_size, 4)); EXPECT_EQ( NN_API_STATUS_OK, di.workload_execute_function( ( nn_workload * )gpu_workload, ( void ** )execute_inputs, ( void ** )execute_outputs, nullptr ) ); EXPECT_EQ( true, verify_output( execute_outputs[0], cpu_outputs ) ); EXPECT_EQ( NN_API_STATUS_OK, di.workload_delete_function(( nn_workload * )gpu_workload)); #ifdef __linux__ free( cpu_outputs ); cpu_outputs = nullptr; free( gpu_outputs ); gpu_outputs = nullptr; free( filters ); filters = nullptr; free( biases ); biases = nullptr; free( input ); input = nullptr; #else _aligned_free( cpu_outputs ); cpu_outputs = nullptr; _aligned_free( gpu_outputs ); gpu_outputs = nullptr; _aligned_free( filters ); filters = nullptr; _aligned_free( biases ); biases = nullptr; _aligned_free( input ); input = nullptr; #endif //__linux__ return true; }
bool run_softmax_test( const nn_device_interface_0_t &di, uint_least32_t num_samples, uint_least32_t num_batches) // length of input to be processed (softmax normalize) { // Input generation (input feature maps to have pooling run on it) float *input = nullptr; generate_input_data( input, num_samples, 1, 1, num_batches ); // length of output is the same as input float *cpu_outputs; init_data( cpu_outputs, num_samples * num_batches, 0.0f ); float *gpu_outputs; init_data( gpu_outputs, num_samples * num_batches, 0.0f ); softmax_ref( cpu_outputs, input, num_samples, num_batches ); // First workload item is input one (entity producing input data) nn_gpu_workload_item *input_workload_item = nullptr; initialize_input_workload_item( input_workload_item); // Specify layout of softmax workload nn_workload_data_layout_t workload_layout = { { 0, 0, 0, 0, 0, 0 }, // tile in log2(size) { 0, 0, 0, 0, 0, 0 }, // alignment { NN_DATA_COORD_x, NN_DATA_COORD_y, NN_DATA_COORD_z, NN_DATA_COORD_p, NN_DATA_COORD_n, NN_DATA_COORD_q }, NN_DATATYPE_FLOAT }; // specify dimensions of input, output nn_workload_data_coords_t workload_coords = { num_batches, num_samples, 1, 1, 1, 1 }; size_t output_coords[2] = {num_samples, num_batches}; // Now create softmax workload_item giving as input input_workload_item nn_gpu_workload_item *softmax_workload_item = nullptr; initialize_layer_workload_item( softmax_workload_item, input_workload_item, workload_layout, workload_coords ); softmax_workload_item->type = NN_WORK_ITEM_TYPE_SOFTMAX; // Now create output workload_item giving softmax workload item as precedessor nn_gpu_workload_item *output_workload_item = nullptr; initialize_output_workload_item( output_workload_item, softmax_workload_item ); // Make a workload using two above created workload_items nn_gpu_workload *gpu_workload = nullptr; create_workload_using_workload_items( di, gpu_workload, num_batches, NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH, NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH, input_workload_item, softmax_workload_item, output_workload_item ); using io_data = std::unique_ptr<nn::data<float, 0>>; io_data execute_inputs[1]; io_data execute_outputs[1]; execute_inputs[0] = io_data(new nn::data<float, 0>(input, output_coords, 2)); execute_outputs[0] = io_data(new nn::data<float, 0>(gpu_outputs, output_coords, 2)); EXPECT_EQ( NN_API_STATUS_OK, di.workload_execute_function( ( nn_workload * )gpu_workload, ( void ** )execute_inputs, ( void ** )execute_outputs, nullptr ) ); nn_workload_data_coords_t output_view_begin(0, 0, 0, 0, 0, 0); nn_workload_data_coords_t output_view_end(num_batches - 1, num_samples - 1, 0, 0, 0, 0); // Compare CPU(reference) output with the one returned by GPU EXPECT_EQ( true, verify_output( execute_outputs[0], cpu_outputs ) ); EXPECT_EQ( NN_API_STATUS_OK, di.workload_delete_function(( nn_workload * )gpu_workload)); #ifdef __linux__ free( cpu_outputs ); cpu_outputs = nullptr; free( gpu_outputs ); gpu_outputs = nullptr; free( input ); input = nullptr; #else _aligned_free( cpu_outputs ); cpu_outputs = nullptr; _aligned_free( gpu_outputs ); gpu_outputs = nullptr; _aligned_free( input ); input = nullptr; #endif //__linux__ return true; }