static int input_zvmmul_row_f(lwp_functions* pf, void* p_params, void* entries, unsigned int current_count, unsigned int total_count) { unsigned int const FP = 1; // Split-complex data: 1 floats per point. Vmmul_split_params* params = (Vmmul_split_params*)p_params; unsigned long length = params->length; unsigned long mult = params->mult; (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); add_vector_f(pf, entries, params->ea_input_matrix_re + current_count * FP * mult * params->input_stride * sizeof(float), length * mult); add_vector_f(pf, entries, params->ea_input_matrix_im + current_count * FP * mult * params->input_stride * sizeof(float), length * mult); if (current_count == 0) { unsigned long dma_size = VSIP_IMPL_INCREASE_TO_DMA_SIZE(length, float); add_vector_f(pf, entries, params->ea_input_vector_re, dma_size); add_vector_f(pf, entries, params->ea_input_vector_im, dma_size); }
int output( Plugin_functions* pf, void* p_context, void* p_params, void* entries, unsigned int current_count, unsigned int total_count) { unsigned int const FP = 1; // Split-complex data: 1 floats per point. Vmmul_split_params* params = (Vmmul_split_params*)p_params; unsigned long length = params->length; (pf->f_dtl_begin)(entries, ALF_BUF_OVL_OUT, 0); add_vector_f(pf, entries, params->ea_output_matrix_re + current_count * FP * params->output_stride * sizeof(float), length); add_vector_f(pf, entries, params->ea_output_matrix_im + current_count * FP * params->output_stride * sizeof(float), length); (pf->f_dtl_end)(entries); return 0; }
int input( Plugin_functions* pf, void* p_context, void* p_params, void* entries, unsigned int current_count, unsigned int total_count) { unsigned int const FP = 1; // Split-complex data: 1 floats per point. Vmmul_split_params* params = (Vmmul_split_params*)p_params; unsigned long length = params->length; #if PPU_IS_32BIT (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); add_vector_f(pf, entries, params->ea_input_matrix_re + current_count * FP * params->input_stride * sizeof(float), length); add_vector_f(pf, entries, params->ea_input_matrix_im + current_count * FP * params->input_stride * sizeof(float), length); if (current_count == 0) { add_vector_f(pf, entries, params->ea_input_vector_re, length); add_vector_f(pf, entries, params->ea_input_vector_im, length); } (pf->f_dtl_end)(entries); #else (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); add_vector_f(pf, entries, params->ea_input_matrix_re + current_count * FP * params->input_stride * sizeof(float), length); (pf->f_dtl_end)(entries); (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, length*sizeof(float)); add_vector_f(pf, entries, params->ea_input_matrix_im + current_count * FP * params->input_stride * sizeof(float), length); (pf->f_dtl_end)(entries); if (current_count == 0) { (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 2*length*sizeof(float)); add_vector_f(pf, entries, params->ea_input_vector_re, length); (pf->f_dtl_end)(entries); (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 3*length*sizeof(float)); add_vector_f(pf, entries, params->ea_input_vector_im, length); (pf->f_dtl_end)(entries); } #endif return 0; }
static int input_zfft_f(lwp_functions* pf, void* params, void* entries, unsigned int iter, unsigned int iter_max) { Fft_split_params* fftp = (Fft_split_params *)params; unsigned int size = fftp->size; unsigned int chunks = fftp->chunks_per_wb; unsigned int cur_chunks; unsigned int i; alf_data_addr64_t ea; if (iter == iter_max-1 && iter_max * chunks > fftp->chunks_per_spe) cur_chunks = fftp->chunks_per_spe % chunks; else cur_chunks = chunks; if (size == fftp->in_blk_stride) { size *= cur_chunks; cur_chunks = 1; } // Transfer input. (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); for (i=0; i<cur_chunks; ++i) { ea = fftp->ea_input_re + (iter * chunks + i) * fftp->in_blk_stride * sizeof(float); add_vector_f(pf, entries, ea, size); } for (i=0; i<cur_chunks; ++i) { ea = fftp->ea_input_im + (iter * chunks + i) * fftp->in_blk_stride * sizeof(float); add_vector_f(pf, entries, ea, size); } (pf->f_dtl_end)(entries); return 0; }
int input( Plugin_functions* pf, void* p_context, void* p_params, void* entries, unsigned int current_count, unsigned int total_count) { unsigned int const FP = 1; // Split-complex data: 1 floats per point. Vmmul_split_params* params = (Vmmul_split_params*)p_params; unsigned long length = params->length; // Set v_len to minimum of 4 since transfers must be a multiple of 16B. unsigned long v_len = total_count; v_len += params->shift; if (v_len == 0) v_len = 4; else if (v_len % 4 != 0) v_len += 4 - (v_len % 4); #if PPU_IS_32BIT (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); add_vector_f(pf, entries, params->ea_input_matrix_re + current_count * FP * params->input_stride * sizeof(float), length); add_vector_f(pf, entries, params->ea_input_matrix_im + current_count * FP * params->input_stride * sizeof(float), length); if (current_count == 0) { add_vector_f(pf, entries, params->ea_input_vector_re, v_len); add_vector_f(pf, entries, params->ea_input_vector_im, v_len); } (pf->f_dtl_end)(entries); #else (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 0); add_vector_f(pf, entries, params->ea_input_matrix_re + current_count * FP * params->input_stride * sizeof(float), length); (pf->f_dtl_end)(entries); (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, length*sizeof(float)); add_vector_f(pf, entries, params->ea_input_matrix_im + current_count * FP * params->input_stride * sizeof(float), length); (pf->f_dtl_end)(entries); if (current_count == 0) { (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, 2*length*sizeof(float)); add_vector_f(pf, entries, params->ea_input_vector_re, v_len); (pf->f_dtl_end)(entries); (pf->f_dtl_begin)(entries, ALF_BUF_OVL_IN, (2*length+v_len)*sizeof(float)); add_vector_f(pf, entries, params->ea_input_vector_im, v_len); (pf->f_dtl_end)(entries); } #endif return 0; }