void VectorMutable::set_sub_vector( const RTOpPack::SparseSubVector& sub_vec ) { RTOp_SparseSubVector spc_sub_vec; if (!is_null(sub_vec.indices())) { RTOp_sparse_sub_vector( sub_vec.globalOffset(), sub_vec.subDim(), sub_vec.subNz(), sub_vec.values().get(), sub_vec.valuesStride(), sub_vec.indices().get(), sub_vec.indicesStride(), sub_vec.localOffset(), sub_vec.isSorted(), &spc_sub_vec ); } else { RTOp_SubVector _sub_vec; RTOp_sub_vector( sub_vec.globalOffset(), sub_vec.subDim(), sub_vec.values().get(), sub_vec.valuesStride(), &_sub_vec ); RTOp_sparse_sub_vector_from_dense( &_sub_vec, &spc_sub_vec ); } RTOpPack::RTOpC set_sub_vector_op; TEST_FOR_EXCEPT(0!=RTOp_TOp_set_sub_vector_construct(&spc_sub_vec,&set_sub_vector_op.op())); VectorMutable* targ_vecs[1] = { this }; AbstractLinAlgPack::apply_op( set_sub_vector_op,0,NULL,1,targ_vecs,NULL ,sub_vec.globalOffset()+1,sub_vec.subDim(),sub_vec.globalOffset() // first_ele, sub_dim, global_offset ); }
static int targ_obj_create( const struct RTOp_obj_type_vtbl_t* vtbl, const void* obj_data , RTOp_ReductTarget* targ_obj ) { const struct RTOp_ROp_get_sub_vector_rng_t *rng = NULL; const int mem_size = sizeof(struct RTOp_SubVector); struct RTOp_SubVector *sub_vec_targ = NULL; RTOp_index_type sub_dim = 0; /* Get the range of the sub-vector */ assert(obj_data); rng = (const struct RTOp_ROp_get_sub_vector_rng_t*)obj_data; sub_dim = rng->u - rng->l + 1; /* Allocate the sub-vector target object */ *targ_obj = malloc(mem_size); sub_vec_targ = (struct RTOp_SubVector*)*targ_obj; /* Setup storage for the target sub-vector */ RTOp_sub_vector( rng->l - 1 /* global_offset */ ,sub_dim /* sub_dim */ ,(const RTOp_value_type*)malloc(sub_dim*sizeof(RTOp_value_type)) /* values[] */ ,1 /* value_stride */ ,sub_vec_targ ); /* Initialize the sub-vector to zero */ vtbl->obj_reinit( vtbl, obj_data, *targ_obj ); return 0; }
static int targ_load_state( const struct RTOp_obj_type_vtbl_t* vtbl ,const void* obj_data ,int num_values ,const RTOp_value_type value_data[] ,int num_indexes ,const RTOp_index_type index_data[] ,int num_chars ,const RTOp_char_type char_data[] ,void ** targ_obj ) { const struct RTOp_ROp_get_sub_vector_rng_t *rng = NULL; struct RTOp_SubVector *sub_vec_targ = NULL; RTOp_index_type sub_dim = 0; RTOp_value_type *values = NULL; register RTOp_index_type k; assert(obj_data); /* Get the range of the sub-vector */ rng = (const struct RTOp_ROp_get_sub_vector_rng_t*)obj_data; sub_dim = rng->u - rng->l + 1; /* Get the target sub-vector */ assert( *targ_obj ); sub_vec_targ = (struct RTOp_SubVector*)*targ_obj; assert( sub_dim == sub_vec_targ->sub_dim ); assert( sub_vec_targ->values ); /* Load the state */ assert( num_values == sub_dim ); assert( num_indexes == 2 ); assert( num_chars == 0 ); assert( index_data[0] == sub_vec_targ->global_offset + 1 ); assert( index_data[1] == sub_vec_targ->global_offset + sub_vec_targ->sub_dim ); values = (RTOp_value_type*)sub_vec_targ->values; for( k = 0; k < sub_dim; ++k ) *values++ = value_data[k]; RTOp_sub_vector( rng->l - 1 /* global_offset */ ,sub_dim /* sub_dim */ ,sub_vec_targ->values /* values[] */ ,1 /* value_stide */ ,sub_vec_targ ); return 0; }
int RTOp_apply_op_serial( RTOp_index_type full_dim ,const int num_vecs, const RTOp_value_type* vec_ptrs[], const ptrdiff_t vec_strides[] ,const int num_targ_vecs, RTOp_value_type* targ_vec_ptrs[], const ptrdiff_t targ_vec_strides[] ,const RTOp_index_type first_ele_in, const RTOp_index_type sub_dim_in, const RTOp_index_type global_offset_in ,const struct RTOp_RTOp* op ,RTOp_ReductTarget reduct_obj ) { int err = 0; RTOp_index_type sub_dim = 0; struct RTOp_SubVector *sub_vecs = NULL; struct RTOp_MutableSubVector *targ_sub_vecs = NULL; int k; /* Sort out the input and get the number of vector elements to operator over */ #ifdef RTOp_DEBUG assert( num_vecs || num_targ_vecs ); if(num_vecs) assert( vec_ptrs != NULL ); if(num_targ_vecs) assert( targ_vec_ptrs != NULL ); assert( 0 <= sub_dim_in && sub_dim_in <= full_dim ); #endif sub_dim = sub_dim_in ? sub_dim_in : full_dim - (first_ele_in - 1); /* Dimension of logical vectors */ /* Create the sub-vector data structures */ if(num_vecs) { sub_vecs = malloc( sizeof(struct RTOp_SubVector) * num_vecs ); for( k = 0; k < num_vecs; ++k ) { #ifdef RTOp_DEBUG assert( vec_ptrs[k] != NULL ); #endif RTOp_sub_vector( global_offset_in ,sub_dim ,vec_ptrs[k] + (first_ele_in -1) * vec_strides[k] ,vec_strides[k] ,&sub_vecs[k] ); } } if(num_targ_vecs) { targ_sub_vecs = malloc( sizeof(struct RTOp_MutableSubVector) * num_targ_vecs ); for( k = 0; k < num_targ_vecs; ++k ) { #ifdef RTOp_DEBUG assert( targ_vec_ptrs[k] != NULL ); #endif RTOp_mutable_sub_vector( global_offset_in ,sub_dim ,targ_vec_ptrs[k] + (first_ele_in -1) * targ_vec_strides[k] ,targ_vec_strides[k] ,&targ_sub_vecs[k] ); } } /* Apply the reduction/transformation operator in one chunk */ err = RTOp_apply_op( op, num_vecs, sub_vecs, num_targ_vecs, targ_sub_vecs, reduct_obj ); /* Free the sub-vector data structures */ if( sub_vecs ) free( sub_vecs ); if( targ_sub_vecs ) free( targ_sub_vecs ); return err; /* This could be an error code! */ }
int RTOp_apply_op_mpi( MPI_Comm comm ,RTOp_index_type global_dim_in, RTOp_index_type local_sub_dim_in, RTOp_index_type local_offset_in ,const int num_cols ,const int num_vecs, const RTOp_value_type* l_vec_ptrs[], const ptrdiff_t l_vec_strides[], const ptrdiff_t l_vec_leading_dim[] ,const int num_targ_vecs, RTOp_value_type* l_targ_vec_ptrs[], const ptrdiff_t l_targ_vec_strides[], const ptrdiff_t l_targ_vec_leading_dim[] ,const RTOp_index_type first_ele_in, const RTOp_index_type sub_dim_in, const RTOp_index_type global_offset_in ,const struct RTOp_RTOp* op ,RTOp_ReductTarget reduct_objs[] ) { int err = 0; struct RTOp_SubVector *local_vecs = NULL; struct RTOp_MutableSubVector *local_targ_vecs = NULL; RTOp_index_type overlap_first_local_ele = 0; RTOp_index_type overalap_local_sub_dim = 0; RTOp_index_type overlap_global_offset = 0; int k; int kc; /* Validate the input */ #ifdef RTOp_DEBUG assert( num_vecs || num_targ_vecs ); if(num_vecs) assert( l_vec_ptrs != NULL ); if(num_targ_vecs) assert( l_targ_vec_ptrs != NULL ); assert( 0 <= sub_dim_in && sub_dim_in <= global_dim_in ); #endif /* Pre-initialize the local sub-vectors */ if(num_vecs) { local_vecs = malloc( sizeof(struct RTOp_SubVector) * num_vecs * num_cols ); for( kc = 0; kc < num_cols; ++kc ) { for( k = 0; k < num_vecs; ++k ) RTOp_sub_vector_null(&local_vecs[kc*num_cols+k]); } } if(num_targ_vecs) { local_targ_vecs = malloc( sizeof(struct RTOp_MutableSubVector) * num_targ_vecs ); for( kc = 0; kc < num_cols; ++kc ) { for( k = 0; k < num_targ_vecs; ++k ) RTOp_mutable_sub_vector_null(&local_targ_vecs[kc*num_cols+k]); } } /* Get the overlap in the current process with the input logical sub-vector */ /* from (first_ele_in,sub_dim_in,global_offset_in) */ RTOp_parallel_calc_overlap( global_dim_in, local_sub_dim_in, local_offset_in, first_ele_in, sub_dim_in, global_offset_in ,&overlap_first_local_ele, &overalap_local_sub_dim, &overlap_global_offset ); if( overlap_first_local_ele != 0 ) { /* Sub-vector structs for the local elements that are to participate in the */ /* reduction/transforamtion operation. */ for( kc = 0; kc < num_cols; ++kc ) { for(k = 0; k < num_vecs; ++k) { RTOp_sub_vector( overlap_global_offset /* global_offset */ ,overalap_local_sub_dim /* sub_dim */ ,l_vec_ptrs[k]+(overlap_first_local_ele-1)*l_vec_strides[k] + ( num_cols > 1 ? kc*l_vec_leading_dim[k] : 0 ) /* values */ ,l_vec_strides[k] /* values_stride */ ,&local_vecs[kc*num_cols+k] ); } for(k = 0; k < num_targ_vecs; ++k) { RTOp_mutable_sub_vector( overlap_global_offset /* global_offset */ ,overalap_local_sub_dim /* sub_dim */ ,l_targ_vec_ptrs[k]+(overlap_first_local_ele-1)*l_targ_vec_strides[k] + ( num_cols > 1 ? kc*l_targ_vec_leading_dim[k] : 0 ) /* values */ ,l_targ_vec_strides[k] /* values_stride */ ,&local_targ_vecs[kc*num_cols+k] ); } } } /* */ /* Apply the reduction operation over the sub-vectors in */ /* this process then collect the reductions over */ /* all the processes and return the result */ /* to all the processes (including this one of course). */ /* If all of the sub-svectors are empty then this will */ /* just call the reduction operation with NULL sub-vectors */ /* */ err = RTOp_MPI_apply_op( comm, op, -1 /* MPI_Allreduce(...) */ ,num_cols ,num_vecs, num_vecs && overlap_first_local_ele ? &local_vecs[0] : NULL ,num_targ_vecs, num_targ_vecs && overlap_first_local_ele ? &local_targ_vecs[0] : NULL ,reduct_objs ); if(local_vecs) free(local_vecs); if(local_targ_vecs) free(local_targ_vecs); /* Deallocate memory */ return err; }