void update(const std::vector< double >& inputs) { optimizer_.setInputValues(inputs); copy_results(); }
double matrix_multiply(void) { int oi, oj; int i, j, k, l, q, p; double start, end; // timer for the start of the computation // Reorganize the data but do not start multiplying elements before // the timer value is captured. start = omp_get_wtime(); /* //Single loop - makes no obvious difference #pragma omp parallel for for(i=0; i<SIZE_N*SIZE_M; i++){ oi = i / SIZE_M; oj = i % SIZE_M; // #pragma omp parallel for // for(oj=0; oj<SIZE_M; oj++){ //this is where we launch! //iterate over matrices: for(k=0; k<SIZE_P; k++){ //submatrix multiply: for(q=0; q<BLOCK_SIZE; q++){ for(l=0; l<BLOCK_SIZE; l++){ for(p=0; p<BLOCK_SIZE; p++){ SUB_C[oi][oj][q][l] += SUB_A[oi][k][q][p] * SUB_B[k][oj][p][l]; } } } } //end launch } */ #ifdef PARALLEL #pragma omp parallel for #endif for(oi=0; oi<SIZE_N; oi++){ #ifdef PARALLEL #pragma omp parallel for #endif for(oj=0; oj<SIZE_M; oj++){ //this is where we launch! //iterate over matrices: for(k=0; k<SIZE_P; k++){ //submatrix multiply: for(q=0; q<BLOCK_SIZE; q++){ for(l=0; l<BLOCK_SIZE; l++){ for(p=0; p<BLOCK_SIZE; p++){ SUB_C[oi][oj][q][l] += SUB_A[oi][k][q][p] * SUB_B[k][oj][p][l]; } } } } //end launch } } // timer for the end of the computation end = omp_get_wtime(); //copy the results from the streamlined matrix into the old matrix form (for... compatability?) copy_results(); // return the amount of high resolution time spent return end - start; }
void update(const Eigen::VectorXd& inputs) { optimizer_.setInputValues(inputs); copy_results(); }