void doLsqr( int m, int n, double damp, void *UsrWrk, double u[], // len = m double v[], // len = n double w[], // len = n double x[], // len = n FILE *nout, // The remaining variables are output only. int *istop_out, int *itn_out, double *anorm_out, double *acond_out, double *rnorm_out, double *arnorm_out, double *xnorm_out ) { lsqr( m, n, sparseMatrixVectorProduct, damp, UsrWrk, u, // len = m v, // len = n w, // len = n x, // len = n NULL, // len = * 1.0e-12, 1.0e-12, 10e6, 1e7, nout, // The remaining variables are output only. istop_out, itn_out, anorm_out, acond_out, rnorm_out, arnorm_out, xnorm_out ); }
int main(int argc, char *argv[]) { struct matrix_t *matrixA; struct sparse_matrix_t *sparseA; struct vector_t *x, *b; lsqr_input *input; lsqr_output *output; lsqr_work *work; /* zone temoraire de travail */ lsqr_func *func; /* func->mat_vec_prod -> APROD */ /* cmd line arg */ char *matrix_filename = NULL; char *vector_filename = NULL; char *sol_filename = NULL; int max_iter = -1; float damping = 0; if (argc != 4) { fprintf(stderr, "%s matrixfile vectorfile solutionfile\n", argv[0]); exit(1); } matrix_filename = strdup(argv[1]); vector_filename = strdup(argv[2]); sol_filename = strdup(argv[3]); /* read the matrix */ matrixA = read_matrix(matrix_filename); fprintf(stderr, "read*matrix: ok (size=%ldx%ld, %ld elements)\n", matrixA->nb_line, matrixA->nb_col, matrixA->nb_line * matrixA->nb_col); sparseA = sparsify(matrixA, SPARSE_COL_LINK); b = read_simple_vector(vector_filename); /*************************************************/ /* check compatibility between matrix and vector */ /*************************************************/ if (sparseA->nb_line != b->length) { fprintf(stderr, "Error, check your matrix/vector sizes (%ld/%ld)\n", sparseA->nb_line, b->length); exit(1); } /* init vector solution to zero */ x = new_vector(sparseA->nb_col); /* catch Ctrl-C signal */ signal(SIGINT, emergency_halt); /*************************************************************/ /* solve A.x = B */ /*************************************************************/ /* LSQR alloc */ alloc_lsqr_mem(&input, &output, &work, &func, sparseA->nb_line, sparseA->nb_col); fprintf(stderr, "alloc_lsqr_mem : ok\n"); /* defines the routine Mat.Vect to use */ func->mat_vec_prod = sparseMATRIXxVECTOR; /* Set the input parameters for LSQR */ input->num_rows = sparseA->nb_line; input->num_cols = sparseA->nb_col; input->rel_mat_err = .0; input->rel_rhs_err = .0; input->cond_lim = .0; input->lsqr_fp_out = stdout; input->rhs_vec = (dvec *) b; input->sol_vec = (dvec *) x; /* initial guess */ input->damp_val = damping; if (max_iter == -1) { input->max_iter = 4 * (sparseA->nb_col); } else { input->max_iter = max_iter; } /* resolution du systeme Ax=b */ lsqr(input, output, work, func, sparseA); write_vector((struct vector_t *) output->sol_vec, sol_filename); free_lsqr_mem(input, output, work, func); free_matrix(matrixA); /* check A^t.A */ /* * { struct sparse_matrix_t *AtA; AtA = AtransA (sparseA); * write_sparse_matrix(AtA, "AtA"); write_sparse_matrix(sparseA, * "A"); free_sparse_matrix (AtA); * * } */ free_sparse_matrix(sparseA); return (1); }
int main(int argc, char* argv[]) { char *A_matrix_file = NULL; char *AT_matrix_file = NULL; char *vector_file = NULL; char *result_file = NULL; // input checks if (argc <= 4) { std::cout << "usage: " << argv[0] << " <A matrix file> <AT matrix file>" << " <rhs vector file> <result file>" << std::endl; return -1; } else { A_matrix_file = argv[1]; AT_matrix_file = argv[2]; vector_file = argv[3]; result_file = argv[4]; } // init the network agile::NetworkEnvironment environment(argc, argv); // allocate a GPU typedef agile::GPUCommunicator<unsigned, float, float> CommunicatorType; CommunicatorType com; com.allocateGPU(); bool success; typedef std::vector<std::complex<float> > cpu_vector_type; // read in crs matrix A //-------------------------------------------------------------------------- unsigned A_num_rows, A_num_columns; std::vector<unsigned> A_row_nnz; std::vector<unsigned> A_column_index; cpu_vector_type A_data; success = agile::readCSMatrixFile(A_matrix_file, A_num_rows, A_num_columns, A_row_nnz, A_column_index, A_data); if (!success) { std::cerr << "error: not able to load matrix A: " << A_matrix_file << std::endl; exit(-1); } // read in crs matrix A' //-------------------------------------------------------------------------- unsigned AT_num_rows, AT_num_columns; std::vector<unsigned> AT_row_nnz; std::vector<unsigned> AT_column_index; cpu_vector_type AT_data; success = agile::readCSMatrixFile(AT_matrix_file, AT_num_rows, AT_num_columns, AT_row_nnz, AT_column_index, AT_data); if (!success) { std::cerr << "error: not able to load matrix A': " << AT_matrix_file << std::endl; exit(-1); } // read in vector //-------------------------------------------------------------------------- cpu_vector_type y_host; success = agile::readVectorFile(vector_file, y_host); if (!success) { std::cerr << "error: not able to load vector: " << vector_file << std::endl; exit(-1); } // dimension check //-------------------------------------------------------------------------- if (A_num_rows != AT_num_columns || A_num_columns != AT_num_rows) { std::cerr << "error: incompatible matrix dimensions " << std::endl << " A: " << A_num_rows << "x" << A_num_columns << ", AT: " << AT_num_rows << "x" << AT_num_columns << std::endl; exit(-1); } if (y_host.size() != A_num_rows) { std::cerr << "error: incompatible dimensions" << std::endl; } // init gpu matrix and vector typedef agile::GPUCSMatrix<std::complex<float> > GPUCSMatrixType; GPUCSMatrixType A(A_row_nnz, A_column_index, A_data); typedef agile::GPUCSMatrix<std::complex<float>, true> GPUCSAdjointMatrixType; GPUCSAdjointMatrixType AT(AT_row_nnz, AT_column_index, AT_data); typedef agile::GPUVector<std::complex<float> > gpu_vector_type; gpu_vector_type y(A_num_rows); y.assignFromHost(y_host.begin(), y_host.end()); // generate a forward operator typedef agile::ForwardMatrixWithAdjoint<CommunicatorType, GPUCSMatrixType, GPUCSAdjointMatrixType> ForwardType; ForwardType forward(com, A, AT); // generate a binary measure typedef agile::ScalarProductMeasure<CommunicatorType> MeasureType; MeasureType scalar_product(com); // init lsqr operator agile::LSQR<CommunicatorType, ForwardType, MeasureType> lsqr( com, forward, scalar_product, LSQR_ABS_TOLERANCE, LSQR_MAX_ITERATIONS); // init result vector on gpu gpu_vector_type x(A_num_columns); #if WITH_TIMER struct timeval st, et; gettimeofday(&st, NULL); #endif // do lsqr inverse computation lsqr(y, x); #if WITH_TIMER cudaThreadSynchronize(); gettimeofday(&et, NULL); float elapsed_time = ((et.tv_sec-st.tv_sec)*1000.0 + (et.tv_usec - st.tv_usec)/1000.0); std::cout << "lsqr (gpu): " << std::setprecision(5.9) << elapsed_time << "ms" << std::endl; #endif #if SHOW_LSQR_DETAILS std::cout << "iterations: " << lsqr.getIteration() << std::endl; std::cout << "final residual: " << lsqr.getRho() << std::endl; #endif // transfer result from gpu to cpu cpu_vector_type x_host; x.copyToHost(x_host); // write result to file agile::writeVectorFile(result_file, x_host); return 0; }
int main(int argc, char *argv[]) { struct sparse_matrix_t *sparseA = NULL; struct vector_t *b = NULL; struct vector_t *x; struct mesh_t *mesh; char *xml_output; long int *compress2fat = NULL; struct vector_t *solution; struct vector_t *std_error_sol; long int fat_sol_nb_col; lsqr_input *input; lsqr_output *output; lsqr_work *work; /* zone temoraire de travail */ lsqr_func *func; /* func->mat_vec_prod -> APROD */ /* cmd line arg */ char *mesh_filename = NULL; char *importfilename = NULL; char *output_filename = NULL; char *sol_error_filename = NULL; char *log_filename = NULL; char *output_type = NULL; int max_iter; double damping, grad_damping; int use_ach = 0; /* ACH : tele-seismic inversion tomography */ int check_sparse = 0; /* check sparse matrix disable by default */ /* velocity model */ char *vmodel = NULL; struct velocity_model_t *vm = NULL; struct mesh_t **imported_mesh = NULL; char **xmlfilelist = NULL; int nb_xmlfile = 0; int i, j; int nb_irm = 0; struct irm_t **irm = NULL; int *nb_metacell = NULL; FILE *logfd; /*************************************************************/ parse_command_line(argc, argv, &mesh_filename, &vmodel, &importfilename, &log_filename, &output_filename, &output_type, &max_iter, &damping, &grad_damping, &use_ach, &check_sparse); if (use_ach) { fprintf(stderr, "Using ACH tomographic inversion\n"); } else { fprintf(stderr, "Using STANDARD tomographic inversion\n"); } /* load the velocity model */ if (vmodel) { char *myfile; vm = load_velocity_model(vmodel); if (!vm) { fprintf(stderr, "Can not initialize velocity model '%s'\n", vmodel); exit(1); } myfile = strdup(vmodel); fprintf(stderr, "Velocity model '%s' loaded\n", basename(myfile)); free(myfile); } else { vm = NULL; } /* Open log file */ if (!log_filename) { logfd = stdout; } else { if (!(logfd = fopen(log_filename, "w"))) { perror(log_filename); exit(1); } } /*check_write_access (output_filename); */ /**************************************/ /* test if we can open file to import */ /**************************************/ if (importfilename) { xmlfilelist = parse_separated_list(importfilename, ","); nb_xmlfile = 0; while (xmlfilelist[nb_xmlfile]) { if (access(xmlfilelist[nb_xmlfile], R_OK) == -1) { perror(xmlfilelist[nb_xmlfile]); exit(1); } nb_xmlfile++; } } else { fprintf(stderr, "No file to import ... exiting\n"); exit(0); } /****************************/ /* main mesh initialization */ /****************************/ mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /*****************************************/ /* check and initialize slice xml files */ /*****************************************/ if (nb_xmlfile) { int nb_sparse = 0; int nb_res = 0; int f; imported_mesh = (struct mesh_t **) malloc(sizeof(struct mesh_t *) * nb_xmlfile); assert(imported_mesh); for (i = 0; i < nb_xmlfile; i++) { imported_mesh[i] = mesh_init_from_file(xmlfilelist[i]); if (!imported_mesh[i]) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } for (f = 0; f < NB_MESH_FILE_FORMAT; f++) { /* mandatory field : res, sparse, and irm if provided */ if (f == RES || f == SPARSE || f == IRM) { check_files_access(f, imported_mesh[i]->data[f], xmlfilelist[i]); } } if (imported_mesh[i]->data[SPARSE]) { nb_sparse += imported_mesh[i]->data[SPARSE]->ndatafile; } if (imported_mesh[i]->data[RES]) { nb_res += imported_mesh[i]->data[RES]->ndatafile; } if (imported_mesh[i]->data[IRM]) { nb_irm += imported_mesh[i]->data[IRM]->ndatafile; } } if (!nb_sparse || !nb_res) { fprintf(stderr, "Error no sparse or res file available !\n"); exit(0); } } /*********************************************/ /* read and import the sparse(s) matrix(ces) */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[SPARSE]) { continue; } for (j = 0; j < imported_mesh[i]->data[SPARSE]->ndatafile; j++) { sparseA = import_sparse_matrix(sparseA, imported_mesh[i]->data[SPARSE]-> filename[j]); } } if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /*********************************************/ /* read and import the residual time vector */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[RES]) { continue; } for (j = 0; j < imported_mesh[i]->data[RES]->ndatafile; j++) { b = import_vector(b, imported_mesh[i]->data[RES]->filename[j]); } } /*************************************************/ /* check compatibility between matrix and vector */ /*************************************************/ if (sparseA->nb_line != b->length) { fprintf(stderr, "Error, check your matrix/vector sizes (%ld/%ld)\n", sparseA->nb_line, b->length); exit(1); } /********************/ /* show memory used */ /********************/ #ifdef __APPLE__ { struct mstats memusage; memusage = mstats(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (memusage.bytes_used) / (1024. * 1024)); } #else { struct mallinfo m_info; m_info = mallinfo(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (m_info.uordblks + m_info.usmblks) / (1024. * 1024.)); } #endif /**************************************/ /* relative traveltime mode */ /**************************************/ if (use_ach) { int nb_evt_imported = 0; for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[EVT]) { continue; } for (j = 0; j < imported_mesh[i]->data[EVT]->ndatafile; j++) { relative_tt(sparseA, b, imported_mesh[i]->data[EVT]->filename[j]); nb_evt_imported++; } } if (!nb_evt_imported) { fprintf(stderr, "Error in ACH mode, can not import any .evt file !\n"); exit(1); } } /************************************************/ /* read the irregular mesh definition if needed */ /* one by layer */ /************************************************/ if (nb_irm) { int cpt = 0; struct mesh_offset_t **offset; int l; irm = (struct irm_t **) malloc(nb_irm * sizeof(struct irm_t *)); assert(irm); nb_metacell = (int *) calloc(nb_irm, sizeof(int)); assert(nb_metacell); make_mesh(mesh); for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[IRM]) { continue; } /* offset between meshes */ offset = compute_mesh_offset(mesh, imported_mesh[i]); for (l = 0; l < mesh->nlayers; l++) { if (!offset[l]) continue; fprintf(stderr, "\t%s, [%s] offset[layer=%d] : lat=%d lon=%d z=%d\n", xmlfilelist[i], MESH_FILE_FORMAT[IRM], l, offset[l]->lat, offset[l]->lon, offset[l]->z); } for (j = 0; j < imported_mesh[i]->data[IRM]->ndatafile; j++) { /* FIXME: read only once the irm file */ irm[cpt] = read_irm(imported_mesh[i]->data[IRM]->filename[j], &(nb_metacell[cpt])); import2mesh_irm_file(mesh, imported_mesh[i]->data[IRM]-> filename[j], offset); cpt++; } for (l = 0; l < mesh->nlayers; l++) { if (offset[l]) free(offset[l]); } free(offset); } metacell_find_neighbourhood(mesh); } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /***********************/ /* remove empty column */ /***********************/ fprintf(stderr, "starting compression ...\n"); sparse_compress_column(mesh, sparseA, &compress2fat); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } show_sparse_stats(sparseA); /***************************************/ /* add gradient damping regularisation */ /***************************************/ if (fabs(grad_damping) > 1.e-6) { int nb_faces = 6; /* 1 cell may have 6 neighbours */ long int nb_lines = 0; char *regul_name; fprintf(stdout, "using gradient damping : %f\n", grad_damping); /* tmp file name */ regul_name = tempnam("/tmp", "regul"); if (!regul_name) { perror("lsqrsolve: "); exit(1); } if (nb_irm) { create_regul_DtD_irm(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } else { create_regul_DtD(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } sparse_matrix_resize(sparseA, sparseA->nb_line + sparseA->nb_col, sparseA->nb_col); sparseA = import_sparse_matrix(sparseA, regul_name); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } vector_resize(b, sparseA->nb_line); unlink(regul_name); show_sparse_stats(sparseA); } /*********************************/ /* the real mesh is no more used */ /* keep only the light mesh */ /*********************************/ fprintf(stdout, "Time to free the real mesh and keep only the light structure\n"); free_mesh(mesh); mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /********************************/ /* init vector solution to zero */ /********************************/ x = new_vector(sparseA->nb_col); /*************************************************************/ /* solve A.x = B */ /* A = ray length in the cells */ /* B = residual travel time observed - computed */ /* x solution to satisfy the lsqr problem */ /*************************************************************/ /* LSQR alloc */ alloc_lsqr_mem(&input, &output, &work, &func, sparseA->nb_line, sparseA->nb_col); fprintf(stderr, "alloc_lsqr_mem : ok\n"); /* defines the routine Mat.Vect to use */ func->mat_vec_prod = sparseMATRIXxVECTOR; /* Set the input parameters for LSQR */ input->num_rows = sparseA->nb_line; input->num_cols = sparseA->nb_col; input->rel_mat_err = 1.0e-3; /* in km */ input->rel_rhs_err = 1.0e-2; /* in seconde */ /*input->rel_mat_err = 0.; input->rel_rhs_err = 0.; */ input->cond_lim = .0; input->lsqr_fp_out = logfd; /* input->rhs_vec = (dvec *) b; */ dvec_copy((dvec *) b, input->rhs_vec); input->sol_vec = (dvec *) x; /* initial guess */ input->damp_val = damping; if (max_iter == -1) { input->max_iter = 4 * (sparseA->nb_col); } else { input->max_iter = max_iter; } /* catch Ctrl-C signal */ signal(SIGINT, emergency_halt); /******************************/ /* resolution du systeme Ax=b */ /******************************/ lsqr(input, output, work, func, sparseA); fprintf(stderr, "*** lsqr ended (%ld iter) : %s\n", output->num_iters, lsqr_msg[output->term_flag]); if (output->term_flag == 0) { /* solution x=x0 */ exit(0); } /* uncompress the solution */ solution = uncompress_column((struct vector_t *) output->sol_vec, compress2fat, fat_sol_nb_col); /* uncompress the standard error on solution */ std_error_sol = uncompress_column((struct vector_t *) output->std_err_vec, compress2fat, fat_sol_nb_col); /* if irm file was provided, set the right value to each cell * from a given metacell */ if (irm) { irm_update(solution, irm, nb_metacell, nb_irm, mesh); free_irm(irm, nb_irm); free(nb_metacell); } /* write solution */ if (strchr(output_type, 'm')) { export2matlab(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 's')) { export2sco(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 'g')) { /* solution */ export2gmt(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); /* error on solution */ sol_error_filename = (char *) malloc(sizeof(char) * (strlen(output_filename) + strlen(".err") + 1)); sprintf(sol_error_filename, "%s.err", output_filename); export2gmt(std_error_sol, sol_error_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); free(sol_error_filename); } /* save the xml enrichied with sections */ xml_output = (char *) malloc((strlen(output_filename) + strlen(".xml") + 1) * sizeof(char)); assert(xml_output); sprintf(xml_output, "%s.xml", output_filename); mesh2xml(mesh, xml_output); free(xml_output); /******************************************************/ /* variance reduction, ie how the model fits the data */ /* X = the final solution */ /* */ /* ||b-AX||² */ /* VR= 1 - -------- */ /* ||b||² */ /* */ /******************************************************/ { double norm_b; double norm_b_AX; double VR; /* variance reduction */ struct vector_t *rhs; /* right hand side */ rhs = new_vector(sparseA->nb_line); /* use copy */ dvec_copy((dvec *) b, (dvec *) rhs); norm_b = dvec_norm2((dvec *) rhs); /* does rhs = rhs + sparseA . output->sol_vec */ /* here rhs is overwritten */ dvec_scale((-1.0), (dvec *) rhs); sparseMATRIXxVECTOR(0, output->sol_vec, (dvec *) rhs, sparseA); dvec_scale((-1.0), (dvec *) rhs); norm_b_AX = dvec_norm2((dvec *) rhs); VR = 1 - (norm_b_AX * norm_b_AX) / (norm_b * norm_b); fprintf(stdout, "Variance reduction = %.2f%%\n", VR * 100); free_vector(rhs); } /********/ /* free */ /********/ if (vm) { free_velocity_model(vm); } free_mesh(mesh); free_sparse_matrix(sparseA); free_lsqr_mem(input, output, work, func); free_vector(solution); free_vector(std_error_sol); free(compress2fat); for (i = 0; i < nb_xmlfile; i++) { free(xmlfilelist[i]); free_mesh(imported_mesh[i]); } free(xmlfilelist); free(imported_mesh); return (0); }
float optimize_smoothness(WPt& worlds_pts, const IntensityPerImage& left_intensities, const IntensityPerImage& right_intensities) { // copy to globals //assert(fromVector.size() == toVector.size()); //assert(fromVector.size() >= 3); //_fromVector = fromVector; // worlds_pts.resize(3); if (worlds_pts.size() < 3) { // too little points to opitimize return -1; } double max_z = -DBL_MAX; for (auto w = 0; w < worlds_pts.size();++w) { max_z = std::max(max_z, worlds_pts[w][2]); // max_z = std::max(max_z, (worlds_pts[w][1])); } double adjustment_rate = 1; std::vector<double> lambdas; lambdas.resize(worlds_pts.size() - 2); for (auto w = 0; w < worlds_pts.size();++w) { if (((w - 1) >= 0) && ((w) < (worlds_pts.size() - 1))) { double z_top = worlds_pts[w - 1][2]; double z_bottom = worlds_pts[w + 1][2]; double z_0 = worlds_pts[w][2]; // double z_top = worlds_pts[w - 1][1]; // double z_bottom = worlds_pts[w + 1][1]; // double z_0 = worlds_pts[w][1]; // double delta_z = ((z_top - z_bottom) + (z_0 - z_top) - (z_bottom - z_0)) / max_z; double delta_z = ((z_0 - z_top) - (z_bottom - z_0)) / max_z; // double delta_z = 0; double omega_i = std::min(left_intensities[w], right_intensities[w]); omega_i /= 255.0; // omega_i = 1.0; // double lambda_i = (1.0 - delta_z) * omega_i * adjustment_rate; double lambda_i = 0.99; lambdas[w - 1] = lambda_i; double y_top = worlds_pts[w - 1][1]; double y_bottom = worlds_pts[w + 1][1]; double y_0 = worlds_pts[w][1]; double y = worlds_pts[w][1]; double y_prime = w; } } #ifdef DEBUG for (auto i = 0u; i < lambdas.size(); ++i) { std::cout << std::setprecision(15) << lambdas[i] << std::endl; } #endif // allocate to globals lambdas_g = lambdas; // allocate structures for sparse linear least squares //printf("\tallocating for sparse linear least squares " //"(%i vectors)...\n", fromVector.size()); int num_rows = worlds_pts.size() - 2; int num_cols = worlds_pts.size(); lsqr_input *input = NULL; lsqr_output *output = NULL; lsqr_work *work = NULL; lsqr_func *func = NULL; alloc_lsqr_mem(&input, &output, &work, &func, num_rows, num_cols); input->num_rows = num_rows; input->num_cols = num_cols; input->damp_val = 0.0; input->rel_mat_err = 0.0; input->rel_rhs_err = 0.0; input->cond_lim = 0.0; input->max_iter = 10*input->num_cols; input->lsqr_fp_out = NULL; func->mat_vec_prod = lsqr_eval_for_opt; // set rhs vec for (auto j = 0; j < num_rows; ++j) { // input->rhs_vec->elements[j] = (1.0 - lambdas[j]) * worlds_pts[j+1][1]; input->rhs_vec->elements[j] = (1.0 - lambdas[j]) * worlds_pts[j+1][2]; } // set initial sol vec for (auto i = 0u; i<num_cols; i++) { // input->sol_vec->elements[i] = worlds_pts[i][1]; input->sol_vec->elements[i] = 0; } // call sparse linear least squares! printf("\t\tstarting (rows=%i, cols=%i)...\n", num_rows, num_cols); lsqr(input, output, work, func, NULL); double error = output->resid_norm; printf("\t\ttermination reason = %i\n", output->term_flag); printf("\t\tnum function calls = %i\n", output->num_iters); printf("\t\tremaining error = %lf\n", error); if (worlds_pts.size() > 0) { // double y_prev = worlds_pts[0][1]; double y_prev = worlds_pts[0][2]; for (auto i = 1u; i < worlds_pts.size() - 1; ++i) { // auto& y = worlds_pts[i][1]; auto& y = worlds_pts[i][2]; y = output->sol_vec->elements[i]; double y_diff = y - y_prev; std::cout << "y difference " << i << " : "<< y_diff << std::endl; y_prev = y; } auto i = worlds_pts.size() - 1; // auto& y = worlds_pts[i][1]; auto& y = worlds_pts[i][2]; double y_diff = y - y_prev; std::cout << "y difference " << i << " : "<< y_diff << std::endl; } // free memory free_lsqr_mem(input, output, work, func); return (error); }
int LEVMAR( // functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n void (*func)( float* p, float* hx, int r, int c, void* adata ), // function to evaluate the Jacobian \part x / \part p void (*jacf)( float* p, SparseMatrix* j, int r, int c, void* adata ), float* p, // I/O: initial parameter estimates. On output has the estimated solution float* x, // I: measurement vector. NULL implies a zero vector int r, // I: measurement vector dimension int c, // I: parameter vector dimension (i.e. #unknowns) int itmax, // I: maximum number of iterations float opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used. */ float info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care info[0] = ||e||_2, at initial p. info[1] = ||e||_2, at estimated p. info[2] = ||J^T e||_inf, at estimated p. info[3] = ||Dp||_2, at estimated p. info[4] = mu/max[J^T * J]_ii, at estimated p. info[5] = # iterations, info[6] = reason for terminating: 1 - stopped by small gradient J^T e 2 - stopped by small Dp 3 - stopped by itmax 4 - singular matrix. Restart from current p with increased mu 5 - no further error reduction is possible. Restart with increased mu 6 - stopped by small ||e||_2 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error. info[7] = # function evaluations info[8] = # Jacobian evaluations info[9] = # linear systems solved, i.e. # attempts for reducing error. */ void* adata, // pointer to possibly additional data, passed uninterpreted to func & jacf. // Set to NULL if not needed. FILE* dout ) { SparseMatrix JAC; // sparse jac SparseMatrix JTJ; // sparse jac^T \times jac // temp work arrays float* epsilon_p; // r x 1 float* hx; // r x 1 \hat{x}_i float* jacTe; // c x 1 J^T * e_i float* Dp; // c x 1 float* diag_jacTjac; // c x 1 diagonal of [ J^T * J ] float* p_new; // c x 1 p + Dp float mu = 0.0f; // damping constant float tmp = 0.0f; // mainly used in matrix & vector multiplications float p_eL2 = 0.0f; // || e(p) ||_2 float jacTe_inf = 0.0f; // || J^T e ||_inf float pDp_eL2 = 0.0f; // || e(p+Dp) ||_2 float p_L2 = 0.0f; float Dp_L2 = FLT_MAX; float dF = 0.0f; float dL = 0.0f; float tau = LM_INIT_MU; float eps1 = LM_STOP_THRESH; float eps2 = LM_STOP_THRESH; float eps3 = LM_STOP_THRESH; float eps2_sq = LM_STOP_THRESH * LM_STOP_THRESH; float init_p_eL2= 0.0f; int i, k; int nu = 2, nu2 = 0, stop = 0; int nfev = 0, njev = 0, nlss = 0; gettimeofday( &startTime, NULL ); if ( r < c ) { fprintf( dout, "LEVMAR(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n", r, c ); return LM_ERROR*1; } if ( !jacf ) { fprintf( dout, "No function specified for computing the Jacobian in LEVMAR()\n" ); return LM_ERROR*2; } if ( opts ) { tau = opts[0]; eps1 = opts[1]; eps2 = opts[2]; eps3 = opts[3]; eps2_sq = eps2 * eps2; } // setup indices of JTJ, they're constant for all iterations. (*jacf)( p, &JAC, r, c, adata ); njev++; JAC.prepare_transpose_multiply( JTJ ); // allocate 2 * r + 4 * m floats; size_t total = LM_DER_WORKSZ( c, r ); if ( NULL == ( epsilon_p = (float*) malloc( total * sizeof(float) ) ) ) { fprintf( dout, "LEVMAR(): memory allocation request failed\n" ); return LM_ERROR*3; } /* Internal solver memory pointer pt, */ /* 32-bit: int pt[64]; 64-bit: long int pt[64] */ /* or void *pt[64] should be OK on both architectures */ void *pt[64]; /* Pardiso control parameters. */ double dparm[64]; int iparm[64]; int error, solver, mtype = -2; /* Real symmetric matrix */ /* -------------------------------------------------------------------- */ /* .. Setup Pardiso control parameters. */ /* -------------------------------------------------------------------- */ error = 0; solver = 0; /* use sparse direct solver */ F77_FUNC( pardisoinit )( pt, &mtype, &solver, iparm, dparm, &error ); if ( error != 0 ) { if (error == -10 ) fprintf( dout, "No license file found \n" ); if (error == -11 ) fprintf( dout, "License is expired \n" ); if (error == -12 ) fprintf( dout, "Wrong username or hostname \n"); return 0; } else fprintf( dout, "PARDISO license check was successful ... \n"); /* set up work arrays */ hx = epsilon_p + r; jacTe = hx + r; Dp = jacTe + c; diag_jacTjac = Dp + c; p_new = diag_jacTjac + c; /* compute epsilon_p = x - f(p) and its L2 norm */ (*func)( p, hx, r, c, adata ); nfev++; /* ### epsilon_p = x - hx, p_eL2 = ||epsilon_p|| */ for( i = 0, p_eL2 = 0.0f; i < r; ++i ) { tmp = -hx[i]; epsilon_p[i] = tmp; p_eL2 += tmp * tmp; } init_p_eL2 = p_eL2; if ( !finite( p_eL2 ) ) stop = 7; for ( k = 0; k < itmax && !stop; ++k ) { // Note that p and epsilon_p have been updated at a previous iteration if ( p_eL2 <= eps3 ) { // error is small stop = 6; break; } // Compute the Jacobian J at p, // [J^T \times J], // [J^T \times epsilon_p], // ||[J^T \times epsilon_p]||_inf and ||p||^2. (*jacf)( p, &JAC, r, c, adata ); njev++; #if DEBUG if ( c < 101 ) JAC.dump( true ); #endif // J^T \times J, J^T \times epsilon_p JAC.compute_transpose_multiply( JTJ ); #if DEBUG if ( c < 101 ) JTJ.dump(); #endif bzero( jacTe, c * sizeof( float ) ); for( i = 0; i < r; ++i ) { tmp = epsilon_p[ i ]; for ( int k = JAC.I_[ i ]; k < JAC.I_[ i + 1 ]; ++k ) { jacTe[ JAC.J_[ k ] ] += JAC.A_[ k ] * tmp; } } #if DEBUG if ( c < 101 ) { fprintf( dout, "VECTOR epsilon_p:\n" ); for ( i = 0; i < r; ++i ) { fprintf( dout, "%4d: %7.3f\n", i, epsilon_p[ i ] ); } fprintf( dout, "VECTOR jacTe:\n" ); for ( i = 0; i < c; ++i ) { fprintf( dout, "%4d: %7.3f\n", i, jacTe[ i ] ); } } #endif // Compute ||J^T \times epsilon_p||_inf and ||p||^2 for ( i = 0, p_L2 = jacTe_inf = 0.0f; i < c; ++i ) { tmp = FABS( jacTe[ i ] ); if ( jacTe_inf < tmp ) { jacTe_inf = tmp; } // save diagonal entries so that augmentation can be later canceled diag_jacTjac[ i ] = JTJ.A_[ JTJ.I_[ i ] ]; p_L2 += p[ i ] * p[ i ]; } // check for convergence if ( jacTe_inf <= eps1 ) { Dp_L2 = 0.0f; // no increment for p in this case stop = 1; break; } // compute initial damping factor if ( k == 0 ) { tmp = -FLT_MAX; // find max diagonal element for ( i = 0; i < c; ++i ) { if ( diag_jacTjac[i] > tmp ) { tmp = diag_jacTjac[ i ]; } } mu = tau * tmp; } // determine increment using adaptive damping while ( 1 ) { // augment normal equations for ( i = 0; i < c; ++i ) { JTJ.A_[ JTJ.I_[ i ] ] += mu; } #if 0 // solve augmented equations #if DEBUG float t1 = currentTime(); #endif pardiso_symmetric( JTJ, jacTe, Dp, dout, pt, iparm, dparm ); nlss++; #if DEBUG float t2 = currentTime(); fprintf( dout, "PARDISO time %f us\n", (t2 - t1) ); if ( c < 201 ) check_solution( JTJ, jacTe, Dp ); #endif #else { int istop, itn; float E_ = 1.0e-6f; float F_ = 1.0f / ( 10.0f * sqrtf( 1.0e-7f ) ); float an, ac, rn, ar, xn; float* v = (float*) malloc( c * 2 * sizeof(float) ); float* w = v + c; float t1 = currentTime(); lsqr( c, c, LSQRAPROD, 0, &JTJ, jacTe, v, w, Dp, 0, E_, E_, F_, 100, dout, &istop, &itn, &an, &ac, &rn, &ar, &xn ); nlss++; float t2 = currentTime(); fprintf( dout, "LSQR solver time %f us\n", (t2 - t1) ); if ( c < 201 ) check_solution( JTJ, jacTe, Dp ); free( v ); } #endif // compute p's new estimate and ||Dp||^2 for( i = 0, Dp_L2 = 0.0f; i < c; ++i ) { p_new[i] = p[i] + ( tmp = Dp[i] ); Dp_L2 += tmp * tmp; } // Dp_L2 = sqrt( Dp_L2 ); if ( Dp_L2 <= eps2_sq * p_L2 ) { // relative change in p is small stop = 2; break; } if ( Dp_L2 >= (p_L2 + eps2) / LM_EPSILON ) { // almost singular stop = 4; break; } // evaluate function at p + Dp (*func)( p_new, hx, r, c, adata ); nfev++; // compute ||e(p_new)||_2 // ### hx=x-hx, pDp_eL2=||hx|| for( i = 0, pDp_eL2 = 0.0; i < r; ++i ) { tmp = -hx[ i ]; hx[ i ] = tmp; pDp_eL2 += tmp * tmp; } if ( !finite( pDp_eL2 ) ) { // sum of squares is not finite, most probably due to a user error. // This check makes sure that the inner loop does not run indefinitely. stop = 7; break; } for ( i = 0, dL = 0.0f; i < c; ++i ) { dL += Dp[ i ] * ( mu * Dp[ i ] + jacTe[ i ] ); } dF = p_eL2 - pDp_eL2; // reduction in error, increment is accepted if ( dL > 0.0f && dF > 0.0f ) { tmp = ( 2.0f * dF / dL - 1.0f ); tmp = 1.0f - tmp * tmp * tmp; mu = mu * ( ( tmp >= LM_1_THIRD ) ? tmp : LM_1_THIRD ); nu = 2; // update p's estimate bcopy( p_new, p, c * sizeof(float) ); // update e and ||e||_2 bcopy( hx, epsilon_p, r * sizeof(float) ); p_eL2 = pDp_eL2; break; } // if this point is reached the error did not reduce; the increment must be rejected mu *= nu; nu2 = nu << 1; // 2 * nu; if( nu2 <= nu ) { // nu has wrapped around (overflown) stop = 5; break; } nu = nu2; // restore diagonal J^T J entries for ( i = 0; i < c; ++i ) { JTJ.A_[ JTJ.I_[ i ] ] = diag_jacTjac[ i ]; } } // inner loop } if ( k >= itmax ) stop = 3; if ( info ) { for ( i = 1, tmp = diag_jacTjac[ 0 ]; i < c; ++i ) { if ( tmp < diag_jacTjac[ i ] ) { tmp = diag_jacTjac[ i ]; } } info[0] = init_p_eL2; info[1] = p_eL2; info[2] = jacTe_inf; info[3] = Dp_L2; info[4] = mu / tmp; info[5] = (float)k; info[6] = (float)stop; info[7] = (float)nfev; info[8] = (float)njev; info[9] = (float)nlss; } free( epsilon_p ); return ( stop != 4 && stop != 7 ) ? k : LM_ERROR*5; }
double find_optimal_edge_zero_crossing(std::vector<cv::Point2f>& crossing_points) { // copy to globals //assert(fromVector.size() == toVector.size()); //assert(fromVector.size() >= 3); //_fromVector = fromVector; // worlds_pts.resize(3); if (crossing_points.size() < 3) { // too little points to opitimize return -1; } #ifdef DEBUG //for (auto i = 0u; i < lambdas.size(); ++i) { // std::cout << std::setprecision(15) << lambdas[i] << std::endl; //} #endif // allocate to globals crossing_points_g = crossing_points; // allocate structures for sparse linear least squares //printf("\tallocating for sparse linear least squares " //"(%i vectors)...\n", fromVector.size()); int num_rows = crossing_points.size(); int num_cols = 3; lsqr_input *input = NULL; lsqr_output *output = NULL; lsqr_work *work = NULL; lsqr_func *func = NULL; alloc_lsqr_mem(&input, &output, &work, &func, num_rows, num_cols); input->num_rows = num_rows; input->num_cols = num_cols; input->damp_val = 0.0; input->rel_mat_err = 0.0; input->rel_rhs_err = 0.0; input->cond_lim = 0.0; input->max_iter = 10*input->num_cols; input->lsqr_fp_out = NULL; func->mat_vec_prod = lsqr_eval_for_opt_; // set rhs vec for (auto j = 0; j < num_rows; ++j) { // input->rhs_vec->elements[j] = (1.0 - lambdas[j]) * worlds_pts[j+1][1]; input->rhs_vec->elements[j] = crossing_points[j].y; } // set initial sol vec for (auto i = 0u; i<num_cols; i++) { // input->sol_vec->elements[i] = worlds_pts[i][1]; input->sol_vec->elements[i] = 0; } // call sparse linear least squares! //printf("\t\tstarting (rows=%i, cols=%i)...\n", num_rows, num_cols); lsqr(input, output, work, func, NULL); double error = output->resid_norm; //printf("\t\ttermination reason = %i\n", output->term_flag); //printf("\t\tnum function calls = %i\n", output->num_iters); //printf("\t\tremaining error = %lf\n", error); double a = input->sol_vec->elements[0]; double b = input->sol_vec->elements[1]; double c = input->sol_vec->elements[2]; // solving for y = 0 double solution = 0.0; double discriminant = std::pow(b, 2) - (4 * a * c); if (discriminant < 0) { // something went wrong solution = -1; } else { double delta = std::sqrt(discriminant); double sol_1 = ((-1 * b) + delta) / (2 * a); double sol_2 = ((-1 * b) - delta) / (2 * a); if (sol_1 <= crossing_points[crossing_points.size() - 1].x && sol_1 >= crossing_points[0].x) { solution = sol_1; } else if (sol_2 <= crossing_points[crossing_points.size() - 1].x && sol_2 >= crossing_points[0].x) { solution = sol_2; } else { // something wrong happened solution = -1; } } // free memory free_lsqr_mem(input, output, work, func); return (solution); }