// abs(x'*y)/\sqrt(x'*x)/sqrt(y'*y) double dvec_dcos_abs(int n, const double *x, const double *y){ double norm_x,norm_y,dcos,dot; norm_x=dvec_norm2(n,x); norm_y=dvec_norm2(n,y); dot=dvec_dot(n,x,y); dcos=fabs(dot)/(norm_x*norm_y); return dcos; }
// x=x/sqrt(x'*x) where x[i]>0, abs(x[i]) is max void dvec_normalize_sgn(int n, double *x) { int k; double a=0; a=dvec_norm2(n,x); a=1.0/a; dvec_max_abs_index(n,x,&k); if(x[k]<0) a=-a; dvec_scale(n,x,a); }
int main(int argc, char *argv[]) { struct sparse_matrix_t *sparseA = NULL; struct vector_t *b = NULL; struct vector_t *x; struct mesh_t *mesh; char *xml_output; long int *compress2fat = NULL; struct vector_t *solution; struct vector_t *std_error_sol; long int fat_sol_nb_col; lsqr_input *input; lsqr_output *output; lsqr_work *work; /* zone temoraire de travail */ lsqr_func *func; /* func->mat_vec_prod -> APROD */ /* cmd line arg */ char *mesh_filename = NULL; char *importfilename = NULL; char *output_filename = NULL; char *sol_error_filename = NULL; char *log_filename = NULL; char *output_type = NULL; int max_iter; double damping, grad_damping; int use_ach = 0; /* ACH : tele-seismic inversion tomography */ int check_sparse = 0; /* check sparse matrix disable by default */ /* velocity model */ char *vmodel = NULL; struct velocity_model_t *vm = NULL; struct mesh_t **imported_mesh = NULL; char **xmlfilelist = NULL; int nb_xmlfile = 0; int i, j; int nb_irm = 0; struct irm_t **irm = NULL; int *nb_metacell = NULL; FILE *logfd; /*************************************************************/ parse_command_line(argc, argv, &mesh_filename, &vmodel, &importfilename, &log_filename, &output_filename, &output_type, &max_iter, &damping, &grad_damping, &use_ach, &check_sparse); if (use_ach) { fprintf(stderr, "Using ACH tomographic inversion\n"); } else { fprintf(stderr, "Using STANDARD tomographic inversion\n"); } /* load the velocity model */ if (vmodel) { char *myfile; vm = load_velocity_model(vmodel); if (!vm) { fprintf(stderr, "Can not initialize velocity model '%s'\n", vmodel); exit(1); } myfile = strdup(vmodel); fprintf(stderr, "Velocity model '%s' loaded\n", basename(myfile)); free(myfile); } else { vm = NULL; } /* Open log file */ if (!log_filename) { logfd = stdout; } else { if (!(logfd = fopen(log_filename, "w"))) { perror(log_filename); exit(1); } } /*check_write_access (output_filename); */ /**************************************/ /* test if we can open file to import */ /**************************************/ if (importfilename) { xmlfilelist = parse_separated_list(importfilename, ","); nb_xmlfile = 0; while (xmlfilelist[nb_xmlfile]) { if (access(xmlfilelist[nb_xmlfile], R_OK) == -1) { perror(xmlfilelist[nb_xmlfile]); exit(1); } nb_xmlfile++; } } else { fprintf(stderr, "No file to import ... exiting\n"); exit(0); } /****************************/ /* main mesh initialization */ /****************************/ mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /*****************************************/ /* check and initialize slice xml files */ /*****************************************/ if (nb_xmlfile) { int nb_sparse = 0; int nb_res = 0; int f; imported_mesh = (struct mesh_t **) malloc(sizeof(struct mesh_t *) * nb_xmlfile); assert(imported_mesh); for (i = 0; i < nb_xmlfile; i++) { imported_mesh[i] = mesh_init_from_file(xmlfilelist[i]); if (!imported_mesh[i]) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } for (f = 0; f < NB_MESH_FILE_FORMAT; f++) { /* mandatory field : res, sparse, and irm if provided */ if (f == RES || f == SPARSE || f == IRM) { check_files_access(f, imported_mesh[i]->data[f], xmlfilelist[i]); } } if (imported_mesh[i]->data[SPARSE]) { nb_sparse += imported_mesh[i]->data[SPARSE]->ndatafile; } if (imported_mesh[i]->data[RES]) { nb_res += imported_mesh[i]->data[RES]->ndatafile; } if (imported_mesh[i]->data[IRM]) { nb_irm += imported_mesh[i]->data[IRM]->ndatafile; } } if (!nb_sparse || !nb_res) { fprintf(stderr, "Error no sparse or res file available !\n"); exit(0); } } /*********************************************/ /* read and import the sparse(s) matrix(ces) */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[SPARSE]) { continue; } for (j = 0; j < imported_mesh[i]->data[SPARSE]->ndatafile; j++) { sparseA = import_sparse_matrix(sparseA, imported_mesh[i]->data[SPARSE]-> filename[j]); } } if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /*********************************************/ /* read and import the residual time vector */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[RES]) { continue; } for (j = 0; j < imported_mesh[i]->data[RES]->ndatafile; j++) { b = import_vector(b, imported_mesh[i]->data[RES]->filename[j]); } } /*************************************************/ /* check compatibility between matrix and vector */ /*************************************************/ if (sparseA->nb_line != b->length) { fprintf(stderr, "Error, check your matrix/vector sizes (%ld/%ld)\n", sparseA->nb_line, b->length); exit(1); } /********************/ /* show memory used */ /********************/ #ifdef __APPLE__ { struct mstats memusage; memusage = mstats(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (memusage.bytes_used) / (1024. * 1024)); } #else { struct mallinfo m_info; m_info = mallinfo(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (m_info.uordblks + m_info.usmblks) / (1024. * 1024.)); } #endif /**************************************/ /* relative traveltime mode */ /**************************************/ if (use_ach) { int nb_evt_imported = 0; for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[EVT]) { continue; } for (j = 0; j < imported_mesh[i]->data[EVT]->ndatafile; j++) { relative_tt(sparseA, b, imported_mesh[i]->data[EVT]->filename[j]); nb_evt_imported++; } } if (!nb_evt_imported) { fprintf(stderr, "Error in ACH mode, can not import any .evt file !\n"); exit(1); } } /************************************************/ /* read the irregular mesh definition if needed */ /* one by layer */ /************************************************/ if (nb_irm) { int cpt = 0; struct mesh_offset_t **offset; int l; irm = (struct irm_t **) malloc(nb_irm * sizeof(struct irm_t *)); assert(irm); nb_metacell = (int *) calloc(nb_irm, sizeof(int)); assert(nb_metacell); make_mesh(mesh); for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[IRM]) { continue; } /* offset between meshes */ offset = compute_mesh_offset(mesh, imported_mesh[i]); for (l = 0; l < mesh->nlayers; l++) { if (!offset[l]) continue; fprintf(stderr, "\t%s, [%s] offset[layer=%d] : lat=%d lon=%d z=%d\n", xmlfilelist[i], MESH_FILE_FORMAT[IRM], l, offset[l]->lat, offset[l]->lon, offset[l]->z); } for (j = 0; j < imported_mesh[i]->data[IRM]->ndatafile; j++) { /* FIXME: read only once the irm file */ irm[cpt] = read_irm(imported_mesh[i]->data[IRM]->filename[j], &(nb_metacell[cpt])); import2mesh_irm_file(mesh, imported_mesh[i]->data[IRM]-> filename[j], offset); cpt++; } for (l = 0; l < mesh->nlayers; l++) { if (offset[l]) free(offset[l]); } free(offset); } metacell_find_neighbourhood(mesh); } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /***********************/ /* remove empty column */ /***********************/ fprintf(stderr, "starting compression ...\n"); sparse_compress_column(mesh, sparseA, &compress2fat); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } show_sparse_stats(sparseA); /***************************************/ /* add gradient damping regularisation */ /***************************************/ if (fabs(grad_damping) > 1.e-6) { int nb_faces = 6; /* 1 cell may have 6 neighbours */ long int nb_lines = 0; char *regul_name; fprintf(stdout, "using gradient damping : %f\n", grad_damping); /* tmp file name */ regul_name = tempnam("/tmp", "regul"); if (!regul_name) { perror("lsqrsolve: "); exit(1); } if (nb_irm) { create_regul_DtD_irm(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } else { create_regul_DtD(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } sparse_matrix_resize(sparseA, sparseA->nb_line + sparseA->nb_col, sparseA->nb_col); sparseA = import_sparse_matrix(sparseA, regul_name); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } vector_resize(b, sparseA->nb_line); unlink(regul_name); show_sparse_stats(sparseA); } /*********************************/ /* the real mesh is no more used */ /* keep only the light mesh */ /*********************************/ fprintf(stdout, "Time to free the real mesh and keep only the light structure\n"); free_mesh(mesh); mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /********************************/ /* init vector solution to zero */ /********************************/ x = new_vector(sparseA->nb_col); /*************************************************************/ /* solve A.x = B */ /* A = ray length in the cells */ /* B = residual travel time observed - computed */ /* x solution to satisfy the lsqr problem */ /*************************************************************/ /* LSQR alloc */ alloc_lsqr_mem(&input, &output, &work, &func, sparseA->nb_line, sparseA->nb_col); fprintf(stderr, "alloc_lsqr_mem : ok\n"); /* defines the routine Mat.Vect to use */ func->mat_vec_prod = sparseMATRIXxVECTOR; /* Set the input parameters for LSQR */ input->num_rows = sparseA->nb_line; input->num_cols = sparseA->nb_col; input->rel_mat_err = 1.0e-3; /* in km */ input->rel_rhs_err = 1.0e-2; /* in seconde */ /*input->rel_mat_err = 0.; input->rel_rhs_err = 0.; */ input->cond_lim = .0; input->lsqr_fp_out = logfd; /* input->rhs_vec = (dvec *) b; */ dvec_copy((dvec *) b, input->rhs_vec); input->sol_vec = (dvec *) x; /* initial guess */ input->damp_val = damping; if (max_iter == -1) { input->max_iter = 4 * (sparseA->nb_col); } else { input->max_iter = max_iter; } /* catch Ctrl-C signal */ signal(SIGINT, emergency_halt); /******************************/ /* resolution du systeme Ax=b */ /******************************/ lsqr(input, output, work, func, sparseA); fprintf(stderr, "*** lsqr ended (%ld iter) : %s\n", output->num_iters, lsqr_msg[output->term_flag]); if (output->term_flag == 0) { /* solution x=x0 */ exit(0); } /* uncompress the solution */ solution = uncompress_column((struct vector_t *) output->sol_vec, compress2fat, fat_sol_nb_col); /* uncompress the standard error on solution */ std_error_sol = uncompress_column((struct vector_t *) output->std_err_vec, compress2fat, fat_sol_nb_col); /* if irm file was provided, set the right value to each cell * from a given metacell */ if (irm) { irm_update(solution, irm, nb_metacell, nb_irm, mesh); free_irm(irm, nb_irm); free(nb_metacell); } /* write solution */ if (strchr(output_type, 'm')) { export2matlab(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 's')) { export2sco(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 'g')) { /* solution */ export2gmt(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); /* error on solution */ sol_error_filename = (char *) malloc(sizeof(char) * (strlen(output_filename) + strlen(".err") + 1)); sprintf(sol_error_filename, "%s.err", output_filename); export2gmt(std_error_sol, sol_error_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); free(sol_error_filename); } /* save the xml enrichied with sections */ xml_output = (char *) malloc((strlen(output_filename) + strlen(".xml") + 1) * sizeof(char)); assert(xml_output); sprintf(xml_output, "%s.xml", output_filename); mesh2xml(mesh, xml_output); free(xml_output); /******************************************************/ /* variance reduction, ie how the model fits the data */ /* X = the final solution */ /* */ /* ||b-AX||² */ /* VR= 1 - -------- */ /* ||b||² */ /* */ /******************************************************/ { double norm_b; double norm_b_AX; double VR; /* variance reduction */ struct vector_t *rhs; /* right hand side */ rhs = new_vector(sparseA->nb_line); /* use copy */ dvec_copy((dvec *) b, (dvec *) rhs); norm_b = dvec_norm2((dvec *) rhs); /* does rhs = rhs + sparseA . output->sol_vec */ /* here rhs is overwritten */ dvec_scale((-1.0), (dvec *) rhs); sparseMATRIXxVECTOR(0, output->sol_vec, (dvec *) rhs, sparseA); dvec_scale((-1.0), (dvec *) rhs); norm_b_AX = dvec_norm2((dvec *) rhs); VR = 1 - (norm_b_AX * norm_b_AX) / (norm_b * norm_b); fprintf(stdout, "Variance reduction = %.2f%%\n", VR * 100); free_vector(rhs); } /********/ /* free */ /********/ if (vm) { free_velocity_model(vm); } free_mesh(mesh); free_sparse_matrix(sparseA); free_lsqr_mem(input, output, work, func); free_vector(solution); free_vector(std_error_sol); free(compress2fat); for (i = 0; i < nb_xmlfile; i++) { free(xmlfilelist[i]); free_mesh(imported_mesh[i]); } free(xmlfilelist); free(imported_mesh); return (0); }
// x=x/sqrt(x'*x) void dvec_normalize(int n, double *x) { double norm=0; norm=dvec_norm2(n,x); dvec_scale(n,x,(1.0/norm)); }
/* *------------------------------------------------------------------------------ * * LSQR finds a solution x to the following problems: * * 1. Unsymmetric equations -- solve A*x = b * * 2. Linear least squares -- solve A*x = b * in the least-squares sense * * 3. Damped least squares -- solve ( A )*x = ( b ) * ( damp*I ) ( 0 ) * in the least-squares sense * * where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an * 'm'-vector, and 'damp' is a scalar. (All quantities are real.) * The matrix 'A' is intended to be large and sparse. * * * Notation * -------- * * The following quantities are used in discussing the subroutine * parameters: * * 'Abar' = ( A ), 'bbar' = ( b ) * ( damp*I ) ( 0 ) * * 'r' = b - A*x, 'rbar' = bbar - Abar*x * * 'rnorm' = sqrt( norm(r)**2 + damp**2 * norm(x)**2 ) * = norm( rbar ) * * 'rel_prec' = the relative precision of floating-point arithmetic * on the machine being used. Typically 2.22e-16 * with 64-bit arithmetic. * * LSQR minimizes the function 'rnorm' with respect to 'x'. * * * References * ---------- * * C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse * linear equations and sparse least squares, * ACM Transactions on Mathematical Software 8, 1 (March 1982), * pp. 43-71. * * C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse * linear equations and least-squares problems, * ACM Transactions on Mathematical Software 8, 2 (June 1982), * pp. 195-209. * * C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh, * Basic linear algebra subprograms for Fortran usage, * ACM Transactions on Mathematical Software 5, 3 (Sept 1979), * pp. 308-323 and 324-325. * *------------------------------------------------------------------------------ */ void lsqr( lsqr_input *input, lsqr_output *output, lsqr_work *work, lsqr_func *func, void *prod, int (*per_iteration_callback)(lsqr_input*, lsqr_output*, void* token), void* token) { double dvec_norm2( dvec * ); long indx, term_iter, term_iter_max; double alpha, beta, rhobar, phibar, bnorm, bbnorm, cs1, sn1, psi, rho, cs, sn, theta, phi, tau, ddnorm, delta, gammabar, zetabar, gamma, cs2, sn2, zeta, xxnorm, res, resid_tol, cond_tol, resid_tol_mach, temp, stop_crit_1, stop_crit_2, stop_crit_3; static char term_msg[8][80] = { "The exact solution is x = x0", "The residual Ax - b is small enough, given ATOL and BTOL", "The least squares error is small enough, given ATOL", "The estimated condition number has exceeded CONLIM", "The residual Ax - b is small enough, given machine precision", "The least squares error is small enough, given machine precision", "The estimated condition number has exceeded machine precision", "The iteration limit has been reached" }; if( input->lsqr_fp_out != NULL ) fprintf( input->lsqr_fp_out, " Least Squares Solution of A*x = b\n" " The matrix A has %7li rows and %7li columns\n" " The damping parameter is\tDAMP = %10.2e\n" " ATOL = %10.2e\t\tCONDLIM = %10.2e\n" " BTOL = %10.2e\t\tITERLIM = %10li\n\n", input->num_rows, input->num_cols, input->damp_val, input->rel_mat_err, input->cond_lim, input->rel_rhs_err, input->max_iter ); output->term_flag = 0; term_iter = 0; output->num_iters = 0; output->frob_mat_norm = 0.0; output->mat_cond_num = 0.0; output->sol_norm = 0.0; for(indx = 0; indx < input->num_cols; indx++) { work->bidiag_wrk_vec->elements[indx] = 0.0; work->srch_dir_vec->elements[indx] = 0.0; output->std_err_vec->elements[indx] = 0.0; output->sol_vec->elements[indx] = 0.0; } bbnorm = 0.0; ddnorm = 0.0; xxnorm = 0.0; cs2 = -1.0; sn2 = 0.0; zeta = 0.0; res = 0.0; if( input->cond_lim > 0.0 ) cond_tol = 1.0 / input->cond_lim; else cond_tol = DBL_EPSILON; alpha = 0.0; beta = 0.0; /* * Set up the initial vectors u and v for bidiagonalization. These satisfy * the relations * BETA*u = b - A*x0 * ALPHA*v = A^T*u */ /* Compute b - A*x0 and store in vector u which initially held vector b */ dvec_scale( (-1.0), input->rhs_vec ); func->mat_vec_prod( 0, input->sol_vec, input->rhs_vec, prod ); dvec_scale( (-1.0), input->rhs_vec ); /* compute Euclidean length of u and store as BETA */ beta = dvec_norm2( input->rhs_vec ); if( beta > 0.0 ) { /* scale vector u by the inverse of BETA */ dvec_scale( (1.0 / beta), input->rhs_vec ); /* Compute matrix-vector product A^T*u and store it in vector v */ func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of v and store as ALPHA */ alpha = dvec_norm2( work->bidiag_wrk_vec ); } if( alpha > 0.0 ) { /* scale vector v by the inverse of ALPHA */ dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); /* copy vector v to vector w */ dvec_copy( work->bidiag_wrk_vec, work->srch_dir_vec ); } output->mat_resid_norm = alpha * beta; output->resid_norm = beta; bnorm = beta; /* * If the norm || A^T r || is zero, then the initial guess is the exact * solution. Exit and report this. */ if( (output->mat_resid_norm == 0.0) && (input->lsqr_fp_out != NULL) ) { fprintf( input->lsqr_fp_out, "\tISTOP = %3li\t\t\tITER = %9li\n" " || A ||_F = %13.5e\tcond( A ) = %13.5e\n" " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", output->term_flag, output->num_iters, output->frob_mat_norm, output->mat_cond_num, output->resid_norm, output->mat_resid_norm, bnorm, output->sol_norm ); fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); return; } rhobar = alpha; phibar = beta; /* * If statistics are printed at each iteration, print a header and the initial * values for each quantity. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, " ITER || r || Compatible " "||A^T r|| / ||A|| ||r|| || A || cond( A )\n\n" ); stop_crit_1 = 1.0; stop_crit_2 = alpha / beta; fprintf( input->lsqr_fp_out, "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2, output->frob_mat_norm, output->mat_cond_num); } /* * The main iteration loop is continued as long as no stopping criteria * are satisfied and the number of total iterations is less than some upper * bound. */ while( output->term_flag == 0 ) { output->num_iters++; /* * Perform the next step of the bidiagonalization to obtain * the next vectors u and v, and the scalars ALPHA and BETA. * These satisfy the relations * BETA*u = A*v - ALPHA*u, * ALFA*v = A^T*u - BETA*v. */ /* scale vector u by the negative of ALPHA */ dvec_scale( (-alpha), input->rhs_vec ); /* compute A*v - ALPHA*u and store in vector u */ func->mat_vec_prod( 0, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of u and store as BETA */ beta = dvec_norm2( input->rhs_vec ); /* accumulate this quantity to estimate Frobenius norm of matrix A */ /* bbnorm += sqr(alpha) + sqr(beta) + sqr(input->damp_val);*/ bbnorm += alpha*alpha + beta*beta + input->damp_val*input->damp_val; if( beta > 0.0 ) { /* scale vector u by the inverse of BETA */ dvec_scale( (1.0 / beta), input->rhs_vec ); /* scale vector v by the negative of BETA */ dvec_scale( (-beta), work->bidiag_wrk_vec ); /* compute A^T*u - BETA*v and store in vector v */ func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of v and store as ALPHA */ alpha = dvec_norm2( work->bidiag_wrk_vec ); if( alpha > 0.0 ) /* scale vector v by the inverse of ALPHA */ dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); } /* * Use a plane rotation to eliminate the damping parameter. * This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix. */ cs1 = rhobar / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); sn1 = input->damp_val / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); psi = sn1 * phibar; phibar = cs1 * phibar; /* * Use a plane rotation to eliminate the subdiagonal element (BETA) * of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix. */ rho = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) + lsqr_sqr(beta) ); cs = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ) / rho; sn = beta / rho; theta = sn * alpha; rhobar = -cs * alpha; phi = cs * phibar; phibar = sn * phibar; tau = sn * phi; /* * Update the solution vector x, the search direction vector w, and the * standard error estimates vector se. */ for(indx = 0; indx < input->num_cols; indx++) { /* update the solution vector x */ output->sol_vec->elements[indx] += (phi / rho) * work->srch_dir_vec->elements[indx]; /* update the standard error estimates vector se */ output->std_err_vec->elements[indx] += lsqr_sqr( (1.0 / rho) * work->srch_dir_vec->elements[indx] ); /* accumulate this quantity to estimate condition number of A */ ddnorm += lsqr_sqr( (1.0 / rho) * work->srch_dir_vec->elements[indx] ); /* update the search direction vector w */ work->srch_dir_vec->elements[indx] = work->bidiag_wrk_vec->elements[indx] - (theta / rho) * work->srch_dir_vec->elements[indx]; } /* * Use a plane rotation on the right to eliminate the super-diagonal element * (THETA) of the upper-bidiagonal matrix. Then use the result to estimate * the solution norm || x ||. */ delta = sn2 * rho; gammabar = -cs2 * rho; zetabar = (phi - delta * zeta) / gammabar; /* compute an estimate of the solution norm || x || */ output->sol_norm = sqrt( xxnorm + lsqr_sqr(zetabar) ); gamma = sqrt( lsqr_sqr(gammabar) + lsqr_sqr(theta) ); cs2 = gammabar / gamma; sn2 = theta / gamma; zeta = (phi - delta * zeta) / gamma; /* accumulate this quantity to estimate solution norm || x || */ xxnorm += lsqr_sqr(zeta); /* * Estimate the Frobenius norm and condition of the matrix A, and the * Euclidean norms of the vectors r and A^T*r. */ output->frob_mat_norm = sqrt( bbnorm ); output->mat_cond_num = output->frob_mat_norm * sqrt( ddnorm ); res += lsqr_sqr(psi); output->resid_norm = sqrt( lsqr_sqr(phibar) + res ); output->mat_resid_norm = alpha * fabs( tau ); /* * Use these norms to estimate the values of the three stopping criteria. */ stop_crit_1 = output->resid_norm / bnorm; stop_crit_2 = 0.0; if( output->resid_norm > 0.0 ) stop_crit_2 = output->mat_resid_norm / ( output->frob_mat_norm * output->resid_norm ); stop_crit_3 = 1.0 / output->mat_cond_num; /* 05 Jul 2007: Bug reported by Joel Erickson <*****@*****.**>. */ resid_tol = input->rel_rhs_err + input->rel_mat_err * output->frob_mat_norm * /* (not output->mat_resid_norm *) */ output->sol_norm / bnorm; resid_tol_mach = DBL_EPSILON + DBL_EPSILON * output->frob_mat_norm * /* (not output->mat_resid_norm *) */ output->sol_norm / bnorm; /* * Check to see if any of the stopping criteria are satisfied. * First compare the computed criteria to the machine precision. * Second compare the computed criteria to the the user specified precision. */ /* iteration limit reached */ if( output->num_iters >= input->max_iter ) output->term_flag = 7; /* condition number greater than machine precision */ if( stop_crit_3 <= DBL_EPSILON ) output->term_flag = 6; /* least squares error less than machine precision */ if( stop_crit_2 <= DBL_EPSILON ) output->term_flag = 5; /* residual less than a function of machine precision */ if( stop_crit_1 <= resid_tol_mach ) output->term_flag = 4; /* condition number greater than CONLIM */ if( stop_crit_3 <= cond_tol ) output->term_flag = 3; /* least squares error less than ATOL */ if( stop_crit_2 <= input->rel_mat_err ) output->term_flag = 2; /* residual less than a function of ATOL and BTOL */ if( stop_crit_1 <= resid_tol ) output->term_flag = 1; /* * If statistics are printed at each iteration, print a header and the initial * values for each quantity. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2, output->frob_mat_norm, output->mat_cond_num); } /* * The convergence criteria are required to be met on NCONV consecutive * iterations, where NCONV is set below. Suggested values are 1, 2, or 3. */ if( output->term_flag == 0 ) term_iter = -1; term_iter_max = 1; term_iter++; if( (term_iter < term_iter_max) && (output->num_iters < input->max_iter) ) output->term_flag = 0; if (per_iteration_callback) per_iteration_callback(input, output, token); } /* end while loop */ /* * Finish computing the standard error estimates vector se. */ temp = 1.0; if( input->num_rows > input->num_cols ) temp = ( double ) ( input->num_rows - input->num_cols ); if( lsqr_sqr(input->damp_val) > 0.0 ) temp = ( double ) ( input->num_rows ); temp = output->resid_norm / sqrt( temp ); for(indx = 0; indx < input->num_cols; indx++) /* update the standard error estimates vector se */ output->std_err_vec->elements[indx] = temp * sqrt( output->std_err_vec->elements[indx] ); /* * If statistics are printed at each iteration, print the statistics for the * stopping condition. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, "\n\tISTOP = %3li\t\t\tITER = %9li\n" " || A ||_F = %13.5e\tcond( A ) = %13.5e\n" " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", output->term_flag, output->num_iters, output->frob_mat_norm, output->mat_cond_num, output->resid_norm, output->mat_resid_norm, bnorm, output->sol_norm ); fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); } return; }