Exemple #1
0
// abs(x'*y)/\sqrt(x'*x)/sqrt(y'*y)
double dvec_dcos_abs(int n, const double *x, const double *y){
  double norm_x,norm_y,dcos,dot;
  norm_x=dvec_norm2(n,x);
  norm_y=dvec_norm2(n,y);
  dot=dvec_dot(n,x,y);
  dcos=fabs(dot)/(norm_x*norm_y);
  return dcos;
}
Exemple #2
0
// x=x/sqrt(x'*x) where x[i]>0, abs(x[i]) is max
void dvec_normalize_sgn(int n, double *x)
{
  int k;
  double a=0;
  a=dvec_norm2(n,x);
  a=1.0/a;
  dvec_max_abs_index(n,x,&k);
  if(x[k]<0) a=-a;
  dvec_scale(n,x,a);
}
Exemple #3
0
int main(int argc, char *argv[])
{
    struct sparse_matrix_t *sparseA = NULL;
    struct vector_t *b = NULL;
    struct vector_t *x;
    struct mesh_t *mesh;
    char *xml_output;

    long int *compress2fat = NULL;
    struct vector_t *solution;
    struct vector_t *std_error_sol;
    long int fat_sol_nb_col;

    lsqr_input *input;
    lsqr_output *output;
    lsqr_work *work;            /* zone temoraire de travail */
    lsqr_func *func;            /* func->mat_vec_prod -> APROD */

    /* cmd line arg */
    char *mesh_filename = NULL;
    char *importfilename = NULL;
    char *output_filename = NULL;
    char *sol_error_filename = NULL;
    char *log_filename = NULL;
    char *output_type = NULL;
    int max_iter;
    double damping, grad_damping;
    int use_ach = 0;            /* ACH : tele-seismic inversion tomography */
    int check_sparse = 0;       /* check sparse matrix disable by default  */

    /* velocity model */
    char *vmodel = NULL;
    struct velocity_model_t *vm = NULL;

    struct mesh_t **imported_mesh = NULL;
    char **xmlfilelist = NULL;
    int nb_xmlfile = 0;

    int i, j;

    int nb_irm = 0;
    struct irm_t **irm = NULL;
    int *nb_metacell = NULL;

    FILE *logfd;

    /*************************************************************/
    parse_command_line(argc, argv,
                       &mesh_filename,
                       &vmodel,
                       &importfilename,
                       &log_filename,
                       &output_filename, &output_type,
                       &max_iter,
                       &damping, &grad_damping, &use_ach, &check_sparse);

    if (use_ach) {
        fprintf(stderr, "Using ACH tomographic inversion\n");
    } else {
        fprintf(stderr, "Using STANDARD tomographic inversion\n");
    }

    /* load the velocity model */
    if (vmodel) {
        char *myfile;
        vm = load_velocity_model(vmodel);
        if (!vm) {
            fprintf(stderr, "Can not initialize velocity model '%s'\n",
                    vmodel);
            exit(1);
        }
        myfile = strdup(vmodel);
        fprintf(stderr, "Velocity model '%s' loaded\n", basename(myfile));
        free(myfile);
    } else {
        vm = NULL;
    }

    /* Open log file */
    if (!log_filename) {
        logfd = stdout;
    } else {
        if (!(logfd = fopen(log_filename, "w"))) {
            perror(log_filename);
            exit(1);
        }
    }

    /*check_write_access (output_filename); */

    /**************************************/
    /* test if we can open file to import */
    /**************************************/
    if (importfilename) {
        xmlfilelist = parse_separated_list(importfilename, ",");
        nb_xmlfile = 0;
        while (xmlfilelist[nb_xmlfile]) {
            if (access(xmlfilelist[nb_xmlfile], R_OK) == -1) {
                perror(xmlfilelist[nb_xmlfile]);
                exit(1);
            }
            nb_xmlfile++;
        }
    } else {
        fprintf(stderr, "No file to import ... exiting\n");
        exit(0);
    }

    /****************************/
    /* main mesh initialization */
    /****************************/
    mesh = mesh_init_from_file(mesh_filename);
    if (!mesh) {
        fprintf(stderr, "Error decoding %s.\n", mesh_filename);
        exit(1);
    }
    fprintf(stderr, "read %s ok\n", mesh_filename);

    /*****************************************/
    /* check and initialize slice xml files  */
    /*****************************************/
    if (nb_xmlfile) {
        int nb_sparse = 0;
        int nb_res = 0;
        int f;

        imported_mesh =
            (struct mesh_t **) malloc(sizeof(struct mesh_t *) *
                                      nb_xmlfile);
        assert(imported_mesh);

        for (i = 0; i < nb_xmlfile; i++) {
            imported_mesh[i] = mesh_init_from_file(xmlfilelist[i]);
            if (!imported_mesh[i]) {
                fprintf(stderr, "Error decoding %s.\n", mesh_filename);
                exit(1);
            }
            for (f = 0; f < NB_MESH_FILE_FORMAT; f++) {
                /* mandatory field : res, sparse, and irm if provided */
                if (f == RES || f == SPARSE || f == IRM) {
                    check_files_access(f, imported_mesh[i]->data[f],
                                       xmlfilelist[i]);
                }
            }
            if (imported_mesh[i]->data[SPARSE]) {
                nb_sparse += imported_mesh[i]->data[SPARSE]->ndatafile;
            }

            if (imported_mesh[i]->data[RES]) {
                nb_res += imported_mesh[i]->data[RES]->ndatafile;
            }

            if (imported_mesh[i]->data[IRM]) {
                nb_irm += imported_mesh[i]->data[IRM]->ndatafile;
            }

        }

        if (!nb_sparse || !nb_res) {
            fprintf(stderr, "Error no sparse or res file available !\n");
            exit(0);
        }
    }

    /*********************************************/
    /* read and import the sparse(s) matrix(ces) */
    /*********************************************/
    for (i = 0; i < nb_xmlfile; i++) {
        if (!imported_mesh[i]->data[SPARSE]) {
            continue;
        }

        for (j = 0; j < imported_mesh[i]->data[SPARSE]->ndatafile; j++) {
            sparseA = import_sparse_matrix(sparseA,
                                           imported_mesh[i]->data[SPARSE]->
                                           filename[j]);
        }
    }

    if (check_sparse) {
        if (check_sparse_matrix(sparseA)) {
            exit(1);
        }
    }

    /*sparse_compute_length(sparseA, "length1.txt"); */
    fat_sol_nb_col = sparseA->nb_col;
    show_sparse_stats(sparseA);

    /*********************************************/
    /* read and import the residual time vector  */
    /*********************************************/
    for (i = 0; i < nb_xmlfile; i++) {
        if (!imported_mesh[i]->data[RES]) {
            continue;
        }

        for (j = 0; j < imported_mesh[i]->data[RES]->ndatafile; j++) {
            b = import_vector(b, imported_mesh[i]->data[RES]->filename[j]);
        }
    }

    /*************************************************/
    /* check compatibility between matrix and vector */
    /*************************************************/
    if (sparseA->nb_line != b->length) {
        fprintf(stderr,
                "Error, check your matrix/vector sizes (%ld/%ld)\n",
                sparseA->nb_line, b->length);
        exit(1);
    }

    /********************/
    /* show memory used */
    /********************/
#ifdef __APPLE__
    {
        struct mstats memusage;

        memusage = mstats();
        fprintf(stderr, "Memory used: %.2f MBytes\n",
                (float) (memusage.bytes_used) / (1024. * 1024));
    }
#else
    {
        struct mallinfo m_info;

        m_info = mallinfo();
        fprintf(stderr, "Memory used: %.2f MBytes\n",
                (float) (m_info.uordblks +
                         m_info.usmblks) / (1024. * 1024.));
    }
#endif

    /**************************************/
    /* relative traveltime mode           */
    /**************************************/
    if (use_ach) {
        int nb_evt_imported = 0;

        for (i = 0; i < nb_xmlfile; i++) {
            if (!imported_mesh[i]->data[EVT]) {
                continue;
            }

            for (j = 0; j < imported_mesh[i]->data[EVT]->ndatafile; j++) {
                relative_tt(sparseA, b,
                            imported_mesh[i]->data[EVT]->filename[j]);
                nb_evt_imported++;
            }
        }

        if (!nb_evt_imported) {
            fprintf(stderr,
                    "Error in ACH mode, can not import any .evt file !\n");
            exit(1);
        }
    }

    /************************************************/
    /* read the irregular mesh definition if needed */
    /* one by layer                                 */
    /************************************************/
    if (nb_irm) {
        int cpt = 0;
        struct mesh_offset_t **offset;
        int l;

        irm = (struct irm_t **) malloc(nb_irm * sizeof(struct irm_t *));
        assert(irm);
        nb_metacell = (int *) calloc(nb_irm, sizeof(int));
        assert(nb_metacell);

        make_mesh(mesh);

        for (i = 0; i < nb_xmlfile; i++) {
            if (!imported_mesh[i]->data[IRM]) {
                continue;
            }

            /* offset between meshes */
            offset = compute_mesh_offset(mesh, imported_mesh[i]);
            for (l = 0; l < mesh->nlayers; l++) {
                if (!offset[l])
                    continue;
                fprintf(stderr,
                        "\t%s, [%s] offset[layer=%d] : lat=%d lon=%d z=%d\n",
                        xmlfilelist[i], MESH_FILE_FORMAT[IRM], l,
                        offset[l]->lat, offset[l]->lon, offset[l]->z);
            }

            for (j = 0; j < imported_mesh[i]->data[IRM]->ndatafile; j++) {
                /* FIXME: read only once the irm file */
                irm[cpt] =
                    read_irm(imported_mesh[i]->data[IRM]->filename[j],
                             &(nb_metacell[cpt]));
                import2mesh_irm_file(mesh,
                                     imported_mesh[i]->data[IRM]->
                                     filename[j], offset);
                cpt++;
            }

            for (l = 0; l < mesh->nlayers; l++) {
                if (offset[l])
                    free(offset[l]);
            }
            free(offset);
        }
        metacell_find_neighbourhood(mesh);
    }

    /*sparse_compute_length(sparseA, "length1.txt"); */
    fat_sol_nb_col = sparseA->nb_col;
    show_sparse_stats(sparseA);

    /***********************/
    /* remove empty column */
    /***********************/
    fprintf(stderr, "starting compression ...\n");
    sparse_compress_column(mesh, sparseA, &compress2fat);
    if (check_sparse) {
        if (check_sparse_matrix(sparseA)) {
            exit(1);
        }
    }
    show_sparse_stats(sparseA);

    /***************************************/
    /* add gradient damping regularisation */
    /***************************************/
    if (fabs(grad_damping) > 1.e-6) {
        int nb_faces = 6;       /* 1 cell may have 6 neighbours */
        long int nb_lines = 0;
        char *regul_name;

        fprintf(stdout, "using gradient damping : %f\n", grad_damping);

        /* tmp file name */
        regul_name = tempnam("/tmp", "regul");
        if (!regul_name) {
            perror("lsqrsolve: ");
            exit(1);
        }

        if (nb_irm) {
            create_regul_DtD_irm(sparseA, compress2fat,
                                 mesh, regul_name, nb_faces,
                                 grad_damping, &nb_lines);
        } else {
            create_regul_DtD(sparseA, compress2fat,
                             mesh, regul_name, nb_faces,
                             grad_damping, &nb_lines);
        }

        sparse_matrix_resize(sparseA,
                             sparseA->nb_line + sparseA->nb_col,
                             sparseA->nb_col);

        sparseA = import_sparse_matrix(sparseA, regul_name);
        if (check_sparse) {
            if (check_sparse_matrix(sparseA)) {
                exit(1);
            }
        }
        vector_resize(b, sparseA->nb_line);
        unlink(regul_name);

        show_sparse_stats(sparseA);
    }

    /*********************************/
    /* the real mesh is no more used */
    /* keep only the light mesh      */
    /*********************************/
    fprintf(stdout,
            "Time to free the real mesh and keep only the light structure\n");
    free_mesh(mesh);
    mesh = mesh_init_from_file(mesh_filename);
    if (!mesh) {
        fprintf(stderr, "Error decoding %s.\n", mesh_filename);
        exit(1);
    }
    fprintf(stderr, "read %s ok\n", mesh_filename);

    /********************************/
    /* init vector solution to zero */
    /********************************/
    x = new_vector(sparseA->nb_col);

    /*************************************************************/
    /* solve A.x = B                                             */
    /* A = ray length in the cells                               */
    /* B = residual travel time observed - computed              */
    /* x solution to satisfy the lsqr problem                    */
    /*************************************************************/

    /* LSQR alloc */
    alloc_lsqr_mem(&input, &output, &work, &func, sparseA->nb_line,
                   sparseA->nb_col);

    fprintf(stderr, "alloc_lsqr_mem : ok\n");

    /* defines the routine Mat.Vect to use */
    func->mat_vec_prod = sparseMATRIXxVECTOR;

    /* Set the input parameters for LSQR */
    input->num_rows = sparseA->nb_line;
    input->num_cols = sparseA->nb_col;
    input->rel_mat_err = 1.0e-3;        /* in km */
    input->rel_rhs_err = 1.0e-2;        /* in seconde */
    /*input->rel_mat_err = 0.;
       input->rel_rhs_err = 0.; */
    input->cond_lim = .0;
    input->lsqr_fp_out = logfd;
    /* input->rhs_vec = (dvec *) b; */
    dvec_copy((dvec *) b, input->rhs_vec);
    input->sol_vec = (dvec *) x;        /* initial guess */
    input->damp_val = damping;
    if (max_iter == -1) {
        input->max_iter = 4 * (sparseA->nb_col);
    } else {
        input->max_iter = max_iter;
    }

    /* catch Ctrl-C signal */
    signal(SIGINT, emergency_halt);

    /******************************/
    /* resolution du systeme Ax=b */

    /******************************/
    lsqr(input, output, work, func, sparseA);
    fprintf(stderr, "*** lsqr ended (%ld iter) : %s\n",
            output->num_iters, lsqr_msg[output->term_flag]);
    if (output->term_flag == 0) {       /* solution x=x0 */
        exit(0);
    }

    /* uncompress the solution  */
    solution = uncompress_column((struct vector_t *) output->sol_vec,
                                 compress2fat, fat_sol_nb_col);

    /* uncompress the standard error on solution  */
    std_error_sol =
        uncompress_column((struct vector_t *) output->std_err_vec,
                          compress2fat, fat_sol_nb_col);

    /* if irm file was provided, set the right value to each cell 
     * from a given metacell 
     */
    if (irm) {
        irm_update(solution, irm, nb_metacell, nb_irm, mesh);
        free_irm(irm, nb_irm);
        free(nb_metacell);
    }

    /* write solution */
    if (strchr(output_type, 'm')) {
        export2matlab(solution, output_filename, mesh, vm,
                      output->num_iters,
                      input->damp_val, grad_damping, use_ach);
    }

    if (strchr(output_type, 's')) {
        export2sco(solution, output_filename, mesh, vm,
                   output->num_iters,
                   input->damp_val, grad_damping, use_ach);
    }

    if (strchr(output_type, 'g')) {
        /* solution */
        export2gmt(solution, output_filename, mesh, vm,
                   output->num_iters,
                   input->damp_val, grad_damping, use_ach);

        /* error on solution */
        sol_error_filename = (char *)
            malloc(sizeof(char) *
                   (strlen(output_filename) + strlen(".err") + 1));
        sprintf(sol_error_filename, "%s.err", output_filename);
        export2gmt(std_error_sol, sol_error_filename, mesh, vm,
                   output->num_iters,
                   input->damp_val, grad_damping, use_ach);
        free(sol_error_filename);

    }

    /* save the xml enrichied with sections */
    xml_output = (char *)
        malloc((strlen(output_filename) + strlen(".xml") +
                1) * sizeof(char));
    assert(xml_output);
    sprintf(xml_output, "%s.xml", output_filename);
    mesh2xml(mesh, xml_output);
    free(xml_output);

    /******************************************************/
    /* variance reduction, ie how the model fits the data */
    /* X = the final solution                             */
    /*                                                    */
    /*                 ||b-AX||²                          */
    /*         VR= 1 - --------                           */
    /*                  ||b||²                            */
    /*                                                    */
    /******************************************************/
    {
        double norm_b;
        double norm_b_AX;
        double VR;              /* variance reduction */

        struct vector_t *rhs;   /* right hand side */
        rhs = new_vector(sparseA->nb_line);

        /* use copy */
        dvec_copy((dvec *) b, (dvec *) rhs);

        norm_b = dvec_norm2((dvec *) rhs);

        /* does rhs = rhs + sparseA . output->sol_vec */
        /* here  rhs is overwritten */
        dvec_scale((-1.0), (dvec *) rhs);
        sparseMATRIXxVECTOR(0, output->sol_vec, (dvec *) rhs, sparseA);
        dvec_scale((-1.0), (dvec *) rhs);

        norm_b_AX = dvec_norm2((dvec *) rhs);

        VR = 1 - (norm_b_AX * norm_b_AX) / (norm_b * norm_b);
        fprintf(stdout, "Variance reduction = %.2f%%\n", VR * 100);
        free_vector(rhs);
    }

    /********/
    /* free */
    /********/
    if (vm) {
        free_velocity_model(vm);
    }
    free_mesh(mesh);
    free_sparse_matrix(sparseA);
    free_lsqr_mem(input, output, work, func);

    free_vector(solution);
    free_vector(std_error_sol);
    free(compress2fat);

    for (i = 0; i < nb_xmlfile; i++) {
        free(xmlfilelist[i]);
        free_mesh(imported_mesh[i]);
    }
    free(xmlfilelist);
    free(imported_mesh);
    return (0);
}
Exemple #4
0
// x=x/sqrt(x'*x)
void dvec_normalize(int n, double *x)
{
  double norm=0;
  norm=dvec_norm2(n,x);
  dvec_scale(n,x,(1.0/norm));
}
Exemple #5
0
/*
*------------------------------------------------------------------------------
*
*     LSQR  finds a solution x to the following problems:
*
*     1. Unsymmetric equations --    solve  A*x = b
*
*     2. Linear least squares  --    solve  A*x = b
*                                    in the least-squares sense
*
*     3. Damped least squares  --    solve  (   A    )*x = ( b )
*                                           ( damp*I )     ( 0 )
*                                    in the least-squares sense
*
*     where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an
*     'm'-vector, and 'damp' is a scalar.  (All quantities are real.)
*     The matrix 'A' is intended to be large and sparse.
*
*
*     Notation
*     --------
*
*     The following quantities are used in discussing the subroutine
*     parameters:
*
*     'Abar'   =  (   A    ),          'bbar'  =  ( b )
*                 ( damp*I )                      ( 0 )
*
*     'r'      =  b  -  A*x,           'rbar'  =  bbar  -  Abar*x
*
*     'rnorm'  =  sqrt( norm(r)**2  +  damp**2 * norm(x)**2 )
*              =  norm( rbar )
*
*     'rel_prec'  =  the relative precision of floating-point arithmetic
*                    on the machine being used.  Typically 2.22e-16
*                    with 64-bit arithmetic.
*
*     LSQR  minimizes the function 'rnorm' with respect to 'x'.
*
*
*     References
*     ----------
*
*     C.C. Paige and M.A. Saunders,  LSQR: An algorithm for sparse
*          linear equations and sparse least squares,
*          ACM Transactions on Mathematical Software 8, 1 (March 1982),
*          pp. 43-71.
*
*     C.C. Paige and M.A. Saunders,  Algorithm 583, LSQR: Sparse
*          linear equations and least-squares problems,
*          ACM Transactions on Mathematical Software 8, 2 (June 1982),
*          pp. 195-209.
*
*     C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh,
*          Basic linear algebra subprograms for Fortran usage,
*          ACM Transactions on Mathematical Software 5, 3 (Sept 1979),
*          pp. 308-323 and 324-325.
*
*------------------------------------------------------------------------------
*/
void lsqr( lsqr_input *input, lsqr_output *output, lsqr_work *work,
           lsqr_func *func,
           void *prod,
           int (*per_iteration_callback)(lsqr_input*, lsqr_output*, void* token),
           void* token)
{
    double  dvec_norm2( dvec * );

    long    indx,
            term_iter,
            term_iter_max;

    double  alpha,
            beta,
            rhobar,
            phibar,
            bnorm,
            bbnorm,
            cs1,
            sn1,
            psi,
            rho,
            cs,
            sn,
            theta,
            phi,
            tau,
            ddnorm,
            delta,
            gammabar,
            zetabar,
            gamma,
            cs2,
            sn2,
            zeta,
            xxnorm,
            res,
            resid_tol,
            cond_tol,
            resid_tol_mach,
            temp,
            stop_crit_1,
            stop_crit_2,
            stop_crit_3;

    static char term_msg[8][80] =
    {
        "The exact solution is x = x0",
        "The residual Ax - b is small enough, given ATOL and BTOL",
        "The least squares error is small enough, given ATOL",
        "The estimated condition number has exceeded CONLIM",
        "The residual Ax - b is small enough, given machine precision",
        "The least squares error is small enough, given machine precision",
        "The estimated condition number has exceeded machine precision",
        "The iteration limit has been reached"
    };

    if( input->lsqr_fp_out != NULL )
        fprintf( input->lsqr_fp_out, "  Least Squares Solution of A*x = b\n"
                 "        The matrix A has %7li rows and %7li columns\n"
                 "        The damping parameter is\tDAMP = %10.2e\n"
                 "        ATOL = %10.2e\t\tCONDLIM = %10.2e\n"
                 "        BTOL = %10.2e\t\tITERLIM = %10li\n\n",
                 input->num_rows, input->num_cols, input->damp_val, input->rel_mat_err,
                 input->cond_lim, input->rel_rhs_err, input->max_iter );

    output->term_flag = 0;
    term_iter = 0;

    output->num_iters = 0;

    output->frob_mat_norm = 0.0;
    output->mat_cond_num = 0.0;
    output->sol_norm = 0.0;

    for(indx = 0; indx < input->num_cols; indx++)
    {
        work->bidiag_wrk_vec->elements[indx] = 0.0;
        work->srch_dir_vec->elements[indx] = 0.0;
        output->std_err_vec->elements[indx] = 0.0;
        output->sol_vec->elements[indx] = 0.0;
    }

    bbnorm = 0.0;
    ddnorm = 0.0;
    xxnorm = 0.0;

    cs2 = -1.0;
    sn2 = 0.0;
    zeta = 0.0;
    res = 0.0;

    if( input->cond_lim > 0.0 )
        cond_tol = 1.0 / input->cond_lim;
    else
        cond_tol = DBL_EPSILON;

    alpha = 0.0;
    beta = 0.0;
    /*
    *  Set up the initial vectors u and v for bidiagonalization.  These satisfy
    *  the relations
    *             BETA*u = b - A*x0
    *             ALPHA*v = A^T*u
    */
    /* Compute b - A*x0 and store in vector u which initially held vector b */
    dvec_scale( (-1.0), input->rhs_vec );
    func->mat_vec_prod( 0, input->sol_vec, input->rhs_vec, prod );
    dvec_scale( (-1.0), input->rhs_vec );

    /* compute Euclidean length of u and store as BETA */
    beta = dvec_norm2( input->rhs_vec );

    if( beta > 0.0 )
    {
        /* scale vector u by the inverse of BETA */
        dvec_scale( (1.0 / beta), input->rhs_vec );

        /* Compute matrix-vector product A^T*u and store it in vector v */
        func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod );

        /* compute Euclidean length of v and store as ALPHA */
        alpha = dvec_norm2( work->bidiag_wrk_vec );
    }

    if( alpha > 0.0 )
    {
        /* scale vector v by the inverse of ALPHA */
        dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec );

        /* copy vector v to vector w */
        dvec_copy( work->bidiag_wrk_vec, work->srch_dir_vec );
    }

    output->mat_resid_norm = alpha * beta;
    output->resid_norm = beta;
    bnorm = beta;
    /*
    *  If the norm || A^T r || is zero, then the initial guess is the exact
    *  solution.  Exit and report this.
    */
    if( (output->mat_resid_norm == 0.0) && (input->lsqr_fp_out != NULL) )
    {
        fprintf( input->lsqr_fp_out, "\tISTOP = %3li\t\t\tITER = %9li\n"
                 "        || A ||_F = %13.5e\tcond( A ) = %13.5e\n"
                 "        || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n"
                 "        || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n",
                 output->term_flag, output->num_iters, output->frob_mat_norm,
                 output->mat_cond_num, output->resid_norm, output->mat_resid_norm,
                 bnorm, output->sol_norm );

        fprintf( input->lsqr_fp_out, "  %s\n\n", term_msg[output->term_flag]);

        return;
    }

    rhobar = alpha;
    phibar = beta;
    /*
    *  If statistics are printed at each iteration, print a header and the initial
    *  values for each quantity.
    */
    if( input->lsqr_fp_out != NULL )
    {
        fprintf( input->lsqr_fp_out,
                 "  ITER     || r ||    Compatible  "
                 "||A^T r|| / ||A|| ||r||  || A ||    cond( A )\n\n" );

        stop_crit_1 = 1.0;
        stop_crit_2 = alpha / beta;

        fprintf( input->lsqr_fp_out,
                 "%6li %13.5e %10.2e \t%10.2e \t%10.2e  %10.2e\n",
                 output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2,
                 output->frob_mat_norm, output->mat_cond_num);
    }

    /*
    *  The main iteration loop is continued as long as no stopping criteria
    *  are satisfied and the number of total iterations is less than some upper
    *  bound.
    */
    while( output->term_flag == 0 )
    {
        output->num_iters++;
        /*
        *     Perform the next step of the bidiagonalization to obtain
        *     the next vectors u and v, and the scalars ALPHA and BETA.
        *     These satisfy the relations
        *                BETA*u  =  A*v  -  ALPHA*u,
        *                ALFA*v  =  A^T*u  -  BETA*v.
        */
        /* scale vector u by the negative of ALPHA */
        dvec_scale( (-alpha), input->rhs_vec );

        /* compute A*v - ALPHA*u and store in vector u */
        func->mat_vec_prod( 0, work->bidiag_wrk_vec, input->rhs_vec, prod );

        /* compute Euclidean length of u and store as BETA */
        beta = dvec_norm2( input->rhs_vec );

        /* accumulate this quantity to estimate Frobenius norm of matrix A */
        /* bbnorm += sqr(alpha) + sqr(beta) + sqr(input->damp_val);*/
        bbnorm += alpha*alpha + beta*beta
                  + input->damp_val*input->damp_val;

        if( beta > 0.0 )
        {
            /* scale vector u by the inverse of BETA */
            dvec_scale( (1.0 / beta), input->rhs_vec );

            /* scale vector v by the negative of BETA */
            dvec_scale( (-beta), work->bidiag_wrk_vec );

            /* compute A^T*u - BETA*v and store in vector v */
            func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod );

            /* compute Euclidean length of v and store as ALPHA */
            alpha = dvec_norm2( work->bidiag_wrk_vec );

            if( alpha > 0.0 )
                /* scale vector v by the inverse of ALPHA */
                dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec );
        }
        /*
        *     Use a plane rotation to eliminate the damping parameter.
        *     This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix.
        */
        cs1 = rhobar / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) );
        sn1 = input->damp_val
              / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) );

        psi = sn1 * phibar;
        phibar = cs1 * phibar;
        /*
        *     Use a plane rotation to eliminate the subdiagonal element (BETA)
        *     of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix.
        */
        rho = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val)
                    + lsqr_sqr(beta) );
        cs  = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ) / rho;
        sn  = beta / rho;

        theta = sn * alpha;
        rhobar = -cs * alpha;
        phi = cs * phibar;
        phibar = sn * phibar;
        tau = sn * phi;
        /*
        *     Update the solution vector x, the search direction vector w, and the
        *     standard error estimates vector se.
        */
        for(indx = 0; indx < input->num_cols; indx++)
        {
            /* update the solution vector x */
            output->sol_vec->elements[indx] += (phi / rho) *
                                               work->srch_dir_vec->elements[indx];

            /* update the standard error estimates vector se */
            output->std_err_vec->elements[indx] += lsqr_sqr( (1.0 / rho) *
                                                   work->srch_dir_vec->elements[indx] );

            /* accumulate this quantity to estimate condition number of A
            */
            ddnorm += lsqr_sqr( (1.0 / rho) *
                                work->srch_dir_vec->elements[indx] );

            /* update the search direction vector w */
            work->srch_dir_vec->elements[indx] =
                work->bidiag_wrk_vec->elements[indx] -
                (theta / rho) * work->srch_dir_vec->elements[indx];
        }
        /*
        *     Use a plane rotation on the right to eliminate the super-diagonal element
        *     (THETA) of the upper-bidiagonal matrix.  Then use the result to estimate
        *     the solution norm || x ||.
        */
        delta = sn2 * rho;
        gammabar = -cs2 * rho;
        zetabar = (phi - delta * zeta) / gammabar;

        /* compute an estimate of the solution norm || x || */
        output->sol_norm = sqrt( xxnorm + lsqr_sqr(zetabar) );

        gamma = sqrt( lsqr_sqr(gammabar) + lsqr_sqr(theta) );
        cs2 = gammabar / gamma;
        sn2 = theta / gamma;
        zeta = (phi - delta * zeta) / gamma;

        /* accumulate this quantity to estimate solution norm || x || */
        xxnorm += lsqr_sqr(zeta);
        /*
        *     Estimate the Frobenius norm and condition of the matrix A, and the
        *     Euclidean norms of the vectors r and A^T*r.
        */
        output->frob_mat_norm = sqrt( bbnorm );
        output->mat_cond_num = output->frob_mat_norm * sqrt( ddnorm );

        res += lsqr_sqr(psi);
        output->resid_norm = sqrt( lsqr_sqr(phibar) + res );

        output->mat_resid_norm = alpha * fabs( tau );
        /*
        *     Use these norms to estimate the values of the three stopping criteria.
        */
        stop_crit_1 = output->resid_norm / bnorm;

        stop_crit_2 = 0.0;
        if( output->resid_norm > 0.0 )
            stop_crit_2 = output->mat_resid_norm / ( output->frob_mat_norm *
                          output->resid_norm );

        stop_crit_3 = 1.0 / output->mat_cond_num;

        /*    05 Jul 2007: Bug reported by Joel Erickson <*****@*****.**>.
        */
        resid_tol = input->rel_rhs_err + input->rel_mat_err *
                    output->frob_mat_norm *         /* (not output->mat_resid_norm *) */
                    output->sol_norm / bnorm;

        resid_tol_mach = DBL_EPSILON + DBL_EPSILON *
                         output->frob_mat_norm *         /* (not output->mat_resid_norm *) */
                         output->sol_norm / bnorm;
        /*
        *     Check to see if any of the stopping criteria are satisfied.
        *     First compare the computed criteria to the machine precision.
        *     Second compare the computed criteria to the the user specified precision.
        */
        /* iteration limit reached */
        if( output->num_iters >= input->max_iter )
            output->term_flag = 7;

        /* condition number greater than machine precision */
        if( stop_crit_3 <= DBL_EPSILON )
            output->term_flag = 6;
        /* least squares error less than machine precision */
        if( stop_crit_2 <= DBL_EPSILON )
            output->term_flag = 5;
        /* residual less than a function of machine precision */
        if( stop_crit_1 <= resid_tol_mach )
            output->term_flag = 4;

        /* condition number greater than CONLIM */
        if( stop_crit_3 <= cond_tol )
            output->term_flag = 3;
        /* least squares error less than ATOL */
        if( stop_crit_2 <= input->rel_mat_err )
            output->term_flag = 2;
        /* residual less than a function of ATOL and BTOL */
        if( stop_crit_1 <= resid_tol )
            output->term_flag = 1;
        /*
        *  If statistics are printed at each iteration, print a header and the initial
        *  values for each quantity.
        */
        if( input->lsqr_fp_out != NULL )
        {
            fprintf( input->lsqr_fp_out,
                     "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n",
                     output->num_iters, output->resid_norm, stop_crit_1,
                     stop_crit_2,
                     output->frob_mat_norm, output->mat_cond_num);
        }
        /*
        *     The convergence criteria are required to be met on NCONV consecutive
        *     iterations, where NCONV is set below.  Suggested values are 1, 2, or 3.
        */
        if( output->term_flag == 0 )
            term_iter = -1;

        term_iter_max = 1;
        term_iter++;

        if( (term_iter < term_iter_max) &&
                (output->num_iters < input->max_iter) )
            output->term_flag = 0;

        if (per_iteration_callback)
            per_iteration_callback(input, output, token);

    } /* end while loop */
    /*
    *  Finish computing the standard error estimates vector se.
    */
    temp = 1.0;

    if( input->num_rows > input->num_cols )
        temp = ( double ) ( input->num_rows - input->num_cols );

    if( lsqr_sqr(input->damp_val) > 0.0 )
        temp = ( double ) ( input->num_rows );

    temp = output->resid_norm / sqrt( temp );

    for(indx = 0; indx < input->num_cols; indx++)
        /* update the standard error estimates vector se */
        output->std_err_vec->elements[indx] = temp *
                                              sqrt( output->std_err_vec->elements[indx] );
    /*
    *  If statistics are printed at each iteration, print the statistics for the
    *  stopping condition.
    */
    if( input->lsqr_fp_out != NULL )
    {
        fprintf( input->lsqr_fp_out, "\n\tISTOP = %3li\t\t\tITER = %9li\n"
                 "        || A ||_F = %13.5e\tcond( A ) = %13.5e\n"
                 "        || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n"
                 "        || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n",
                 output->term_flag, output->num_iters, output->frob_mat_norm,
                 output->mat_cond_num, output->resid_norm, output->mat_resid_norm,
                 bnorm, output->sol_norm );

        fprintf( input->lsqr_fp_out, "  %s\n\n", term_msg[output->term_flag]);

    }

    return;
}