// traj z part should be offset to ground, not abs z coord!! void LipmVarHeightPlanner::setZMPTraj(const Traj<3,3> &traj, const std::vector <double> &z0) { assert(traj.size() > 0 && traj.size() == z0.size()); _zmp_d = traj; _z0 = z0; // get last step Vxx and initial Ks const TrajPoint<3,3> &end = traj[traj.size()-1]; double x[6] = {0}; double u[3] = {0}; dvec_copy(x, end.x, 6); dvec_copy(u, end.u, 3); Eigen::Matrix<double,6,6> A; Eigen::Matrix<double,6,3> B; getAB(x, u, _z0[_z0.size()-1], A, B); _lqr.setQ(_Q); _lqr.setR(_R); _lqr.infTimeLQR(A, B); _Vxx = _lqr.getV(); if (traj.size() == _du.size()) { for (size_t i = 0; i < traj.size(); i++) { _du[i].setZero(); _K[i] = _lqr.getK(); } } else { _du.resize(traj.size(), Eigen::Matrix<double,3,1>::Zero()); _K.resize(traj.size(), _lqr.getK()); } // initial trajectory _traj0 = Traj<3,3>(); for (size_t i = 0; i < _zmp_d.size(); i++) { const TrajPoint<3,3> &end = traj[i]; dvec_copy(x, end.x, 6); dvec_copy(u, end.u, 3); _traj0.append(end.time, end.type, x, x+3, NULL, u); } Traj<3,3> tmpTraj; forwardPass(traj[0].x, tmpTraj); _traj0 = tmpTraj; assert(_zmp_d.size() == _du.size() && _zmp_d.size() == _K.size() && _zmp_d.size() == _traj0.size()); }
// store the list of indexes, x is not destroyed void dvec_sort_index(int *I, int n, double *X) { double *Y=NULL; Y=dvec_allocate(n); dvec_copy(n,Y,X); dvec_sort(n,Y,I); Y=dvec_free(Y); }
// x[i] <=> x[I[i]] void dvec_swap_index(int n, double *x, const int *I) { double *y=NULL; y=dvec_allocate(n); dvec_copy_index(n,y,x,I); dvec_copy(n,x,y); y=dvec_free(y); }
void fill_cmd(const RobotState &rs, Command &cmd) { for (int i = 0; i < N_JOINTS; i++) { cmd.joints_d[i] = Command::BDI_joints[i]; cmd.jointsd_d[i] = 0; cmd.jointsdd_d[i] = 0; } for (int i = 0; i < 3; i++) { cmd.com_d[i] = rs.com[i]; } cmd.com_d[ZZ] -= 0.5; cmd.utorsoq_d = rs.utorsoq; cmd.rootq_d = rs.rootq; for (int side = 0; side < LR; side++) { cmd.footq_d[side] = rs.feet[side].w_q; dvec_copy(cmd.foot_d[side], rs.feet[side].w_pos, 3); } }
double LipmVarHeightPlanner::forwardPass(const double *x0, Traj<3,3> &traj1) const { Eigen::Matrix<double,6,1> z; Eigen::Matrix<double,6,1> z1; Eigen::Matrix<double,3,1> u; traj1 = _zmp_d; //traj1 = Traj<3,3>(); //for (size_t i = 0; i < _zmp_d.size(); i++) // traj1.append(_zmp_d[i].time, _zmp_d[i].type, NULL, NULL, NULL, NULL); dvec_copy(traj1[0].x, x0, 6); double cost = 0; Eigen::Matrix<double,6,1> x_h; Eigen::Matrix<double,3,1> u_h; for (size_t t = 0; t < _zmp_d.size(); t++) { // x - xref for (int i = 0; i < 6; i++) { z(i) = traj1[t].x[i] - _traj0[t].x[i]; x_h(i) = traj1[t].x[i] - _zmp_d[t].x[i]; } // u = alpha*du + uref u = _alpha*_du[t]; for (int i = 0; i < 3; i++) u(i) += _traj0[t].u[i]; // u += K*z u += _K[t]*z; for (int i = 0; i < 3; i++) traj1[t].u[i] = u(i); for (int i = 0; i < 3; i++) u_h(i) = traj1[t].u[i] - _zmp_d[t].u[i]; // compute cost cost += 0.5 * x_h.transpose() * _Q * x_h; cost += 0.5 * u_h.transpose() * _R * u_h; // integrate if (t < _zmp_d.size()-1) integrate(traj1[t].x, traj1[t].u, _z0[t], traj1[t].acc, traj1[1+t].x); } /* FILE *out = fopen("tmp/lipmz_traj0", "w"); for (size_t t = 0; t < _traj0.size(); t++) { fprintf(out, "%g %g %g %g %g %g\n", _traj0[t].x[0], _traj0[t].x[1], _traj0[t].x[2], _traj0[t].u[0], _traj0[t].u[1], _traj0[t].u[2]); } fclose(out); out = fopen("tmp/lipmz_traj1", "w"); for (size_t t = 0; t < traj1.size(); t++) { fprintf(out, "%g %g %g %g %g %g\n", traj1[t].x[0], traj1[t].x[1], traj1[t].x[2], traj1[t].u[0], traj1[t].u[1], traj1[t].u[2]); } fclose(out); */ return cost; }
void build_source_vectors_1n_a(int n_quad, int n_stokes, double *qx_v, double F_0, double omega, double *omega_a, double as_0, double *as_0_a, double *P_q0_mm, double *P_q0_pm, double **tpr, double **tmr, double **gamma, double *F_p, double *F_m, double *P_q0_mm_a, double *P_q0_pm_a, double **tpr_a, double **tmr_a, double **gamma_a, double *F_p_a, double *F_m_a, save_tree_data save_tree, work_data work) { int i; int n_quad_v; double *v1; double *a_a; double *b_a; double *c_a; double *d_a; double *e_a; double *p_a; double *h_a; double **w1; forward_save_build_source_vectors_1n_data *save; n_quad_v = n_quad * n_stokes; /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ save_tree_encode_s(&save_tree, "build_source_vectors_1n"); save_tree_retrieve_data(&save_tree, forward_save_build_source_vectors_1n_data, &save); /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ v1 = get_work1(&work, WORK_DX); a_a = get_work1(&work, WORK_DX); b_a = get_work1(&work, WORK_DX); c_a = get_work1(&work, WORK_DX); d_a = get_work1(&work, WORK_DX); e_a = get_work1(&work, WORK_DX); p_a = get_work1(&work, WORK_DX); h_a = get_work1(&work, WORK_DX); w1 = get_work1(&work, WORK_DXX); /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ dm_v_mul_D_A(n_quad, n_stokes, F_m_a, F_m_a); dvec_add(F_p_a, F_m_a, F_p_a, n_quad_v); dvec_scale(-1., F_m_a, p_a, n_quad_v); *as_0_a += -dvec_dot(save->h, F_p_a, n_quad_v) / (2. * as_0 * as_0); dvec_scale(1. / (2. * as_0), F_p_a, h_a, n_quad_v); dvec_scale(1. / 2., F_p_a, v1, n_quad_v); dvec_add(p_a, v1, p_a, n_quad_v); dmat_vxvtmx(h_a, save->p, 1., tpr_a, 1., n_quad_v, n_quad_v); dmat_gxvxmx(1, tpr, h_a, 1., p_a, 1., n_quad_v, n_quad_v); dvec_copy(e_a, h_a, n_quad_v); dvec_copy(v1, p_a, n_quad_v); dmat_getrs2('t', save->f, &v1, n_quad_v, 1, save->ip); dmat_vxvtmx(v1, save->p, -1., w1, 0., n_quad_v, n_quad_v); dmat_add(gamma_a, w1, gamma_a, n_quad_v, n_quad_v); for (i = 0; i < n_quad_v; ++i) *as_0_a += -2. * as_0 * w1[i][i]; dmat_vxvtmx(v1, save->e, -1., tmr_a, 1., n_quad_v, n_quad_v); dmat_gxvxmx(1, tmr, v1, -1., e_a, 1., n_quad_v, n_quad_v); *as_0_a -= dvec_dot(v1, save->d, n_quad_v); dvec_scale(-as_0, v1, d_a, n_quad_v); dvec_copy(b_a, e_a, n_quad_v); dvec_scale(-1., e_a, c_a, n_quad_v); dvec_add(b_a, d_a, b_a, n_quad_v); dvec_add(c_a, d_a, c_a, n_quad_v); dm_v_mul_D_A(n_quad, n_stokes, c_a, c_a); for (i = 0; i < n_quad_v; ++i) a_a[i] = c_a[i] * P_q0_mm[i]; for (i = 0; i < n_quad_v; ++i) P_q0_mm_a[i] += F_0 * omega / (4. * PI) / qx_v[i] * c_a[i]; for (i = 0; i < n_quad_v; ++i) a_a[i] += b_a[i] * P_q0_pm[i]; for (i = 0; i < n_quad_v; ++i) P_q0_pm_a[i] += F_0 * omega / (4. * PI) / qx_v[i] * b_a[i]; for (i = 0; i < n_quad_v; ++i) *omega_a += F_0 / (4. * PI) / qx_v[i] * a_a[i]; dvec_zero(F_p_a, n_quad_v); dvec_zero(F_m_a, n_quad_v); }
int main(int argc, char *argv[]) { struct sparse_matrix_t *sparseA = NULL; struct vector_t *b = NULL; struct vector_t *x; struct mesh_t *mesh; char *xml_output; long int *compress2fat = NULL; struct vector_t *solution; struct vector_t *std_error_sol; long int fat_sol_nb_col; lsqr_input *input; lsqr_output *output; lsqr_work *work; /* zone temoraire de travail */ lsqr_func *func; /* func->mat_vec_prod -> APROD */ /* cmd line arg */ char *mesh_filename = NULL; char *importfilename = NULL; char *output_filename = NULL; char *sol_error_filename = NULL; char *log_filename = NULL; char *output_type = NULL; int max_iter; double damping, grad_damping; int use_ach = 0; /* ACH : tele-seismic inversion tomography */ int check_sparse = 0; /* check sparse matrix disable by default */ /* velocity model */ char *vmodel = NULL; struct velocity_model_t *vm = NULL; struct mesh_t **imported_mesh = NULL; char **xmlfilelist = NULL; int nb_xmlfile = 0; int i, j; int nb_irm = 0; struct irm_t **irm = NULL; int *nb_metacell = NULL; FILE *logfd; /*************************************************************/ parse_command_line(argc, argv, &mesh_filename, &vmodel, &importfilename, &log_filename, &output_filename, &output_type, &max_iter, &damping, &grad_damping, &use_ach, &check_sparse); if (use_ach) { fprintf(stderr, "Using ACH tomographic inversion\n"); } else { fprintf(stderr, "Using STANDARD tomographic inversion\n"); } /* load the velocity model */ if (vmodel) { char *myfile; vm = load_velocity_model(vmodel); if (!vm) { fprintf(stderr, "Can not initialize velocity model '%s'\n", vmodel); exit(1); } myfile = strdup(vmodel); fprintf(stderr, "Velocity model '%s' loaded\n", basename(myfile)); free(myfile); } else { vm = NULL; } /* Open log file */ if (!log_filename) { logfd = stdout; } else { if (!(logfd = fopen(log_filename, "w"))) { perror(log_filename); exit(1); } } /*check_write_access (output_filename); */ /**************************************/ /* test if we can open file to import */ /**************************************/ if (importfilename) { xmlfilelist = parse_separated_list(importfilename, ","); nb_xmlfile = 0; while (xmlfilelist[nb_xmlfile]) { if (access(xmlfilelist[nb_xmlfile], R_OK) == -1) { perror(xmlfilelist[nb_xmlfile]); exit(1); } nb_xmlfile++; } } else { fprintf(stderr, "No file to import ... exiting\n"); exit(0); } /****************************/ /* main mesh initialization */ /****************************/ mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /*****************************************/ /* check and initialize slice xml files */ /*****************************************/ if (nb_xmlfile) { int nb_sparse = 0; int nb_res = 0; int f; imported_mesh = (struct mesh_t **) malloc(sizeof(struct mesh_t *) * nb_xmlfile); assert(imported_mesh); for (i = 0; i < nb_xmlfile; i++) { imported_mesh[i] = mesh_init_from_file(xmlfilelist[i]); if (!imported_mesh[i]) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } for (f = 0; f < NB_MESH_FILE_FORMAT; f++) { /* mandatory field : res, sparse, and irm if provided */ if (f == RES || f == SPARSE || f == IRM) { check_files_access(f, imported_mesh[i]->data[f], xmlfilelist[i]); } } if (imported_mesh[i]->data[SPARSE]) { nb_sparse += imported_mesh[i]->data[SPARSE]->ndatafile; } if (imported_mesh[i]->data[RES]) { nb_res += imported_mesh[i]->data[RES]->ndatafile; } if (imported_mesh[i]->data[IRM]) { nb_irm += imported_mesh[i]->data[IRM]->ndatafile; } } if (!nb_sparse || !nb_res) { fprintf(stderr, "Error no sparse or res file available !\n"); exit(0); } } /*********************************************/ /* read and import the sparse(s) matrix(ces) */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[SPARSE]) { continue; } for (j = 0; j < imported_mesh[i]->data[SPARSE]->ndatafile; j++) { sparseA = import_sparse_matrix(sparseA, imported_mesh[i]->data[SPARSE]-> filename[j]); } } if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /*********************************************/ /* read and import the residual time vector */ /*********************************************/ for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[RES]) { continue; } for (j = 0; j < imported_mesh[i]->data[RES]->ndatafile; j++) { b = import_vector(b, imported_mesh[i]->data[RES]->filename[j]); } } /*************************************************/ /* check compatibility between matrix and vector */ /*************************************************/ if (sparseA->nb_line != b->length) { fprintf(stderr, "Error, check your matrix/vector sizes (%ld/%ld)\n", sparseA->nb_line, b->length); exit(1); } /********************/ /* show memory used */ /********************/ #ifdef __APPLE__ { struct mstats memusage; memusage = mstats(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (memusage.bytes_used) / (1024. * 1024)); } #else { struct mallinfo m_info; m_info = mallinfo(); fprintf(stderr, "Memory used: %.2f MBytes\n", (float) (m_info.uordblks + m_info.usmblks) / (1024. * 1024.)); } #endif /**************************************/ /* relative traveltime mode */ /**************************************/ if (use_ach) { int nb_evt_imported = 0; for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[EVT]) { continue; } for (j = 0; j < imported_mesh[i]->data[EVT]->ndatafile; j++) { relative_tt(sparseA, b, imported_mesh[i]->data[EVT]->filename[j]); nb_evt_imported++; } } if (!nb_evt_imported) { fprintf(stderr, "Error in ACH mode, can not import any .evt file !\n"); exit(1); } } /************************************************/ /* read the irregular mesh definition if needed */ /* one by layer */ /************************************************/ if (nb_irm) { int cpt = 0; struct mesh_offset_t **offset; int l; irm = (struct irm_t **) malloc(nb_irm * sizeof(struct irm_t *)); assert(irm); nb_metacell = (int *) calloc(nb_irm, sizeof(int)); assert(nb_metacell); make_mesh(mesh); for (i = 0; i < nb_xmlfile; i++) { if (!imported_mesh[i]->data[IRM]) { continue; } /* offset between meshes */ offset = compute_mesh_offset(mesh, imported_mesh[i]); for (l = 0; l < mesh->nlayers; l++) { if (!offset[l]) continue; fprintf(stderr, "\t%s, [%s] offset[layer=%d] : lat=%d lon=%d z=%d\n", xmlfilelist[i], MESH_FILE_FORMAT[IRM], l, offset[l]->lat, offset[l]->lon, offset[l]->z); } for (j = 0; j < imported_mesh[i]->data[IRM]->ndatafile; j++) { /* FIXME: read only once the irm file */ irm[cpt] = read_irm(imported_mesh[i]->data[IRM]->filename[j], &(nb_metacell[cpt])); import2mesh_irm_file(mesh, imported_mesh[i]->data[IRM]-> filename[j], offset); cpt++; } for (l = 0; l < mesh->nlayers; l++) { if (offset[l]) free(offset[l]); } free(offset); } metacell_find_neighbourhood(mesh); } /*sparse_compute_length(sparseA, "length1.txt"); */ fat_sol_nb_col = sparseA->nb_col; show_sparse_stats(sparseA); /***********************/ /* remove empty column */ /***********************/ fprintf(stderr, "starting compression ...\n"); sparse_compress_column(mesh, sparseA, &compress2fat); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } show_sparse_stats(sparseA); /***************************************/ /* add gradient damping regularisation */ /***************************************/ if (fabs(grad_damping) > 1.e-6) { int nb_faces = 6; /* 1 cell may have 6 neighbours */ long int nb_lines = 0; char *regul_name; fprintf(stdout, "using gradient damping : %f\n", grad_damping); /* tmp file name */ regul_name = tempnam("/tmp", "regul"); if (!regul_name) { perror("lsqrsolve: "); exit(1); } if (nb_irm) { create_regul_DtD_irm(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } else { create_regul_DtD(sparseA, compress2fat, mesh, regul_name, nb_faces, grad_damping, &nb_lines); } sparse_matrix_resize(sparseA, sparseA->nb_line + sparseA->nb_col, sparseA->nb_col); sparseA = import_sparse_matrix(sparseA, regul_name); if (check_sparse) { if (check_sparse_matrix(sparseA)) { exit(1); } } vector_resize(b, sparseA->nb_line); unlink(regul_name); show_sparse_stats(sparseA); } /*********************************/ /* the real mesh is no more used */ /* keep only the light mesh */ /*********************************/ fprintf(stdout, "Time to free the real mesh and keep only the light structure\n"); free_mesh(mesh); mesh = mesh_init_from_file(mesh_filename); if (!mesh) { fprintf(stderr, "Error decoding %s.\n", mesh_filename); exit(1); } fprintf(stderr, "read %s ok\n", mesh_filename); /********************************/ /* init vector solution to zero */ /********************************/ x = new_vector(sparseA->nb_col); /*************************************************************/ /* solve A.x = B */ /* A = ray length in the cells */ /* B = residual travel time observed - computed */ /* x solution to satisfy the lsqr problem */ /*************************************************************/ /* LSQR alloc */ alloc_lsqr_mem(&input, &output, &work, &func, sparseA->nb_line, sparseA->nb_col); fprintf(stderr, "alloc_lsqr_mem : ok\n"); /* defines the routine Mat.Vect to use */ func->mat_vec_prod = sparseMATRIXxVECTOR; /* Set the input parameters for LSQR */ input->num_rows = sparseA->nb_line; input->num_cols = sparseA->nb_col; input->rel_mat_err = 1.0e-3; /* in km */ input->rel_rhs_err = 1.0e-2; /* in seconde */ /*input->rel_mat_err = 0.; input->rel_rhs_err = 0.; */ input->cond_lim = .0; input->lsqr_fp_out = logfd; /* input->rhs_vec = (dvec *) b; */ dvec_copy((dvec *) b, input->rhs_vec); input->sol_vec = (dvec *) x; /* initial guess */ input->damp_val = damping; if (max_iter == -1) { input->max_iter = 4 * (sparseA->nb_col); } else { input->max_iter = max_iter; } /* catch Ctrl-C signal */ signal(SIGINT, emergency_halt); /******************************/ /* resolution du systeme Ax=b */ /******************************/ lsqr(input, output, work, func, sparseA); fprintf(stderr, "*** lsqr ended (%ld iter) : %s\n", output->num_iters, lsqr_msg[output->term_flag]); if (output->term_flag == 0) { /* solution x=x0 */ exit(0); } /* uncompress the solution */ solution = uncompress_column((struct vector_t *) output->sol_vec, compress2fat, fat_sol_nb_col); /* uncompress the standard error on solution */ std_error_sol = uncompress_column((struct vector_t *) output->std_err_vec, compress2fat, fat_sol_nb_col); /* if irm file was provided, set the right value to each cell * from a given metacell */ if (irm) { irm_update(solution, irm, nb_metacell, nb_irm, mesh); free_irm(irm, nb_irm); free(nb_metacell); } /* write solution */ if (strchr(output_type, 'm')) { export2matlab(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 's')) { export2sco(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); } if (strchr(output_type, 'g')) { /* solution */ export2gmt(solution, output_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); /* error on solution */ sol_error_filename = (char *) malloc(sizeof(char) * (strlen(output_filename) + strlen(".err") + 1)); sprintf(sol_error_filename, "%s.err", output_filename); export2gmt(std_error_sol, sol_error_filename, mesh, vm, output->num_iters, input->damp_val, grad_damping, use_ach); free(sol_error_filename); } /* save the xml enrichied with sections */ xml_output = (char *) malloc((strlen(output_filename) + strlen(".xml") + 1) * sizeof(char)); assert(xml_output); sprintf(xml_output, "%s.xml", output_filename); mesh2xml(mesh, xml_output); free(xml_output); /******************************************************/ /* variance reduction, ie how the model fits the data */ /* X = the final solution */ /* */ /* ||b-AX||² */ /* VR= 1 - -------- */ /* ||b||² */ /* */ /******************************************************/ { double norm_b; double norm_b_AX; double VR; /* variance reduction */ struct vector_t *rhs; /* right hand side */ rhs = new_vector(sparseA->nb_line); /* use copy */ dvec_copy((dvec *) b, (dvec *) rhs); norm_b = dvec_norm2((dvec *) rhs); /* does rhs = rhs + sparseA . output->sol_vec */ /* here rhs is overwritten */ dvec_scale((-1.0), (dvec *) rhs); sparseMATRIXxVECTOR(0, output->sol_vec, (dvec *) rhs, sparseA); dvec_scale((-1.0), (dvec *) rhs); norm_b_AX = dvec_norm2((dvec *) rhs); VR = 1 - (norm_b_AX * norm_b_AX) / (norm_b * norm_b); fprintf(stdout, "Variance reduction = %.2f%%\n", VR * 100); free_vector(rhs); } /********/ /* free */ /********/ if (vm) { free_velocity_model(vm); } free_mesh(mesh); free_sparse_matrix(sparseA); free_lsqr_mem(input, output, work, func); free_vector(solution); free_vector(std_error_sol); free(compress2fat); for (i = 0; i < nb_xmlfile; i++) { free(xmlfilelist[i]); free_mesh(imported_mesh[i]); } free(xmlfilelist); free(imported_mesh); return (0); }
void single_scattered_radiance_dn(int n_stokes, int n_derivs, int n_layers, double F_0, int n_ulevels, int *ulevels, double *utaus, double *umus, int n_umus, double *omega, double **omega_l, double *ltau, double **ltau_l, double *btran, double **btran_l, double *as_0, double **as_0_l, double *atran, double **atran_l, double **P, double ***P_l, double *I_in, double **I_in_l, double **I_ss, double ***I_ss_l, int utau_output, uchar **derivs_h, uchar **derivs_p, save_tree_data save_tree, work_data work) { int i; int ii; int i1; int i2; int j; int jj; int k; int kk; int l; int n_umus_v; int i_ulevel; double a; double b; double *I; double **I_l; double **I_ss_l2; forward_save_single_scattered_radiance_data *save; n_umus_v = n_umus * n_stokes; /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ I = get_work_d1(&work, n_umus * n_stokes); if (n_derivs > 0) I_l = get_work_d2(&work, n_derivs, n_umus * n_stokes); /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ if (save_tree.t) { save_tree_encode_s(&save_tree, "single_scattered_radiance_dn"); if (save_tree_retrieve_data(&save_tree, forward_save_single_scattered_radiance_data, &save)) forward_save_single_scattered_radiance_alloc(save, n_layers, n_umus * n_stokes); } /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ a = F_0 / (4. * PI); for (i = 0; i < n_umus; ++i) { ii = i * n_stokes; b = 1. / umus[i] * a; for (j = 0; j < n_stokes; ++j) { jj = ii + j; I[jj] = I_in[jj] / b; } for (j = 0; j < n_derivs; ++j) for (k = 0; k < n_stokes; ++k) { kk = ii + k; I_l[j][kk] = I_in_l[j][kk] / b; } } /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ i1 = 0; if (! utau_output) i2 = ulevels[n_ulevels - 1]; else i2 = ulevels[n_ulevels - 1] + 1; i_ulevel = 0; /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ if ((! utau_output && i1 == ulevels[i_ulevel]) || (utau_output && i1 == ulevels[i_ulevel] && utaus[i_ulevel] == 0.)) { for (i = 0; i < n_umus_v; ++i) I_ss[i_ulevel][i] = I[i]; for (i = 0; i < n_derivs; ++i) { for (j = 0; j < n_umus_v; ++j) I_ss_l[i_ulevel][i][j] = I_l[i][j]; } i_ulevel++; } /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ for (i = i1; i < i2; ++i) { if ( utau_output && i_ulevel < n_ulevels && i == ulevels[i_ulevel]) { while (i_ulevel < n_ulevels && i == ulevels[i_ulevel]) { if (n_derivs > 0) I_ss_l2 = I_ss_l[i_ulevel]; ssr_dn_layer(i, n_stokes, n_derivs, utaus[i_ulevel], umus, n_umus, omega, omega_l, ltau, ltau_l, btran, btran_l, as_0, as_0_l, P, P_l, I, I_l, I_ss[i_ulevel], I_ss_l2, derivs_h, work); i_ulevel++; } } if (save_tree.t) dvec_copy(save->I[i], I, n_umus * n_stokes); ssr_dn_layer(i, n_stokes, n_derivs, ltau[i], umus, n_umus, omega, omega_l, ltau, ltau_l, btran, btran_l, as_0, as_0_l, P, P_l, I, I_l, I, I_l, derivs_h, work); if (! utau_output && i + 1 == ulevels[i_ulevel]) { for (j = 0; j < n_umus_v; ++j) I_ss[i_ulevel][j] = I[j]; for (j = 0; j < n_derivs; ++j) { for (k = 0; k < n_umus_v; ++k) I_ss_l[i_ulevel][j][k] = I_l[j][k]; } i_ulevel++; } } /*------------------------------------------------------------------------- * *-----------------------------------------------------------------------*/ for (i = 0; i < n_ulevels; ++i) { for (j = 0; j < n_umus; ++j) { ii = j * n_stokes; b = 1. / umus[j] * a; for (k = 0; k < n_stokes; ++k) I_ss[i][ii + k] *= b; for (k = 0; k < n_derivs; ++k) { for (l = 0; l < n_stokes; ++l) { I_ss_l[i][k][ii + l] *= b; } } } } #ifdef USE_AD_FOR_TL_SINGLE_SCATTERED_RADIANCE_DN single_scattered_radiance_dn_tl_with_ad(n_stokes, n_derivs, n_layers, F_0, n_ulevels, ulevels, utaus, umus, n_umus, omega, omega_l, ltau, ltau_l, btran, btran_l, as_0, as_0_l, atran, atran_l, P, P_l, I_in, I_in_l, I_ss, I_ss_l, utau_output, derivs_h, derivs_p, save_tree, work); #endif }
/* *------------------------------------------------------------------------------ * * LSQR finds a solution x to the following problems: * * 1. Unsymmetric equations -- solve A*x = b * * 2. Linear least squares -- solve A*x = b * in the least-squares sense * * 3. Damped least squares -- solve ( A )*x = ( b ) * ( damp*I ) ( 0 ) * in the least-squares sense * * where 'A' is a matrix with 'm' rows and 'n' columns, 'b' is an * 'm'-vector, and 'damp' is a scalar. (All quantities are real.) * The matrix 'A' is intended to be large and sparse. * * * Notation * -------- * * The following quantities are used in discussing the subroutine * parameters: * * 'Abar' = ( A ), 'bbar' = ( b ) * ( damp*I ) ( 0 ) * * 'r' = b - A*x, 'rbar' = bbar - Abar*x * * 'rnorm' = sqrt( norm(r)**2 + damp**2 * norm(x)**2 ) * = norm( rbar ) * * 'rel_prec' = the relative precision of floating-point arithmetic * on the machine being used. Typically 2.22e-16 * with 64-bit arithmetic. * * LSQR minimizes the function 'rnorm' with respect to 'x'. * * * References * ---------- * * C.C. Paige and M.A. Saunders, LSQR: An algorithm for sparse * linear equations and sparse least squares, * ACM Transactions on Mathematical Software 8, 1 (March 1982), * pp. 43-71. * * C.C. Paige and M.A. Saunders, Algorithm 583, LSQR: Sparse * linear equations and least-squares problems, * ACM Transactions on Mathematical Software 8, 2 (June 1982), * pp. 195-209. * * C.L. Lawson, R.J. Hanson, D.R. Kincaid and F.T. Krogh, * Basic linear algebra subprograms for Fortran usage, * ACM Transactions on Mathematical Software 5, 3 (Sept 1979), * pp. 308-323 and 324-325. * *------------------------------------------------------------------------------ */ void lsqr( lsqr_input *input, lsqr_output *output, lsqr_work *work, lsqr_func *func, void *prod, int (*per_iteration_callback)(lsqr_input*, lsqr_output*, void* token), void* token) { double dvec_norm2( dvec * ); long indx, term_iter, term_iter_max; double alpha, beta, rhobar, phibar, bnorm, bbnorm, cs1, sn1, psi, rho, cs, sn, theta, phi, tau, ddnorm, delta, gammabar, zetabar, gamma, cs2, sn2, zeta, xxnorm, res, resid_tol, cond_tol, resid_tol_mach, temp, stop_crit_1, stop_crit_2, stop_crit_3; static char term_msg[8][80] = { "The exact solution is x = x0", "The residual Ax - b is small enough, given ATOL and BTOL", "The least squares error is small enough, given ATOL", "The estimated condition number has exceeded CONLIM", "The residual Ax - b is small enough, given machine precision", "The least squares error is small enough, given machine precision", "The estimated condition number has exceeded machine precision", "The iteration limit has been reached" }; if( input->lsqr_fp_out != NULL ) fprintf( input->lsqr_fp_out, " Least Squares Solution of A*x = b\n" " The matrix A has %7li rows and %7li columns\n" " The damping parameter is\tDAMP = %10.2e\n" " ATOL = %10.2e\t\tCONDLIM = %10.2e\n" " BTOL = %10.2e\t\tITERLIM = %10li\n\n", input->num_rows, input->num_cols, input->damp_val, input->rel_mat_err, input->cond_lim, input->rel_rhs_err, input->max_iter ); output->term_flag = 0; term_iter = 0; output->num_iters = 0; output->frob_mat_norm = 0.0; output->mat_cond_num = 0.0; output->sol_norm = 0.0; for(indx = 0; indx < input->num_cols; indx++) { work->bidiag_wrk_vec->elements[indx] = 0.0; work->srch_dir_vec->elements[indx] = 0.0; output->std_err_vec->elements[indx] = 0.0; output->sol_vec->elements[indx] = 0.0; } bbnorm = 0.0; ddnorm = 0.0; xxnorm = 0.0; cs2 = -1.0; sn2 = 0.0; zeta = 0.0; res = 0.0; if( input->cond_lim > 0.0 ) cond_tol = 1.0 / input->cond_lim; else cond_tol = DBL_EPSILON; alpha = 0.0; beta = 0.0; /* * Set up the initial vectors u and v for bidiagonalization. These satisfy * the relations * BETA*u = b - A*x0 * ALPHA*v = A^T*u */ /* Compute b - A*x0 and store in vector u which initially held vector b */ dvec_scale( (-1.0), input->rhs_vec ); func->mat_vec_prod( 0, input->sol_vec, input->rhs_vec, prod ); dvec_scale( (-1.0), input->rhs_vec ); /* compute Euclidean length of u and store as BETA */ beta = dvec_norm2( input->rhs_vec ); if( beta > 0.0 ) { /* scale vector u by the inverse of BETA */ dvec_scale( (1.0 / beta), input->rhs_vec ); /* Compute matrix-vector product A^T*u and store it in vector v */ func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of v and store as ALPHA */ alpha = dvec_norm2( work->bidiag_wrk_vec ); } if( alpha > 0.0 ) { /* scale vector v by the inverse of ALPHA */ dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); /* copy vector v to vector w */ dvec_copy( work->bidiag_wrk_vec, work->srch_dir_vec ); } output->mat_resid_norm = alpha * beta; output->resid_norm = beta; bnorm = beta; /* * If the norm || A^T r || is zero, then the initial guess is the exact * solution. Exit and report this. */ if( (output->mat_resid_norm == 0.0) && (input->lsqr_fp_out != NULL) ) { fprintf( input->lsqr_fp_out, "\tISTOP = %3li\t\t\tITER = %9li\n" " || A ||_F = %13.5e\tcond( A ) = %13.5e\n" " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", output->term_flag, output->num_iters, output->frob_mat_norm, output->mat_cond_num, output->resid_norm, output->mat_resid_norm, bnorm, output->sol_norm ); fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); return; } rhobar = alpha; phibar = beta; /* * If statistics are printed at each iteration, print a header and the initial * values for each quantity. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, " ITER || r || Compatible " "||A^T r|| / ||A|| ||r|| || A || cond( A )\n\n" ); stop_crit_1 = 1.0; stop_crit_2 = alpha / beta; fprintf( input->lsqr_fp_out, "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2, output->frob_mat_norm, output->mat_cond_num); } /* * The main iteration loop is continued as long as no stopping criteria * are satisfied and the number of total iterations is less than some upper * bound. */ while( output->term_flag == 0 ) { output->num_iters++; /* * Perform the next step of the bidiagonalization to obtain * the next vectors u and v, and the scalars ALPHA and BETA. * These satisfy the relations * BETA*u = A*v - ALPHA*u, * ALFA*v = A^T*u - BETA*v. */ /* scale vector u by the negative of ALPHA */ dvec_scale( (-alpha), input->rhs_vec ); /* compute A*v - ALPHA*u and store in vector u */ func->mat_vec_prod( 0, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of u and store as BETA */ beta = dvec_norm2( input->rhs_vec ); /* accumulate this quantity to estimate Frobenius norm of matrix A */ /* bbnorm += sqr(alpha) + sqr(beta) + sqr(input->damp_val);*/ bbnorm += alpha*alpha + beta*beta + input->damp_val*input->damp_val; if( beta > 0.0 ) { /* scale vector u by the inverse of BETA */ dvec_scale( (1.0 / beta), input->rhs_vec ); /* scale vector v by the negative of BETA */ dvec_scale( (-beta), work->bidiag_wrk_vec ); /* compute A^T*u - BETA*v and store in vector v */ func->mat_vec_prod( 1, work->bidiag_wrk_vec, input->rhs_vec, prod ); /* compute Euclidean length of v and store as ALPHA */ alpha = dvec_norm2( work->bidiag_wrk_vec ); if( alpha > 0.0 ) /* scale vector v by the inverse of ALPHA */ dvec_scale( (1.0 / alpha), work->bidiag_wrk_vec ); } /* * Use a plane rotation to eliminate the damping parameter. * This alters the diagonal (RHOBAR) of the lower-bidiagonal matrix. */ cs1 = rhobar / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); sn1 = input->damp_val / sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ); psi = sn1 * phibar; phibar = cs1 * phibar; /* * Use a plane rotation to eliminate the subdiagonal element (BETA) * of the lower-bidiagonal matrix, giving an upper-bidiagonal matrix. */ rho = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) + lsqr_sqr(beta) ); cs = sqrt( lsqr_sqr(rhobar) + lsqr_sqr(input->damp_val) ) / rho; sn = beta / rho; theta = sn * alpha; rhobar = -cs * alpha; phi = cs * phibar; phibar = sn * phibar; tau = sn * phi; /* * Update the solution vector x, the search direction vector w, and the * standard error estimates vector se. */ for(indx = 0; indx < input->num_cols; indx++) { /* update the solution vector x */ output->sol_vec->elements[indx] += (phi / rho) * work->srch_dir_vec->elements[indx]; /* update the standard error estimates vector se */ output->std_err_vec->elements[indx] += lsqr_sqr( (1.0 / rho) * work->srch_dir_vec->elements[indx] ); /* accumulate this quantity to estimate condition number of A */ ddnorm += lsqr_sqr( (1.0 / rho) * work->srch_dir_vec->elements[indx] ); /* update the search direction vector w */ work->srch_dir_vec->elements[indx] = work->bidiag_wrk_vec->elements[indx] - (theta / rho) * work->srch_dir_vec->elements[indx]; } /* * Use a plane rotation on the right to eliminate the super-diagonal element * (THETA) of the upper-bidiagonal matrix. Then use the result to estimate * the solution norm || x ||. */ delta = sn2 * rho; gammabar = -cs2 * rho; zetabar = (phi - delta * zeta) / gammabar; /* compute an estimate of the solution norm || x || */ output->sol_norm = sqrt( xxnorm + lsqr_sqr(zetabar) ); gamma = sqrt( lsqr_sqr(gammabar) + lsqr_sqr(theta) ); cs2 = gammabar / gamma; sn2 = theta / gamma; zeta = (phi - delta * zeta) / gamma; /* accumulate this quantity to estimate solution norm || x || */ xxnorm += lsqr_sqr(zeta); /* * Estimate the Frobenius norm and condition of the matrix A, and the * Euclidean norms of the vectors r and A^T*r. */ output->frob_mat_norm = sqrt( bbnorm ); output->mat_cond_num = output->frob_mat_norm * sqrt( ddnorm ); res += lsqr_sqr(psi); output->resid_norm = sqrt( lsqr_sqr(phibar) + res ); output->mat_resid_norm = alpha * fabs( tau ); /* * Use these norms to estimate the values of the three stopping criteria. */ stop_crit_1 = output->resid_norm / bnorm; stop_crit_2 = 0.0; if( output->resid_norm > 0.0 ) stop_crit_2 = output->mat_resid_norm / ( output->frob_mat_norm * output->resid_norm ); stop_crit_3 = 1.0 / output->mat_cond_num; /* 05 Jul 2007: Bug reported by Joel Erickson <*****@*****.**>. */ resid_tol = input->rel_rhs_err + input->rel_mat_err * output->frob_mat_norm * /* (not output->mat_resid_norm *) */ output->sol_norm / bnorm; resid_tol_mach = DBL_EPSILON + DBL_EPSILON * output->frob_mat_norm * /* (not output->mat_resid_norm *) */ output->sol_norm / bnorm; /* * Check to see if any of the stopping criteria are satisfied. * First compare the computed criteria to the machine precision. * Second compare the computed criteria to the the user specified precision. */ /* iteration limit reached */ if( output->num_iters >= input->max_iter ) output->term_flag = 7; /* condition number greater than machine precision */ if( stop_crit_3 <= DBL_EPSILON ) output->term_flag = 6; /* least squares error less than machine precision */ if( stop_crit_2 <= DBL_EPSILON ) output->term_flag = 5; /* residual less than a function of machine precision */ if( stop_crit_1 <= resid_tol_mach ) output->term_flag = 4; /* condition number greater than CONLIM */ if( stop_crit_3 <= cond_tol ) output->term_flag = 3; /* least squares error less than ATOL */ if( stop_crit_2 <= input->rel_mat_err ) output->term_flag = 2; /* residual less than a function of ATOL and BTOL */ if( stop_crit_1 <= resid_tol ) output->term_flag = 1; /* * If statistics are printed at each iteration, print a header and the initial * values for each quantity. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, "%6li %13.5e %10.2e \t%10.2e \t%10.2e %10.2e\n", output->num_iters, output->resid_norm, stop_crit_1, stop_crit_2, output->frob_mat_norm, output->mat_cond_num); } /* * The convergence criteria are required to be met on NCONV consecutive * iterations, where NCONV is set below. Suggested values are 1, 2, or 3. */ if( output->term_flag == 0 ) term_iter = -1; term_iter_max = 1; term_iter++; if( (term_iter < term_iter_max) && (output->num_iters < input->max_iter) ) output->term_flag = 0; if (per_iteration_callback) per_iteration_callback(input, output, token); } /* end while loop */ /* * Finish computing the standard error estimates vector se. */ temp = 1.0; if( input->num_rows > input->num_cols ) temp = ( double ) ( input->num_rows - input->num_cols ); if( lsqr_sqr(input->damp_val) > 0.0 ) temp = ( double ) ( input->num_rows ); temp = output->resid_norm / sqrt( temp ); for(indx = 0; indx < input->num_cols; indx++) /* update the standard error estimates vector se */ output->std_err_vec->elements[indx] = temp * sqrt( output->std_err_vec->elements[indx] ); /* * If statistics are printed at each iteration, print the statistics for the * stopping condition. */ if( input->lsqr_fp_out != NULL ) { fprintf( input->lsqr_fp_out, "\n\tISTOP = %3li\t\t\tITER = %9li\n" " || A ||_F = %13.5e\tcond( A ) = %13.5e\n" " || r ||_2 = %13.5e\t|| A^T r ||_2 = %13.5e\n" " || b ||_2 = %13.5e\t|| x - x0 ||_2 = %13.5e\n\n", output->term_flag, output->num_iters, output->frob_mat_norm, output->mat_cond_num, output->resid_norm, output->mat_resid_norm, bnorm, output->sol_norm ); fprintf( input->lsqr_fp_out, " %s\n\n", term_msg[output->term_flag]); } return; }