VARIOGRAM *reml_sills(DATA *data, VARIOGRAM *vp) { int i, j, k; MAT **Vk = NULL, *X = MNULL; VEC *Y = VNULL, *init = VNULL; DPOINT *dpa, *dpb; double dx, dy = 0.0, dz = 0.0, dzero2; if (data == NULL || vp == NULL) ErrMsg(ER_NULL, "reml()"); select_at(data, (DPOINT *) NULL); if (vp->n_models <= 0) ErrMsg(ER_VARNOTSET, "reml: please define initial variogram model"); /* * create Y, X, Vk's only once: */ Y = get_y(&data, Y, 1); X = get_X(&data, X, 1); Vk = (MAT **) emalloc(vp->n_models * sizeof(MAT)); init = v_resize(init, vp->n_models); for (i = 0; i < vp->n_models; i++) { init->ve[i] = vp->part[i].sill; /* remember init. values for updating */ vp->part[i].sill = 1; Vk[i] = m_resize(MNULL, X->m, X->m); } dzero2 = gl_zero * gl_zero; for (i = 0; i < data->n_list; i++) { for (j = 0; j < vp->n_models; j++) /* fill diagonals */ Vk[j]->me[i][i] = Covariance(vp->part[j], 0.0, 0.0, 0.0); for (j = 0; j < i; j++) { /* off-diagonal elements: */ dpa = data->list[i]; dpb = data->list[j]; /* * if different points coincide on a locations, shift them, * or the covariance matrix will become singular */ dx = dpa->x - dpb->x; dy = dpa->y - dpb->y; dz = dpa->z - dpb->z; if (data->pp_norm2(dpa, dpb) < dzero2) { if (data->mode & X_BIT_SET) dx = (dx >= 0 ? gl_zero : -gl_zero); if (data->mode & Y_BIT_SET) dy = (dy >= 0 ? gl_zero : -gl_zero); if (data->mode & Z_BIT_SET) dz = (dz >= 0 ? gl_zero : -gl_zero); } for (k = 0; k < vp->n_models; k++) Vk[k]->me[i][j] = Vk[k]->me[j][i] = Covariance(vp->part[k], dx, dy, dz); } } if (reml(Y, X, Vk, vp->n_models, gl_iter, gl_fit_limit, init)) vp->ev->refit = 0; else /* on convergence */ pr_warning("no convergence while fitting variogram"); for (i = 0; i < vp->n_models; i++) vp->part[i].sill = init->ve[i]; update_variogram(vp); if (DEBUG_VGMFIT) logprint_variogram(vp, 1); for (i = 0; i < vp->n_models; i++) m_free(Vk[i]); efree(Vk); m_free(X); v_free(Y); v_free(init); return vp; }
void gcta::fit_reml(string grm_file, string phen_file, string qcovar_file, string covar_file, string qGE_file, string GE_file, string keep_indi_file, string remove_indi_file, string sex_file, int mphen, double grm_cutoff, double adj_grm_fac, int dosage_compen, bool m_grm_flag, bool pred_rand_eff, bool est_fix_eff, int reml_mtd, int MaxIter, vector<double> reml_priors, vector<double> reml_priors_var, vector<int> drop, bool no_lrt, double prevalence, bool no_constrain, bool mlmassoc) { _reml_mtd=reml_mtd; _reml_max_iter=MaxIter; int i=0, j=0, k=0; bool grm_flag=(!grm_file.empty()); bool qcovar_flag=(!qcovar_file.empty()); bool covar_flag=(!covar_file.empty()); bool GE_flag=(!GE_file.empty()); bool qGE_flag=(!qGE_file.empty()); if(m_grm_flag) grm_flag=false; // Read data stringstream errmsg; int qcovar_num=0, covar_num=0, qE_fac_num=0, E_fac_num=0; vector<string> phen_ID, phen_buf, qcovar_ID, covar_ID, qGE_ID, GE_ID, grm_id, grm_files; vector< vector<string> > qcovar, covar, GE, qGE; // save individuals by column if(grm_flag){ read_grm_gz(grm_file, grm_id); update_id_map_kp(grm_id, _id_map, _keep); grm_files.push_back(grm_file); } else if(m_grm_flag){ read_grm_filenames(grm_file, grm_files, false); for(i=0; i<grm_files.size(); i++){ read_grm_gz(grm_files[i], grm_id, false, true); update_id_map_kp(grm_id, _id_map, _keep); } } read_phen(phen_file, phen_ID, phen_buf, mphen); update_id_map_kp(phen_ID, _id_map, _keep); if(qcovar_flag){ qcovar_num=read_covar(qcovar_file, qcovar_ID, qcovar, true); update_id_map_kp(qcovar_ID, _id_map, _keep); } if(covar_flag){ covar_num=read_covar(covar_file, covar_ID, covar, false); update_id_map_kp(covar_ID, _id_map, _keep); } if(qGE_flag){ qE_fac_num=read_GE(qGE_file, qGE_ID, qGE, true); update_id_map_kp(qGE_ID, _id_map, _keep); } if(GE_flag){ E_fac_num=read_GE(GE_file, GE_ID, GE, false); update_id_map_kp(GE_ID, _id_map, _keep); } if(!mlmassoc){ if(!keep_indi_file.empty()) keep_indi(keep_indi_file); if(!remove_indi_file.empty()) remove_indi(remove_indi_file); } if(grm_flag){ if(grm_cutoff>-1.0) rm_cor_indi(grm_cutoff); if(!sex_file.empty()) update_sex(sex_file); if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac); if(dosage_compen>-1) dc(dosage_compen); _grm_N.resize(1,1); } vector<string> uni_id; map<string, int> uni_id_map; map<string, int>::iterator iter; for(i=0; i<_keep.size(); i++){ uni_id.push_back(_fid[_keep[i]]+":"+_pid[_keep[i]]); uni_id_map.insert(pair<string,int>(_fid[_keep[i]]+":"+_pid[_keep[i]], i)); } _n=_keep.size(); if(_n<1) throw("Error: no individual is in common in the input files."); // construct model terms _y.setZero(_n); for(i=0; i<phen_ID.size(); i++){ iter=uni_id_map.find(phen_ID[i]); if(iter==uni_id_map.end()) continue; _y[iter->second]=atof(phen_buf[i].c_str()); } int pos=0; _r_indx.clear(); eigenMatrix A_N(_n, _n); vector<int> kp; if(grm_flag){ for(i=0; i<1+qE_fac_num+E_fac_num; i++) _r_indx.push_back(i); if(!no_lrt) drop_comp(drop); _A=eigenMatrix::Zero(_n, _r_indx.size()*_n); if(mlmassoc) StrFunc::match(uni_id, grm_id, kp); else kp=_keep; for(i=0; i<_n; i++){ for(j=0; j<=i; j++) (_A.block(0,0,_n,_n))(j,i)=(_A.block(0,0,_n,_n))(i,j)=_grm(kp[i],kp[j]); } pos++; _grm.resize(1,1); } else if(m_grm_flag){ if(!sex_file.empty()) update_sex(sex_file); for(i=0; i<(1+qE_fac_num+E_fac_num)*grm_files.size(); i++) _r_indx.push_back(i); if(!no_lrt) drop_comp(drop); _A=eigenMatrix::Zero(_n, _r_indx.size()*_n); string prev_file=grm_files[0]; vector<string> prev_grm_id(grm_id); cout<<"There are "<<grm_files.size()<<" GRM file names specified in the file ["+grm_file+"]."<<endl; for(i=0; i<grm_files.size(); i++, pos++){ cout<<"Reading the GRM from the "<<i+1<<"th file ..."<<endl; read_grm_gz(grm_files[i], grm_id); if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac); if(dosage_compen>-1) dc(dosage_compen); StrFunc::match(uni_id, grm_id, kp); int pos_n=pos*_n; for(j=0; j<_n; j++){ for(k=0; k<=j; k++){ if(kp[j]>=kp[k]) (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[j],kp[k]); else (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[k],kp[j]); } } prev_file=grm_files[i]; prev_grm_id=grm_id; } _grm_N.resize(1,1); _grm.resize(1,1); } // GE interaction vector<eigenMatrix> E_float(E_fac_num); eigenMatrix qE_float, mbuf; if(qGE_flag){ qE_float.resize(_n, qE_fac_num); for(i=0; i<qGE_ID.size(); i++){ iter=uni_id_map.find(qGE_ID[i]); if(iter==uni_id_map.end()) continue; for(j=0; j<qE_fac_num; j++) qE_float(iter->second,j)=atof(qGE[i][j].c_str()); } for(j=0; j<qE_fac_num; j++){ mbuf=((qE_float.block(0,j,_n,1))*(qE_float.block(0,j,_n,1)).transpose()); for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array(); } } if(GE_flag){ vector< vector<string> > E_str(E_fac_num); for(i=0; i<E_fac_num; i++) E_str[i].resize(_n); for(i=0; i<GE_ID.size(); i++){ iter=uni_id_map.find(GE_ID[i]); if(iter!=uni_id_map.end()){ for(j=0; j<E_fac_num; j++) E_str[j][iter->second]=GE[i][j]; } } for(j=0; j<E_fac_num; j++){ stringstream errmsg; errmsg<<"Error: too many classes for the "<<j+1<<"th environmental factor. \nPlease make sure you input a discrete variable as the environmental factor."; string errmsg1=errmsg.str(); errmsg.str(""); errmsg<<"Error: the "<<j+1<<"th envronmental factor has only one class."; string errmsg2=errmsg.str(); coeff_mat(E_str[j], E_float[j], errmsg1, errmsg2); mbuf=((E_float[j])*(E_float[j]).transpose()); for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array(); } } // construct X matrix construct_X(uni_id_map, qcovar_flag, qcovar_num, qcovar_ID, qcovar, covar_flag, covar_num, covar_ID, covar, E_float, qE_float); // names of variance component for(i=0; i<grm_files.size(); i++){ stringstream strstrm; if(grm_files.size()==1) strstrm<<""; else strstrm<<i+1; _var_name.push_back("V(G"+strstrm.str()+")"); _hsq_name.push_back("V(G"+strstrm.str()+")/Vp"); } for(j=0; j<qE_fac_num; j++){ for(i=0; i<grm_files.size(); i++){ stringstream strstrm1,strstrm2; if(grm_files.size()==1) strstrm1<<""; else strstrm1<<i+1; if(qE_fac_num==1) strstrm2<<""; else strstrm2<<j+1; _var_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")"); _hsq_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")"+"/Vp"); } } for(j=0; j<E_fac_num; j++){ for(i=0; i<grm_files.size(); i++){ stringstream strstrm1,strstrm2; if(grm_files.size()==1) strstrm1<<""; else strstrm1<<i+1; if(E_fac_num==1) strstrm2<<""; else strstrm2<<j+1; _var_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")"); _hsq_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")"+"/Vp"); } } _var_name.push_back("V(e)"); // run REML algorithm cout<<_n<<" individuals are in common in these files."<<endl; reml(pred_rand_eff, est_fix_eff, reml_priors, reml_priors_var, prevalence, no_constrain, no_lrt, mlmassoc); }