Beispiel #1
0
VARIOGRAM *reml_sills(DATA *data, VARIOGRAM *vp) {
	int i, j, k;
	MAT **Vk = NULL, *X = MNULL;
	VEC *Y = VNULL, *init = VNULL;
	DPOINT *dpa, *dpb;
	double dx, dy = 0.0, dz = 0.0, dzero2;

	if (data == NULL  || vp == NULL)
		ErrMsg(ER_NULL, "reml()");
	select_at(data, (DPOINT *) NULL);
	if (vp->n_models <= 0)
		ErrMsg(ER_VARNOTSET, "reml: please define initial variogram model");
/*
 * create Y, X, Vk's only once:
 */
	Y = get_y(&data, Y, 1);
	X = get_X(&data, X, 1);
	Vk = (MAT **) emalloc(vp->n_models * sizeof(MAT));
	init = v_resize(init, vp->n_models);

	for (i = 0; i < vp->n_models; i++) {
		init->ve[i] = vp->part[i].sill; /* remember init. values for updating */
		vp->part[i].sill = 1;
		Vk[i] = m_resize(MNULL, X->m, X->m);
	}
	dzero2 = gl_zero * gl_zero;
	for (i = 0; i < data->n_list; i++) {
		for (j = 0; j < vp->n_models; j++) /* fill diagonals */
			Vk[j]->me[i][i] = Covariance(vp->part[j], 0.0, 0.0, 0.0);
		for (j = 0; j < i; j++) { /* off-diagonal elements: */
			dpa = data->list[i];
			dpb = data->list[j];
			/* 
		 	 * if different points coincide on a locations, shift them,
		 	 * or the covariance matrix will become singular
		 	 */
			dx = dpa->x - dpb->x;
			dy = dpa->y - dpb->y;
			dz = dpa->z - dpb->z;
			if (data->pp_norm2(dpa, dpb) < dzero2) {
				if (data->mode & X_BIT_SET)
					dx = (dx >= 0 ? gl_zero : -gl_zero);
				if (data->mode & Y_BIT_SET)
					dy = (dy >= 0 ? gl_zero : -gl_zero);
				if (data->mode & Z_BIT_SET)
					dz = (dz >= 0 ? gl_zero : -gl_zero);
			}
			for (k = 0; k < vp->n_models; k++)
				Vk[k]->me[i][j] = Vk[k]->me[j][i] = 
						Covariance(vp->part[k], dx, dy, dz);
		}
	}
	if (reml(Y, X, Vk, vp->n_models, gl_iter, gl_fit_limit, init))
		vp->ev->refit = 0;
	else /* on convergence */
		pr_warning("no convergence while fitting variogram");
	for (i = 0; i < vp->n_models; i++)
		vp->part[i].sill = init->ve[i];
	update_variogram(vp);
	if (DEBUG_VGMFIT)
		logprint_variogram(vp, 1);
	for (i = 0; i < vp->n_models; i++)
		m_free(Vk[i]); 
	efree(Vk);
	m_free(X);
	v_free(Y);
	v_free(init);
	return vp;
}
Beispiel #2
0
void gcta::fit_reml(string grm_file, string phen_file, string qcovar_file, string covar_file, string qGE_file, string GE_file, string keep_indi_file, string remove_indi_file, string sex_file, int mphen, double grm_cutoff, double adj_grm_fac, int dosage_compen, bool m_grm_flag, bool pred_rand_eff, bool est_fix_eff, int reml_mtd, int MaxIter, vector<double> reml_priors, vector<double> reml_priors_var, vector<int> drop, bool no_lrt, double prevalence, bool no_constrain, bool mlmassoc)
{
    _reml_mtd=reml_mtd;
    _reml_max_iter=MaxIter;
    int i=0, j=0, k=0;
    bool grm_flag=(!grm_file.empty());
    bool qcovar_flag=(!qcovar_file.empty());
    bool covar_flag=(!covar_file.empty());
    bool GE_flag=(!GE_file.empty());
    bool qGE_flag=(!qGE_file.empty());
    if(m_grm_flag) grm_flag=false;

    // Read data
    stringstream errmsg;
    int qcovar_num=0, covar_num=0, qE_fac_num=0, E_fac_num=0;
    vector<string> phen_ID, phen_buf, qcovar_ID, covar_ID, qGE_ID, GE_ID, grm_id, grm_files;
    vector< vector<string> > qcovar, covar, GE, qGE; // save individuals by column

    if(grm_flag){
        read_grm_gz(grm_file, grm_id);
        update_id_map_kp(grm_id, _id_map, _keep);
        grm_files.push_back(grm_file);
    }
    else if(m_grm_flag){
        read_grm_filenames(grm_file, grm_files, false);
        for(i=0; i<grm_files.size(); i++){
            read_grm_gz(grm_files[i], grm_id, false, true);
            update_id_map_kp(grm_id, _id_map, _keep);
        }
    }
    read_phen(phen_file, phen_ID, phen_buf, mphen);
    update_id_map_kp(phen_ID, _id_map, _keep);
    if(qcovar_flag){
        qcovar_num=read_covar(qcovar_file, qcovar_ID, qcovar, true);
        update_id_map_kp(qcovar_ID, _id_map, _keep);
    }
    if(covar_flag){
        covar_num=read_covar(covar_file, covar_ID, covar, false);
        update_id_map_kp(covar_ID, _id_map, _keep);
    }
    if(qGE_flag){
        qE_fac_num=read_GE(qGE_file, qGE_ID, qGE, true);
        update_id_map_kp(qGE_ID, _id_map, _keep);
    }
    if(GE_flag){
        E_fac_num=read_GE(GE_file, GE_ID, GE, false);
        update_id_map_kp(GE_ID, _id_map, _keep);
    }
    if(!mlmassoc){
        if(!keep_indi_file.empty()) keep_indi(keep_indi_file);
        if(!remove_indi_file.empty()) remove_indi(remove_indi_file);       
    }
    if(grm_flag){
        if(grm_cutoff>-1.0) rm_cor_indi(grm_cutoff);
        if(!sex_file.empty()) update_sex(sex_file);
        if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac);
        if(dosage_compen>-1) dc(dosage_compen);
        _grm_N.resize(1,1);
    }

    vector<string> uni_id;
	map<string, int> uni_id_map;
    map<string, int>::iterator iter;
	for(i=0; i<_keep.size(); i++){
	    uni_id.push_back(_fid[_keep[i]]+":"+_pid[_keep[i]]);
	    uni_id_map.insert(pair<string,int>(_fid[_keep[i]]+":"+_pid[_keep[i]], i));
	}
    _n=_keep.size();
    if(_n<1) throw("Error: no individual is in common in the input files.");

    // construct model terms
    _y.setZero(_n);
    for(i=0; i<phen_ID.size(); i++){
        iter=uni_id_map.find(phen_ID[i]);
        if(iter==uni_id_map.end()) continue;
        _y[iter->second]=atof(phen_buf[i].c_str());
    }

    int pos=0;
    _r_indx.clear();
    eigenMatrix A_N(_n, _n);
    vector<int> kp;
    if(grm_flag){
        for(i=0; i<1+qE_fac_num+E_fac_num; i++) _r_indx.push_back(i);
        if(!no_lrt) drop_comp(drop);
        _A=eigenMatrix::Zero(_n, _r_indx.size()*_n);
        if(mlmassoc) StrFunc::match(uni_id, grm_id, kp);
        else kp=_keep;
        for(i=0; i<_n; i++){
            for(j=0; j<=i; j++) (_A.block(0,0,_n,_n))(j,i)=(_A.block(0,0,_n,_n))(i,j)=_grm(kp[i],kp[j]);
        }
        pos++;
        _grm.resize(1,1);
    }
    else if(m_grm_flag){
        if(!sex_file.empty()) update_sex(sex_file);
        for(i=0; i<(1+qE_fac_num+E_fac_num)*grm_files.size(); i++) _r_indx.push_back(i);
        if(!no_lrt) drop_comp(drop);
        _A=eigenMatrix::Zero(_n, _r_indx.size()*_n);
        string prev_file=grm_files[0];
        vector<string> prev_grm_id(grm_id);
        cout<<"There are "<<grm_files.size()<<" GRM file names specified in the file ["+grm_file+"]."<<endl;
        for(i=0; i<grm_files.size(); i++, pos++){
            cout<<"Reading the GRM from the "<<i+1<<"th file ..."<<endl;
            read_grm_gz(grm_files[i], grm_id);
            if(adj_grm_fac>-1.0) adj_grm(adj_grm_fac);
            if(dosage_compen>-1) dc(dosage_compen);
            StrFunc::match(uni_id, grm_id, kp);
            int pos_n=pos*_n;
            for(j=0; j<_n; j++){
                for(k=0; k<=j; k++){
                    if(kp[j]>=kp[k]) (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[j],kp[k]);
                    else (_A.block(0,pos_n,_n,_n))(k,j)=(_A.block(0,pos_n,_n,_n))(j,k)=_grm(kp[k],kp[j]);
                }
            }
            prev_file=grm_files[i];
            prev_grm_id=grm_id;
       }
        _grm_N.resize(1,1);
        _grm.resize(1,1);
    }

    // GE interaction
    vector<eigenMatrix> E_float(E_fac_num);
    eigenMatrix qE_float, mbuf;
    if(qGE_flag){
        qE_float.resize(_n, qE_fac_num);
        for(i=0; i<qGE_ID.size(); i++){
            iter=uni_id_map.find(qGE_ID[i]);
            if(iter==uni_id_map.end()) continue;
            for(j=0; j<qE_fac_num; j++) qE_float(iter->second,j)=atof(qGE[i][j].c_str());
        }
        for(j=0; j<qE_fac_num; j++){
            mbuf=((qE_float.block(0,j,_n,1))*(qE_float.block(0,j,_n,1)).transpose());
            for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array();
        }
    }
    if(GE_flag){
        vector< vector<string> > E_str(E_fac_num);
        for(i=0; i<E_fac_num; i++) E_str[i].resize(_n);
        for(i=0; i<GE_ID.size(); i++){
            iter=uni_id_map.find(GE_ID[i]);
            if(iter!=uni_id_map.end()){
                for(j=0; j<E_fac_num; j++) E_str[j][iter->second]=GE[i][j];
            }
        }
        for(j=0; j<E_fac_num; j++){
            stringstream errmsg;
            errmsg<<"Error: too many classes for the "<<j+1<<"th environmental factor. \nPlease make sure you input a discrete variable as the environmental factor.";
            string errmsg1=errmsg.str();
            errmsg.str("");
            errmsg<<"Error: the "<<j+1<<"th envronmental factor has only one class.";
            string errmsg2=errmsg.str();
            coeff_mat(E_str[j], E_float[j], errmsg1, errmsg2);
            mbuf=((E_float[j])*(E_float[j]).transpose());
            for(i=0; i<grm_files.size(); i++, pos++) (_A.block(0,pos*_n,_n,_n))=(_A.block(0,i*_n,_n,_n)).array()*mbuf.array();
        }
    }

    // construct X matrix
    construct_X(uni_id_map, qcovar_flag, qcovar_num, qcovar_ID, qcovar, covar_flag, covar_num, covar_ID, covar, E_float, qE_float);

    // names of variance component
    for(i=0; i<grm_files.size(); i++){
        stringstream strstrm;
        if(grm_files.size()==1) strstrm<<"";
        else strstrm<<i+1;
        _var_name.push_back("V(G"+strstrm.str()+")");
        _hsq_name.push_back("V(G"+strstrm.str()+")/Vp");
    }
    for(j=0; j<qE_fac_num; j++){
        for(i=0; i<grm_files.size(); i++){
            stringstream strstrm1,strstrm2;
            if(grm_files.size()==1) strstrm1<<"";
            else  strstrm1<<i+1;
            if(qE_fac_num==1) strstrm2<<"";
            else strstrm2<<j+1;
            _var_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")");
            _hsq_name.push_back("V(G"+strstrm1.str()+"xqE"+strstrm2.str()+")"+"/Vp");
        }
    }
    for(j=0; j<E_fac_num; j++){
        for(i=0; i<grm_files.size(); i++){
            stringstream strstrm1,strstrm2;
            if(grm_files.size()==1) strstrm1<<"";
            else  strstrm1<<i+1;
            if(E_fac_num==1) strstrm2<<"";
            else strstrm2<<j+1;
            _var_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")");
            _hsq_name.push_back("V(G"+strstrm1.str()+"xE"+strstrm2.str()+")"+"/Vp");
        }
    }
    _var_name.push_back("V(e)");

    // run REML algorithm
    cout<<_n<<" individuals are in common in these files."<<endl;
	reml(pred_rand_eff, est_fix_eff, reml_priors, reml_priors_var, prevalence, no_constrain, no_lrt, mlmassoc);
}