void altRss2(double *tmppheno, double *pheno, int nphe, int n_ind, int n_gen1, int n_gen2, int *Draws1, int *Draws2, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *lrss, double *dwork_add, double *dwork_full, int multivar, double *weights, int n_col2drop, int *allcol2drop) { int i, j, k, s, nrss, lwork, rank, info; int n_col_a, n_col_f, n_gen_sq, ind_idx; double *x, *x_bk, *singular, *yfit, *work, *rss, *rss_det=0, *coef; double alpha=1.0, beta=0.0, tol=TOL, dtmp; if( (nphe==1) || (multivar==1) ) nrss = 1; else nrss = nphe; /* allocate memory */ rss = (double *)R_alloc(nrss, sizeof(double)); /* constants */ /* number of columns for Q1*Q2 */ n_gen_sq = n_gen1*n_gen2; /* number of columns of X for additive model */ n_col_a = (n_gen1+n_gen2-1) + n_addcov + n_intcov*(n_gen1+n_gen2-2); /* number of columns of X for full model */ n_col_f = n_gen_sq + n_addcov + n_intcov*(n_gen_sq-1); /************************************** * Now work on the additive model **************************************/ /* split the memory block */ lwork = 3*n_col_a + MAX(n_ind, nphe); singular = dwork_add; work = singular + n_col_a; x = work + lwork; x_bk = x + n_ind*n_col_a; yfit = x_bk + n_ind*n_col_a; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = yfit + n_col_a*nphe; /* zero out X matrix */ for(i=0; i<n_ind*n_col_a; i++) x[i] = 0.0; rank = n_col_a; /* fill up X matrix */ for(i=0; i<n_ind; i++) { x[i+(Draws1[i]-1)*n_ind] = weights[i]; /* QTL 1 */ s = n_gen1; if(Draws2[i] < n_gen2) /* QTL 2 */ x[i+(Draws2[i]-1+s)*n_ind] = weights[i]; s += (n_gen2-1); for(k=0; k<n_addcov; k++) /* add cov */ x[i+(k+s)*n_ind] = Addcov[k][i]; s += n_addcov; for(k=0; k<n_intcov; k++) { if(Draws1[i] < n_gen1) /* QTL1 x int cov */ x[i+(Draws1[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen1-1); if(Draws2[i] < n_gen2) /* QTL 2 x int cov*/ x[i+(Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen2-1); } } /* end loop over individuals */ /* drop cols */ if(n_col2drop) dropcol_x(&n_col_a, n_ind, allcol2drop, x); rank = n_col_a; /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_a*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_a, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /*only one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_a) for (i=rank, rss[0]=0.0; i<n_ind; i++) rss[0] += tmppheno[i]*tmppheno[i]; else { /* the desigm matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, n_col_a, tmppheno, 1); /* calculate rss */ for (i=0, rss[0]=0.0; i<n_ind; i++) rss[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* multiple phenotypes */ if(multivar == 1) { /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_a, tmppheno+i*n_ind, n_col_a*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_a, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* clear rss_det */ for (i=0; i<nphe*nphe; i++) rss_det[i] = 0.0; /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss[0]=1.0;i<nphe; i++) rss[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return rss as a vector */ if(rank == n_col_a) { /* design matrix is of full rank, this is easier */ for(i=0; i<nrss; i++) { ind_idx = i*n_ind; for(j=rank, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } else { /* design matrix is singular, this is troubler */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_a, tmppheno+i*n_ind, n_col_a*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_a, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; for(i=0; i<nrss; i++) { ind_idx = i*n_ind; for(j=0, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) lrss[i] = log10(rss[i]); /************************************** * Finish additive model **************************************/ /******************* * INTERACTIVE MODEL *******************/ /* split the memory block */ lwork = 3*n_col_f + MAX(n_ind, nphe); singular = dwork_full; work = singular + n_col_f; x = work + lwork; x_bk = x + n_ind*n_col_f; yfit = x_bk + n_ind*n_col_f; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = coef + n_col_f*nphe; /* zero out X matrix */ for(i=0; i<n_ind*n_col_f; i++) x[i] = 0.0; rank = n_col_f; /* fill up X matrix */ for(i=0; i<n_ind; i++) { x[i+(Draws1[i]-1)*n_ind] = weights[i]; /* QTL 1 */ s = n_gen1; if(Draws2[i] < n_gen2) /* QTL 2 */ x[i+(Draws2[i]-1+s)*n_ind] = weights[i]; s += (n_gen2-1); for(k=0; k<n_addcov; k++) /* add cov */ x[i+(k+s)*n_ind] = Addcov[k][i]; s += n_addcov; for(k=0; k<n_intcov; k++) { if(Draws1[i] < n_gen1) /* QTL1 x int cov */ x[i+(Draws1[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen1-1); if(Draws2[i] < n_gen2) /* QTL 2 x int cov */ x[i+(Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen2-1); } if(Draws1[i] < n_gen1 && Draws2[i] < n_gen2) /* QTL x QTL */ x[i+((Draws1[i]-1)*(n_gen2-1)+Draws2[i]-1+s)*n_ind] = weights[i]; s += ((n_gen1-1)*(n_gen2-1)); for(k=0; k<n_intcov; k++) { /* QTL x QTL x int cov */ if(Draws1[i] < n_gen1 && Draws2[i] < n_gen2) x[i+((Draws1[i]-1)*(n_gen2-1)+Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += ((n_gen1-1)*(n_gen2-1)); } } /* end loop over individuals */ /* drop x's */ if(n_col2drop) dropcol_x(&n_col_f, n_ind, allcol2drop, x); rank = n_col_f; /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_f*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_f, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /* one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_f) for (i=rank, rss[0]=0.0; i<n_ind; i++) rss[0] += tmppheno[i]*tmppheno[i]; else { /* the desigm matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, n_col_f, tmppheno, 1); /* calculate rss */ for (i=0, rss[0]=0.0; i<n_ind; i++) rss[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* mutlple phenotypes */ if(multivar == 1) { /* multivariate model, rss=det(rss) */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_f, tmppheno+i*n_ind, n_col_f*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_f, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* clear rss_det */ for (i=0; i<nphe*nphe; i++) rss_det[i] = 0.0; /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss[0]=1.0;i<nphe; i++) rss[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return rss as a vector */ if(rank == n_col_f) { /*design matrix is of full rank, this is easier */ for(i=0; i<nrss; i++) { ind_idx = i * n_ind; for(j=rank, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } else { /* sigular design matrix */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_f, tmppheno+i*n_ind, n_col_f*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_f, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; for(i=0; i<nrss; i++) { ind_idx = i * n_ind; for(j=0, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) lrss[i+nrss] = log10(rss[i]); }
void scantwo_1chr_hk(int n_ind, int n_pos, int n_gen, double ***Genoprob, double *****Pairprob, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *pheno, int nphe, double *weights, double ***Result, int n_col2drop, int *col2drop) { int n_col_a, n_col_f, n_gen_sq, multivar=0, rank=0, n_col_a_temp, n_col_f_temp; int itmp, i, i2, j, k, k2, k3, s, nrss, lwork, info, ind_idx; /* additive model working arrays */ /* double *dwork_add, *x_add, *x_bk_add, *singular_add, *work_add, *yfit_add, *coef_add; */ /* full model working arrays */ /* double *dwork_full, *x_full, *x_bk_full, *singular_full, *work_full, *yfit_full, *coef_full;*/ double *dwork, *x, *x_bk, *singular, *work, *yfit, *coef, *tmppheno; double tol=TOL, dtmp=0; int *allcol2drop; /* number of rss */ if( (nphe==1) || (multivar==1) ) nrss = 1; else nrss = nphe; /* tolerance for linear regression */ tol = TOL; n_gen_sq = n_gen*n_gen; /* no. param in additive QTL model */ n_col_a = (n_gen*2-1)+n_addcov+n_intcov*(n_gen-1)*2; /* no. param full model */ n_col_f = n_gen_sq+n_addcov+n_intcov*(n_gen_sq-1); /* expand col2drop */ if(n_col2drop) { allocate_int(n_col_f, &allcol2drop); expand_col2drop(n_gen, n_addcov, n_intcov, col2drop, allcol2drop); } /* allocate space and set things up - I will leave multivariate model at this time */ tmppheno = (double *)R_alloc(n_ind*nphe, sizeof(double)); /* for full model */ lwork = 3*n_col_f + MAX(n_ind, nphe);; if(multivar == 1) /* request to do multivariate normal model */ dwork = (double *)R_alloc(n_col_f+lwork+2*n_ind*n_col_f+n_ind*nphe+nphe*nphe+n_col_f*nphe, sizeof(double)); else dwork = (double *)R_alloc(n_col_f + lwork + 2*n_ind*n_col_f + n_ind*nphe + n_col_f*nphe, sizeof(double)); /* split memory block */ lwork = 3*n_col_f + MAX(n_ind, nphe); singular = dwork; work = singular + n_col_f; x = work + lwork; x_bk = x + n_ind*n_col_f; yfit = x_bk + n_ind*n_col_f; coef = yfit + n_ind*nphe; /*************************** * finish memory allocation ***************************/ /* modify pheno, Addcov and Intcov with weights */ for(i=0; i<n_ind; i++) { for(j=0; j<nphe; j++) pheno[i+j*n_ind] *= weights[i]; for(j=0; j<n_addcov; j++) Addcov[j][i] *= weights[i]; for(j=0; j<n_intcov; j++) Intcov[j][i] *= weights[i]; } for(i=0; i<n_pos-1; i++) { for(i2=i+1; i2<n_pos; i2++) { /* loop over pairs of positions */ R_CheckUserInterrupt(); /* check for ^C */ /* ADDITIVE MODEL */ rank = n_col_a; /* fill up X matrix */ for(j=0; j<n_ind; j++) { for(k=0, s=0; k<n_gen; k++, s++) /* QTL 1 */ x[j+s*n_ind] = Genoprob[k][i][j]*weights[j]; /* s keeps track of column */ for(k=0; k<n_gen-1; k++,s++) /* QTL 2 */ x[j+s*n_ind] = Genoprob[k][i2][j]*weights[j]; for(k=0; k<n_addcov; k++, s++) /* additive covariates */ x[j+s*n_ind] = Addcov[k][j]; for(k2=0; k2<n_intcov; k2++) { for(k=0; k<n_gen-1; k++, s++) /* interactive x QTL 1 */ x[j+s*n_ind] = Genoprob[k][i][j]*Intcov[k2][j]; for(k=0; k<n_gen-1; k++, s++) /* interactive x QTL 2 */ x[j+s*n_ind] = Genoprob[k][i2][j]*Intcov[k2][j]; } } /* drop cols */ n_col_a_temp = n_col_a; if(n_col2drop) dropcol_x(&n_col_a_temp, n_ind, allcol2drop, x); rank = n_col_a_temp; /* linear regression of phenotype on QTL genotype probabilities */ /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_a_temp*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_a_temp, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* F77_CALL(dqrls)(x, &n_ind, &n_col_a, pheno, &ny, &tol, coef, resid, qty, &k, jpvt, qraux, work);*/ /* RSS */ /* calculate residual sum of squares */ if(nphe == 1) { /*only one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_a_temp) { for (itmp=rank, dtmp=0.0; itmp<n_ind; itmp++) dtmp += tmppheno[itmp]*tmppheno[itmp]; } else { /* the design matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, n_col_a_temp, tmppheno, 1); /* calculate rss */ for (itmp=0, dtmp=0.0; itmp<n_ind; itmp++) dtmp += (pheno[itmp]-yfit[itmp]) * (pheno[itmp]-yfit[itmp]); } Result[0][i2][i] = log10(dtmp); } else { /* multiple phenotypes */ if(multivar == 1) { /* multivariate model, I will leave it now */ } else{ if(rank == n_col_a_temp) { /* design matrix is of full rank, this is easier */ for(itmp=0, ind_idx=0; itmp<nrss; itmp++, ind_idx+=n_ind) { /* loop thru phenotypes */ for(j=rank, dtmp=0.0; j<n_ind; j++) dtmp += tmppheno[ind_idx+j] * tmppheno[ind_idx+j]; Result[itmp][i2][i] = log10(dtmp); } } else { /* design matrix is singular, this is troubler */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (itmp=0; itmp<nphe; itmp++) memcpy(coef+itmp*n_col_a_temp, tmppheno+itmp*n_ind, n_col_a_temp*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_a_temp, coef, nphe); /* calculate residual, put the result in tmppheno */ for (itmp=0; itmp<n_ind*nphe; itmp++) tmppheno[itmp] = pheno[itmp] - yfit[itmp]; for(itmp=0; itmp<nrss; itmp++) { /* loop thru phenotypes */ ind_idx = itmp*n_ind; for(j=0, dtmp=0.0; j<n_ind; j++) dtmp += tmppheno[ind_idx+j] * tmppheno[ind_idx+j]; Result[itmp][i2][i] = log10(dtmp); } } } } /* INTERACTIVE MODEL */ rank = n_col_f; /* fill up X matrix */ for(j=0; j<n_ind; j++) { for(k=0, s=0; k<n_gen; k++, s++) /* QTL 1 */ x[j+s*n_ind] = Genoprob[k][i][j]*weights[j]; /* s keeps track of column */ for(k=0; k<n_gen-1; k++,s++) /* QTL 2 */ x[j+s*n_ind] = Genoprob[k][i2][j]*weights[j]; for(k=0; k<n_addcov; k++, s++) /* additive covariates */ x[j+s*n_ind] = Addcov[k][j]; for(k2=0; k2<n_intcov; k2++) { for(k=0; k<n_gen-1; k++,s++) /* interactive x QTL 1 */ x[j+s*n_ind] = Genoprob[k][i][j]*Intcov[k2][j]; for(k=0; k<n_gen-1; k++,s++) /* interactive x QTL 2 */ x[j+s*n_ind] = Genoprob[k][i2][j]*Intcov[k2][j]; } for(k=0; k<n_gen-1; k++) for(k2=0; k2<n_gen-1; k2++,s++) /* QTL 1 x QTL 2 */ x[j+s*n_ind] = Pairprob[k][k2][i][i2][j]*weights[j]; for(k3=0; k3<n_intcov; k3++) for(k=0; k<n_gen-1; k++) /* interactive x QTL 1 x QTL 2 */ for(k2=0; k2<n_gen-1; k2++,s++) x[j+s*n_ind] = Pairprob[k][k2][i][i2][j]*Intcov[k3][j]; } /* drop x's */ n_col_f_temp = n_col_f; if(n_col2drop) dropcol_x(&n_col_f_temp, n_ind, allcol2drop, x); rank = n_col_f_temp; /* linear regression of phenotype on QTL genotype probabilities */ /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_f_temp*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_f_temp, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /*only one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_f_temp) { for (itmp=rank, dtmp=0.0; itmp<n_ind; itmp++) dtmp += tmppheno[itmp]*tmppheno[itmp]; } else { /* the design matrix is not full rank, this is trouble */ /* calculate the fitted value */ /* matmult(yfit, x_bk, n_ind, n_col_f_temp, tmppheno, 1); */ matmult(yfit, x_bk, n_ind, n_col_f_temp, tmppheno, 1); /* calculate rss */ for (itmp=0, dtmp=0.0; itmp<n_ind; itmp++) dtmp += (pheno[itmp]-yfit[itmp]) * (pheno[itmp]-yfit[itmp]); } Result[0][i][i2] = log10(dtmp); } else { /* multiple phenotypes */ if(multivar == 1) { /* multivariate model */ } else{ if(rank == n_col_f_temp) { /* design matrix is of full rank, this is easier */ for(itmp=0; itmp<nrss; itmp++) { /* loop thru phenotypes */ ind_idx = itmp*n_ind; for(j=rank, dtmp=0.0; j<n_ind; j++) dtmp += tmppheno[ind_idx+j] * tmppheno[ind_idx+j]; Result[itmp][i][i2] = log10(dtmp); } } else { /* design matrix is singular, this is troubler */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (itmp=0; itmp<nphe; itmp++) memcpy(coef+itmp*n_col_f_temp, tmppheno+itmp*n_ind, n_col_f_temp*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_f_temp, coef, nphe); /* calculate residual, put the result in tmppheno */ for (itmp=0; itmp<n_ind*nphe; itmp++) tmppheno[itmp] = pheno[itmp] - yfit[itmp]; for(itmp=0; itmp<nrss; itmp++) { /* loop thru phenotypes */ ind_idx = itmp*n_ind; for(j=0, dtmp=0.0; j<n_ind; j++) dtmp += tmppheno[ind_idx+j] * tmppheno[ind_idx+j]; Result[itmp][i][i2] = log10(dtmp); } } } } /* convert to LODs */ for(itmp=0; itmp < nphe; itmp++) { Result[itmp][i2][i] = (double)n_ind/2.0*Result[itmp][i2][i]; Result[itmp][i][i2] = (double)n_ind/2.0* Result[itmp][i][i2]; } } /* end loop over positions */ } }