void altRss1(double *tmppheno, double *pheno, int nphe, int n_ind, int n_gen, int *Draws, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *dwork, int multivar, double *rss, double *weights, int *ind_noqtl) { /* create local variables */ int i, j, s, s2, ncolx, lwork, rank, info, nrss, ind_idx; double *x, *x_bk, *singular, *yfit, *work, *coef, *rss_det=0; double alpha=1.0, beta=0.0, tol=TOL, dtmp; /* for lapack dgelss */ if( (nphe==1) || (multivar==1) ) nrss = 1; else nrss = nphe; /* number of columns in design matrix X */ ncolx = n_gen + n_addcov + n_intcov*(n_gen-1); /* init rank to be ncolx, which means X is of full rank. If it's not, the value of rank will be changed by dgelss */ rank = ncolx; /* split the memory block */ lwork = 3*ncolx+ MAX(n_ind, nphe); /*lwork = 3*ncolx + n_ind;*/ singular = dwork; work = singular + ncolx; x = work + lwork; x_bk = x + n_ind*ncolx; yfit = x_bk + n_ind*ncolx; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = coef + ncolx*nphe; /* zero out X matrix */ for(i=0; i<n_ind*ncolx; i++) x[i] = 0.0; /* fill up design matrix */ for(i=0; i<n_ind; i++) { /* QTL genotypes */ if(!ind_noqtl[i]) x[i+(Draws[i]-1)*n_ind] = weights[i]; /* Additive covariates */ for(s=0, s2=n_gen; s<n_addcov; s++, s2++) x[i+s2*n_ind] = Addcov[s][i]; /* Interactive covariates */ if(!ind_noqtl[i]) { for(s=0; s<n_intcov; s++) for(j=0; j<n_gen-1; j++, s2++) if(Draws[i] == j+1) x[i+n_ind*s2] = Intcov[s][i]; } } /* end loop over individuals */ /* Done filling up X matrix */ /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*ncolx*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss(&n_ind, &ncolx, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /* only one phenotype, this is easier */ /* if the design matrix is full rank */ if(rank == ncolx) for (i=rank, rss[0]=0.0; i<n_ind; i++) rss[0] += tmppheno[i]*tmppheno[i]; else { /* the desigm matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, ncolx, tmppheno, 1); /* calculate rss */ for (i=0, rss[0]=0.0; i<n_ind; i++) rss[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* multiple phenotypes */ if(multivar == 1) { /* multivariate normal model, this is troubler */ /* multivariate model, rss=det(rss) */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*ncolx, tmppheno+i*n_ind, ncolx*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, ncolx, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* clear rss_det */ for (i=0; i<nphe*nphe; i++) rss_det[i] = 0.0; /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss[0]=1.0;i<nphe; i++) rss[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return rss as a vector */ if(rank == ncolx) { /* design matrix is of full rank, this is easier */ for(i=0; i<nrss; i++) { rss[i] = 0.0; ind_idx = i*n_ind; for (j=rank; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp*dtmp; } } } else { /* design matrix is singular, this is troubler */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*ncolx, tmppheno+i*n_ind, ncolx*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, ncolx, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calculate rss */ for(i=0; i<nrss; i++) { rss[i] = 0.0; ind_idx = i*n_ind; for(j=0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) rss[i] = log10(rss[i]); } /* end of function */
void altRss2(double *tmppheno, double *pheno, int nphe, int n_ind, int n_gen1, int n_gen2, int *Draws1, int *Draws2, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *lrss, double *dwork_add, double *dwork_full, int multivar, double *weights, int n_col2drop, int *allcol2drop) { int i, j, k, s, nrss, lwork, rank, info; int n_col_a, n_col_f, n_gen_sq, ind_idx; double *x, *x_bk, *singular, *yfit, *work, *rss, *rss_det=0, *coef; double alpha=1.0, beta=0.0, tol=TOL, dtmp; if( (nphe==1) || (multivar==1) ) nrss = 1; else nrss = nphe; /* allocate memory */ rss = (double *)R_alloc(nrss, sizeof(double)); /* constants */ /* number of columns for Q1*Q2 */ n_gen_sq = n_gen1*n_gen2; /* number of columns of X for additive model */ n_col_a = (n_gen1+n_gen2-1) + n_addcov + n_intcov*(n_gen1+n_gen2-2); /* number of columns of X for full model */ n_col_f = n_gen_sq + n_addcov + n_intcov*(n_gen_sq-1); /************************************** * Now work on the additive model **************************************/ /* split the memory block */ lwork = 3*n_col_a + MAX(n_ind, nphe); singular = dwork_add; work = singular + n_col_a; x = work + lwork; x_bk = x + n_ind*n_col_a; yfit = x_bk + n_ind*n_col_a; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = yfit + n_col_a*nphe; /* zero out X matrix */ for(i=0; i<n_ind*n_col_a; i++) x[i] = 0.0; rank = n_col_a; /* fill up X matrix */ for(i=0; i<n_ind; i++) { x[i+(Draws1[i]-1)*n_ind] = weights[i]; /* QTL 1 */ s = n_gen1; if(Draws2[i] < n_gen2) /* QTL 2 */ x[i+(Draws2[i]-1+s)*n_ind] = weights[i]; s += (n_gen2-1); for(k=0; k<n_addcov; k++) /* add cov */ x[i+(k+s)*n_ind] = Addcov[k][i]; s += n_addcov; for(k=0; k<n_intcov; k++) { if(Draws1[i] < n_gen1) /* QTL1 x int cov */ x[i+(Draws1[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen1-1); if(Draws2[i] < n_gen2) /* QTL 2 x int cov*/ x[i+(Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen2-1); } } /* end loop over individuals */ /* drop cols */ if(n_col2drop) dropcol_x(&n_col_a, n_ind, allcol2drop, x); rank = n_col_a; /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_a*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_a, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /*only one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_a) for (i=rank, rss[0]=0.0; i<n_ind; i++) rss[0] += tmppheno[i]*tmppheno[i]; else { /* the desigm matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, n_col_a, tmppheno, 1); /* calculate rss */ for (i=0, rss[0]=0.0; i<n_ind; i++) rss[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* multiple phenotypes */ if(multivar == 1) { /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_a, tmppheno+i*n_ind, n_col_a*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_a, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* clear rss_det */ for (i=0; i<nphe*nphe; i++) rss_det[i] = 0.0; /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss[0]=1.0;i<nphe; i++) rss[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return rss as a vector */ if(rank == n_col_a) { /* design matrix is of full rank, this is easier */ for(i=0; i<nrss; i++) { ind_idx = i*n_ind; for(j=rank, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } else { /* design matrix is singular, this is troubler */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_a, tmppheno+i*n_ind, n_col_a*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_a, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; for(i=0; i<nrss; i++) { ind_idx = i*n_ind; for(j=0, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) lrss[i] = log10(rss[i]); /************************************** * Finish additive model **************************************/ /******************* * INTERACTIVE MODEL *******************/ /* split the memory block */ lwork = 3*n_col_f + MAX(n_ind, nphe); singular = dwork_full; work = singular + n_col_f; x = work + lwork; x_bk = x + n_ind*n_col_f; yfit = x_bk + n_ind*n_col_f; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = coef + n_col_f*nphe; /* zero out X matrix */ for(i=0; i<n_ind*n_col_f; i++) x[i] = 0.0; rank = n_col_f; /* fill up X matrix */ for(i=0; i<n_ind; i++) { x[i+(Draws1[i]-1)*n_ind] = weights[i]; /* QTL 1 */ s = n_gen1; if(Draws2[i] < n_gen2) /* QTL 2 */ x[i+(Draws2[i]-1+s)*n_ind] = weights[i]; s += (n_gen2-1); for(k=0; k<n_addcov; k++) /* add cov */ x[i+(k+s)*n_ind] = Addcov[k][i]; s += n_addcov; for(k=0; k<n_intcov; k++) { if(Draws1[i] < n_gen1) /* QTL1 x int cov */ x[i+(Draws1[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen1-1); if(Draws2[i] < n_gen2) /* QTL 2 x int cov */ x[i+(Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += (n_gen2-1); } if(Draws1[i] < n_gen1 && Draws2[i] < n_gen2) /* QTL x QTL */ x[i+((Draws1[i]-1)*(n_gen2-1)+Draws2[i]-1+s)*n_ind] = weights[i]; s += ((n_gen1-1)*(n_gen2-1)); for(k=0; k<n_intcov; k++) { /* QTL x QTL x int cov */ if(Draws1[i] < n_gen1 && Draws2[i] < n_gen2) x[i+((Draws1[i]-1)*(n_gen2-1)+Draws2[i]-1+s)*n_ind] = Intcov[k][i]; s += ((n_gen1-1)*(n_gen2-1)); } } /* end loop over individuals */ /* drop x's */ if(n_col2drop) dropcol_x(&n_col_f, n_ind, allcol2drop, x); rank = n_col_f; /* make a copy of x matrix, we may need it */ memcpy(x_bk, x, n_ind*n_col_f*sizeof(double)); /* copy pheno to tmppheno, because dgelss will destroy the input pheno array */ memcpy(tmppheno, pheno, n_ind*nphe*sizeof(double)); /* Call LAPACK engine DGELSS to do linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &n_col_f, &nphe, x, x_bk, pheno, tmppheno, singular, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /* one phenotype */ /* if the design matrix is full rank */ if(rank == n_col_f) for (i=rank, rss[0]=0.0; i<n_ind; i++) rss[0] += tmppheno[i]*tmppheno[i]; else { /* the desigm matrix is not full rank, this is trouble */ /* calculate the fitted value */ matmult(yfit, x_bk, n_ind, n_col_f, tmppheno, 1); /* calculate rss */ for (i=0, rss[0]=0.0; i<n_ind; i++) rss[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* mutlple phenotypes */ if(multivar == 1) { /* multivariate model, rss=det(rss) */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_f, tmppheno+i*n_ind, n_col_f*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_f, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* clear rss_det */ for (i=0; i<nphe*nphe; i++) rss_det[i] = 0.0; /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss[0]=1.0;i<nphe; i++) rss[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return rss as a vector */ if(rank == n_col_f) { /*design matrix is of full rank, this is easier */ for(i=0; i<nrss; i++) { ind_idx = i * n_ind; for(j=rank, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } else { /* sigular design matrix */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*n_col_f, tmppheno+i*n_ind, n_col_f*sizeof(double)); /* calculate yfit */ matmult(yfit, x_bk, n_ind, n_col_f, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; for(i=0; i<nrss; i++) { ind_idx = i * n_ind; for(j=0, rss[i]=0.0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) lrss[i+nrss] = log10(rss[i]); }
/********************************************************** * * function to calculate the residual sum of squares (RSS) * for the null model: pheno ~ u + addcov * This function is used by scanone_imp and scantwo_imp * * Note that if the input pheno is a matrix (multiple columns), * a multivariate normal model will be applied, which means * the result rss should be det((y-yfit)'*(y-yfit)) * **********************************************************/ void nullRss(double *tmppheno, double *pheno, int nphe, int n_ind, double **Addcov, int n_addcov, double *dwork_null, int multivar, double *rss0, double *weights) { /* create local variables */ int i, j, ncolx0, lwork, info, rank, nrss, ind_idx; double alpha=1.0, beta=0.0, tol=TOL, dtmp; double *work, *x0, *x02, *s, *yfit, *rss_det=0, *coef; if( (nphe==1) || (multivar==1) ) nrss = 1; else nrss = nphe; /*rss0 = mxCalloc(nrss, sizeof(double));*/ /* split the memory block */ ncolx0 = 1 + n_addcov; /* number of columns in x0 matrix */ /* init rank to be ncolx, which means X is of full rank. If it's not, the value of rank will be changed by dgelss */ rank = ncolx0; /* lwork = 3*ncolx0 + n_ind;*/ lwork = 3*ncolx0 + MAX(n_ind,nphe); /* allocate memory */ s = dwork_null; work = s + ncolx0; x0 = work + lwork; x02 = x0 + n_ind*ncolx0; yfit = x02 + n_ind*ncolx0; coef = yfit + n_ind*nphe; if(multivar == 1) rss_det = coef + ncolx0*nphe; /* fill up x0 matrix */ for (i=0; i<n_ind; i++) { x0[i] = weights[i]; /* the first row (column in Fortran) are all 1s */ for(j=0; j<n_addcov; j++) x0[(j+1)*n_ind+i] = Addcov[j][i]; } /* make a copy of x0 matrix, we may need it */ memcpy(x02, x0, n_ind*ncolx0*sizeof(double)); /* Now we have the design matrix x, the model is pheno = x*b call LAPACK routine DGELSS to do the linear regression. Note that DGELSS doesn't have the assumption that X is full rank. */ /* Pass all arguments to Fortran by reference */ mydgelss (&n_ind, &ncolx0, &nphe, x0, x02, pheno, tmppheno, s, &tol, &rank, work, &lwork, &info); /* calculate residual sum of squares */ if(nphe == 1) { /* if there are only one phenotype, this is easier */ /* if the design matrix is full rank */ if(rank == ncolx0) { rss0[0] = 0.0; for (i=rank; i<n_ind; i++) rss0[0] += tmppheno[i]*tmppheno[i]; } else { /* the design matrix is not full rank, this is trouble */ /* calculate the fitted value using yfit=x02*tmppheno(1:ncolx0) */ matmult(yfit, x02, n_ind, ncolx0, tmppheno, 1); /* calculate rss */ for (i=0; i<n_ind; i++) rss0[0] += (pheno[i]-yfit[i]) * (pheno[i]-yfit[i]); } } else { /* multiple phenotypes, this is troubler */ if(multivar == 1) { /* multivariate model */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx0 rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*ncolx0, tmppheno+i*n_ind, ncolx0*sizeof(double)); /* calculate yfit */ matmult(yfit, x02, n_ind, ncolx0, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calcualte rss_det = tmppheno'*tmppheno. */ /* Call BLAS routine dgemm. Note that the result rss_det is a symemetric positive definite matrix */ /* the dimension of tmppheno is n_ind x nphe */ mydgemm(&nphe, &n_ind, &alpha, tmppheno, &beta, rss_det); /* calculate the determinant of rss */ /* do Cholesky factorization on rss_det */ mydpotrf(&nphe, rss_det, &info); for(i=0, rss0[0]=1.0;i<nphe; i++) rss0[0] *= rss_det[i*nphe+i]*rss_det[i*nphe+i]; } else { /* return on rss for each phenotype */ if(rank == ncolx0) { /* if the design matrix is of full rank, it's easier */ for(i=0; i<nrss; i++) { rss0[i] = 0.0; ind_idx = i*n_ind; for (j=rank; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss0[i] += dtmp * dtmp; } } } else { /* not full rank, this is trouble */ /* note that the result tmppheno has dimension n_ind x nphe, the first ncolx0 rows contains the estimates. */ for (i=0; i<nphe; i++) memcpy(coef+i*ncolx0, tmppheno+i*n_ind, ncolx0*sizeof(double)); /* calculate yfit */ matmult(yfit, x02, n_ind, ncolx0, coef, nphe); /* calculate residual, put the result in tmppheno */ for (i=0; i<n_ind*nphe; i++) tmppheno[i] = pheno[i] - yfit[i]; /* calculate rss */ for(i=0; i<nrss; i++) { rss0[i] = 0.0; ind_idx = i*n_ind; for(j=0; j<n_ind; j++) { dtmp = tmppheno[ind_idx+j]; rss0[i] += dtmp * dtmp; } } } } } /* take log10 */ for(i=0; i<nrss; i++) rss0[i] = log10(rss0[i]); }