예제 #1
0
// [[Rcpp::export]]
Rcpp::List getWEXPcutoff(arma::mat data, arma::mat subdata, arma::mat Y, arma::mat subY, int N, arma::mat RT_out, double predictTime, arma::vec resid_sco, double fitvar, arma::colvec cutoffs){

   
   //create dataD
   arma::mat dataDuns = data.rows(find(data.col(1)==1));
   arma::mat dataD = dataDuns.rows(sort_index(dataDuns.col(0)));


   int n = data.n_rows, nD = dataD.n_rows, np = Y.n_cols, ncuts = subdata.n_rows; 
   //guide
    colvec times = data.col(0); 
   // status= data.col(1); 
   // Dtimes = dataD.col(0); 
   // weights = data.col(4); 
   //  Dweights = dataD.col(4); 

   uvec tmpind = find((data.col(0) <= predictTime)%data.col(1)); // indices of (data$times<=predict.time)&(data$status==1)
   mat myempty(1,1); 
   uvec tmpindT = conv_to<uvec>::from(CSumI(times.elem(tmpind), 4, dataD.col(0), myempty,  FALSE)); 


   colvec rrk =  exp(data.col(6)); //data.col(6)  is data$linearY
   
   // build riskmat
   mat riskmat(n, nD); 
   mat::iterator riskmat_it = riskmat.begin(); 
 
for(colvec::iterator j = dataD.begin_col(0); j != dataD.end_col(0); j++){   
   for( colvec::iterator i = data.begin_col(0); i != data.end_col(0); i++){
       *riskmat_it = *i >= *j; riskmat_it++;
      }  
   }   
   //s0 and s1
   colvec s0 = riskmat.t()*(rrk%data.col(4)); 
   colvec s1 = riskmat.t()*trans(Vec2Mat(rrk%data.col(4), np)%Y.t()); 

   //haz0 and cumhaz0
   
   colvec haz0 = dataD.col(4)/sum(riskmat%trans(Vec2Mat(rrk%data.col(4), nD))).t(); 
   colvec cumhaz0 = cumsum(haz0); 
   colvec ptvec(1); ptvec(0) = predictTime; 
   colvec cumhaz_t0 = CSumI(ptvec, 4, dataD.col(0), haz0, TRUE); 

   //Wexp
   colvec Wexp_beta = resid_sco*fitvar*N; 

   colvec WexpLam1(n); WexpLam1.zeros(n); 
   WexpLam1(tmpind) =  N/s0(tmpindT - 1);     
   WexpLam1 = WexpLam1 - CSumI( myPmin(data.col(0), predictTime), 4, dataD.col(0), haz0/s0, TRUE)%rrk*N; 
 
   colvec WexpLam2 = Wexp_beta*CSumI(ptvec, 4, dataD.col(0), haz0%s1/trans(Vec2Mat(s0, np)), TRUE); 
   colvec WexpLam = WexpLam1 - WexpLam2; 

   //Fyk = Pr(Sy < c)
   colvec Fyk = CSumI( cutoffs, 4, data.col(6), data.col(4), TRUE)/sum(data.col(4)); 

   colvec dFyk(Fyk.n_elem); dFyk(0) = 0;
   dFyk(span(1, Fyk.n_elem - 1)) = Fyk(span(0, Fyk.n_elem-2));
   dFyk = Fyk - dFyk; 
   
   colvec Sy = subdata.col(5); 
   colvec Syall = data.col(5); 
   colvec St0_Fyk = cumsum(Sy%dFyk); 
   double St0 = max(St0_Fyk); 
   colvec St0_Syk = St0 - St0_Fyk; 
 //
  mat Wexp_Cond_Stc =  -Vec2Mat(Sy%exp(subdata.col(6)), n)%(trans(Vec2Mat(WexpLam, ncuts)) +as_scalar(cumhaz_t0)*Wexp_beta*subY.t()); 

  mat tmpmat = conv_to<mat>::from(trans(Vec2Mat(data.col(6), ncuts)) > Vec2Mat(cutoffs, n)); 
  mat Wexp_Stc = trans(CSumI(cutoffs, 0, subdata.col(6), Wexp_Cond_Stc.t()%Vec2Mat(dFyk, n).t(), TRUE)) + trans(Vec2Mat(Syall,ncuts))%tmpmat - Vec2Mat(St0_Syk, n); 

  colvec Wexp_St = sum(trans(Wexp_Cond_Stc)%trans(Vec2Mat(dFyk, n))).t() + Syall - St0; 

   mat Wexp_Fc = 1-tmpmat - Vec2Mat(Fyk, n); 

   //assemble for classic performance measures, given linear predictor

   List out(8); 

   out[0] = -Wexp_Cond_Stc; 
   out[1] =  Wexp_Fc;
   mat Wexp_St_mat = Vec2Mat(Wexp_St, ncuts).t(); 

   out[2] = (-Wexp_St_mat%Vec2Mat(RT_out.col(3), n) + Wexp_Stc)/St0; 
   out[3] = (Wexp_St_mat%Vec2Mat(RT_out.col(4), n) -Wexp_Fc - Wexp_Stc)/(1-St0); 

   out[4] = -Wexp_St; 
   out[5] = (Wexp_St_mat - Wexp_Stc - Vec2Mat(RT_out.col(6), n)%Wexp_Fc)/Vec2Mat(Fyk, n); 
   out[6] = (Vec2Mat(RT_out.col(5)-1, n)%Wexp_Fc - Wexp_Stc)/Vec2Mat(1-Fyk, n);
   out[7] = Wexp_beta;

   return out; 
}
tuple<double, double, int, int, double, double> simulate(const arma::Col<double> &Y, const vector<int> X, 
                                                         double sigma, bool varianceKnown,
                                                         arma::mat &Z, mt19937_64 &rng,
                                                         bool interceptTerm) {
  bernoulli_distribution bernoulli(0.5);
  int N = X.size();
  Z.fill(0);
  // bestColumns[k] keeps track of the k + 1 or k + 2 columns that produce the smallest p-value depending on interceptTerm
  vector<arma::uvec> bestColumns; bestColumns.reserve(N - 1); 
  if (interceptTerm) { // make intercept term last column of Z
    fill(Z.begin_col(N - 1), Z.end_col(N - 1), 1);
    copy(X.begin(), X.end(), Z.begin_col(0));
    bestColumns.push_back(arma::uvec{0, (unsigned long long) N - 1ULL}); 
  } else {
    copy(X.begin(), X.end(), Z.begin_col(0));
    bestColumns.push_back(arma::uvec{0});     
  }  
  // bestPValues[k] corresponds to p-value if the columns bestColumns[k] are used
  vector<pair<double, double>> bestPValues; bestPValues.reserve(N - 1);
  bestPValues.push_back(calculateBetaPValue(Z.cols(bestColumns.front()), Y, sigma, varianceKnown));
  if (bestPValues.front().first <= 0.05) {
    return make_tuple(bestPValues.front().first, bestPValues.front().second, 0, 0, -1, bestPValues.front().first);
  } else {                    // need more covariates
    bool done = false;
    int smallestSubsetSize = INT_MAX;
    /* add covariates one-by-one, we always include the treatment
     * if we're using the intercept two covariates are included by default
     */
    for (int j = 1; j < N - 2 || (j == N - 2 && !interceptTerm); ++j) { 
      for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
      if (!interceptTerm) {
        while (arma::rank(Z) <= j) {
          for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
        }        
      } else { // offset rank by 1 for intercept term
        while (arma::rank(Z) <= j + 1) {
          for (int k = 0; k < N; ++k) Z(k, j) = bernoulli(rng);
        }        
      }
      for (int k = j; k >= 1; --k) { // loop through subset sizes, k is the number of additional covariates
        pair<double, double> newPValue;
        if (k == j) {           // use all available covariates
          bestColumns.emplace_back(bestColumns.back().n_rows + 1); // add one more to biggest subset
          for (int l = 0; l < bestColumns.back().n_rows - 1; ++l) { 
            bestColumns.back()(l) = bestColumns[j - 1](l); // copy over from original subset
          }
          bestColumns.back()(bestColumns.back().n_rows - 1) = j; // add new covariate
          newPValue = calculateBetaPValue(Z.cols(bestColumns.back()), Y, sigma, varianceKnown);
          bestPValues.push_back(newPValue);
        } else {                // make a new subset of same size with new covariate
          arma::uvec columnSubset(bestColumns[k].n_rows); 
          for (int l = 0; l < columnSubset.n_rows - 1; ++l) 
            columnSubset(l) = bestColumns[k - 1](l); // copy over from smaller subset
          columnSubset(columnSubset.n_rows - 1) = j; // add new covariate
          newPValue = calculateBetaPValue(Z.cols(columnSubset), Y, sigma, varianceKnown);
          if (bestPValues[k].first > newPValue.first) { // if better subset replace
            bestPValues[k] = newPValue;
            bestColumns[k] = columnSubset;
          }
        }
        if (newPValue.first <= 0.05) { // stop when we reach significance
          done = true;
          smallestSubsetSize = k;
        }
      }
      if (done) {
        // compute balance p value in special case that only 1 covariate was needed
        double balancePValue = -1;
        if (smallestSubsetSize == 1 && !interceptTerm) {
          balancePValue = testBalance(Z.col(bestColumns[1](1)), Z.col(0));
        } else if (smallestSubsetSize == 1 && interceptTerm) {
          balancePValue = testBalance(Z.col(bestColumns[1](2)), Z.col(0));
        }
        return make_tuple(bestPValues.front().first, bestPValues[smallestSubsetSize].second, 
                          j, smallestSubsetSize, balancePValue, bestPValues[smallestSubsetSize].first); 
      }
    }    
  }  
  return make_tuple(bestPValues.front().first, bestPValues.front().second, -1, -1, -1, bestPValues.front().first);
}