bool unmatch(const Vector &rhs) const { if (size_.unmatch(rhs.size_)) { return true; } if (is_na()) { return false; } return std::memcmp(data_, rhs.data_, sizeof(Int) * raw_size()) != 0; }
Text operator[](Int i) const { if (is_na() || (static_cast<size_t>(i.raw()) >= raw_size())) { return Text::na(); } if (is_direct_) { return data_[i.raw()]; } else { return Text(&bodies_[headers_[i.raw()].offset], headers_[i.raw()].size.raw()); } }
bool unmatch(const Vector &rhs) const { if (size_.unmatch(rhs.size_)) { return true; } if (is_na()) { return false; } // TODO: This is because raw values are not normalized. size_t size = raw_size(); for (size_t i = 0; i < size; ++i) { if (data_[i].unmatch(rhs.data_[i])) { return true; } } return false; }
bool match(const Vector &rhs) const { if (size_.unmatch(rhs.size_)) { return false; } if (is_na()) { return true; } // TODO: This is because raw values are not normalized. size_t size = raw_size(); for (size_t i = 0; i < size; ++i) { // TODO: This can be improved. if (operator[](grnxx::Int(i)).unmatch(rhs[grnxx::Int(i)])) { return false; } } return true; }
void neighboors(double **array, int *nb_row, int *nb_col, int *n_position, int *nb_neighboors) { int i; int *niet; /* ghost variable */ int count=0; int missing; niet=ivector(*nb_col, code_miss); for(i=0;i<*nb_row;i++) { missing=is_na(array[i],nb_col,niet); /** check if the neighboor has missing values **/ if(missing==0) { n_position[count]=i; count++; } } *nb_neighboors=count; Free(niet); }
constexpr Int operator~() const { return is_na() ? na() : Int(~raw_); }
Int operator[](Int i) const { if (is_na() || (static_cast<size_t>(i.raw()) >= raw_size())) { return Int::na(); } return data_[i.raw()]; }
void knnc(double *array_vec,int *nb_col,int *nb_row, int*k, int *corre_flag, double *dist, double *dist_bound) { int missing,i,j,ii; int count; double value; double *temp; double *row_nb; double ** array; int *miss_pos; int index; int *n_position; int* nb_neighboors; int min=0; int max=*k-1; array=dmatrix(*nb_row,*nb_col); /** contain the row numbers of the missing values **/ miss_pos=ivector(*nb_col, code_miss); /** contains the distances of the neighboors **/ temp=dvector(*k,code_miss); /** contains the row numbers of the neighboors **/ row_nb=dvector(*k,code_miss); /** initilize all the distances with the missing codes **/ init_dvector(dist, nb_row, code_miss); n_position=ivector(*nb_row, code_miss); /** positions of potential neighboors **/ nb_neighboors=ivector(1, code_miss); /** number of neighboors **/ /** coerce the vector into a two dimmensional array **/ vec_mat(array_vec,nb_row,nb_col,array); neighboors(array, nb_row, nb_col, n_position, nb_neighboors); if(*nb_neighboors==0) /** Stop if no neighboors **/ { error("No rows without missing values"); } else { if(*nb_neighboors<*k) /** If less than k neighboors give a warning **/ warning("Only %d neighboors could be used", *nb_neighboors); for(i=0;i<*nb_row;i++) { /** Check for missing values **/ missing=is_na(array[i],nb_col,miss_pos); if (missing==1 && miss_pos[*nb_col-1]==code_miss) /**at least one missing value at most nb_col**/ { if(*corre_flag==1 && miss_pos[*nb_col-2]!=code_miss) /** Give a warning if based on correlation and only one observation **/ warning("Could not estimate the missing values for the row %d\n One observation is not enough to compute the sample correlation", i+1); else { count=0; for(j=0;j<*nb_neighboors;j++) /** loop on the neighboors only **/ { index=n_position[j]; if(*corre_flag==0) value=distance(array[i],array[index],nb_col); /** compute the distance **/ else value=-correlation(array[i],array[index],nb_col); /** compute the correlation **/ if(value!=code_miss) { if (count<*k) /** store the first k **/ { temp[count]=value; row_nb[count]=index; count++; } else { quicksort2(temp,row_nb,&min,&max); /** sort the neighboors to keep the kth nearest **/ if (temp[*k-1]>value) /** keep it if the distance is shorter **/ { temp[*k-1]=value; row_nb[*k-1]=index; } } } } if(*corre_flag==0) { fill_up(array,row_nb,nb_col,k,i,miss_pos,temp, dist_bound); /** fill up the missing values by the averaging the distance**/ dist[i]=mean_vec(temp, k); /** Compute the average distances **/ } else { fill_up_corr(array,row_nb, nb_col, k,i, miss_pos, temp, dist_bound); /** fill up the missing values based on correlations**/ dist[i]=-mean_vec(temp, k); /** Compute the average distances **/ } init_dvector(row_nb, k, code_miss); /** initialize row_nb with missing codes **/ init_dvector(temp, k, code_miss); /** initialize temp with missing codes **/ } } else if(missing==1 && miss_pos[*nb_col-1]!=code_miss) warning("Could not estimate the missing values for the row %d\n The row only contains missing values", i+1); } } mat_vec(array_vec, nb_row, nb_col,array); /** recoerce the matrix into a vector **/ /** free the memory **/ free_dmatrix(array,*nb_row); Free(miss_pos); Free(temp); Free(row_nb); Free(n_position); Free(nb_neighboors); }
Int operator--(int) & { if (is_na()) { return na(); } return Int(raw_--); }
constexpr Int operator^(Int rhs) const { return (is_na() || rhs.is_na()) ? na() : Int(raw_ ^ rhs.raw_); }
Int operator++(int) & { if (is_na()) { return na(); } return Int(raw_++); }
Int &operator--() & { if (!is_na()) { --raw_; } return *this; }
//[[Rcpp::export]] bool any_naC (Rcpp::NumericVector x) { return is_true(any(is_na(x))); }
//[[Rcpp::export]] bool all_naC (Rcpp::NumericVector x) { return is_true(all(is_na(x))); }
constexpr Int operator<<(Int rhs) const { return (is_na() || rhs.is_na() || (static_cast<uint64_t>(rhs.raw_) >= 64)) ? na() : Int(raw_ << rhs.raw_); }
Int &operator^=(Int rhs) & { if (!is_na()) { raw_ = rhs.is_na() ? raw_na() : (raw_ ^ rhs.raw_); } return *this; }
constexpr Bool operator!=(Bool rhs) const { return (is_na() || rhs.is_na()) ? na() : Bool(static_cast<uint8_t>(raw_ ^ rhs.raw_)); }
constexpr Bool operator!() const { return is_na() ? na() : Bool(static_cast<uint8_t>(raw_ ^ raw_true())); }
//[[Rcpp::export]] int nb_naC (Rcpp::NumericVector x) { return sum(is_na(x)); }
Int &operator++() & { if (!is_na()) { ++raw_; } return *this; }
// [[Rcpp::export]] List buildCellList( CharacterVector r, CharacterVector t, CharacterVector v) { //Valid combinations // r t v // T F F // T T T // F F F // T F T (must be a formula) int n = r.size(); List cells(n); LogicalVector hasV = !is_na(v); LogicalVector hasR = !is_na(r); LogicalVector hasT = !is_na(t); for(int i=0; i < n; i++){ if(hasR[i]){ if(hasV[i]){ if(hasT[i]){ // r t v // T T T (2) cells[i] = CharacterVector::create( Named("r") = r[i], Named("t") = t[i], Named("v") = v[i], Named("f") = NA_STRING); }else{ // r t f // T T T (4 - formula) cells[i] = CharacterVector::create( Named("r") = r[i], Named("t") = "str", Named("v") = NA_STRING, Named("f") = "<f>" + v[i] + "</f>"); } }else{ // r t v // T F F (1) cells[i] = CharacterVector::create( Named("r") = r[i], Named("t") = NA_STRING, Named("v") = NA_STRING, Named("f") = NA_STRING); } }else{ // r t v // F F F (3) cells[i] = CharacterVector::create( Named("r") = NA_STRING, Named("t") = NA_STRING, Named("v") = NA_STRING, Named("f") = NA_STRING); } } // end of for loop return wrap(cells) ; }