Beispiel #1
0
Rcpp::NumericVector rcpp_segInbreeding(std::string path1, std::string path2, int NFile1, int NFile2, const arma::ivec& ArmaIndex1, const arma::ivec& ArmaIndex2, int N1, int N2, int M, int minSNP, double minL, const arma::vec& ArmacM, const arma::vec& Armakb, double a, std::string stdsymB, int skip, int cskip) {
  int m, i, i0, j, rK, r, endoffile, gleich;
  double L;
  char str1[100];
  char symB = stdsymB.at(0);
  FILE *f1, *f2;
  int N  = N1 + N2;
  int K  = (minSNP<=60)?(minSNP/2):(30);
  Rcpp::NumericVector ArmasegInbr(N/2);
  
  size_t bufsize = 2*(NFile1+NFile2);  
  char* Line = (char*)malloc(bufsize*sizeof(char));
  if(Line == NULL){error_return("Memory allocation failed.");};
  
  
  double* fROH   = (double*)calloc(N/2,sizeof(double));
  int* thisROH   = (int*)calloc(N/2,sizeof(int));
  int* thisAllel = (int*)calloc(N,sizeof(int));
  int* prevAllel = (int*)calloc(N,sizeof(int));
  double* cM     = (double*)calloc(M+1,sizeof(double));
  double* kb     = (double*)calloc(M+1,sizeof(double));
  int* index1    = (int*)calloc(N1,sizeof(int));          /*     N1 - vector */
  int* index2    = (int*)calloc(N2,sizeof(int));          /*     N2 - vector */
  if(fROH      == NULL){error_return("Memory allocation failed.");};
  if(thisROH   == NULL){error_return("Memory allocation failed.");};
  if(thisAllel == NULL){error_return("Memory allocation failed.");};
  if(prevAllel == NULL){error_return("Memory allocation failed.");};
  if(cM        == NULL){error_return("Memory allocation failed.");};
  if(kb        == NULL){error_return("Memory allocation failed.");};
  if(index1    == NULL){error_return("Memory allocation failed.");};
  if(index2    == NULL){error_return("Memory allocation failed.");};
  
  for(i=0;i<N1;i++){index1[i]=ArmaIndex1.at(i);}
  for(i=0;i<N2;i++){index2[i]=ArmaIndex2.at(i);}
  
  for(m=0;m<M+1;m++){
    cM[m]=ArmacM.at(m);
    kb[m]=Armakb.at(m);
  }
  
  f1 = fopen(path1.c_str(),"r");
  if(f1 == NULL){error_return("File opening failed.");};
  for(i=0;i<skip+1;i++){
    while(fgetc(f1)!='\n'){}
  }
  
  if(N2>0){
    f2 = fopen(path2.c_str(),"r");
    if(f2 == NULL){error_return("File opening failed.");};
    for(i=0;i<skip+1;i++){
      while(fgetc(f2)!='\n'){}
    }
  }else{f2 = f1; /* avoid warnings */}
  
  endoffile=0;
  m=0;
  while(!endoffile){
    for(i=0; i<N;i++){
      prevAllel[i] = thisAllel[i];
      thisAllel[i] = 0;
      }
    rK=0;
    while(rK<K){
      for(i=0; i<cskip; i++){
        endoffile = fscanf(f1, "%s ", str1)<1;
        if(endoffile){break;}
      }
      if(endoffile){break;}
      endoffile = fgets(Line, 2*NFile1, f1)==NULL;
      if(endoffile){break;}
      for(i=0; i<N1;i++){
        if(Line[2*index1[i]]==symB){thisAllel[i]= thisAllel[i] | (1u<<rK);}
      }
      rK++;
    }
    if(N2>0){
      rK=0;
      while(rK<K){
        for(i=0; i<cskip; i++){
          endoffile = fscanf(f2, "%s ", str1)<1;
          if(endoffile){break;}
        }
        if(endoffile){break;}
        endoffile = fgets(Line,2*NFile2,f2)==NULL;
        if(endoffile){break;}
        for(i=0; i<N2;i++){
          if(Line[2*index2[i]]==symB){thisAllel[i+N1]= thisAllel[i+N1] | (1u<<rK);}
        }
        rK++;
      }
    }
    if(endoffile){Rprintf("M=%d\n",m+rK);}
    if(rK==0){break;}
    
    for(i0=0; i0<N/2;i0++){
      i = 2*i0;
      j = 2*i0+1;
      if(thisAllel[i]==thisAllel[j]){
          if(prevAllel[i]==prevAllel[j] && m>0){ /* ROH verlängern */
            thisROH[i0] += rK;
          }else{  /* neuer ROH */
            thisROH[i0] = rK;
            if(m>0){
              gleich = ~(prevAllel[i] ^ prevAllel[j]);
              r = K-1;
              while(r>=0 && ((gleich>>r)&1u)){
                thisROH[i0] += 1;
                r--;
              }
            }
          }
        }else{
          if(prevAllel[i]==prevAllel[j] && m>0){ /* ROH beenden */
            gleich = ~(thisAllel[i] ^ thisAllel[j]);
            r = 0;
            while(r<rK && ((gleich>>r)&1u)){
              thisROH[i0] += 1;
              r++;
            }
            
            if(thisROH[i0]>=minSNP){
              L = cM[m+r]-cM[m+r-thisROH[i0]];
              if(L>=minL){fROH[i0] += (L*L/(a+L*L))*(kb[m+r]-kb[m+r-thisROH[i0]]);}
              }
            thisROH[i0] = 0;
          }
        }
Beispiel #2
0
Rcpp::NumericMatrix rcpp_segIBDandN(std::string pathThisBreed, std::string pathNative, int NFileC, int NFileN, const arma::ivec& ArmaIndexC, const arma::ivec& ArmaIndexN, int NC, int minSNP, double minL, const arma::vec& ArmaPos, const arma::vec& Armakb, double a, std::string stdsymB, int skip, int cskip) {
  /* ***** initialize variables ****** */
  int m, i, j, r, r2, rK, endoffile, gleich;
  double L;
  char str1[100];
  FILE *fC, *fN;
  char symB = stdsymB.at(0);
  Rcpp::NumericMatrix confROH(NC, NC);
  int K  = (minSNP<=60)?(minSNP/2):(30);
  int M  = Armakb.n_elem - 1;
  
  size_t bufsize = 2*(NFileC+NFileN);  
  char* Line = (char*)malloc(bufsize*sizeof(char));
  if(Line == NULL){error_return("Memory allocation failed.");};
  
  int** Nat         = (int**)calloc(M,sizeof(int*));                   /*  M xNC - matrix */
  double** fROH     = (double**)calloc(NC,sizeof(double*));            /*  NCxNC - matrix */
  int** thisROH     = (int**)calloc(NC,sizeof(int*));                  /*  NCxNC - matrix */
  double** lSEG     = (double**)calloc(NC,sizeof(double*));            /*  NCxNC - matrix */
  int* currAllelesC = (int*)calloc(NC,sizeof(int));                    /*     NC - vector */
  int* prevAllelesC = (int*)calloc(NC,sizeof(int));                    /*     NC - vector */
  int* indexC       = (int*)calloc(NC,sizeof(int));                    /*     NC - vector */
  int* indexN       = (int*)calloc(NC,sizeof(int));                    /*     NC - vector */
  double* Pos       = (double*)calloc(ArmaPos.n_elem, sizeof(double)); /*    M+1 - vector */
  double* kb        = (double*)calloc(Armakb.n_elem, sizeof(double));  /*    M+1 - vector */
  if(fROH        == NULL){error_return("Memory allocation failed.");};
  if(Nat         == NULL){error_return("Memory allocation failed.");};
  if(thisROH     == NULL){error_return("Memory allocation failed.");};
  if(lSEG        == NULL){error_return("Memory allocation failed.");};
  if(currAllelesC== NULL){error_return("Memory allocation failed.");};
  if(prevAllelesC== NULL){error_return("Memory allocation failed.");};
  if(indexC      == NULL){error_return("Memory allocation failed.");};
  if(indexN      == NULL){error_return("Memory allocation failed.");};
  if(Pos         == NULL){error_return("Memory allocation failed.");};
  if(kb          == NULL){error_return("Memory allocation failed.");};
  
  for(m=0;m<M+1;m++){
    Pos[m] = ArmaPos.at(m);
    kb[m]  = Armakb.at(m);
  }
  
  for(i=0; i<NC;i++){
    indexC[i] = ArmaIndexC.at(i);
    indexN[i] = ArmaIndexN.at(i);
    fROH[i]   = (double*)calloc(i+1,sizeof(double));
    thisROH[i]= (int*)calloc(i+1,sizeof(int));
    lSEG[i]   = (double*)calloc(i+1,sizeof(double));
    if(fROH[i]   == NULL){error_return("Memory allocation failed.");};
    if(thisROH[i]== NULL){error_return("Memory allocation failed.");};
    if(lSEG[i]   == NULL){error_return("Memory allocation failed.");};
  }
  
  /* ******* Main part ******** */
  fC = fopen(pathThisBreed.c_str(),"r");
  fN = fopen(pathNative.c_str(),"r");
  if(fC == NULL){error_return("File opening failed.");};
  if(fN == NULL){error_return("File opening failed.");};
  while(fgetc(fN)!='\n'){}
  for(i=0;i<skip+1;i++){
    while(fgetc(fC)!='\n'){}
  }
  endoffile=0;
  m=0;
  while(!endoffile){
    /* *** Determine previous alleles and current alleles (K at a time) *** */
    /* ***           and native alleles for candidates                  *** */
    for(i=0; i<NC;i++){
      prevAllelesC[i] = currAllelesC[i];
      currAllelesC[i] = 0;
    }
    rK=0;
    while(rK<K){
      for(i=0; i<cskip; i++){
        endoffile = fscanf(fC, "%s ", str1)<1;
        if(endoffile){break;}
      }
      if(endoffile){break;}
      endoffile = fscanf(fN, "%s ", str1)<1;
      if(endoffile){break;}
      endoffile = fgets(Line,2*NFileC,fC)==NULL;
      if(endoffile){break;}
      for(i=0; i<NC;i++){
        if(Line[2*indexC[i]]==symB){currAllelesC[i]= currAllelesC[i] | (1u<<rK);}
      }
      endoffile = fgets(Line, 2*NFileN, fN)==NULL;
      if(endoffile){break;}
      Nat[m+rK] = (int*)calloc(NC,sizeof(int));
      for(i=0; i<NC;i++){
        Nat[m+rK][i] = ((Line[2*indexN[i]]=='1')?1:0);
      }
      rK++;
    }
    if(endoffile){Rprintf("M=%d\n",m+rK);}
    if(rK==0){break;}
    
    for(i=0; i<NC;i++){
      for(j=0; j<i+1; j++){
        if(currAllelesC[i]==currAllelesC[j]){
          if(prevAllelesC[i]==prevAllelesC[j] && m>0){ /* ROH verlängern */
            thisROH[i][j] += rK;
            for(r2=0;r2<rK;r2++){if(Nat[m+r2][i]*Nat[m+r2][j]>0){lSEG[i][j] += kb[m+r2+1]-kb[m+r2];}} /* !!!!! */
          }else{  /* neuer ROH */
            thisROH[i][j] = rK;
            for(r2=0;r2<rK;r2++){if(Nat[m+r2][i]*Nat[m+r2][j]>0){lSEG[i][j] += kb[m+r2+1]-kb[m+r2];}} /* !!!!! */
            if(m>0){
              gleich = ~(prevAllelesC[i] ^ prevAllelesC[j]);
              r = K-1;
              while(r>=0 && ((gleich>>r)&1u)){
                thisROH[i][j] += 1;
                if(Nat[m-K+r][i]*Nat[m-K+r][j]>0){lSEG[i][j] += kb[m-K+r+1]-kb[m-K+r];} /* !!!!! */
                r--;
              }
            }
          }
        }else{
          if(prevAllelesC[i]==prevAllelesC[j] && m>0){ /* ROH beenden */
            gleich = ~(currAllelesC[i] ^ currAllelesC[j]);
            r = 0;
            while(r<K && ((gleich>>r)&1u)){
              thisROH[i][j] += 1;
              if(Nat[m+r][i]*Nat[m+r][j]>0){lSEG[i][j] += kb[m+r+1]-kb[m+r];} /* !!!!! */
              r++;
            }
            
            if(thisROH[i][j]>=minSNP){
              L = Pos[m+r]-Pos[m+r-thisROH[i][j]];
              if(L>=minL){
                fROH[i][j] += (L*L/(a+L*L))*lSEG[i][j];
                }
              }
            thisROH[i][j] = 0;
            lSEG[i][j] = 0.0;
          }
        }
      }
Rcpp::NumericMatrix rcpp_segIBDandNVersion2(std::string pathThisBreed, int NFileC, int NC, const arma::ivec& ArmaIndexC, const arma::mat& ArmaNat, int minSNP, double minL, const arma::vec& ArmaPos, const arma::vec& Armakb, double a, std::string stdsymB, int skip, int cskip) {
  int m, m2, i, j, r, rK, endoffile, gleich;
  double L, w, lSEG ;
  char str1[100];
  char symB = stdsymB.at(0);
  FILE *fC;
  Rcpp::NumericMatrix confROH(NC, NC);
  int K  = (minSNP<=60)?(minSNP/2):(30);
  int M  = Armakb.n_elem - 1;
  
  size_t bufsize = 2*NFileC;  
  char* Line = (char*)malloc(bufsize*sizeof(char));
  if(Line == NULL){error_return("Memory allocation failed.");};
  
  int** Nat         = (int**)calloc(NC,sizeof(int*));
  double** fROH     = (double**)calloc(NC,sizeof(double*));
  int** thisROH     = (int**)calloc(NC,sizeof(int*));
  int* currAllelesC = (int*)calloc(NC,sizeof(int));
  int* prevAllelesC = (int*)calloc(NC,sizeof(int));
  int* indexC       = (int*)calloc(NC,sizeof(int));
  double* Pos       = (double*)calloc(ArmaPos.n_elem, sizeof(double));
  double* kb        = (double*)calloc(Armakb.n_elem, sizeof(double));
  
  if(Nat          == NULL){error_return("Memory allocation failed.");};
  if(fROH         == NULL){error_return("Memory allocation failed.");};
  if(thisROH      == NULL){error_return("Memory allocation failed.");};
  if(currAllelesC == NULL){error_return("Memory allocation failed.");};
  if(prevAllelesC == NULL){error_return("Memory allocation failed.");};
  if(indexC       == NULL){error_return("Memory allocation failed.");};
  if(Pos          == NULL){error_return("Memory allocation failed.");};
  if(kb           == NULL){error_return("Memory allocation failed.");};
  
  for(m=0;m<M+1;m++){
    Pos[m] = ArmaPos.at(m);
    kb[m]  = Armakb.at(m);
  }
  
  for(i=0; i<NC;i++){
    indexC[i] = ArmaIndexC.at(i);
    fROH[i]   = (double*)calloc(i+1, sizeof(double));
    thisROH[i]=    (int*)calloc(i+1, sizeof(int));
    Nat[i]    =    (int*)calloc(M,   sizeof(int));
    if(fROH[i]    == NULL){error_return("Memory allocation failed.");};
    if(thisROH[i] == NULL){error_return("Memory allocation failed.");};
    if(Nat[i]     == NULL){error_return("Memory allocation failed.");};
    for(m=0; m<M;m++){
      Nat[i][m] = ArmaNat.at(m,i);
    }
  }
  
  
  fC = fopen(pathThisBreed.c_str(),"r");
  if(fC == NULL){error_return("File opening failed.");}; 
  for(i=0;i<skip+1;i++){
    while(fgetc(fC)!='\n'){}
  }
  
  endoffile=0;
  m=0;
  while(!endoffile){
    for(i=0; i<NC;i++){
      prevAllelesC[i] = currAllelesC[i];
      currAllelesC[i] = 0;
    }
    rK=0;
    while(rK<K){
      for(i=0; i<cskip; i++){
        endoffile = fscanf(fC, "%s ", str1)<1;
        if(endoffile){break;}
      }
      if(endoffile){break;}
      endoffile = fgets(Line,2*NFileC,fC)==NULL;
      if(endoffile){break;}
      for(i=0; i<NC;i++){
        if(Line[2*indexC[i]]==symB){currAllelesC[i]= currAllelesC[i] | (1u<<rK);}
      }
      rK++;
    }
    if(endoffile){Rprintf("M=%d\n",m+rK);}
    if(rK==0){break;}

    for(i=0; i<NC;i++){
      for(j=0; j<i+1; j++){
        if(currAllelesC[i]==currAllelesC[j]){
          if(prevAllelesC[i]==prevAllelesC[j] && m>0){ /* ROH verlängern */
            thisROH[i][j] += rK;
          }else{  /* neuer ROH */
            thisROH[i][j] = rK;
            if(m>0){
              gleich = ~(prevAllelesC[i] ^ prevAllelesC[j]);
              r = K-1;
              while(r>=0 && ((gleich>>r)&1u)){
                thisROH[i][j] += 1;
                r--;
              }
            }
          }
        }else{
          if(prevAllelesC[i]==prevAllelesC[j] && m>0){ /* ROH beenden */
            gleich = ~(currAllelesC[i] ^ currAllelesC[j]);
            r = 0;
            while(r<K && ((gleich>>r)&1u)){
              thisROH[i][j] += 1;
              r++;
            }
            
            if(thisROH[i][j]>=minSNP){
              L = Pos[m+r]-Pos[m+r-thisROH[i][j]];
              if(L>=minL){
                w = L*L/(a+L*L);
                lSEG = 0.0;
                for(m2=m+r-thisROH[i][j];m2<m+r;m2++){
                  if(Nat[i][m2]*Nat[j][m2]>0){lSEG += kb[m2+1]-kb[m2];}
                }
                fROH[i][j] += w*lSEG;
                }
              }
            thisROH[i][j] = 0;
          }
        }
      }