Ejemplo n.º 1
0
Archivo: seq.c Proyecto: kalibera/rexp
SEXP attribute_hidden do_rep_len(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    R_xlen_t ns, na;
    SEXP a, s, len;

    checkArity(op, args);
    s = CAR(args);

    if (!isVector(s) && s != R_NilValue)
	error(_("attempt to replicate non-vector"));

    len = CADR(args);
    if(length(len) != 1)
	error(_("invalid '%s' value"), "length.out");
#ifdef LONG_VECTOR_SUPPORT
    double sna = asReal(len);
    if (!R_FINITE(sna) || sna < 0)
	error(_("invalid '%s' value"), "length.out");
    na = (R_xlen_t) sna;
#else
    if ((na = asInteger(len)) == NA_INTEGER || na < 0) /* na = 0 ok */
	error(_("invalid '%s' value"), "length.out");
#endif

    if (TYPEOF(s) == NILSXP && na > 0)
	error(_("cannot replicate NULL to a non-zero length"));
    ns = xlength(s);
    if (ns == 0) {
	SEXP a;
	PROTECT(a = duplicate(s));
	if(na > 0) a = xlengthgets(a, na);
	UNPROTECT(1);
	return a;
    }
    PROTECT(a = rep3(s, ns, na));

#ifdef _S4_rep_keepClass
    if(IS_S4_OBJECT(s)) { /* e.g. contains = "list" */
	setAttrib(a, R_ClassSymbol, getClassAttrib(s));
	SET_S4_OBJECT(a);
    }
#endif

    if (inheritsCharSXP(s, R_FactorCharSXP)) {
	SEXP tmp;
	if(inheritsCharSXP(s, R_OrderedCharSXP)) {
	    PROTECT(tmp = allocVector(STRSXP, 2));
	    SET_STRING_ELT(tmp, 0, R_OrderedCharSXP);
	    SET_STRING_ELT(tmp, 1, R_FactorCharSXP);
	} else PROTECT(tmp = mkString("factor"));
	setAttrib(a, R_ClassSymbol, tmp);
	UNPROTECT(1);
	setAttrib(a, R_LevelsSymbol, getLevelsAttrib(s));
    }
    UNPROTECT(1);
    return a;
}
Ejemplo n.º 2
0
/* This is allowed to change 'out' */
attribute_hidden
SEXP do_copyDFattr(SEXP call, SEXP op, SEXP args, SEXP env)
{
    checkArity(op, args);
    SEXP in = CAR(args), out = CADR(args);
    SET_ATTRIB(out, ATTRIB(in));
    IS_S4_OBJECT(in) ?  SET_S4_OBJECT(out) : UNSET_S4_OBJECT(out);
    SET_OBJECT(out, OBJECT(in));
    return out;
}
Ejemplo n.º 3
0
Archivo: seq.c Proyecto: kalibera/rexp
SEXP attribute_hidden do_rep_int(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    checkArity(op, args);
    SEXP s = CAR(args), ncopy = CADR(args);
    R_xlen_t nc;
    SEXP a;

    if (!isVector(ncopy))
	error(_("incorrect type for second argument"));

    if (!isVector(s) && s != R_NilValue)
	error(_("attempt to replicate an object of type '%s'"), 
	      type2char(TYPEOF(s)));

    nc = xlength(ncopy); // might be 0
    if (nc == xlength(s)) 
	PROTECT(a = rep2(s, ncopy));
    else {
	if (nc != 1) error(_("invalid '%s' value"), "times");
	
#ifdef LONG_VECTOR_SUPPORT
	double snc = asReal(ncopy);
	if (!R_FINITE(snc) || snc < 0)
	    error(_("invalid '%s' value"), "times");
	nc = (R_xlen_t) snc;
#else
	if ((nc = asInteger(ncopy)) == NA_INTEGER || nc < 0)/* nc = 0 ok */
	    error(_("invalid '%s' value"), "times");
#endif
	R_xlen_t ns = xlength(s);
	PROTECT(a = rep3(s, ns, nc * ns));
    }

#ifdef _S4_rep_keepClass
    if(IS_S4_OBJECT(s)) { /* e.g. contains = "list" */
	setAttrib(a, R_ClassSymbol, getClassAttrib(s));
	SET_S4_OBJECT(a);
    }
#endif

    if (inheritsCharSXP(s, R_FactorCharSXP)) {
	SEXP tmp;
	if(inheritsCharSXP(s, R_OrderedCharSXP)) {
	    PROTECT(tmp = allocVector(STRSXP, 2));
	    SET_STRING_ELT(tmp, 0, R_OrderedCharSXP);
	    SET_STRING_ELT(tmp, 1, R_FactorCharSXP);
	} else PROTECT(tmp = mkString("factor"));
	setAttrib(a, R_ClassSymbol, tmp);
	UNPROTECT(1);
	setAttrib(a, R_LevelsSymbol, getLevelsAttrib(s));
    }
    UNPROTECT(1);
    return a;
}
Ejemplo n.º 4
0
SEXP R_copyTruncate(SEXP x, SEXP R_n) {
    if (isNull(x) || TYPEOF(x) != VECSXP)
	error("'x' not of type list");
    if (isNull(R_n) || TYPEOF(R_n) != INTSXP)
	error("'n' not of type integer");
    int i, k, n;
    SEXP s, r, t = 0;

    n = INTEGER(R_n)[0];
    if (n < 0)
	error("'n' invalid value");

    r = PROTECT(allocVector(VECSXP, LENGTH(x)));

    for (i = 0; i < LENGTH(x); i++) {
	s = VECTOR_ELT(x, i);
	if (TYPEOF(s) != STRSXP)
	    error("component not of type character");
	if (LENGTH(s) > n) {
	    SET_VECTOR_ELT(r, i, (t = allocVector(STRSXP, n)));
	    for (k = 0; k < n; k++)
		SET_STRING_ELT(t, k, STRING_ELT(s, k));
	    copyMostAttrib(t, s);
	    if ((s = getAttrib(s, R_NamesSymbol)) != R_NilValue) {
		SEXP v;
		setAttrib(t, R_NamesSymbol, (v = allocVector(STRSXP, n)));
		for (k = 0; k < n; k++)
		    SET_STRING_ELT(v, k, STRING_ELT(s, k));
	    }
	} else
	    SET_VECTOR_ELT(r, i, s);
    }
    UNPROTECT(1);

    if (!t)
	return x;
    
    SET_ATTRIB(r, ATTRIB(x));
    SET_OBJECT(r, OBJECT(x));
    if (IS_S4_OBJECT(x))
	SET_S4_OBJECT(r);

    return r;
}
Ejemplo n.º 5
0
/* version that does not preserve ts information, for subsetting */
void copyMostAttribNoTs(SEXP inp, SEXP ans)
{
    SEXP s;

    if (ans == R_NilValue)
	error(_("attempt to set an attribute on NULL"));

    PROTECT(ans);
    PROTECT(inp);
    for (s = ATTRIB(inp); s != R_NilValue; s = CDR(s)) {
	if ((TAG(s) != R_NamesSymbol) &&
	    (TAG(s) != R_ClassSymbol) &&
	    (TAG(s) != R_TspSymbol) &&
	    (TAG(s) != R_DimSymbol) &&
	    (TAG(s) != R_DimNamesSymbol)) {
	    installAttrib(ans, TAG(s), CAR(s));
	} else if (TAG(s) == R_ClassSymbol) {
	    SEXP cl = CAR(s);
	    int i;
	    Rboolean ists = FALSE;
	    for (i = 0; i < LENGTH(cl); i++)
		if (strcmp(CHAR(STRING_ELT(cl, i)), "ts") == 0) { /* ASCII */
		    ists = TRUE;
		    break;
		}
	    if (!ists) installAttrib(ans, TAG(s), cl);
	    else if(LENGTH(cl) <= 1) {
	    } else {
		SEXP new_cl;
		int i, j, l = LENGTH(cl);
		PROTECT(new_cl = allocVector(STRSXP, l - 1));
		for (i = 0, j = 0; i < l; i++)
		    if (strcmp(CHAR(STRING_ELT(cl, i)), "ts")) /* ASCII */
			SET_STRING_ELT(new_cl, j++, STRING_ELT(cl, i));
		installAttrib(ans, TAG(s), new_cl);
		UNPROTECT(1);
	    }
	}
    }
    SET_OBJECT(ans, OBJECT(inp));
    IS_S4_OBJECT(inp) ?  SET_S4_OBJECT(ans) : UNSET_S4_OBJECT(ans);
    UNPROTECT(2);
}
Ejemplo n.º 6
0
void copyMostAttrib(SEXP inp, SEXP ans)
{
    SEXP s;

    if (ans == R_NilValue)
	error(_("attempt to set an attribute on NULL"));

    PROTECT(ans);
    PROTECT(inp);
    for (s = ATTRIB(inp); s != R_NilValue; s = CDR(s)) {
	if ((TAG(s) != R_NamesSymbol) &&
	    (TAG(s) != R_DimSymbol) &&
	    (TAG(s) != R_DimNamesSymbol)) {
	    installAttrib(ans, TAG(s), CAR(s));
	}
    }
    SET_OBJECT(ans, OBJECT(inp));
    IS_S4_OBJECT(inp) ?  SET_S4_OBJECT(ans) : UNSET_S4_OBJECT(ans);
    UNPROTECT(2);
}
Ejemplo n.º 7
0
SEXP insnp_new(const SEXP Filenames, const SEXP Sample_id, const SEXP Snp_id,
	       const SEXP Diploid, const SEXP Fields,  
	       const SEXP Codes, const SEXP Threshold, const SEXP Lower, 
	       const SEXP Sep, const SEXP Comment, const SEXP Skip, 
	       const SEXP Simplify, const SEXP Verbose, 
	       const SEXP In_order, const SEXP Every){

  /* Process arguments */ 
  
  if (TYPEOF(Verbose)!=LGLSXP)
    error("Argument type error: Verbose");
  if (length(Verbose)>1)
    warning("Only first element of argument used: Verbose");
  int verbose = *INTEGER(Verbose);

  if (TYPEOF(In_order)!=LGLSXP)
    error("Argument type error: In_order");
  if (length(In_order)>1)
    warning("Only first element of argument used: In_order");
  int in_order = *INTEGER(In_order);

  if (TYPEOF(Filenames)!=STRSXP)
    error("Argument type error: Filenames");
  int Nfile = length(Filenames);

  int Nsample=0;
  if (TYPEOF(Sample_id)==STRSXP)
    Nsample = length(Sample_id);
  else if (TYPEOF(Sample_id)!=NILSXP)
    error("Argument type error: Sample_id");
  
  int Nsnp=0;
  if (TYPEOF(Snp_id)==STRSXP)
    Nsnp = length(Snp_id);
  else if (TYPEOF(Snp_id)!=NILSXP)
    error("Argument type error: Snp_id");
  
  /* file is 1 = sample, 2 = snp, 0 = irrelevant */

  int file_is = 0;
  if (!Nsample) {
    if (Nsnp) {
      Nsample = Nfile;
      Rprintf("Each file is assumed to concern a single sample\n"); 
      Rprintf("(Sample IDs are assumed to be included in filenames)\n");
      file_is = 1;
    }
    else 
      error("No sample or SNP IDs specified");
  }
  else if (!Nsnp) {
    Nsnp = Nfile;
    Rprintf("Each file is assumed to concern a single SNP\n");
    Rprintf("(SNP IDs are assumed to be included in filenames)\n");
    file_is = 2;
  }

  /* If not in order, set up hash tables */

  index_db sample_index = NULL;
  index_db snp_index = NULL;
  if (!in_order) {
    if (file_is != 1) 
      sample_index = create_name_index(Sample_id);
    if (file_is != 2) 
      snp_index = create_name_index(Snp_id);
  }

  int *diploid=NULL;
  if (TYPEOF(Diploid)==LGLSXP) {
    if (length(Diploid)!=Nsample)
      error("Argument length error: diploid argument");
    diploid = LOGICAL(Diploid);
  }
  else if (TYPEOF(Diploid)!=NILSXP)
    error("Argument type error: diploid argument");
  
  if (TYPEOF(Fields)!=INTSXP) 
    error("Argument type error: Fields");
  int *fields = INTEGER(Fields);
  int fsamp=0, fsnp=0, fgt=0, fa1=0, fa2=0, fconf=0;
  SEXP Fnames = getAttrib(Fields, R_NamesSymbol);
  if (TYPEOF(Fnames)==NILSXP) 
    error("Argument error: Fields argument has no names");
  int fmax = 0;
  int Nfield = length(Fields);
  for (int i=0; i<Nfield; i++) {
    const char *fname = CHAR(STRING_ELT(Fnames, i));
    int fi = fields[i];
    if (!strcmp(fname, "sample"))
      fsamp = fi;
    else if (!strcmp(fname, "snp"))
      fsnp = fi;
    else if (!strcmp(fname, "genotype"))
      fgt = fi;
    else if (!strcmp(fname, "allele1"))
      fa1 = fi;
    else if (!strcmp(fname, "allele2"))
      fa2 = fi;
    else if (!strcmp(fname, "confidence"))
      fconf = fi;
    else
      error("Unrecognized input field name: %s", fname);
    if (fi>fmax) 
      fmax = fi;
  }
  if (verbose) {
    Rprintf("Reading one call per input line\n");
    if (fsamp) {
      Rprintf("   Sample id is in field %d", fsamp);
      if (file_is==1) 
	Rprintf(" (ignored)\n");
      else
	Rprintf("\n");
    }
    if (fsnp) {
      Rprintf("   SNP id is in field %d", fsnp);
      if (file_is==2) 
	Rprintf(" (ignored)\n");
      else
	Rprintf("\n");
    }
    if (fgt)
      Rprintf("   Genotype is in field %d\n", fgt);
    if (fa1)
      Rprintf("   Allele 1 is in field %d\n", fa1);
    if (fa2)
      Rprintf("   Allele 2 is in field %d\n", fa2);
    if (fconf)
      Rprintf("   Confidence score is in field %d\n", fconf);
  }
  
  if (file_is==1)
    fsamp = 0;
  else if (file_is==2)
    fsnp = 0;


  /* Allele or genotype coding? */

  int gcoding;
  if (fgt) {
    if (fa1 || fa2) 
      error("Coding must be by genotype OR allele, not both");
    gcoding = 1;
  }
  else {
    if (!(fa1 && fa2)) 
      error("No genotype or allele field(s) specified");
    if (!(fa1 && fa2)) 
      error("Field positions for both alleles must be specified"); 
    gcoding = 0;
  }

  int nuc = 0;
  if (TYPEOF(Codes)!=STRSXP)
    error("Argument type error: Codes");
  if (length(Codes)==1) {
    SEXP Code = STRING_ELT(Codes, 0);
    const char *code = CHAR(Code);
    if (!strcmp(code, "nucleotide"))
      nuc = 1;
    else
      error("Unrecognized coding: %s", code);
  }
  else {
    int ncode = length(Codes);
    if (gcoding) {
      if (diploid) {
	if (ncode!=5) {
	  if (ncode==3)
	    warning("Genotype coding for X: haploid genotypes are assumed to be coded as homozygous");
	  else
	    error("Genotype coding for X.snp: three or five genotype codes must be specified");
	}
      }
      else {
	if (ncode!=3)
	  error("Genotype coding: three genotype codes must be specified");
      }
    }
    else {
      if (ncode!=2) 
	error("Allele coding: two allele codes must be specified");
    }
  }

  if (TYPEOF(Threshold)==NILSXP && !fconf) 
    error("Argument type error: no threshold argument");
  if (TYPEOF(Threshold)!=REALSXP)
    error("Argument type error: Threshold");
  double threshold = *REAL(Threshold);
  if (fconf && threshold==NA_REAL)
    error("Confidence score is read but no threshold is set");

  if (TYPEOF(Lower)!=LGLSXP)
    error("Argument type error: Lower");
  if (length(Lower)>1)
    warning("Only first element of argument used: Lower");
  int lower = *INTEGER(Lower);

  char sep = ' ';
  if (TYPEOF(Sep)==STRSXP) {
    if (length(Sep)>1)
      warning("Only first element of argument used: Sep");
    const char *c = CHAR(STRING_ELT(Sep, 0));
    if (strlen(c)>1) 
      warning("Only first character used: Sep");
    sep = c[0];
  }
  else if (TYPEOF(Sep)!=NILSXP) 
    error("Argument type error: Sep");

  char comment = (char) 0;
  if (TYPEOF(Comment)==STRSXP) {
    if (length(Sep)>1)
      warning("Only first element of argument used: Comment");
    const char *c = CHAR(STRING_ELT(Comment, 0));
    if (strlen(c)>1) 
      warning("Only first character used: Comment");
    comment = c[0];
  }
  else if (TYPEOF(Comment)!=NILSXP) 
    error("Argument type error: Comment");

  int skip = 0;
  if (TYPEOF(Skip)==INTSXP) {
    if (length(Skip)>1)
      warning("Only first element used: Skip");
    skip = INTEGER(Skip)[0];
  }
  else if (TYPEOF(Skip)!=NILSXP) 
    error("Argument type error: Skip");
 
  if (TYPEOF(Simplify)!=LGLSXP)
    error("Argument type error: Simplify");
  if (length(Simplify)>2)
    error("Argument length error: Simplify");
  int *simplify = INTEGER(Simplify);
  if (length(Simplify)==1)
    simplify[1] = simplify[0];

  int every=0;
  if (TYPEOF(Every)==INTSXP) {
    if (length(Every)>1) 
      warning("Only first element used: Every");
    every = INTEGER(Every)[0];
  }
  else if (TYPEOF(Every)!=NILSXP)
    error("Argument type error: Every");
    

  /* Create output object and initialise to zero */

  if (verbose) {
    if (diploid)
      Rprintf("Reading XSnpMatrix with %d rows and %d columns\n", 
	      Nsample, Nsnp);
    else
      Rprintf("Reading SnpMatrix with %d rows and %d columns\n", 
	      Nsample, Nsnp);
  }
  SEXP Result, Dimnames, Package, Class;
  PROTECT(Result = allocMatrix(RAWSXP, Nsample, Nsnp));
  PROTECT(Dimnames = allocVector(VECSXP, 2));
  if (simplify[0]) {
    SET_VECTOR_ELT(Dimnames, 0, 
		   simplify_names(file_is==1? Filenames: Sample_id));
  }
  else {
    SET_VECTOR_ELT(Dimnames, 0, 
		   duplicate(file_is==1? Filenames: Sample_id));
  }
  if (simplify[1]) {
    SET_VECTOR_ELT(Dimnames, 1, 
		   simplify_names(file_is==2? Filenames: Snp_id));
  }
  else {
    SET_VECTOR_ELT(Dimnames, 1, 
		   duplicate(file_is==2? Filenames: Snp_id));
  }
  setAttrib(Result, R_DimNamesSymbol, Dimnames);

  /* Class */

  PROTECT(Class = allocVector(STRSXP, 1));
  if (diploid) {
    R_do_slot_assign(Result, mkString("diploid"), Diploid);
    SET_STRING_ELT(Class, 0, mkChar("XSnpMatrix"));
  }
  else {
    SET_STRING_ELT(Class, 0, mkChar("SnpMatrix"));
  }
  PROTECT(Package = allocVector(STRSXP, 1));
  SET_STRING_ELT(Package, 0, mkChar("snpStats"));
  setAttrib(Class, install("package"), Package);
  classgets(Result, Class);
  SET_S4_OBJECT(Result);
  unsigned char *result = RAW(Result);
  memset(result, 0x00, Nsample*Nsnp);

  /* Read in data */

  char field[MAX_FLD];
  int Naccept = 0, Nreject = 0, Nocall = 0, Nskipped = 0, Nxerror = 0;
  int i_this = 0, j_this = 0;
  const char *this_sample=NULL, *this_snp=NULL;
  if (fsamp) {
    this_sample = CHAR(STRING_ELT(Sample_id, 0)); 
  }
  if (fsnp) {
    this_snp = CHAR(STRING_ELT(Snp_id, 0));
  }
  if (verbose) {
    Rprintf("                             Cumulative totals\n");
    Rprintf("                    -----------------------------------\n");
    Rprintf("    File     Line   Accepted Rejected  No call  Skipped    File name\n");
  }
  
  /* slowest varying 0 = don't know, 1 = sample, 2 = snp */

  int slowest = file_is, last=Nsample*Nsnp-1;
  int advance = 0, finished=0;

  for (int f=0; f<Nfile; f++) {
    /* Open input file */
    const char *filename = CHAR(STRING_ELT(Filenames, f));
    if (verbose) {
      int lfn = strlen(filename); 
      if (lfn > 20) {
	Rprintf("%59s...%-17s\r", "", filename+lfn-17);
      }
      else
	Rprintf("%59s%-20s\r", "", filename);
    }
    gzFile infile = gzopen(filename, "rb");
    if (!infile) {
      warning("Failure to open input file: %s", filename);
      continue;
    }
    int fterm = 2, line = 0, found_in_file = 0;
    /* Skip any header lines */
    for (int i=0; i<skip; i++) {
      line++;
      if (skip_to_eol(infile)==3)
	error("End-of-file reached on line %d", line);
    }
    Nskipped += skip;
    /* Read data lines */
 
    while (fterm!=3) {

      /* Read a line */

      line++;
      if (verbose && every && !(line % every)) 
	Rprintf("%8d %8d %10d %8d %8d %8d\r", 
		f+1, line, Naccept, Nreject, Nocall, Nskipped);
      int genotype=0, allele1=0, allele2=0;
      /* wanted is coded as:
	 1  if this call is to be accepted
	 0  if it is not wanted (or a comment line)
	 -1 if rejected due to insufficient confidence
	 -2 coded as no-call
      */
      int wanted = 1; 
      char sampid[MAX_FLD], snpid[MAX_FLD];
      char gtype1[MAX_FLD], gtype2[MAX_FLD];
      char cscore[MAX_FLD];
      sampid[0] = snpid[0] = cscore[0] = (char) 0;
      for (int r=1; (r<=fmax); r++) {
	fterm = next_field(infile, sep, comment, '_', field, MAX_FLD);
	if (!fterm) 
	  error("Field overflow: line %d, field %d", line, r);
	if ((fterm>1) && (r<fmax)) {
	  if (r==1) {
	    if(!field[0]) {
	    /* Empty line or comment line */
	      wanted = 0;
	      if (fterm==2) {
		Nskipped++;
		continue;
	      }
	      else
		break;
	    }
	  }
	  error("Incomplete line: %d (last field read: %d = %s)", 
		line, r, field);
	}
	/* Save fields */
	if (r==fsamp) {
	  strncpy(sampid, field, MAX_FLD-1);
	}
	else if (r==fsnp) {
	  strncpy(snpid, field, MAX_FLD-1);
	}
	else if (r==fconf) {
	  strncpy(cscore, field, MAX_FLD-1);
	}
	else if (r==fgt) 
	  strncpy(gtype1, field, MAX_FLD-1);
        else if (r==fa1) 
	  strncpy(gtype1, field, MAX_FLD-1);
	else if (r==fa2) 
	  strncpy(gtype2, field, MAX_FLD-1);
	else {
	  /* skip field */
	}
      } /* Matches: for (int r=1; (r<=fmax); r++) { */

      if (!wanted) 
	continue;

      /* Discard any further fields */

      if (fterm<2) {
	fterm = skip_to_eol(infile);
      }

      /* Find next target and check matches */

      int match_sample, match_snp;
      if (in_order) {

	/* Advance to next target read */

	if (advance) {
	  
	  /* 
	     If unknown, determine sort order by seeing which indicator 
	     has changed
	  */

	  if (!slowest) {
	    if (strcmp(this_sample, sampid)) 
	      slowest = 2; /* sample fastest, SNP slowest */
	    else if (strcmp(this_snp, snpid))
	      slowest = 1; /* SNP fastest, sample slowest */
	    else
	      error("Error in input file sort order");
	  }
	  
	  /* Now advance fastest varying indicator */
	  
	  if (slowest==1) {
	    j_this++;
	    if (j_this==Nsnp) {
	      j_this = 0;
	      if (fsamp) { 
		i_this++;
		if (i_this==Nsample) {
		  finished = 1;
		  break;
		}
		else
		  this_sample =  CHAR(STRING_ELT(Sample_id, i_this)); 
	      }
	    }
	    this_snp =  CHAR(STRING_ELT(Snp_id, j_this));
	  }
	  else {
	    i_this++;
	    if (i_this==Nsample) {
	      i_this = 0;
	      if (fsnp) {
		j_this++;
		if (j_this==Nsnp) {
		  finished = 1;
		  break;
		}
		else 
		  this_snp =  CHAR(STRING_ELT(Snp_id, j_this));
	      }
	    }
	    this_sample =  CHAR(STRING_ELT(Sample_id, i_this));
	  }
	}
	
	/* Does current line match current target? */
	
	match_sample = file_is==1 || !strcmp(this_sample, sampid);
	match_snp = file_is==2 || !strcmp(this_snp, snpid);
      }

      else { /* Not in order */

	if (file_is!=1) {
	  if (this_sample && strcmp(this_sample, sampid))
	    i_this = index_lookup(sample_index, sampid);
	  if (i_this >= 0) {
	    this_sample =  CHAR(STRING_ELT(Sample_id, i_this)); 
	    match_sample = 1;
	  }
	  else {
	    this_sample = NULL;
	    match_sample = 0;
	  }
	}
	else {
	  match_sample = 1;
	  i_this = f;
	}
	if (file_is!=2) {
	  if (this_snp && strcmp(this_snp, snpid))
	    j_this = index_lookup(snp_index, snpid);
	  if (j_this >= 0) {
	    this_snp = CHAR(STRING_ELT(Snp_id, j_this));
	    match_snp = 1;
	  }
	  else {
	    this_snp = NULL;
	    match_snp = 0;
	  }
	}
	else {
	  match_snp = 1;
	  j_this = f;
	}
      }

      if (match_sample && match_snp) {

	/* Next target read found in file(s) */

	found_in_file++;
	int ij_this = j_this*Nsample + i_this;
	finished = (ij_this==last);
	
	/* Check confidence score */
	
	if (fconf) {
	  double conf;
	  if (sscanf(cscore, "%lf", &conf)!=1) 
	    error("Failure to read confidence score: line %d", line);
	  if ((lower && conf<threshold) || (!lower && conf>threshold)) {
	    wanted = -1;
	    Nreject++;
	    if (finished)
	      break;
	    else
	      advance = 1;
	    continue;
	  }
	}
	
	/* Decode genotype */
	
	int which;
	if (gcoding) {
	  if (nuc) {
	    switch (strlen(gtype1)) {
	    case 0:
	      allele1 = allele2 = 0;
	      break;
	    case 1:
	      allele1 = allele2 = nucleotide(gtype1[0]);
	      break;
	    case 2:
	      allele1 = nucleotide(gtype1[0]);
	      allele2 = nucleotide(gtype1[1]);
	      break;
	    default:
	      error("Nucleotide coded genotype should be 2 character string: line %d", line);
	    }
	  }
	  else {
	    which = str_inlist(Codes, gtype1);
	    if (!which)
	      genotype = 0;
	    else if (which>3) 
	      genotype = 2*which - 7;
	    else
	      genotype = which;
	  }
	}
	else {
	  if (nuc) {
	    allele1 = nucleotide(gtype1[0]);
	    allele2 = nucleotide(gtype2[0]);
	  }
	  else {
	    allele1 = str_inlist(Codes, gtype1);
	    allele2 = str_inlist(Codes, gtype2);
	  }
	}

	/* Successful read, store genotype in result[i_this, j_this] */
	
	if (nuc || !gcoding) {
	  if (allele2 < allele1) {
	    genotype = allele2;
	    allele2 = allele1;
	    allele1 = genotype;
	  }
	  if (allele1 && allele2) 
	    genotype = allele1 + (allele2*(allele2-1))/2;	
	  else
	    genotype = 0;
	}
	if (genotype) {
	  if (diploid && !diploid[i_this] && (genotype==2))
	    Nxerror++;
	  else {
	    Naccept++;
	    result[ij_this] = (unsigned char) genotype;
	  }
	}
	else {
	  wanted = -2;
	  Nocall++;
	}
	
	/* Flag need to advance to next target */

	advance = 1;
      } /* matches if (match_sample && match_snp) { */
      else {
	Nskipped++;

	/* Flag no advance to next target */

	advance = 0;	
      }
      if (finished)
	break;
    } /* matches: while (fterm!=3) { */
    if (file_is==1)  
      i_this++;
    if (file_is==2) 
      j_this++;
    if (verbose) {
      Rprintf("%8d %8d %10d %8d %8d %8d\r", f+1, line, Naccept, Nreject, Nocall, Nskipped);
    }
    if(!found_in_file)
      warning("No calls found in file %s", filename);
    gzclose(infile);
    if (finished)
      break;
  }

  /* Warnings */

  if (in_order && !finished) 
    warning("End of data reached before search completed");
  if (Nxerror) 
    warning("%d haploid genotypes were coded as heterozygous; set to NA", Nxerror);
  if (Nskipped)
    warning("%d lines of input file(s) were skipped", Nskipped);

  /* Report */

  if (verbose)
    Rprintf("\n");
  Rprintf("%d genotypes successfully read\n", Naccept);
  if (Nreject)
    Rprintf("%d genotypes were rejected due to low confidence\n", Nreject);
  if (Nocall)
    Rprintf("%d genotypes were not called\n", Nocall);
  if (Nsample*Nsnp > Naccept+Nxerror+Nreject+Nocall)
    Rprintf("%d genotypes could not be found on input file(s)\n",
	    Nsample*Nsnp - Naccept - Nreject - Nxerror-Nocall);
  if (nuc) {
    if (verbose)
      Rprintf("Recasting and checking nucleotide coding\n");
    int none_snps = recode_snp(result, Nsample, Nsnp);
    if (none_snps) {
      Rprintf("%d polymorphisms were not SNPs and have been set to NA ", 
	      none_snps);
      Rprintf("(see warnings for details)\n");
    }
  }
  UNPROTECT(4);
  
  /* Destroy hash indexes */

  if (sample_index)
    index_destroy(sample_index);
  if (snp_index)
    index_destroy(snp_index);

  return Result;
}
Ejemplo n.º 8
0
/* This is for all cases with a single index, including 1D arrays and
   matrix indexing of arrays */
static SEXP VectorSubset(SEXP x, SEXP s, SEXP call)
{
    R_xlen_t n;
    int mode;
    R_xlen_t stretch = 1;
    SEXP indx, result, attrib, nattrib;

    if (s == R_MissingArg) return duplicate(x);

    PROTECT(s);
    attrib = getAttrib(x, R_DimSymbol);

    /* Check to see if we have special matrix subscripting. */
    /* If we do, make a real subscript vector and protect it. */

    if (isMatrix(s) && isArray(x) && ncols(s) == length(attrib)) {
        if (isString(s)) {
            s = strmat2intmat(s, GetArrayDimnames(x), call);
            UNPROTECT(1);
            PROTECT(s);
        }
        if (isInteger(s) || isReal(s)) {
            s = mat2indsub(attrib, s, call);
            UNPROTECT(1);
            PROTECT(s);
        }
    }

    /* Convert to a vector of integer subscripts */
    /* in the range 1:length(x). */

    PROTECT(indx = makeSubscript(x, s, &stretch, call));
    n = XLENGTH(indx);

    /* Allocate the result. */

    mode = TYPEOF(x);
    /* No protection needed as ExtractSubset does not allocate */
    result = allocVector(mode, n);
    if (mode == VECSXP || mode == EXPRSXP)
	/* we do not duplicate the values when extracting the subset,
	   so to be conservative mark the result as NAMED = 2 */
	SET_NAMED(result, 2);

    PROTECT(result = ExtractSubset(x, result, indx, call));
    if (result != R_NilValue) {
	if (
	    ((attrib = getAttrib(x, R_NamesSymbol)) != R_NilValue) ||
	    ( /* here we might have an array.  Use row names if 1D */
		isArray(x) && LENGTH(getAttrib(x, R_DimNamesSymbol)) == 1 &&
		(attrib = getAttrib(x, R_DimNamesSymbol)) != R_NilValue &&
		(attrib = GetRowNames(attrib)) != R_NilValue
		)
	    ) {
	    PROTECT(attrib);
	    nattrib = allocVector(TYPEOF(attrib), n);
	    PROTECT(nattrib); /* seems unneeded */
	    nattrib = ExtractSubset(attrib, nattrib, indx, call);
	    setAttrib(result, R_NamesSymbol, nattrib);
	    UNPROTECT(2); /* attrib, nattrib */
	}
	if ((attrib = getAttrib(x, R_SrcrefSymbol)) != R_NilValue &&
	    TYPEOF(attrib) == VECSXP) {
	    nattrib = allocVector(VECSXP, n);
	    PROTECT(nattrib); /* seems unneeded */
	    nattrib = ExtractSubset(attrib, nattrib, indx, call);
	    setAttrib(result, R_SrcrefSymbol, nattrib);
	    UNPROTECT(1);
	}
	/* FIXME:  this is wrong, because the slots are gone, so result is an invalid object of the S4 class! JMC 3/3/09 */
#ifdef _S4_subsettable
	if(IS_S4_OBJECT(x)) { /* e.g. contains = "list" */
	    setAttrib(result, R_ClassSymbol, getAttrib(x, R_ClassSymbol));
	    SET_S4_OBJECT(result);
	}
#endif
    }
    UNPROTECT(3);
    return result;
}
Ejemplo n.º 9
0
Archivo: seq.c Proyecto: kalibera/rexp
/* This is a primitive SPECIALSXP with internal argument matching */
SEXP attribute_hidden do_rep(SEXP call, SEXP op, SEXP args, SEXP rho)
{
    SEXP ans, x, times = R_NilValue /* -Wall */;
    int each = 1, nprotect = 3;
    R_xlen_t i, lx, len = NA_INTEGER, nt;
    static SEXP do_rep_formals = NULL;

    /* includes factors, POSIX[cl]t, Date */
    if (DispatchOrEval(call, op, R_RepCharSXP, args, rho, &ans, 0, 0))
	return(ans);

    /* This has evaluated all the non-missing arguments into ans */
    PROTECT(args = ans);

    /* This is a primitive, and we have not dispatched to a method
       so we manage the argument matching ourselves.  We pretend this is
       rep(x, times, length.out, each, ...)
    */
    if (do_rep_formals == NULL) {
        do_rep_formals = CONS(R_NilValue, list4(R_NilValue, R_NilValue, R_NilValue, R_NilValue));
        R_PreserveObject(do_rep_formals);
        SET_TAG(do_rep_formals, R_XSymbol);
        SET_TAG(CDR(do_rep_formals), install("times"));
        SET_TAG(CDDR(do_rep_formals), R_LengthOutSymbol);
        SET_TAG(CDR(CDDR(do_rep_formals)), install("each"));
        SET_TAG(CDDR(CDDR(do_rep_formals)), R_DotsSymbol);
    }
    PROTECT(args = matchArgs(do_rep_formals, args, call));

    x = CAR(args);
    /* supported in R 2.15.x */
    if (TYPEOF(x) == LISTSXP)
	errorcall(call, "replication of pairlists is defunct");

    lx = xlength(x);

    double slen = asReal(CADDR(args));
    if (R_FINITE(slen)) {
	if(slen < 0)
	    errorcall(call, _("invalid '%s' argument"), "length.out");
	len = (R_xlen_t) slen;
    } else {
	len = asInteger(CADDR(args));
	if(len != NA_INTEGER && len < 0)
	    errorcall(call, _("invalid '%s' argument"), "length.out");
    }
    if(length(CADDR(args)) != 1)
	warningcall(call, _("first element used of '%s' argument"), 
		    "length.out");

    each = asInteger(CADDDR(args));
    if(each != NA_INTEGER && each < 0)
	errorcall(call, _("invalid '%s' argument"), "each");
    if(length(CADDDR(args)) != 1)
	warningcall(call, _("first element used of '%s' argument"), "each");
    if(each == NA_INTEGER) each = 1;

    if(lx == 0) {
	if(len > 0 && x == R_NilValue) 
	    warningcall(call, "'x' is NULL so the result will be NULL");
	SEXP a;
	PROTECT(a = duplicate(x));
	if(len != NA_INTEGER && len > 0) a = xlengthgets(a, len);
	UNPROTECT(3);
	return a;
    }
    if (!isVector(x))
	errorcall(call, "attempt to replicate an object of type '%s'",
		  type2char(TYPEOF(x)));

    /* So now we know x is a vector of positive length.  We need to
       replicate it, and its names if it has them. */

    /* First find the final length using 'times' and 'each' */
    if(len != NA_INTEGER) { /* takes precedence over times */
	nt = 1;
    } else {
	R_xlen_t sum = 0;
	if(CADR(args) == R_MissingArg) PROTECT(times = ScalarInteger(1));
	else PROTECT(times = coerceVector(CADR(args), INTSXP));
	nprotect++;
	nt = XLENGTH(times);
	if(nt != 1 && nt != lx * each)
	    errorcall(call, _("invalid '%s' argument"), "times");
	if(nt == 1) {
	    int it = INTEGER(times)[0];
	    if (it == NA_INTEGER || it < 0)
		errorcall(call, _("invalid '%s' argument"), "times");
	    len = lx * it * each;
	} else {
	    for(i = 0; i < nt; i++) {
		int it = INTEGER(times)[i];
		if (it == NA_INTEGER || it < 0)
		    errorcall(call, _("invalid '%s' argument"), "times");
		sum += it;
	    }
            len = sum;
	}
    }

    if(len > 0 && each == 0)
	errorcall(call, _("invalid '%s' argument"), "each");

    SEXP xn = getNamesAttrib(x);

    PROTECT(ans = rep4(x, times, len, each, nt));
    if (length(xn) > 0)
	setAttrib(ans, R_NamesSymbol, rep4(xn, times, len, each, nt));

#ifdef _S4_rep_keepClass
    if(IS_S4_OBJECT(x)) { /* e.g. contains = "list" */
	setAttrib(ans, R_ClassSymbol, getClassAttrib(x));
	SET_S4_OBJECT(ans);
    }
#endif
    UNPROTECT(nprotect);
    return ans;
}
Ejemplo n.º 10
0
static SEXP duplicate1(SEXP s, Rboolean deep)
{
    SEXP t;
    R_xlen_t i, n;

    duplicate1_elts++;
    duplicate_elts++;

    switch (TYPEOF(s)) {
    case NILSXP:
    case SYMSXP:
    case ENVSXP:
    case SPECIALSXP:
    case BUILTINSXP:
    case EXTPTRSXP:
    case BCODESXP:
    case WEAKREFSXP:
	return s;
    case CLOSXP:
	PROTECT(s);
	PROTECT(t = allocSExp(CLOSXP));
	SET_FORMALS(t, FORMALS(s));
	SET_BODY(t, BODY(s));
	SET_CLOENV(t, CLOENV(s));
	DUPLICATE_ATTRIB(t, s, deep);
	if (NOJIT(s)) SET_NOJIT(t);
	if (MAYBEJIT(s)) SET_MAYBEJIT(t);
	UNPROTECT(2);
	break;
    case LISTSXP:
	PROTECT(s);
	t = duplicate_list(s, deep);
	UNPROTECT(1);
	break;
    case LANGSXP:
	PROTECT(s);
	PROTECT(t = duplicate_list(s, deep));
	SET_TYPEOF(t, LANGSXP);
	DUPLICATE_ATTRIB(t, s, deep);
	UNPROTECT(2);
	break;
    case DOTSXP:
	PROTECT(s);
	PROTECT(t = duplicate_list(s, deep));
	SET_TYPEOF(t, DOTSXP);
	DUPLICATE_ATTRIB(t, s, deep);
	UNPROTECT(2);
	break;
    case CHARSXP:
	return s;
	break;
    case EXPRSXP:
    case VECSXP:
	n = XLENGTH(s);
	PROTECT(s);
	PROTECT(t = allocVector(TYPEOF(s), n));
	for(i = 0 ; i < n ; i++)
	    SET_VECTOR_ELT(t, i, duplicate_child(VECTOR_ELT(s, i), deep));
	DUPLICATE_ATTRIB(t, s, deep);
	COPY_TRUELENGTH(t, s);
	UNPROTECT(2);
	break;
    case LGLSXP: DUPLICATE_ATOMIC_VECTOR(int, LOGICAL, t, s, deep); break;
    case INTSXP: DUPLICATE_ATOMIC_VECTOR(int, INTEGER, t, s, deep); break;
    case REALSXP: DUPLICATE_ATOMIC_VECTOR(double, REAL, t, s, deep); break;
    case CPLXSXP: DUPLICATE_ATOMIC_VECTOR(Rcomplex, COMPLEX, t, s, deep); break;
    case RAWSXP: DUPLICATE_ATOMIC_VECTOR(Rbyte, RAW, t, s, deep); break;
    case STRSXP:
	/* direct copying and bypassing the write barrier is OK since
	   t was just allocated and so it cannot be older than any of
	   the elements in s.  LT */
	DUPLICATE_ATOMIC_VECTOR(SEXP, STRING_PTR, t, s, deep);
	break;
    case PROMSXP:
	return s;
	break;
    case S4SXP:
	PROTECT(s);
	PROTECT(t = allocS4Object());
	DUPLICATE_ATTRIB(t, s, deep);
	UNPROTECT(2);
	break;
    default:
	UNIMPLEMENTED_TYPE("duplicate", s);
	t = s;/* for -Wall */
    }
    if(TYPEOF(t) == TYPEOF(s) ) { /* surely it only makes sense in this case*/
	SET_OBJECT(t, OBJECT(s));
	(IS_S4_OBJECT(s) ? SET_S4_OBJECT(t) : UNSET_S4_OBJECT(t));
    }
    return t;
}