Exemplo n.º 1
0
Arquivo: read.c Projeto: bitursa/maos
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]){
    file_t *fp;
    char *fn=NULL;
    int start=0, howmany=0;
    switch(nrhs){
    case 3:
	start=(long)mxGetScalar(prhs[2]);//starting block to read. matlab index.
    case 2:
	howmany=(long)mxGetScalar(prhs[1]);//do not break
    case 1:
	fn=mx2str(prhs[0]);
	break;
    default:
	usage();
    }
    if(howmany>0){
	if(start>0){
	    start--;//convert to C index.
	}
	if(start<0){
	    start=0;
	}
    }
    fp=zfopen(fn,"rb");
    if(!fp){
	perror("zfopen");
	error("Unable to open file: %s\n", fn);
	return;
    }
    free(fn);
    switch(nlhs){
    case 0:
    case 1:
	plhs[0]=readdata(fp, NULL, start, howmany); break;
    case 2:
	plhs[0]=readdata(fp, &plhs[1], start, howmany); break;
    default:
	usage();
    }
    if(start==0 && howmany==0){
	int res=zfeof(fp);
	if(res){
	    warning("There is unread data: res=%d\n", res);
	}
    }
    zfclose(fp);
}
Exemplo n.º 2
0
int fastaq_next(zfp *fp, fastq_entry_t *e) {
    char line[BLK_SIZE];
    size_t l, pos = 0;
    int discard = 0, more;

    /* Skip blank lines */
    line[BLK_SIZE - 1] = '*';
    do {
	if (NULL == zfgets(line, BLK_SIZE, fp)) return 1;
	e->line++;
    } while (*line == '\n');

    /* Read name */
    for (;;) {
	if (!discard) {
	    char *start = line, *cp;
	    if (0 == pos) { /* Check first char */
		if (e->header) {
		    if (*line != e->header) {
			verror(ERR_WARN, "fastaq_next",
			       "Error: sequence name does not start with '%c' "
			       "at %s line %lu.",
			       e->header, e->fn, e->line);
			return -1;
		    }
		} else {
		    if (*line != HDR_FASTA && *line != HDR_FASTQ) {
			verror(ERR_WARN, "fastaq_next",
			       "Error: sequence name does not start with "
			       "'%c' or '%c' at %s line %lu.",
			       HDR_FASTA, HDR_FASTQ, e->fn, e->line);
			return -1;
		    } else {
			e->header = *line;
			e->seq_delimiter = (*line == HDR_FASTA
					    ? DELIM_FASTA : DELIM_FASTQ);
		    }
		}
		start++;
	    }
	    cp = start;
	    while (*cp && !isspace(*cp)) cp++;
	    if (e->max_name_len < pos + cp - start + 1) {
		if (0 != grow_char_string(&e->name, &e->max_name_len,
					  pos + cp - start + 1)) {
		    return -1;
		}
	    }
	    memcpy(e->name + pos, start, cp - start);
	    pos += cp - start;
	    e->name[pos] = '\0';
	    if (isspace(*cp)) discard = 1;
	}
	if (line[BLK_SIZE - 1] == '*' || line[BLK_SIZE - 2] == '\n') break;
	line[BLK_SIZE - 1] = '*';
	if (NULL == zfgets(line, BLK_SIZE, fp)) {
	    verror(ERR_WARN, "fastaq_next",
		   "Error: Unexpected end-of-file while reading sequence name "
		   "at %s line %lu", e->fn, e->line);
	    return -1;
	}
    }
    if (*e->name == '\0') {
	verror(ERR_WARN, "fastaq_next",
	       "Error: Sequence entry with no name at %s line %lu",
	       e->fn, e->line);
	return -1;
    }

    /* Sequence */
    e->seq_len = 0;
    more = 0;
    while (!zfeof(fp) && (zfpeek(fp) != e->seq_delimiter || more)) {
	char *src = line, *dest;
	if (NULL == zfgets(line, BLK_SIZE, fp)) break;
	if (!more) e->line++;
	l = strlen(line);
	if (0 == l) {
	    verror(ERR_WARN, "fastaq_next",
		   "Error: Unexpected NUL byte at %s line %lu\n",
		   e->fn, e->line);
	    return -1;
	}
	if (e->max_seq_len < e->seq_len + l + 1) {
	    if (0 != grow_char_string(&e->seq, &e->max_seq_len,
				      e->seq_len + l + 1)) {
		return -1;
	    }
	}
	for (dest = e->seq + e->seq_len; *src; src++) {
	    if (!isspace(*src)) *dest++ = *src;
	}
	e->seq_len = dest - e->seq;
	more = line[l - 1] != '\n';
    }
    if (e->seq) e->seq[e->seq_len] = '\0';

    /* Done if fasta */
    if (e->header != HDR_FASTQ) return 0;

    /* + line: skip */
    line[BLK_SIZE - 1] = '*'; /* Sentinal */
    if (NULL == zfgets(line, BLK_SIZE, fp) || *line != DELIM_FASTQ) {
	verror(ERR_WARN, "fastaq_next",
	       "Error: Expected '%c' got '%c' reading fastq entry %.1000s",
	       DELIM_FASTQ, *line, e->name);
	return -1;
    }
    e->line++;
    while (line[BLK_SIZE - 1] != '*' && line[BLK_SIZE - 2] != '\n') {
	line[BLK_SIZE - 1] = '*';
	if (NULL == zfgets(line, BLK_SIZE, fp))
	    return -1; /* eof */
    }

    /* Read quality, no more than e->seq_len chars */
    pos = 0;
    more = 0;
    while (pos < e->seq_len && !zfeof(fp)) {
	char *src = line, *dest;
	if (NULL == zfgets(line, BLK_SIZE, fp))
	    break;
	if (!more) e->line++;
	more = (line[BLK_SIZE - 1] != '*' && line[BLK_SIZE - 2] != '\n');
	l = strlen(line);
	if (e->max_qual_len < pos + l + 1) {
	    if (0 != grow_char_string(&e->qual, &e->max_qual_len, pos + l + 1))
		return -1;
	}
	
	for (dest = e->qual + pos; *src; src++) {
	    if (!isspace(*src)) *dest++ = *src;
	}
	pos = dest - e->qual;
    }
    if (e->qual)
	e->qual[pos] = '\0';
    if (pos != e->seq_len) {
	verror(ERR_WARN, "fastaq_next",
	       "Error: differing number of sequence and quality "
	       "characters for entry '%.1000s' at %s line %lu",
	       e->name, e->fn, e->line);
	return -1;
    }

    return 0;
}