Esempio n. 1
0
static cmph_uint32 count_nlfile_keys(gzFile *fd)
{
	cmph_uint32 count = 0;
	gzrewind(fd);
	
//	count=3775790711UL;
//	count=477579071UL;
//	count=1285501694UL; //google 1+2+3 grams;
//	count=1313818354UL; //google 4 grams;
//	count=1176470663UL; //google 5 grams;
	
	fprintf(stderr,"\nCounting Lines in File\n");
	
	register char * ptr;
	while(1)
	{
		if (gzeof(fd)) break; //I had to rearrange the order of this, i should file a bug
		char buf[BUFSIZ];
		ptr = gzgets(fd,buf,BUFSIZ);
		if (buf[strlen(buf) - 1] != '\n') {
			if (gzeof(fd)&&strlen(buf)>1) {++count; break;} //this is in case the last line does not have a \n
			 continue;
		}
		++count;
	}
	gzrewind(fd);
	 
	fprintf(stderr,"\nDone counting.  File is %u lines long\n",count);
	return count;
}
Esempio n. 2
0
u32 GZIPROMReaderSize(void * file)
{
	char useless[1024];
	u32 size = 0;

	/* FIXME this function should first save the current
	 * position and restore it after size calculation */
	gzrewind(file);
	while (gzeof (file) == 0)
		size += gzread(file, useless, 1024);
	gzrewind(file);

	return size;
}
Esempio n. 3
0
/*
    Returns the size of a GZ compressed file.
*/
int gzsize(gzFile *gd)
{
    #define CHUNKSIZE   (0x10000)
    int size = 0, length = 0;
    unsigned char buffer[CHUNKSIZE];
    gzrewind(gd);
    do {
        size = gzread(gd, buffer, CHUNKSIZE);
        if(size <= 0) break;
        length += size;
    } while (!gzeof(gd));
    gzrewind(gd);
    return (length);
    #undef CHUNKSIZE
}
Esempio n. 4
0
void gt_xgzrewind(gzFile file)
{
  if (gzrewind(file) == -1) {
    fprintf(stderr, "cannot rewind compressed file\n");
    exit(EXIT_FAILURE);
  }
}
Esempio n. 5
0
File: c_gz.c Progetto: amnh/poy5
value mlgz_gzrewind(value chan)
{
  gzFile str=Gzfile_val(chan);
  if((gzrewind(str)) < 0)
    mlgz_error(str);
  return Val_unit;
}
Esempio n. 6
0
File: parse.c Progetto: dcjones/cbgb
void fastq_rewind(fastq_t* fqf)
{
    gzrewind(fqf->file);
    fqf->state = STATE_ID1;
    fqf->buf[0] = '\0';
    fqf->c = fqf->buf;
}
Esempio n. 7
0
/**
 * Opens a file for reading text lines. 
 * @param name File name
 * @return number of lines or -1 on error
 */
int input_lines_open(char *name)
{
    assert(name);
    const char *pattern;

    in = gzopen(name, "r");
    if (!in) {
        error("Could not open '%s' for reading", name);
        return -1;
    }

    /* Compile regular expression for label */
    config_lookup_string(&cfg, "input.lines_regex", &pattern);
    if (regcomp(&re, pattern, REG_EXTENDED) != 0) {
        error("Could not compile regex for label");
        return -1;
    }

    /* Count lines in file (I hope this is buffered)*/
    int c = -1, prev, num_lines = 0;
    do {
        prev = c;
        c = gzgetc(in);
        if (c == '\n')
            num_lines++;
    } while(c != -1);

    if (prev >= 0 && prev != '\n') num_lines++;

    /* Prepare reading */
    gzrewind(in);
    line_num = 0;

    return num_lines;
}
Esempio n. 8
0
int main(int argc, char *argv[])
{
   int ret;

   /* etterlog copyright */
   fprintf(stdout, "\n" EC_COLOR_BOLD "%s %s" EC_COLOR_END " copyright %s %s\n\n", 
                      GBL_PROGRAM, EC_VERSION, EC_COPYRIGHT, EC_AUTHORS);
  
  
   /* allocate the global target */
   SAFE_CALLOC(GBL_TARGET, 1, sizeof(struct target_env));
  
   /* initialize to all target */
   GBL_TARGET->all_mac = 1;
   GBL_TARGET->all_ip = 1;
   GBL_TARGET->all_port = 1;
   
   /* getopt related parsing...  */
   parse_options(argc, argv);

   /* get the global header */
   ret = get_header(&GBL.hdr);
   if (ret == -EINVALID)
      FATAL_ERROR("Invalid log file");
   
   fprintf(stderr, "Log file version    : %s\n", GBL.hdr.version);
   fprintf(stderr, "Timestamp           : %s", ctime((time_t *)&GBL.hdr.tv.tv_sec));
   fprintf(stderr, "Type                : %s\n\n", (GBL.hdr.type == LOG_PACKET) ? "LOG_PACKET" : "LOG_INFO" );
  
   
   /* analyze the logfile */
   if (GBL.analyze)
      analyze();

   /* rewind the log file and skip the global header */
   gzrewind(GBL_LOG_FD);
   get_header(&GBL.hdr);
   
   /* create the connection table (respecting the filters) */
   if (GBL.connections)
      conn_table_create();

   /* display the connection table */
   if (GBL.connections && !GBL.decode)
      conn_table_display();

   /* extract files from the connections */
   if (GBL.decode)
      conn_decode();
   
   /* not interested in the content... only analysis */
   if (GBL.analyze || GBL.connections)
      return 0;
   
   /* display the content of the logfile */
   display();
   
   return 0;
}
Esempio n. 9
0
 void gzstreambuf::Reset()
 {
   Assert(!write);
   int result = gzrewind(gz);
   if (result != Z_OK)
     error = true;
   else
     setg(buf, buf + bufsize, buf + bufsize);
 }
Esempio n. 10
0
File: bin.c Progetto: bitursa/maos
/**
   Move the file position pointer to the beginning
*/
void zfrewind(file_t *fp) {
    if(fp->isgzip) {
        if(gzrewind((voidp)fp->p)) {
            error("Failed to rewind\n");
        }
    } else {
        rewind((FILE*)fp->p);
    }
}
Esempio n. 11
0
int countAllReads(char *fileName1, char *fileName2, int compressed, 
		unsigned char pairedEnd) {


	char dummy[SEQ_MAX_LENGTH];
	int maxCnt = 0;
	
	if (!compressed) {
		_r_fp1 = fileOpen( fileName1, "r");
		if (_r_fp1 == NULL)
			return 0;

		if ( pairedEnd && fileName2 != NULL ) {
			_r_fp2 = fileOpen ( fileName2, "r" );
			if (_r_fp2 == NULL)
				return 0;
		}
		else {
			_r_fp2 = _r_fp1;
		}

		readFirstSeq = &readFirstSeqTXT;
		readSecondSeq = &readSecondSeqTXT;
	}
	else {
		_r_gzfp1 = fileOpenGZ (fileName1, "r");
		if (_r_gzfp1 == NULL)
			return 0;

		if ( pairedEnd && fileName2 != NULL ) {
			_r_gzfp2 = fileOpenGZ ( fileName2, "r" );
			if (_r_gzfp2 == NULL)
				return 0;
		}
		else {
			_r_gzfp2 = _r_gzfp1;
		}

		readFirstSeq = &readFirstSeqGZ;
		readSecondSeq = &readSecondSeqGZ;
	}

	// Counting the number of lines in the file
	while (readFirstSeq(dummy)) { 
		if(dummy[0] != '#' && dummy[0]!='>' && dummy[0] != ' ' && 
			dummy[0] != '\r' && dummy[0] != '\n')
			maxCnt++;
	}

	if (!compressed)
		rewind(_r_fp1);
	else
		gzrewind(_r_gzfp1);

	// Return the Maximum # of sequences
	return maxCnt * 2;
}
Esempio n. 12
0
bool ZipFile::rewind() {
  assert(m_gzFile);
  seek(0);
  setWritePosition(0);
  setReadPosition(0);
  setPosition(0);
  setEof(false);
  gzrewind(m_gzFile);
  return true;
}
Esempio n. 13
0
bool ZipFile::rewind() {
  assert(m_gzFile);
  seek(0);
  m_writepos = 0;
  m_readpos = 0;
  m_position = 0;
  m_eof = false;
  gzrewind(m_gzFile);
  return true;
}
Esempio n. 14
0
int pull_by_re(char *input_file, pcre *re, pcre_extra *re_extra, int min, int max, int length, int exclude, int convert, int just_count) {
	gzFile fp;
	int count=0,l;
	int excluded = 0;
	int is_fasta = 0; /* assume fastq */
	kseq_t *seq;

	/* open fasta file */
	fp = gzopen(input_file,"r");
	if (!fp) {
		fprintf(stderr,"%s - Couldn't open fasta file %s\n",progname,input_file);
		exit(EXIT_FAILURE);
	}

	seq = kseq_init(fp);

	/* determine file type */
	l = kseq_read(seq); /* read the first sequence */
	is_fasta = seq->qual.s == NULL ? 1 : 0;
	gzrewind(fp); 
	kseq_rewind(seq); /* rewind to beginning for main loop */

    if (verbose_flag) {
        if (is_fasta)
            fprintf(stderr, "Input is FASTA format\n");
        else
            fprintf(stderr, "Input is FASTQ format\n");
    }

	/* search through list and see if this header matches */
	while((l = kseq_read(seq)) >= 0) {
		if (exclude) {
			if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s))
				excluded++;
			else {
				/* regex doesn't match, so check size/print */
				count += size_filter(seq, is_fasta, min, max, length, convert, just_count);
			}
		} else {
			if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s)) {
				/* regex matches so check size/print */
				count += size_filter(seq, is_fasta, min, max, length, convert, just_count);
			} else
				excluded++;
		}
	} /* end of seq traversal */
	kseq_destroy(seq);
	gzclose(fp); /* done reading file so close */

	if (just_count) {
		fprintf(stdout, "Total output: %i\n", count);
		fprintf(stdout, "Total excluded: %i\n", excluded);
	}
	return count;
}
Esempio n. 15
0
/**
 * \param[in]  nFileIn  File number (1 .. maxNumFiles)
 */
void resetC(int *nFileIn) {
	int fileIndex = *nFileIn - 1; /* index of current file */
	if (fileIndex < 0)
		return; /* no file opened at all... */
#ifdef USE_ZLIB
	gzrewind(files[fileIndex]);
#else
	/* rewind(files[fileIndex]);  Does not work with rfio, so call: */
	fseek(files[fileIndex], 0L, SEEK_SET);
	clearerr(files[fileIndex]); /* These two should be the same as rewind... */
#endif
}
Esempio n. 16
0
//
// Get the length of a (possibly) gzipped file
//
static int gzfilelength(gzFile gd)
{
   int size = 0, length = 0;
   unsigned char buffer[0x10000];

   gzrewind(gd);

   do
   {
      // Read in chunks until EOF
      size = gzread(gd, buffer, 0x10000);

      if (size <= 0)
      	break;

      length += size;
   }
   while (!gzeof(gd));

   gzrewind(gd);
   return length;
}
Esempio n. 17
0
void FCEU_rewind(FCEUFILE *fp)
{
 if(fp->type==1)
 {
  gzrewind(fp->fp);
 }
 else if(fp->type>=2)
 {
  ((MEMWRAP *)(fp->fp))->location=0;
 }
 else
  /* Rewind */
  fseek(fp->fp,0,SEEK_SET);
}
Esempio n. 18
0
void ZOLTAN_FILE_rewind(ZOLTAN_FILE* file)
{
  switch (file->type) {
  case STANDARD:
    rewind(file->strm.fileunc);
    return;
#ifdef ZOLTAN_GZIP
  case GZIP:
    gzrewind(file->strm.filegz);
    file->pos = -1;
    return;
#endif
  default:
    break;
  }
}
Esempio n. 19
0
char *XMLwrapper::doloadfile(const char *filename){
    char *xmldata=NULL;
    int filesize=-1;
    
    //try get filesize as gzip data (first)
    gzFile gzfile=gzopen(filename,"rb");
    if (gzfile!=NULL){//this is a gzip file 
	// first check it's size
	int bufsize=1024;
	char* tmpbuf=new char[bufsize];
	filesize=0;
	while(!gzeof(gzfile)) {
		filesize+=gzread(gzfile,tmpbuf,bufsize);
	};
	delete []tmpbuf;

	//rewind the file and load the data
	xmldata=new char[filesize+1];
	ZERO(xmldata,filesize+1);

	gzrewind(gzfile);
	gzread(gzfile,xmldata,filesize);
	
	gzclose(gzfile);
	return (xmldata);
    } else {//this is not a gzip file
	notgzip:    
	FILE *file=fopen(filename,"rb");
	if (file==NULL) return(NULL);
	fseek(file,0,SEEK_END);
	filesize=ftell(file);

	xmldata=new char [filesize+1];
	ZERO(xmldata,filesize+1);
	
	rewind(file);
	int tmp=fread(xmldata,filesize,1,file);
	
	fclose(file);
	return(xmldata);
    }; 
};
Esempio n. 20
0
/* Read the header portion of the icp file, returning the raw text and
   the number of points recorded in the header.  Return ICP_GOOD on
   success, or ICP_READ_ERROR if there was a problem reading the file
   or if the header text area is too small.
 */
int icp_readheader(gzFile infile, int headersize, char header[], int *pts,
		   int *linenum)
{
  char line[MAX_LINE];
  int offset,len,seen_motor_line;

  /* Return to the start of the file */
  gzrewind(infile);
  *linenum = 0;

  /* Copy lines until one after the motor/qscan line */
  seen_motor_line = 0;
  offset = 0;
  while (!gzeof(infile)) {
    /* Read the next line */
    (*linenum)++;
    if (gzgets(infile,line,sizeof(line)-1) == NULL) return ICP_READ_ERROR;
    line[sizeof(line)-1] = '\0'; /* Guarantee zero terminator */
    // printf("%d: %s",*linenum,line);

    /* Check that the second line contains the ICP signature */
    if ((*linenum) == 2) {
      if (strncmp(line,"  Filename",10)!=0) return ICP_INVALID_FORMAT;
    }

    /* Append the next line to the header */
    len = strlen(line);
    if (len+offset > headersize) return ICP_READ_ERROR;
    strcpy(header+offset, line);
    offset += len;

    /* Stop if the previous line contained " Mot:" */
    if (seen_motor_line) break;
    seen_motor_line = (strncmp(line," Mot:",5) == 0
		       || strncmp(line,"   Q (hkl scan center)",22) == 0 );
  }

  /* Peek in the header for the stored number of points */
  *pts = numpoints(header);
  // printf("number of points: %d\n",*pts);
  return ICP_GOOD;
}
Esempio n. 21
0
int
gzbin_file(gzFile *f)
{
	char		buf[BUFSIZ];
	int		m;
	int		ret = 0;

	if (gzseek(f, (z_off_t)0, SEEK_SET) == -1)
		return 0;

	if ((m = gzread(f, buf, BUFSIZ)) <= 0)
		return 0;

	if (isbinary(buf, m))
		ret = 1;

	if (gzrewind(f) != 0)
		err(1, "gzbin_file");
	return ret;
}
Esempio n. 22
0
/**
 * Opens a file for reading text fasta. 
 * @param name File name
 * @return number of fasta or -1 on error
 */
int input_fasta_open(char *name) 
{
    assert(name);    
    size_t read, size;
    char *line = NULL;
    const char *pattern;

    /* Compile regular expression for label */
    config_lookup_string(&cfg, "input.fasta_regex", &pattern);    
    if (regcomp(&re, pattern, REG_EXTENDED) != 0) {
        error("Could not compile regex for label");
        return -1;
    }

    in = gzopen(name, "r");
    if (!in) {
        error("Could not open '%s' for reading", name);
        return -1;
    }

    int num, cont = FALSE;
    while(!gzeof(in)) {
        line = NULL;
        read = gzgetline(&line, &size, in);
        if (read > 0)
            strtrim(line);
        if (read > 1 && !cont && (line[0] == '>' || line[0] == ';')) {
            num++;
            cont = TRUE;
        } else {
            cont = FALSE;
        }
        free(line);
    }

    /* Prepare reading */
    gzrewind(in);    
    return num;
}
Esempio n. 23
0
static TACommandVerdict gzrewind_cmd(TAThread thread,TAInputStream stream)
{
    void* file;
    int res, errnum;

    file = readPointer(&stream);

    START_TARGET_OPERATION(thread);

    res = gzrewind(file);

    END_TARGET_OPERATION(thread);

    gzerror(file, &errnum);

    writeInt(thread, errnum);
    writeInt(thread, res);

    sendResponse(thread);

    return taDefaultVerdict;
}
Esempio n. 24
0
uint64 FCEU_fgetsize(FCEUFILE *fp)
{
 if(fp->type==1)
 {
  int x,t;
  t=gztell(fp->fp);
  gzrewind(fp->fp);
  for(x=0; gzgetc(fp->fp) != EOF; x++);
  gzseek(fp->fp,t,SEEK_SET);
  return(x);
 }
 else if(fp->type>=2)
  return ((MEMWRAP*)(fp->fp))->size;
 else
 {
  long t,r;
  t=ftell((FILE *)fp->fp);
  fseek((FILE *)fp->fp,0,SEEK_END);
  r=ftell((FILE *)fp->fp);
  fseek((FILE *)fp->fp,t,SEEK_SET);
  return r;
 }
}
Esempio n. 25
0
bool ZipFile::rewind() {
  assert(m_gzFile);
  gzrewind(m_gzFile);
  return true;
}
Esempio n. 26
0
static void key_nlfile_rewind(void *data)
{
	FILE *fd = (FILE *)data;
	gzrewind(fd);
}
Esempio n. 27
0
int initRead(char *fileName1, char *fileName2)
{
	char dummy[SEQ_MAX_LENGTH];
	char ch;
	int i, maxCnt=0;

	_r_buf1 = getMem(10000000);
	_r_buf1_pos = getMem(sizeof(int));
	_r_buf1_size = getMem(sizeof(int));
	*_r_buf1_size = *_r_buf1_pos = 0; 
	if ( pairedEndMode && fileName2 != NULL )
	{
		_r_buf2 = getMem(10000000);
		_r_buf2_pos = getMem(sizeof(int));
		_r_buf2_size = getMem(sizeof(int));
	}
	else
	{
		_r_buf2 = _r_buf1;
		_r_buf2_pos = _r_buf1_pos;
		_r_buf2_size = _r_buf1_size;
	}


	if (!seqCompressed)
	{
		_r_fp1 = fileOpen( fileName1, "r");

		if (_r_fp1 == NULL)
			return 0;

		ch = fgetc(_r_fp1);

		if ( pairedEndMode) 
		{
			if ( fileName2 == NULL )
			{
				_r_fp2 = _r_fp1;
			}
			else
			{
				_r_fp2 = fileOpen ( fileName2, "r" );
				if (_r_fp2 == NULL)
					return 0;
			}
		}

		readBuffer1 = &readBufferTxT1;
		readBuffer2 = &readBufferTxT2;
	}
	else
	{

		_r_gzfp1 = fileOpenGZ (fileName1, "r");

		if (_r_gzfp1 == NULL)
		{
			return 0;
		}

		ch = gzgetc(_r_gzfp1);

		if ( pairedEndMode && fileName2 != NULL )
		{
			_r_gzfp2 = fileOpenGZ ( fileName2, "r" );
			if (_r_gzfp2 == NULL)
			{
				return 0;
			}
		}
		else
		{
			_r_gzfp2 = _r_gzfp1;
		}

		readBuffer1 = &readBufferGZ1;
		readBuffer2 = &readBufferGZ2;
	}

	if (!seqCompressed)
		rewind(_r_fp1);
	else
		gzrewind(_r_gzfp1);

	if (ch == '>')
		_r_fastq = 0;
	else
		_r_fastq = 1;
	
	readFirstSeq(dummy,1);
	int nameLen = strlen(dummy);
	readFirstSeq(dummy,2);
	*_r_buf1_pos = 0;
	int seqLen = strlen(dummy);
	SEQ_LENGTH = 0;
	i = 0;
	while (i<seqLen && !isspace(dummy[i]))
	{
		i++;
		SEQ_LENGTH++;
	}
	
	if (cropSize > 0)
		SEQ_LENGTH = cropSize;

	if ( SEQ_LENGTH >= SEQ_MAX_LENGTH )
	{
		fprintf(stdout, "ERR: Read Length is greater than the MAX length we can process (Current Max: %d).\n", SEQ_MAX_LENGTH);
		exit(EXIT_FAILURE);
	}

	if (_r_fastq)
	{
		QUAL_LENGTH = SEQ_LENGTH;
	}
	else
	{
		QUAL_LENGTH = 1;
	}

	CMP_SEQ_LENGTH = calculateCompressedLen(SEQ_LENGTH);

	//TODO MEMORY CALCULATION FIX
	double readMem = sizeof(Read) + (2 + (SEQ_LENGTH * 2) + QUAL_LENGTH + 3 + (CMP_SEQ_LENGTH * 2 * 8) + (nameLen+10) + 4);
	readMem += ((bestMappingMode) ?(sizeof(FullMappingInfo)) :0);
	if (pairedEndMode)
		readMem += sizeof(MappingInfo) + sizeof(MappingLocations);

	_r_maxSeqCnt = (int)(((MAX_MEMORY-1.2) * (1 << 30))/readMem);
	if ( pairedEndMode && _r_maxSeqCnt % 2 )
		_r_maxSeqCnt ++;
	_r_maxSeqCnt -= _r_maxSeqCnt % THREAD_COUNT;

//_r_maxSeqCnt = 500000;

	_r_seq = getMem(sizeof(Read)*_r_maxSeqCnt);

	int maxErrThreshold = (SEQ_LENGTH/WINDOW_SIZE) - 1;
	if (errThreshold == -1)
	{
		errThreshold = SEQ_LENGTH*6/100;
		fprintf(stdout, "# Errors: %d\n", errThreshold);
	}
	if (errThreshold > maxErrThreshold && SEQ_LENGTH>0)
	{
		errThreshold = maxErrThreshold;
		fprintf(stdout, "# Error: %d (full sensitivity)\n", errThreshold);
	}


	checkSumLength = (SEQ_LENGTH / (errThreshold+1)) - WINDOW_SIZE;
	if (checkSumLength > sizeof(CheckSumType)*4)
		checkSumLength = sizeof(CheckSumType)*4;

	calculateSamplingLocations();


	if (!nohitDisabled)
	{
		_r_umfp = fileOpen(unmappedOutput, "w");
	}

	_r_alphIndex = getMem(128);		// used in readChunk()
	_r_alphIndex['A'] = 0;
	_r_alphIndex['C'] = 1;
	_r_alphIndex['G'] = 2;
	_r_alphIndex['T'] = 3;
	_r_alphIndex['N'] = 4;

	return 1;
}
Esempio n. 28
0
/* -- see zlib.h -- */
z_off64_t ZEXPORT gzseek64(
    gzFile file,
    z_off64_t offset,
    int whence)
{
    unsigned n;
    z_off64_t ret;
    gz_statep state;

    /* get internal structure and check integrity */
    if (file == NULL)
        return -1;
    state = (gz_statep)file;
    if (state->mode != GZ_READ && state->mode != GZ_WRITE)
        return -1;

    /* check that there's no error */
    if (state->err != Z_OK)
        return -1;

    /* can only seek from start or relative to current position */
    if (whence != SEEK_SET && whence != SEEK_CUR)
        return -1;

    /* normalize offset to a SEEK_CUR specification */
    if (whence == SEEK_SET)
        offset -= state->pos;
    else if (state->seek)
        offset += state->skip;
    state->seek = 0;

    /* if within raw area while reading, just go there */
    if (state->mode == GZ_READ && state->how == COPY &&
        state->pos + offset >= state->raw) {
        ret = LSEEK(state->fd, offset - state->have, SEEK_CUR);
        if (ret == -1)
            return -1;
        state->have = 0;
        state->eof = 0;
        state->seek = 0;
        gz_error(state, Z_OK, NULL);
        state->strm.avail_in = 0;
        state->pos += offset;
        return state->pos;
    }

    /* calculate skip amount, rewinding if needed for back seek when reading */
    if (offset < 0) {
        if (state->mode != GZ_READ)         /* writing -- can't go backwards */
            return -1;
        offset += state->pos;
        if (offset < 0)                     /* before start of file! */
            return -1;
        if (gzrewind(file) == -1)           /* rewind, then skip to offset */
            return -1;
    }

    /* if reading, skip what's in output buffer (one less gzgetc() check) */
    if (state->mode == GZ_READ) {
        n = GT_OFF(state->have) || (z_off64_t)state->have > offset ?
            (unsigned)offset : state->have;
        state->have -= n;
        state->next += n;
        state->pos += n;
        offset -= n;
    }

    /* request skip (if not zero) */
    if (offset) {
        state->seek = 1;
        state->skip = offset;
    }
    return state->pos + offset;
}
Esempio n. 29
0
/* int readFile()
 * Parses the input file. Produces the output file(s).
 */
int readFile(File in, File out, int misAllow, int* match,
    int* rcmatch, int fwdSt, int fwdEnd, int revSt, int revEnd,
    int bedSt, int bedEnd, File waste, int wasteOpt, int revMis,
    int revLen, int revLMis, int revOpt, File corr, int corrOpt,
    int gz) {
  // determine if input is fasta or fastq
  int aorq = fastaOrQ(in, gz);
  gz ? gzrewind(in.gzf) : rewind(in.f);

  int count = 0;
  while (getLine(hline, MAX_SIZE, in, gz) != NULL) {
    if (hline[0] == '#')
      continue;
    count++;
    if (getLine(line, MAX_SIZE, in, gz) == NULL)
      exit(error("", ERRSEQ));
    int len = strlen(line) - 1;
    if (line[len] == '\n')
      line[len] = '\0';

    int st = 0, end = 0, f = 0;
    Primer* p = findPrim(line, misAllow, fwdSt, fwdEnd, &st, &f);
    if (p != NULL) {
      (*match)++;
      f ? p->rcount++ : p->fcount++;

      // search for reverse primer
      // first, check 3' end
      char* rev = (f ? p->frc : p->rev);
      end = checkRevEnd(line, rev, revMis, revSt, revEnd);

      // check internal sequence
      if (!end && revLen) {
        int setLen = strlen(rev);
        if (setLen > revLen)
          setLen = revLen;
        end = checkRevInt(line, rev, st, revLMis, setLen);
      }

      // check based on amplicon length
      if (!end && p->len && st + p->len < strlen(line))
        end = checkRevLen(line, rev, st + p->len, bedSt, bedEnd);

      // evaluate outcome, produce output
      if (end <= st)
        end = 0;
      if (end)
        f ? p->rcountr++ : p->fcountr++;
      if (revOpt && !end) {
        // rev primer not found (and was required [revOpt])
        if (wasteOpt)
          gz ? gzprintf(waste.gzf, "%s%s\n", hline, line)
            : fprintf(waste.f, "%s%s\n", hline, line);
      } else {
        // print header
        for (int i = 0; hline[i] != '\0' && hline[i] != '\n'; i++)
          gz ? gzputc(out.gzf, hline[i]) : putc(hline[i], out.f);
        gz ? gzprintf(out.gzf, " %s%s%s\n", p->name,
          f ? REV : FWD, end ? BOTH : "")
          : fprintf(out.f, " %s%s%s\n", p->name,
          f ? REV : FWD, end ? BOTH : "");
        if (corrOpt) {
          for (int i = 0; hline[i] != '\0' && hline[i] != '\n'; i++)
            gz ? gzputc(corr.gzf, hline[i]) : putc(hline[i], corr.f);
          gz ? gzprintf(corr.gzf, " %s%s%s\n", p->name,
            f ? REV : FWD, end ? BOTH : "")
            : fprintf(corr.f, " %s%s%s\n", p->name,
            f ? REV : FWD, end ? BOTH : "");
        }
        // print sequence
        if (!end)
          end = len;
        else
          (*rcmatch)++;
        for (int i = st; i < end; i++)
          gz ? gzputc(out.gzf, line[i]) : putc(line[i], out.f);
        gz ? gzputc(out.gzf, '\n') : putc('\n', out.f);
        // reattach primers
        if (corrOpt) {
          gz ? gzprintf(corr.gzf, "%s", f ? p->rrc : p->fwd)
            : fprintf(corr.f, "%s", f ? p->rrc : p->fwd);
          for (int i = st; i < end; i++)
            gz ? gzputc(corr.gzf, line[i]) : putc(line[i], corr.f);
          gz ? gzprintf(corr.gzf, "%s\n", f ? p->frc : p->rev)
            : fprintf(corr.f, "%s\n", f ? p->frc : p->rev);
        }
      }
    } else if (wasteOpt)
      gz ? gzprintf(waste.gzf, "%s%s\n", hline, line)
        : fprintf(waste.f, "%s%s\n", hline, line);

    // read next 2 lines if fastq
    if (aorq) {
      for (int i = 0; i < 2; i++)
        if (getLine(line, MAX_SIZE, in, gz) == NULL)
          exit(error("", ERRSEQ));
        else if (p != NULL) {
          if (revOpt && !end) {
            if (wasteOpt)
              gz ? gzprintf(waste.gzf, "%s", line)
                : fprintf(waste.f, "%s", line);
          } else if (i) {
            for (int j = st; j < end; j++)
              gz ? gzputc(out.gzf, line[j]) : putc(line[j], out.f);
            gz ? gzputc(out.gzf, '\n') : putc('\n', out.f);
            if (corrOpt) {
              for (int j = 0; j < strlen(f ? p->rrc : p->fwd); j++)
                gz ? gzputc(corr.gzf, 'I') : putc('I', corr.f);
              for (int j = st; j < end; j++)
                gz ? gzputc(corr.gzf, line[j]) : putc(line[j], corr.f);
              for (int j = 0; j < strlen(f ? p->frc : p->rev); j++)
                gz ? gzputc(corr.gzf, 'I') : putc('I', corr.f);
              gz ? gzputc(corr.gzf, '\n') : putc('\n', corr.f);
            }
          } else {
            gz ? gzprintf(out.gzf, "%s", line)
              : fprintf(out.f, "%s", line);
            if (corrOpt)
              gz ? gzprintf(corr.gzf, "%s", line)
                : fprintf(corr.f, "%s", line);
          }
        } else if (wasteOpt)
          gz ? gzprintf(waste.gzf, "%s", line)
            : fprintf(waste.f, "%s", line);

    }
  }
  return count;
}
/* The gateway function */
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
{
    char * filename;
    double max_gap_fraction;
    gzFile fp;
    kseq_t *seq;
    int N, M;
    double * N_ptr;
    double * M_ptr;
    double * Z_ptr;
    double ** Z;
    int * inds;
    int * zinds;

    /* check for proper number of arguments */
    if (nrhs != 2) {
        mexErrMsgIdAndTxt("read_alignemnt_fasta:nrhs", "Two inputs required: filename, max_gap_fraction.");
    }
    if (nlhs != 3) {
        mexErrMsgIdAndTxt("read_alignemnt_fasta:nlhs", "Three outputs required: N, M, Z.");
    }

    /* get the value of the frequence matrices  */
    filename = mxArrayToString(prhs[0]);

    fp = gzopen(filename, "r");

    if (fp == Z_NULL) {
        mexErrMsgIdAndTxt("read_alignemnt_fasta:open_file", "Error opening file");
    }

    seq = kseq_init(fp);

    /* get the max_gap_fraction value */
    max_gap_fraction = mxGetScalar(prhs[1]);

    /* create the outputs N, M */
    plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
    plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);

    N_ptr = mxGetPr(plhs[0]);
    M_ptr = mxGetPr(plhs[1]);

    parse_seq_pass1(seq, &inds, &zinds, &N, &M, max_gap_fraction);

    *N_ptr = (double) N;
    *M_ptr = (double) M;

    /* create the output matrix Z */
    plhs[2] = mxCreateDoubleMatrix(M, N, mxREAL);

    Z_ptr = mxGetPr(plhs[2]);
    Z = malloc(N * sizeof(double));
    {
        int i;
        for (i = 0; i < N; ++i) {
            Z[i] = Z_ptr;
            Z_ptr += M;
        }
    }

    gzrewind(fp);
    kseq_rewind(seq);

    parse_seq_pass2(seq, Z, inds, zinds);

    /* release memory */
    kseq_destroy(seq);
    gzclose(fp);

    mxFree(filename);

    free(Z);

    free(inds);
}