Esempio n. 1
0
File: filter.c Progetto: Pency/BSPT
int filter_pe_fastq(FLT_OPTS *opts){
	int left1=0,left2=0;
	int stat_single1 = 0;
	int stat_single2 = 0;
	int stat_paired = 0;
	int index=1;
	char fn[128];
	char outfile[128];
	SEQ_QUAL item1=init_read();
	SEQ_QUAL item2=init_read();

	gzFile fp1=gzopen_report(opts->r1,"r");
	if(!fp1)	return -1;
	gzFile fp2=gzopen_report(opts->r2,"r");
	if(!fp2)	return -1;
	file_name(outfile,opts->r1);
	sprintf(fn,"%s/%s.flt",opts->output,outfile);
	FILE *fo1=fopen_report(fn,"w+");
	if(!fo1)	return -1;
	file_name(outfile,opts->r2);
	sprintf(fn,"%s/%s.flt",opts->output,outfile);
	FILE *fo2=fopen_report(fn,"w+");
	if(!fo2)	return -1;
	sprintf(fn,"%s/%s.flt.s",opts->output,outfile);
	FILE *fos=fopen_report(fn,"w+");
	if(!fos)	return -1;

	while(read_fastq(fp1,&item1,index) >= 0 && read_fastq(fp2,&item2,index) >= 0)
	{
		left1=filter_all(&item1, opts);
		left2=filter_all(&item2, opts);

		if(left1 == 1 && left2 == 1){
			output_fastq(fo1, &item1);
			output_fastq(fo2, &item2);
			stat_paired++;
		}else{
			if(left1 == 1){
				output_fastq(fos, &item1);
				stat_single1++;
			}
			if(left2 == 1){
				output_fastq(fos, &item2);
				stat_single2++;
			}
		}
		index++;
	}

	printf("Totally %d reads were processed\n",(index-1)*2);
	printf("  file [ %s ]: %d reads were left (%.2f%)\n",opts->r1,stat_paired+stat_single1,(float) (stat_paired+stat_single1)*100/(index-1));
	printf("  file [ %s ]: %d reads were left (%.2f%)\n",opts->r2,stat_paired+stat_single2,(float) (stat_paired+stat_single2)*100/(index-1));
	printf("After filtering %d reads are paired in each file (%.2f%)\n",stat_paired,(float) stat_paired*100/(index-1));
	printf("  file [ %s ]: %d reads were left as single end\n",opts->r1,stat_single1);
	printf("  file [ %s ]: %d reads were left as single end\n",opts->r2,stat_single2);
	free_read(&item1);
	free_read(&item2);
	gzclose(fp1);
	gzclose(fp2);
	fclose(fo1);
	fclose(fo2);
	fclose(fos);
	
	return 0;
}	
Esempio n. 2
0
static int
xgzclose(void *cookie)
{
    return gzclose(cookie);
}
Esempio n. 3
0
static GF_Err xml_sax_read_file(GF_SAXParser *parser)
{
	GF_Err e = GF_EOS;
	unsigned char szLine[XML_INPUT_SIZE+2];

#ifdef NO_GZIP
	if (!parser->f_in) return GF_BAD_PARAM;
#else
	if (!parser->gz_in) return GF_BAD_PARAM;
#endif


	while (!parser->suspended) {
#ifdef NO_GZIP
		s32 read = fread(szLine, 1, XML_INPUT_SIZE, parser->f_in);
#else
		s32 read = gzread(parser->gz_in, szLine, XML_INPUT_SIZE);
#endif
		if ((read<=0) /*&& !parser->node_depth*/) break;
		szLine[read] = 0;
		szLine[read+1] = 0;		
		e = gf_xml_sax_parse(parser, szLine);
		if (e) break;
		if (parser->file_pos > parser->file_size) parser->file_size = parser->file_pos + 1;
		if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_pos, parser->file_size);
	}
	
#ifdef NO_GZIP
	if (feof(parser->f_in)) {
#else
	if (gzeof(parser->gz_in)) {
#endif
		if (!e) e = GF_EOS;
		if (parser->on_progress) parser->on_progress(parser->sax_cbck, parser->file_size, parser->file_size);

#ifdef NO_GZIP
		fclose(parser->f_in);
		parser->f_in = NULL;
#else
		gzclose(parser->gz_in);
		parser->gz_in = 0;
#endif

		parser->elt_start_pos = parser->elt_end_pos = 0;
		parser->elt_name_start = parser->elt_name_end = 0;
		parser->att_name_start = 0;
		parser->current_pos = 0;
		parser->line_size = 0;
		parser->att_sep = 0;
		parser->file_pos = 0;
		parser->file_size = 0;
		parser->line_size = 0;
	}
	return e;
}

GF_EXPORT
GF_Err gf_xml_sax_parse_file(GF_SAXParser *parser, const char *fileName, gf_xml_sax_progress OnProgress)
{
	FILE *test;
	GF_Err e;
#ifndef NO_GZIP
	gzFile gzInput;
#endif
	unsigned char szLine[6];

	/*check file exists and gets its size (zlib doesn't support SEEK_END)*/
	test = gf_f64_open(fileName, "rb");
	if (!test) return GF_URL_ERROR;
	gf_f64_seek(test, 0, SEEK_END);
	assert(gf_f64_tell(test) < 1<<31);
	parser->file_size = (u32) gf_f64_tell(test);
	fclose(test);

	parser->on_progress = OnProgress;

#ifdef NO_GZIP
	parser->f_in = gf_f64_open(fileName, "rt");
	fread(szLine, 1, 4, parser->f_in);
#else
	gzInput = gzopen(fileName, "rb");
	if (!gzInput) return GF_IO_ERR;
	parser->gz_in = gzInput;
	/*init SAX parser (unicode setup)*/
	gzread(gzInput, szLine, 4);
#endif
	szLine[4] = szLine[5] = 0;
	e = gf_xml_sax_init(parser, szLine);
	if (e) return e;
	parser->file_pos = 4;
	/* souchay : not sure for next 2 lines, but it works better it seems */
	parser->elt_start_pos = 0;
	parser->current_pos = 0;
	return xml_sax_read_file(parser);
}
/**
 * \brief    Main function
 * \details  --
 * \param    int argc
 * \param    char const** argv
 * \return   \e int
 */
int main( int argc, char const** argv )
{
  /*--------------------------------*/
  /* 1) Read command line arguments */
  /*--------------------------------*/
  size_t      rep                      = 0;
  std::string optional_filename        = "";
  std::string optional_population_path = "";
  readArgs(argc, argv, rep, optional_filename, optional_population_path);
  
  /*--------------------------------*/
  /* 2) Load parameters from file   */
  /*--------------------------------*/
  printHeader();
  Parameters* parameters = new Parameters();
  bool load_successful   = false;
  if (strcmp(optional_filename.c_str(), "") != 0)
  {
    load_successful = parameters->load_parameters_from_file(optional_filename);
  }
  else
  {
    load_successful = parameters->load_parameters_from_file(DEFAULT_FILENAME);
  }
  if (!load_successful)
  {
    std::cout << "Error during parameters loading.\n";
    exit(EXIT_FAILURE);
  }
  
  /*--------------------------------*/
  /* 3) Load the evolved population */
  /*--------------------------------*/
  Population* evolved_population = NULL;
  if (strcmp(optional_population_path.c_str(), "") != 0)
  {
    gzFile pop_file    = gzopen(optional_population_path.c_str(), "r");
    evolved_population = new Population(parameters, pop_file);
    gzclose(pop_file);
  }
  else
  {
    gzFile pop_file    = gzopen(DEFAULT_POPULATION_PATH.c_str(), "r");
    evolved_population = new Population(parameters, pop_file);
    gzclose(pop_file);
  }
  
  /*--------------------------------*/
  /* 4) Run the post-treatment      */
  /*--------------------------------*/
  measure_frequency_dependent_fitness(parameters, evolved_population, rep);
  
  /*--------------------------------*/
  /* 5) Free the memory             */
  /*--------------------------------*/
  delete evolved_population;
  evolved_population = NULL;
  delete parameters;
  parameters = NULL;
  
  return EXIT_SUCCESS;
}
Esempio n. 5
0
int BfastBAFConvert(int argc, char *argv[])
{
	FILE *fpIn=NULL, *fpOut=NULL;
	gzFile fpInGZ=NULL, fpOutGZ=NULL;
	long long int counter;
	char inputFileName[MAX_FILENAME_LENGTH]="\0";
	char outputFileName[MAX_FILENAME_LENGTH]="\0";
	char fastaFileName[MAX_FILENAME_LENGTH]="\0";
	char outputID[MAX_FILENAME_LENGTH]="\0";
	char *readGroupFileName=NULL, *readGroup=NULL, *readGroupString=NULL;
	char *last;
	int outputType=1; // BAF2TEXT
	int outputSubType=TextOutput;
	int inputType=BinaryInput;
	int c, argnum;
	AlignedRead a;
	RGBinary rg;
	char fileExtension[256]="\0";

	// Get parameters
	while((c = getopt(argc, argv, "f:o:r:O:h")) >= 0) {
		switch(c) {
			case 'O': outputType = atoi(optarg); break;
			case 'f': strcpy(fastaFileName, optarg); break;
			case 'o': strcpy(outputID, optarg); break;
			case 'r': readGroupFileName=strdup(optarg); break;
			case 'h':
					  BfastBAFConvertUsage(); return 1;
			default: fprintf(stderr, "Unrecognized option: -%c\n", c); return 1;
		}
	}

	if(argc == optind) {
		BfastBAFConvertUsage(); return 1;
	}

	/* Only read in the brg if necessary */
	switch(outputType) {
		case 2:
		case 3:
		case 4:
			if(0 == strlen(fastaFileName)) {
				PrintError(Name, "fastaFileName", "Required command line argument", Exit, InputArguments);
			}
			RGBinaryReadBinary(&rg,
					NTSpace,
					fastaFileName);
			break;
		default:
			break;
	}

	/* Set types and file extension */
	switch(outputType) {
		case 0:
			outputType=BAF;
			inputType=TextInput;
			outputSubType=BinaryOutput;
			strcat(fileExtension, BFAST_ALIGNED_FILE_EXTENSION);
			break;
		case 1:
			outputType=BAF;
			inputType=BinaryInput;
			outputSubType=TextOutput;
			strcat(fileExtension, "txt");
			break;
		case 2:
			outputType=SAM;
			inputType=BinaryInput;
			outputSubType=TextOutput;
			strcat(fileExtension, BFAST_SAM_FILE_EXTENSION);
			if(NULL != readGroupFileName) {
				readGroup=ReadInReadGroup(readGroupFileName);
				readGroupString=ParseReadGroup(readGroup);
			}
			break;
		default:
			PrintError(Name, NULL, "Could not understand output type", Exit, OutOfRange);
	}

	for(argnum=optind;argnum<argc;argnum++) {
		strcpy(inputFileName, argv[argnum]);

		/* Create output file name */
		last = StrStrGetLast(inputFileName,
				BFAST_ALIGNED_FILE_EXTENSION);
		if(NULL == last) {
			last = StrStrGetLast(inputFileName, "txt");
			if(NULL == last) {
				PrintError(Name, inputFileName, "Could not recognize file extension", Exit, OutOfRange);
			}
		}

		outputFileName[0]='\0';
		strncpy(outputFileName, inputFileName, (last - inputFileName));
		outputFileName[(last-inputFileName)]='\0';
		strcat(outputFileName, fileExtension);

		/* Open the input file */
		if(BinaryInput == inputType) {
			if(!(fpInGZ=gzopen(inputFileName, "rb"))) {
				PrintError(Name, inputFileName, "Could not open file for reading", Exit, OpenFileError);
			}
		}
		else {
			if(!(fpIn=fopen(inputFileName, "rb"))) {
				PrintError(Name, inputFileName, "Could not open file for reading", Exit, OpenFileError);
			}
		}
		/* Open the output file */
		if(BinaryOutput == outputSubType) {
			if(!(fpOutGZ=gzopen(outputFileName, "wb"))) {
				PrintError(Name, outputFileName, "Could not open file for writing", Exit, OpenFileError);
			}
		}
		else {
			if(!(fpOut=fopen(outputFileName, "wb"))) {
				PrintError(Name, outputFileName, "Could not open file for writing", Exit, OpenFileError);
			}
		}

		fprintf(stderr, "Input:%s\nOutput:%s\n", inputFileName, outputFileName);

		/* Print Header */
		AlignedReadConvertPrintHeader(fpOut, &rg, outputType, readGroup);
		/* Initialize */
		AlignedReadInitialize(&a);
		counter = 0;
		fprintf(stderr, "Currently on:\n0");
		/* Read in each match */
		while((TextInput == inputType && EOF != AlignedReadReadText(&a, fpIn)) ||
				(BinaryInput == inputType && EOF != AlignedReadRead(&a, fpInGZ))) {
			if(counter%BAFCONVERT_ROTATE_NUM==0) {
				fprintf(stderr, "\r%lld",
						counter);
			}
			counter++;
			/* Print each match */
			AlignedReadConvertPrintOutputFormat(&a,
					&rg,
					fpOut,
					fpOutGZ,
					outputID,
					readGroupString,
					-1,
					NULL,
					outputType,
					1,
                                        0,
					outputSubType);
			AlignedReadFree(&a);
		}
		fprintf(stderr, "\r%lld\n",
				counter);
		/* Close the input file */
		if(TextInput == inputType) {
			fclose(fpIn);
		}
		else {
			gzclose(fpInGZ);
		}
		/* Close the output file */
		if(TextOutput == outputSubType) {
			fclose(fpOut);
		}
		else {
			gzclose(fpOutGZ);
		}
	}
	if(SAM == outputType) {
		RGBinaryDelete(&rg);
	}
	free(readGroupFileName);
	free(readGroup);
	free(readGroupString);

	fprintf(stderr, "Terminating successfully!\n");
	return 0;
}
Esempio n. 6
0
static int z_fclose(void *_fh)
{     struct z_file *fh = _fh;
      gzclose(fh->file);
      xfree(fh);
      return 0;
}
Esempio n. 7
0
static int gunzip_and_replace(geoipupdate_s * gu, const char *gzipfile,
                              const char *geoip_filename,
                              const char *expected_file_md5)
{
    gzFile gz_fh;
    FILE *fh = fopen(gzipfile, "rb");
    exit_if(NULL == fh, "Can't open %s\n", gzipfile);
    size_t bsize = 8096;
    char *buffer = (char *)xmalloc(bsize);
    ssize_t read_bytes = my_getline(&buffer, &bsize, fh);
    exit_if(-1 == fclose(fh), "Error closing stream: %s", strerror(errno));
    if (read_bytes < 0) {
        fprintf(stderr, "Read error %s\n", gzipfile);
        unlink(gzipfile);
        free(buffer);
        return ERROR;
    }
    const char *no_new_upd = "No new updates available";
    if (!strncmp(no_new_upd, buffer, strlen(no_new_upd))) {
        say_if(gu->verbose, "%s\n", no_new_upd);
        unlink(gzipfile);
        free(buffer);
        return OK;
    }
    if (strncmp(buffer, "\x1f\x8b", 2)) {
        // error not a zip file
        unlink(gzipfile);
        printf("%s is not a valid gzip file\n", gzipfile);
        return ERROR;
    }

    // We do this here as we have to check that there is an update before
    // we check for the header.
    exit_unless( 32 == strnlen(expected_file_md5, 33),
                 "Did not receive a valid expected database MD5 from server\n");

    char *file_path_test;
    xasprintf(&file_path_test, "%s.test", geoip_filename);
    say_if(gu->verbose, "Uncompress file %s to %s\n", gzipfile, file_path_test);
    gz_fh = gzopen(gzipfile, "rb");
    exit_if(gz_fh == NULL, "Can't open %s\n", gzipfile);
    FILE *fhw = fopen(file_path_test, "wb");
    exit_if(fhw == NULL, "Can't open %s\n", file_path_test);

    for (;; ) {
        int amt = gzread(gz_fh, buffer, bsize);
        if (amt == 0) {
            break;              // EOF
        }
        exit_if(amt == -1, "Gzip read error while reading from %s\n", gzipfile);
        exit_unless(fwrite(buffer, 1, amt, fhw) == (size_t)amt,
                    "Gzip write error\n");
    }
    exit_if(-1 == fclose(fhw), "Error closing stream: %s", strerror(errno));
    exit_if(gzclose(gz_fh) != Z_OK, "Gzip read error while closing from %s\n",
            gzipfile);
    free(buffer);

    char actual_md5[33];
    md5hex(file_path_test, actual_md5);
    exit_if(strncasecmp(actual_md5, expected_file_md5, 32),
            "MD5 of new database (%s) does not match expected MD5 (%s)",
            actual_md5, expected_file_md5);

    say_if(gu->verbose, "Rename %s to %s\n", file_path_test, geoip_filename);
    int err = rename(file_path_test, geoip_filename);
    exit_if(err, "Rename %s to %s failed\n", file_path_test, geoip_filename);

    // fsync directory to ensure the rename is durable
    int dirfd = open(gu->database_dir, O_DIRECTORY);
    exit_if(-1 == dirfd, "Error opening database directory: %s",
            strerror(errno));
    exit_if(-1 == fsync(dirfd), "Error syncing database directory: %s",
            strerror(errno));
    exit_if(-1 == close(dirfd), "Error closing database directory: %s",
            strerror(errno));
    exit_if(-1 == unlink(gzipfile), "Error unlinking %s: %s", gzipfile,
            strerror(errno));

    free(file_path_test);
    return OK;
}
Esempio n. 8
0
int main (int argc, char **argv)
{
   if (argc < 5)
   {
      fprintf (stderr, "template inputfile.gz colsize bound\n");
      exit (1);
   }
   FILE *fp = NULL;
   if ((fp = fopen(*(argv+1),"r")) == NULL)
   {
      fprintf (stderr, "Couldn't open %s\n", *(argv+1));
      exit (1);
   }
   PoolAlloc ac(256, 1000000);
   std::list<tmpl*> tmpls;
   char buf[BUFSIZE];
   while (fgets(buf, BUFSIZE, fp) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsCommentOut(buf))
      {
         continue;
      }
      tmpl *t = new tmpl(buf, &ac);
      tmpls.push_back(t);
   }

   int fd = open(*(argv+2), O_RDONLY);
   if (fd < 0)
   {
      fprintf (stderr, "Couldn't open %s\n",*(argv+2));
      exit (1);
   }
   gzFile input = gzdopen(fd, GZ_MODE);
   if (input == NULL)
   {
      fprintf (stderr, "failed to gzdopen\n");
      exit (1);
   }

   int colsize = 0;
   sscanf (*(argv+3),"%d",&colsize);
   int bound = 0;
   sscanf (*(argv+4),"%d",&bound);

   Sequence sq;
   sq.setColSize(colsize);
   sq.init();

   Dic features(&ac, CountUp);

   while (gzgets(input, buf, BUFSIZE) != NULL)
   {
      MyUtil::chomp(buf);
      if (MyUtil::IsEOS(buf))
      {
         int size = (int)sq.getRowSize();
         std::list<tmpl*>::iterator it = tmpls.begin();
         for (; it != tmpls.end(); it++)
         {
            for (int i = 0; i < size; i++)
            {
               char *feature = (*it)->expand(&sq,i);
               //fprintf(stdout, "%s\n",feature);
               features.insert(feature);
               ac.release(feature);
            }
         }
         sq.clear();
         continue;
      }
      sq.push(buf);
   }

   nodeptr nil = features.getnil();
   for (int i = HASHSIZE-1; i >= 0; i--)
   {
      nodeptr *p = features.table+i;
      if (*p != nil)
      {
         recalldump(nil, (*p)->left, bound);
         recalldump(nil, (*p)->right, bound);
      }
   }

   if (gzclose(input) != Z_OK)
   {
      fprintf (stderr,"gzclose failed\n");
      exit (1);
   }
   return 0;
}