コード例 #1
0
ファイル: ceeq_subsample.c プロジェクト: linsalrob/ceeqlib
int subsample(int nseqs, char *fastqfile, struct fastq *seqs[], int compressed) {


	// read the fastq file into a temporary hash
	struct fastq *allseqs[HASHSIZE] = {NULL};

	int read_seqs = 0;
	if (compressed)
		read_seqs = read_fastq_gz(fastqfile, allseqs);
	else
		read_seqs = read_fastq(fastqfile, allseqs);

	if (read_seqs < nseqs) {
		fprintf(stderr, "You requested %d sequences but there are only %d in the file!\n", nseqs, read_seqs);
		nseqs = read_seqs;
	}

	// get all the ids from the sequences
	char *ids[read_seqs];
	get_ids(ids, allseqs);

	// subsample those IDs 
	char **subsample = subsample_n(read_seqs, ids, nseqs);

	for (int i=0; i<nseqs; i++) {
		char *info = get_seq_information(subsample[i], seqs);
		char *seq = get_sequence(subsample[i], seqs);
		char *qua = get_quality(subsample[i], seqs);
	
	}
	return nseqs;
}
コード例 #2
0
ファイル: filter.c プロジェクト: Pency/BSPT
int filter_se_fastq_bz2(FLT_OPTS *opts){
	int index=1;
	int stat_left = 0;

	SEQ_QUAL item=init_read();
	gzFile fp=gzopen_report(opts->r1,"r");
	FILE *fo=fopen_report(strcat(opts->output,".flt"),"w+");
	int left = 0;

	while(read_fastq(fp,&item,index++) > 0){
		check_read(&item,2);
		left=filter_all(&item, opts);

		if(left == 1){
			output_fastq(fo, &item);
			stat_left++;
		}
	}

	printf("Totally %d reads were processed\n",index-1);
	printf("  file [ %s ]: %d reads were left (%.2f%)\n",opts->r1,stat_left,(float) stat_left*100/(index-1));
	free_read(&item);
	fclose(fp);
	fclose(fo);
	
	return 0;
}	
コード例 #3
0
ファイル: utils.c プロジェクト: dbgoodman/SeqPrep
/* next_fastqs
   Read the next forward and reverse fastq sequences.
   Check to make sure their ID's are compatible and
   put the results in the next SQP of SQPDB. Grow
   this, if necessary.
 */
inline bool next_fastqs( gzFile ffq, gzFile rfq, SQP curr_sqp, bool p64 ) {
  int frs; // forward fastq read status
  int rrs; // reverse fastq read status
  size_t id1len = 0;
  size_t id2len = 0;
  /* Read the next fastq record from the forward and reverse
     pair of each */

  //make sure everything is fresh...
  memset(curr_sqp->fid,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rid,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->fseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rc_rseq,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->fqual,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->rqual,'\0',MAX_SEQ_LEN);
  memset(curr_sqp->merged_seq,'\0',MAX_SEQ_LEN+MAX_SEQ_LEN);
  memset(curr_sqp->merged_qual,'\0',MAX_SEQ_LEN+MAX_SEQ_LEN);
  memset(curr_sqp->rc_rqual,'\0',MAX_SEQ_LEN);
  curr_sqp->flen = curr_sqp->rlen = 0;


  //

  frs = read_fastq( ffq, curr_sqp->fid, curr_sqp->fseq, 
      curr_sqp->fqual, &id1len, &(curr_sqp->flen), p64 );
  rrs = read_fastq( rfq, curr_sqp->rid, curr_sqp->rseq, 
      curr_sqp->rqual, &id2len, &(curr_sqp->rlen), p64 );

  //  //reverse comp the second read for overlapping and everything.
  //  strcpy(curr_sqp->rc_rseq,curr_sqp->rseq);
  //  strcpy(curr_sqp->rc_rqual,curr_sqp->rqual);
  //  revcom_seq(curr_sqp->rc_rseq,curr_sqp->rlen);
  //  rev_qual(curr_sqp->rc_rqual,curr_sqp->rlen);

  if ( (frs == 1) &&
      (rrs == 1) &&
      f_r_id_check( curr_sqp->fid, id1len, curr_sqp->rid, id2len ) ) {
    strncpy(curr_sqp->rc_rseq,curr_sqp->rseq,curr_sqp->rlen+1);
    strncpy(curr_sqp->rc_rqual,curr_sqp->rqual,curr_sqp->rlen+1);
    rev_qual(curr_sqp->rc_rqual, curr_sqp->rlen);
    revcom_seq(curr_sqp->rc_rseq, curr_sqp->rlen);
    return true;
  } else {
    return false;
  }
}
コード例 #4
0
ファイル: file_utils.c プロジェクト: Pency/BSPT
int detect_datatype(char *file){

	int dataType=0;
	int i = 0;
	int max = 0;
	int min = 999;
	int sample = 100;

	SEQ_QUAL item = init_read();
	gzFile zfp = gzopen_report(file,"r");

	if(gzgetc(zfp) == '>'){
		dataType |= FILE_FASTA;
	}else{
		gzseek(zfp, 0L, SEEK_SET);
		if(read_fastq(zfp,&item,i) >= 0){
			dataType |= FILE_FASTQ;
			do{
				for(i=0;i+1<strlen(item.qual);i++){
                                        min = MIN(min, item.qual[i]);
                                        max = MAX(max, item.qual[i]);
                                }
                                if((sample--) == 0){
                                        if(max >= 75)
						dataType|=FILE_PHRED64;
                                        else{
						dataType|=FILE_PHRED33;
						if(min > 58)
							warning_msg("Can not identified quality score type in 100 read samples, assume phred+33\n");
					}
					break;
				}
			}while(read_fastq(zfp,&item, i));
		}else
			dataType |= FILE_UNKN;
	}

	gzclose(zfp);
	free_read(&item);
	return dataType;
}
コード例 #5
0
ファイル: CAReader.cpp プロジェクト: mckinsel/OverlapViewer
void CAReader_create_ovl_list(std::string fastq_filename, std::string gkp_store_name,
                              std::string ovl_store_name, std::vector<Overlap_T>* ovl_list)
{
  std::vector<std::string> fastq_names;
  std::vector<size_t> fastq_lengths;
  read_fastq(fastq_filename, &fastq_names, &fastq_lengths);

  gkStore* gkp_store = new gkStore(gkp_store_name.c_str(), false, false);
  OverlapStore *ovl_store = AS_OVS_openOverlapStore(ovl_store_name.c_str());

  uint32_t iidMin = 0;
  uint32_t iidMax = gkp_store->gkStore_getNumFragments();

  uint32_t ovl_len = 0;
  uint32_t ovl_max = 25000;

  OVSoverlap* overlaps = new OVSoverlap [ovl_max];

  for(uint32_t iid=iidMin; iid < iidMax; iid++) {
    loadOverlaps(iid, overlaps, ovl_len, ovl_max, ovl_store, NULL);

    for(uint32_t i=0; i<ovl_len; i++) {
      Overlap_T ovl = {"","",0,0,0,0,0,0,false};

      ovl.name_a = fastq_names[overlaps[i].a_iid - 1];
      ovl.name_b = fastq_names[overlaps[i].b_iid - 1];
      
      ovl.start_a = overlaps[i].dat.obt.a_beg;
      ovl.start_b = overlaps[i].dat.obt.b_beg;

      ovl.end_a = overlaps[i].dat.obt.a_end;
      ovl.end_b = overlaps[i].dat.obt.b_end_lo | (overlaps[i].dat.obt.b_end_hi << 9);
      
      if(ovl.end_b < ovl.start_b) {
        int tmp = ovl.start_b;
        ovl.start_b = ovl.end_b;
        ovl.end_b = tmp;
      }

      ovl.length_a = fastq_lengths[overlaps[i].a_iid - 1];
      ovl.length_b = fastq_lengths[overlaps[i].b_iid - 1];
      
      ovl.forward = overlaps[i].dat.obt.fwd; 

      ovl_list->push_back(ovl);
    }
  }

  delete[] overlaps;
  AS_OVS_closeOverlapStore(ovl_store);
  delete gkp_store;

}
コード例 #6
0
ファイル: whole_sequence_parser.hpp プロジェクト: dfajar2/KAT
  inline bool produce(uint32_t i, sequence_list& buff) {
    stream_status& st = streams_[i];

    switch(st.type) {
    case FASTA_TYPE:
      read_fasta(st, buff);
      break;
    case FASTQ_TYPE:
      read_fastq(st, buff);
      break;
    case DONE_TYPE:
      return true;
    }

    if(st.stream->good())
      return false;

    // Reach the end of file, close current and try to open the next one
    open_next_file(st);
    return false;
  }
コード例 #7
0
ファイル: fastq2fasta_sorted.c プロジェクト: johned0/ceeqlib
int main(int argc, char *argv[]) {
	if (argc < 2) {
		help();
		exit(0);
	}

	struct fastq *seqs[HASHSIZE] = {NULL};
	int nseqs = read_fastq(argv[1], seqs);

	char **ids = sorted_sequence_ids(seqs, nseqs);

	for (int i=0; i<nseqs; i++) {
		int hashval = hash(ids[i]);
		struct fastq *ptr;
		for (ptr=seqs[hashval]; ptr != NULL; ptr = ptr->next)
			if (strcmp(ptr->seqid, ids[i]) == 0)
				printf(">%s\n%s\n", ids[i], ptr->seq);
	}

	/* print_fasta(seqs); */


}
コード例 #8
0
ReferenceMap::ReferenceMap(const std::string& filename) {
  FastqReader reader{filename};
  read_fastq(reader);
}
コード例 #9
0
ファイル: filter.c プロジェクト: Pency/BSPT
int filter_pe_fastq(FLT_OPTS *opts){
	int left1=0,left2=0;
	int stat_single1 = 0;
	int stat_single2 = 0;
	int stat_paired = 0;
	int index=1;
	char fn[128];
	char outfile[128];
	SEQ_QUAL item1=init_read();
	SEQ_QUAL item2=init_read();

	gzFile fp1=gzopen_report(opts->r1,"r");
	if(!fp1)	return -1;
	gzFile fp2=gzopen_report(opts->r2,"r");
	if(!fp2)	return -1;
	file_name(outfile,opts->r1);
	sprintf(fn,"%s/%s.flt",opts->output,outfile);
	FILE *fo1=fopen_report(fn,"w+");
	if(!fo1)	return -1;
	file_name(outfile,opts->r2);
	sprintf(fn,"%s/%s.flt",opts->output,outfile);
	FILE *fo2=fopen_report(fn,"w+");
	if(!fo2)	return -1;
	sprintf(fn,"%s/%s.flt.s",opts->output,outfile);
	FILE *fos=fopen_report(fn,"w+");
	if(!fos)	return -1;

	while(read_fastq(fp1,&item1,index) >= 0 && read_fastq(fp2,&item2,index) >= 0)
	{
		left1=filter_all(&item1, opts);
		left2=filter_all(&item2, opts);

		if(left1 == 1 && left2 == 1){
			output_fastq(fo1, &item1);
			output_fastq(fo2, &item2);
			stat_paired++;
		}else{
			if(left1 == 1){
				output_fastq(fos, &item1);
				stat_single1++;
			}
			if(left2 == 1){
				output_fastq(fos, &item2);
				stat_single2++;
			}
		}
		index++;
	}

	printf("Totally %d reads were processed\n",(index-1)*2);
	printf("  file [ %s ]: %d reads were left (%.2f%)\n",opts->r1,stat_paired+stat_single1,(float) (stat_paired+stat_single1)*100/(index-1));
	printf("  file [ %s ]: %d reads were left (%.2f%)\n",opts->r2,stat_paired+stat_single2,(float) (stat_paired+stat_single2)*100/(index-1));
	printf("After filtering %d reads are paired in each file (%.2f%)\n",stat_paired,(float) stat_paired*100/(index-1));
	printf("  file [ %s ]: %d reads were left as single end\n",opts->r1,stat_single1);
	printf("  file [ %s ]: %d reads were left as single end\n",opts->r2,stat_single2);
	free_read(&item1);
	free_read(&item2);
	gzclose(fp1);
	gzclose(fp2);
	fclose(fo1);
	fclose(fo2);
	fclose(fos);
	
	return 0;
}