Exemple #1
0
int pull_by_re(char *input_file, pcre *re, pcre_extra *re_extra, int min, int max, int length, int exclude, int convert, int just_count) {
	gzFile fp;
	int count=0,l;
	int excluded = 0;
	int is_fasta = 0; /* assume fastq */
	kseq_t *seq;

	/* open fasta file */
	fp = gzopen(input_file,"r");
	if (!fp) {
		fprintf(stderr,"%s - Couldn't open fasta file %s\n",progname,input_file);
		exit(EXIT_FAILURE);
	}

	seq = kseq_init(fp);

	/* determine file type */
	l = kseq_read(seq); /* read the first sequence */
	is_fasta = seq->qual.s == NULL ? 1 : 0;
	gzrewind(fp); 
	kseq_rewind(seq); /* rewind to beginning for main loop */

    if (verbose_flag) {
        if (is_fasta)
            fprintf(stderr, "Input is FASTA format\n");
        else
            fprintf(stderr, "Input is FASTQ format\n");
    }

	/* search through list and see if this header matches */
	while((l = kseq_read(seq)) >= 0) {
		if (exclude) {
			if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s))
				excluded++;
			else {
				/* regex doesn't match, so check size/print */
				count += size_filter(seq, is_fasta, min, max, length, convert, just_count);
			}
		} else {
			if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s)) {
				/* regex matches so check size/print */
				count += size_filter(seq, is_fasta, min, max, length, convert, just_count);
			} else
				excluded++;
		}
	} /* end of seq traversal */
	kseq_destroy(seq);
	gzclose(fp); /* done reading file so close */

	if (just_count) {
		fprintf(stdout, "Total output: %i\n", count);
		fprintf(stdout, "Total excluded: %i\n", excluded);
	}
	return count;
}
Exemple #2
0
/**
   Read a number from the header and verify.
*/
double search_header_num_valid(const char *header, const char *key) {
    double val=search_header_num(header, key);
    if(is_nan(val)) {
        error("Unable to read %s from %s. val=%s\n", key, header, search_header(header, key));
    }
    return val;
}
Exemple #3
0
/**
   Read a number from the header with key
*/
double search_header_num(const char *header, const char *key) {
    if(!header) return NAN;
    const char *val=search_header(header, key);
    if(val) {
        return readstr_num(val, NULL);
    } else {
        return NAN;/*not found. */
    }
}
Exemple #4
0
static bool message_search_more_decoded2(struct message_search_context *ctx,
					 struct message_block *block)
{
	if (block->hdr != NULL) {
		if (search_header(ctx, block->hdr))
			return TRUE;
	} else {
		if (str_find_more(ctx->str_find_ctx, block->data, block->size))
			return TRUE;
	}
	return FALSE;
}
int main(int argc, char *argv[]){
  long Database_count, Alignment_count;
  int alignment_status;
  float preAlignmentScore=0, DatabaseScore=0, preDatabaseScore=0;
  unsigned int alignmentShow=1;

  interface(argc,argv);
  protein_name=(char *)malloc((size_t)protein_name_length);

  fileend=fgets_wrap(0, global_tmp,MAXLETTER-1,fin);

  Query_count=0;
  do{
  blast_start:
    Alignment_count=0;
    Database_count=0;
    preAlignmentScore=0;
    preDatabaseScore=0;
    DatabaseScore=0;
    if(fileend==NULL) goto read_end;
    /* search query name */
    protein_name[0]='\0';
    search_qname_and_other();
    Database_count=0;
    do{
      if(fileend==NULL) goto read_end;
      /* search database name */
      if(search_dname_and_other()==1) goto blast_start;
      if(fileend==NULL) goto read_end;
      if(dname[0]=='\0' || qname[0]=='\0') {
  goto blast_start;
      }
      preDatabaseScore=DatabaseScore;
      DatabaseScore=0;
      Alignment_count=0;
      alignmentShow=1;

      do{
	if(fileend==NULL) goto read_end;
	/* read alignments */
	search_sp_name();
	search_score_e_value();
	search_identity_and_gaps();
	alignment_status=read_one_alignment();
	if(DatabaseScore<1){
	  DatabaseScore=score;
	}
	/* printf("\n %ld:  %f -> %f\n",
	   Database_count, preDatabaseScore, DatabaseScore); */
	if(Database_count==Database_number && Database_number>0){
	  if(preDatabaseScore!=DatabaseScore || AllowSameScore=='F'){
	    /* printf("break!!\n"); */
	    Database_count++;
	    break;
	  }else{
	    /* printf("###### SAME!!\n"); */
	    Database_count--;
	  }
	}
	/*
	if(alignmentShow==1){
	  printf("\n %ld:  %f -> %f\n",
		 Alignment_count, preAlignmentScore, score);
	}
	*/
	if(Alignment_count>=Alignment_number && 
	   (score!=preAlignmentScore || AllowSameScore=='F')){
	  alignmentShow=0;
	}
	if(alignmentShow==1 || Alignment_number==0){
	  if(output()!=0) Alignment_count++;
	}

	preAlignmentScore=score;
      }while(strncmp(global_tmp," Score =", 8)==0 && 
             global_tmp[0]!='>' && fileend!=NULL &&
	     alignment_status!=1);
      if(Alignment_count!=0) {
	fprintf(fout,"\n");
	Database_count++;
      }
      protein_name[0]='\0';
    }while(global_tmp[0]=='>' &&
           fileend!=NULL &&
           (Database_number==0 || Database_count<=Database_number)&&
	   alignment_status!=1);

    if(Database_count!=0) {
      fprintf(fout,"\n");
    }
    search_header();
  read_end:
    ;
  }while(fileend!=NULL && Query_count!=Query_number);


  if (verbose>=1){
    fprintf(ferr,"filename %s output %ld sequences",filename,(Query_count-nohits));
    if (nohits!=0) fprintf(ferr,", no hits %d, total %ld",nohits, Query_count);
    fprintf(ferr,".\n");
  }
  return 0;
}