int pull_by_re(char *input_file, pcre *re, pcre_extra *re_extra, int min, int max, int length, int exclude, int convert, int just_count) { gzFile fp; int count=0,l; int excluded = 0; int is_fasta = 0; /* assume fastq */ kseq_t *seq; /* open fasta file */ fp = gzopen(input_file,"r"); if (!fp) { fprintf(stderr,"%s - Couldn't open fasta file %s\n",progname,input_file); exit(EXIT_FAILURE); } seq = kseq_init(fp); /* determine file type */ l = kseq_read(seq); /* read the first sequence */ is_fasta = seq->qual.s == NULL ? 1 : 0; gzrewind(fp); kseq_rewind(seq); /* rewind to beginning for main loop */ if (verbose_flag) { if (is_fasta) fprintf(stderr, "Input is FASTA format\n"); else fprintf(stderr, "Input is FASTQ format\n"); } /* search through list and see if this header matches */ while((l = kseq_read(seq)) >= 0) { if (exclude) { if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s)) excluded++; else { /* regex doesn't match, so check size/print */ count += size_filter(seq, is_fasta, min, max, length, convert, just_count); } } else { if (search_header(re, re_extra, seq->name.s) || search_header(re, re_extra, seq->comment.s)) { /* regex matches so check size/print */ count += size_filter(seq, is_fasta, min, max, length, convert, just_count); } else excluded++; } } /* end of seq traversal */ kseq_destroy(seq); gzclose(fp); /* done reading file so close */ if (just_count) { fprintf(stdout, "Total output: %i\n", count); fprintf(stdout, "Total excluded: %i\n", excluded); } return count; }
/** Read a number from the header and verify. */ double search_header_num_valid(const char *header, const char *key) { double val=search_header_num(header, key); if(is_nan(val)) { error("Unable to read %s from %s. val=%s\n", key, header, search_header(header, key)); } return val; }
/** Read a number from the header with key */ double search_header_num(const char *header, const char *key) { if(!header) return NAN; const char *val=search_header(header, key); if(val) { return readstr_num(val, NULL); } else { return NAN;/*not found. */ } }
static bool message_search_more_decoded2(struct message_search_context *ctx, struct message_block *block) { if (block->hdr != NULL) { if (search_header(ctx, block->hdr)) return TRUE; } else { if (str_find_more(ctx->str_find_ctx, block->data, block->size)) return TRUE; } return FALSE; }
int main(int argc, char *argv[]){ long Database_count, Alignment_count; int alignment_status; float preAlignmentScore=0, DatabaseScore=0, preDatabaseScore=0; unsigned int alignmentShow=1; interface(argc,argv); protein_name=(char *)malloc((size_t)protein_name_length); fileend=fgets_wrap(0, global_tmp,MAXLETTER-1,fin); Query_count=0; do{ blast_start: Alignment_count=0; Database_count=0; preAlignmentScore=0; preDatabaseScore=0; DatabaseScore=0; if(fileend==NULL) goto read_end; /* search query name */ protein_name[0]='\0'; search_qname_and_other(); Database_count=0; do{ if(fileend==NULL) goto read_end; /* search database name */ if(search_dname_and_other()==1) goto blast_start; if(fileend==NULL) goto read_end; if(dname[0]=='\0' || qname[0]=='\0') { goto blast_start; } preDatabaseScore=DatabaseScore; DatabaseScore=0; Alignment_count=0; alignmentShow=1; do{ if(fileend==NULL) goto read_end; /* read alignments */ search_sp_name(); search_score_e_value(); search_identity_and_gaps(); alignment_status=read_one_alignment(); if(DatabaseScore<1){ DatabaseScore=score; } /* printf("\n %ld: %f -> %f\n", Database_count, preDatabaseScore, DatabaseScore); */ if(Database_count==Database_number && Database_number>0){ if(preDatabaseScore!=DatabaseScore || AllowSameScore=='F'){ /* printf("break!!\n"); */ Database_count++; break; }else{ /* printf("###### SAME!!\n"); */ Database_count--; } } /* if(alignmentShow==1){ printf("\n %ld: %f -> %f\n", Alignment_count, preAlignmentScore, score); } */ if(Alignment_count>=Alignment_number && (score!=preAlignmentScore || AllowSameScore=='F')){ alignmentShow=0; } if(alignmentShow==1 || Alignment_number==0){ if(output()!=0) Alignment_count++; } preAlignmentScore=score; }while(strncmp(global_tmp," Score =", 8)==0 && global_tmp[0]!='>' && fileend!=NULL && alignment_status!=1); if(Alignment_count!=0) { fprintf(fout,"\n"); Database_count++; } protein_name[0]='\0'; }while(global_tmp[0]=='>' && fileend!=NULL && (Database_number==0 || Database_count<=Database_number)&& alignment_status!=1); if(Database_count!=0) { fprintf(fout,"\n"); } search_header(); read_end: ; }while(fileend!=NULL && Query_count!=Query_number); if (verbose>=1){ fprintf(ferr,"filename %s output %ld sequences",filename,(Query_count-nohits)); if (nohits!=0) fprintf(ferr,", no hits %d, total %ld",nohits, Query_count); fprintf(ferr,".\n"); } return 0; }