static bool run_greylisttest(const config_t *config, const char *basepath) { char buff_q1[BUFSIZ]; char buff_q2[BUFSIZ]; char buff_q3[BUFSIZ]; query_t q1; query_t q2; query_t q3; bool ok = true; filter_t *greylist1; // filter_t *greylist2; #define QUERY(Q) \ if (read_query(basepath, "greylist_" STR(Q), buff_##Q, NULL, &Q) == NULL) { \ return false; \ } QUERY(q1); QUERY(q2); QUERY(q3); #undef QUERY #define FILTER(F) \ do { \ int __p = filter_find_with_name(&config->filters, STR(F)); \ if (__p < 0) { \ return false; \ } \ F = array_ptr(config->filters, __p); \ } while (0) FILTER(greylist1); // FILTER(greylist2); #undef FILTER filter_context_t context; filter_context_prepare(&context, NULL); /* Test greylist */ TEST("greylisted", filter_test(greylist1, &q1, &context, HTK_GREYLIST)); TEST("too_fast", filter_test(greylist1, &q1, &context, HTK_GREYLIST)); sleep(5); TEST("too_slow", filter_test(greylist1, &q1, &context, HTK_GREYLIST)); sleep(2); TEST("whitelisted", filter_test(greylist1, &q1, &context, HTK_WHITELIST)); TEST("other_greylisted", filter_test(greylist1, &q2, &context, HTK_GREYLIST)); TEST("auto_whitelisted", filter_test(greylist1, &q1, &context, HTK_WHITELIST)); TEST("other_auto_whitelisted", filter_test(greylist1, &q2, &context, HTK_WHITELIST)); TEST("greylisted", filter_test(greylist1, &q3, &context, HTK_GREYLIST)); sleep(10); TEST("cleanup", filter_test(greylist1, &q1, &context, HTK_GREYLIST)); filter_context_wipe(&context); return ok; }
static void process(args_t *args) { bcf1_t *rec = bcf_sr_get_line(args->sr,0); bcf_unpack(rec, BCF_UN_ALL); int i, site_pass = 1; const uint8_t *smpl_pass = NULL; if ( args->filter ) { site_pass = filter_test(args->filter, rec, &smpl_pass); if ( args->filter_logic & FLT_EXCLUDE ) site_pass = site_pass ? 0 : 1; } bcf1_t *out = NULL; for (i=0; i<rec->n_sample; i++) { if ( !args->fh[i] ) continue; if ( !smpl_pass && !site_pass ) continue; if ( smpl_pass ) { int pass = args->filter_logic & FLT_EXCLUDE ? ( smpl_pass[i] ? 0 : 1) : smpl_pass[i]; if ( !pass ) continue; } if ( !out ) out = rec_set_info(args, rec); rec_set_format(args, rec, i, out); bcf_write(args->fh[i], args->hdr_out, out); } if ( out ) bcf_destroy(out); }
int main(int argc, char* argv[]) { //---------- query keywords ----------// std::vector<std::string> query_words; query_words.push_back("tattoo"); //query_words.push_back("libstick"); //query_words.push_back("sunglass"); //---------- create bloom filter manager ----------// bloom_filter_manager mgr_for_load; //---------- test for boost serialization ----------// std::cout << "\n------------------------------------------------------------------" << std::endl; std::cout << "| TEST FOR RANDOM ACCESS OF LARGE DATA FILE |" << std::endl; std::cout << "------------------------------------------------------------------\n" << std::endl; //---------- large file load ----------// std::string filename = "/media/mojool1984/research_storage/mirflickr1M_dataset/working/integrated/merged_tags_for_random_access.dat"; //std::string filename = "/home/mojool1984/Desktop/project/keyword-visual-image-search/hybrid_index/nonleaf_ok.dat"; assert( mgr_for_load.open_for_random_access( filename.c_str() ) ); filter_test( mgr_for_load, RANDOM_ACCESS, query_words, false ); //---------- close all file streams ----------// mgr_for_load.close_for_random_access(); return 0; }
static void query_vcf(args_t *args) { kstring_t str = {0,0,0}; if ( args->print_header ) { convert_header(args->convert,&str); fwrite(str.s, str.l, 1, args->out); } while ( bcf_sr_next_line(args->files) ) { if ( !bcf_sr_has_line(args->files,0) ) continue; bcf1_t *line = args->files->readers[0].buffer[0]; bcf_unpack(line, args->files->max_unpack); if ( args->filter ) { int pass = filter_test(args->filter, line, NULL); if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1; if ( !pass ) continue; } str.l = 0; convert_line(args->convert, line, &str); if ( str.l ) fwrite(str.s, str.l, 1, args->out); } if ( str.m ) free(str.s); }
static bool run_testcase(const config_t *config, const char *basepath, const char *filename) { char buff[BUFSIZ]; char *end; query_t query; const char *eol = read_query(basepath, filename, buff, &end, &query); if (eol == NULL) { return false; } bool ok = true; filter_context_t context; filter_context_prepare(&context, NULL); while (eol < end) { char *neol = memchr(eol, '\n', end - eol); if (neol == NULL) { neol = end; } *neol = '\0'; char *sep = memchr(eol, '=', neol - eol); if (sep == NULL) { eol = neol + 1; err("missing separator"); continue; } *sep = '\0'; int pos = filter_find_with_name(&config->filters, eol); if (pos == -1) { err("Unknown filter %s", eol); eol = neol + 1; continue; } ++sep; filter_result_t result = hook_tokenize(sep, neol - sep); if (result == HTK_UNKNOWN) { err("Unknown filter result %.*s", (int) (neol - sep), sep); eol = neol + 1; continue; } filter_t *filter = array_ptr(config->filters, pos); #define TEST(Name, Run) \ do { \ bool __test = (Run); \ printf(" test %s: %s\n", Name, __test ? "SUCCESS" : "FAILED"); \ ok = ok && __test; \ } while (0) TEST(filter->name, filter_test(filter, &query, &context, result)); eol = neol + 1; } filter_context_wipe(&context); return ok; }
static bool run_ratetest(const config_t *config, const char *basepath) { char buff_q1[BUFSIZ]; query_t q1; bool ok = true; filter_t *rate1; #define QUERY(Q) \ if (read_query(basepath, "greylist_" STR(Q), buff_##Q, NULL, &Q) == NULL) { \ return false; \ } QUERY(q1); #undef QUERY #define FILTER(F) \ do { \ int __p = filter_find_with_name(&config->filters, STR(F)); \ if (__p < 0) { \ return false; \ } \ F = array_ptr(config->filters, __p); \ } while (0) FILTER(rate1); #undef FILTER filter_context_t context; filter_context_prepare(&context, NULL); /* Test greylist */ TEST("no", filter_test(rate1, &q1, &context, HTK_FAIL)); TEST("soft_start", filter_test(rate1, &q1, &context, HTK_SOFT_MATCH_START)); sleep(1); TEST("soft", filter_test(rate1, &q1, &context, HTK_SOFT_MATCH)); sleep(1); TEST("hard_start", filter_test(rate1, &q1, &context, HTK_HARD_MATCH_START)); TEST("hard", filter_test(rate1, &q1, &context, HTK_HARD_MATCH)); sleep(4); TEST("soft_down", filter_test(rate1, &q1, &context, HTK_SOFT_MATCH)); sleep(5); TEST("no_down", filter_test(rate1, &q1, &context, HTK_FAIL)); filter_context_wipe(&context); return ok; }
void isec_vcf(args_t *args) { bcf_srs_t *files = args->files; kstring_t str = {0,0,0}; htsFile *out_fh = NULL; // When only one VCF is output, print VCF to pysam_stdout or -o file int out_std = 0; if ( args->nwrite==1 && !args->prefix ) out_std = 1; if ( args->targets_list && files->nreaders==1 ) out_std = 1; if ( out_std ) { out_fh = hts_open(args->output_fname? args->output_fname : "-",hts_bcf_wmode(args->output_type)); if ( out_fh == NULL ) error("Can't write to %s: %s\n", args->output_fname? args->output_fname : "standard output", strerror(errno)); if ( args->n_threads ) hts_set_threads(out_fh, args->n_threads); if (args->record_cmd_line) bcf_hdr_append_version(files->readers[args->iwrite].header,args->argc,args->argv,"bcftools_isec"); bcf_hdr_write(out_fh, files->readers[args->iwrite].header); } if ( !args->nwrite && !out_std && !args->prefix ) fprintf(pysam_stderr,"Note: -w option not given, printing list of sites...\n"); int n; while ( (n=bcf_sr_next_line(files)) ) { bcf_sr_t *reader = NULL; bcf1_t *line = NULL; int i, ret = 0; for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( args->nflt && args->flt[i] ) { bcf1_t *rec = bcf_sr_get_line(files, i); int pass = filter_test(args->flt[i], rec, NULL); if ( args->flt_logic[i] & FLT_EXCLUDE ) pass = pass ? 0 : 1; if ( !pass ) { files->has_line[i] = 0; n--; continue; } } if ( !line ) { line = files->readers[i].buffer[0]; reader = &files->readers[i]; } ret |= 1<<i; // this may overflow for many files, but will be used only with two (OP_VENN) } switch (args->isec_op) { case OP_COMPLEMENT: if ( n!=1 || !bcf_sr_has_line(files,0) ) continue; break; case OP_EQUAL: if ( n != args->isec_n ) continue; break; case OP_PLUS: if ( n < args->isec_n ) continue; break; case OP_MINUS: if ( n > args->isec_n ) continue; break; case OP_EXACT: for (i=0; i<files->nreaders; i++) if ( files->has_line[i] != args->isec_exact[i] ) break; if ( i<files->nreaders ) continue; break; } if ( out_std ) { if ( bcf_sr_has_line(files,args->iwrite) ) bcf_write1(out_fh, files->readers[args->iwrite].header, files->readers[args->iwrite].buffer[0]); continue; } else if ( args->fh_sites ) { str.l = 0; kputs(reader->header->id[BCF_DT_CTG][line->rid].key, &str); kputc('\t', &str); kputw(line->pos+1, &str); kputc('\t', &str); if (line->n_allele > 0) kputs(line->d.allele[0], &str); else kputc('.', &str); kputc('\t', &str); if (line->n_allele > 1) kputs(line->d.allele[1], &str); else kputc('.', &str); for (i=2; i<line->n_allele; i++) { kputc(',', &str); kputs(line->d.allele[i], &str); } kputc('\t', &str); for (i=0; i<files->nreaders; i++) kputc(bcf_sr_has_line(files,i)?'1':'0', &str); kputc('\n', &str); fwrite(str.s,sizeof(char),str.l,args->fh_sites); } if ( args->prefix ) { if ( args->isec_op==OP_VENN && ret==3 ) { if ( !args->nwrite || args->write[0] ) bcf_write1(args->fh_out[2], bcf_sr_get_header(files,0), bcf_sr_get_line(files,0)); if ( !args->nwrite || args->write[1] ) bcf_write1(args->fh_out[3], bcf_sr_get_header(files,1), bcf_sr_get_line(files,1)); } else { for (i=0; i<files->nreaders; i++) { if ( !bcf_sr_has_line(files,i) ) continue; if ( args->write && !args->write[i] ) continue; bcf_write1(args->fh_out[i], files->readers[i].header, files->readers[i].buffer[0]); } } } } if ( str.s ) free(str.s); if ( out_fh ) hts_close(out_fh); }
int main_plugin(int argc, char *argv[]) { int c; args_t *args = (args_t*) calloc(1,sizeof(args_t)); args->argc = argc; args->argv = argv; args->files = bcf_sr_init(); args->output_fname = "-"; args->output_type = FT_VCF; args->nplugin_paths = -1; int regions_is_file = 0, targets_is_file = 0, plist_only = 0; if ( argc==1 ) usage(args); char *plugin_name = NULL; if ( argv[1][0]!='-' ) { plugin_name = argv[1]; argc--; argv++; } static struct option loptions[] = { {"verbose",0,0,'v'}, {"help",0,0,'h'}, {"list-plugins",0,0,'l'}, {"output",1,0,'o'}, {"output-type",1,0,'O'}, {"include",1,0,'i'}, {"exclude",1,0,'e'}, {"regions",1,0,'r'}, {"regions-file",1,0,'R'}, {"targets",1,0,'t'}, {"targets-file",1,0,'T'}, {0,0,0,0} }; while ((c = getopt_long(argc, argv, "h?o:O:r:R:li:e:v",loptions,NULL)) >= 0) { switch (c) { case 'v': args->verbose = 1; break; case 'o': args->output_fname = optarg; break; case 'O': switch (optarg[0]) { case 'b': args->output_type = FT_BCF_GZ; break; case 'u': args->output_type = FT_BCF; break; case 'z': args->output_type = FT_VCF_GZ; break; case 'v': args->output_type = FT_VCF; break; default: error("The output type \"%s\" not recognised\n", optarg); }; break; case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break; case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break; case 'r': args->regions_list = optarg; break; case 'R': args->regions_list = optarg; regions_is_file = 1; break; case 't': args->targets_list = optarg; break; case 'T': args->targets_list = optarg; targets_is_file = 1; break; case 'l': plist_only = 1; break; case '?': case 'h': load_plugin(args, plugin_name, 1, &args->plugin); fprintf(stderr,"%s",args->plugin.usage()); return 0; break; default: error("Unknown argument: %s\n", optarg); } } if ( plist_only ) return list_plugins(args); char *fname = NULL; if ( optind>=argc || argv[optind][0]=='-' ) { if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin else usage(args); args->plugin.argc = argc - optind + 1; args->plugin.argv = argv + optind - 1; } else { fname = argv[optind]; args->plugin.argc = argc - optind; args->plugin.argv = argv + optind; } optind = 0; args->plugin.argv[0] = plugin_name; load_plugin(args, plugin_name, 1, &args->plugin); if ( args->regions_list ) { if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 ) error("Failed to read the regions: %s\n", args->regions_list); } if ( args->targets_list ) { if ( bcf_sr_set_targets(args->files, args->targets_list, targets_is_file, 0)<0 ) error("Failed to read the targets: %s\n", args->targets_list); args->files->collapse |= COLLAPSE_SOME; } if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open or the file not indexed: %s\n", fname); init_data(args); while ( bcf_sr_next_line(args->files) ) { bcf1_t *line = bcf_sr_get_line(args->files,0); if ( args->filter ) { int pass = filter_test(args->filter, line, NULL); if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1; if ( !pass ) continue; } line = args->plugin.process(line); if ( line ) bcf_write1(args->out_fh, args->hdr_out, line); } destroy_data(args); bcf_sr_destroy(args->files); free(args); return 0; }
int subset_vcf(args_t *args, bcf1_t *line) { if ( args->min_alleles && line->n_allele < args->min_alleles ) return 0; // min alleles if ( args->max_alleles && line->n_allele > args->max_alleles ) return 0; // max alleles if (args->novel || args->known) { if ( args->novel && (line->d.id[0]!='.' || line->d.id[1]!=0) ) return 0; // skip sites which are known, ID != '.' if ( args->known && line->d.id[0]=='.' && line->d.id[1]==0 ) return 0; // skip sites which are novel, ID == '.' } if (args->include || args->exclude) { int line_type = bcf_get_variant_types(line); if ( args->include && !(line_type&args->include) ) return 0; // include only given variant types if ( args->exclude && line_type&args->exclude ) return 0; // exclude given variant types } if ( args->filter ) { int ret = filter_test(args->filter, line, NULL); if ( args->filter_logic==FLT_INCLUDE ) { if ( !ret ) return 0; } else if ( ret ) return 0; } hts_expand(int, line->n_allele, args->mac, args->ac); int i, an = 0, non_ref_ac = 0; if (args->calc_ac) { bcf_calc_ac(args->hdr, line, args->ac, BCF_UN_INFO|BCF_UN_FMT); // get original AC and AN values from INFO field if available, otherwise calculate for (i=1; i<line->n_allele; i++) non_ref_ac += args->ac[i]; for (i=0; i<line->n_allele; i++) an += args->ac[i]; } if (args->n_samples) { int non_ref_ac_sub = 0, *ac_sub = (int*) calloc(line->n_allele,sizeof(int)); bcf_subset(args->hdr, line, args->n_samples, args->imap); if (args->calc_ac) { bcf_calc_ac(args->hsub, line, ac_sub, BCF_UN_FMT); // recalculate AC and AN an = 0; for (i=0; i<line->n_allele; i++) { args->ac[i] = ac_sub[i]; an += ac_sub[i]; } for (i=1; i<line->n_allele; i++) non_ref_ac_sub += ac_sub[i]; if (args->private_vars) { if (args->private_vars == FLT_INCLUDE && !(non_ref_ac_sub > 0 && non_ref_ac == non_ref_ac_sub)) { free(ac_sub); return 0; } // select private sites if (args->private_vars == FLT_EXCLUDE && non_ref_ac_sub > 0 && non_ref_ac == non_ref_ac_sub) { free(ac_sub); return 0; } // exclude private sites } non_ref_ac = non_ref_ac_sub; } free(ac_sub); } bcf_fmt_t *gt_fmt; if ( args->gt_type && (gt_fmt=bcf_get_fmt(args->hdr,line,"GT")) ) { int nhet = 0, nhom = 0, nmiss = 0; for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) { int type = bcf_gt_type(gt_fmt,i,NULL,NULL); if ( type==GT_HET_RA || type==GT_HET_AA ) { if ( args->gt_type==GT_NO_HET ) return 0; nhet = 1; } else if ( type==GT_UNKN ) { if ( args->gt_type==GT_NO_MISSING ) return 0; nmiss = 1; } else { if ( args->gt_type==GT_NO_HOM ) return 0; nhom = 1; } } if ( args->gt_type==GT_NEED_HOM && !nhom ) return 0; else if ( args->gt_type==GT_NEED_HET && !nhet ) return 0; else if ( args->gt_type==GT_NEED_MISSING && !nmiss ) return 0; } int minor_ac = 0; int major_ac = 0; if ( args->calc_ac ) { minor_ac = args->ac[0]; major_ac = args->ac[0]; for (i=1; i<line->n_allele; i++){ if (args->ac[i] < minor_ac) { minor_ac = args->ac[i]; } if (args->ac[i] > major_ac) { major_ac = args->ac[i]; } } } if (args->min_ac) { if (args->min_ac_type == ALLELE_NONREF && args->min_ac>non_ref_ac) return 0; // min AC else if (args->min_ac_type == ALLELE_MINOR && args->min_ac>minor_ac) return 0; // min minor AC else if (args->min_ac_type == ALLELE_ALT1 && args->min_ac>args->ac[1]) return 0; // min 1st alternate AC else if (args->min_ac_type == ALLELE_MAJOR && args->min_ac > major_ac) return 0; // min major AC else if (args->min_ac_type == ALLELE_NONMAJOR && args->min_ac > an-major_ac) return 0; // min non-major AC } if (args->max_ac) { if (args->max_ac_type == ALLELE_NONREF && args->max_ac<non_ref_ac) return 0; // max AC else if (args->max_ac_type == ALLELE_MINOR && args->max_ac<minor_ac) return 0; // max minor AC else if (args->max_ac_type == ALLELE_ALT1 && args->max_ac<args->ac[1]) return 0; // max 1st alternate AC else if (args->max_ac_type == ALLELE_MAJOR && args->max_ac < major_ac) return 0; // max major AC else if (args->max_ac_type == ALLELE_NONMAJOR && args->max_ac < an-major_ac) return 0; // max non-major AC } if (args->min_af) { if (an == 0) return 0; // freq not defined, skip site if (args->min_af_type == ALLELE_NONREF && args->min_af>non_ref_ac/(double)an) return 0; // min AF else if (args->min_af_type == ALLELE_MINOR && args->min_af>minor_ac/(double)an) return 0; // min minor AF else if (args->min_af_type == ALLELE_ALT1 && args->min_af>args->ac[1]/(double)an) return 0; // min 1st alternate AF else if (args->min_af_type == ALLELE_MAJOR && args->min_af > major_ac/(double)an) return 0; // min major AF else if (args->min_af_type == ALLELE_NONMAJOR && args->min_af > (an-major_ac)/(double)an) return 0; // min non-major AF } if (args->max_af) { if (an == 0) return 0; // freq not defined, skip site if (args->max_af_type == ALLELE_NONREF && args->max_af<non_ref_ac/(double)an) return 0; // max AF else if (args->max_af_type == ALLELE_MINOR && args->max_af<minor_ac/(double)an) return 0; // max minor AF else if (args->max_af_type == ALLELE_ALT1 && args->max_af<args->ac[1]/(double)an) return 0; // max 1st alternate AF else if (args->max_af_type == ALLELE_MAJOR && args->max_af < major_ac/(double)an) return 0; // max major AF else if (args->max_af_type == ALLELE_NONMAJOR && args->max_af < (an-major_ac)/(double)an) return 0; // max non-major AF } if (args->uncalled) { if (args->uncalled == FLT_INCLUDE && an > 0) return 0; // select uncalled if (args->uncalled == FLT_EXCLUDE && an == 0) return 0; // skip if uncalled } if (args->calc_ac && args->update_info) { bcf_update_info_int32(args->hdr, line, "AC", &args->ac[1], line->n_allele-1); bcf_update_info_int32(args->hdr, line, "AN", &an, 1); } if (args->trim_alts) { int ret = bcf_trim_alleles(args->hsub ? args->hsub : args->hdr, line); if ( ret==-1 ) error("Error: some GT index is out of bounds at %s:%d\n", bcf_seqname(args->hsub ? args->hsub : args->hdr, line), line->pos+1); } if (args->phased) { int phased = bcf_all_phased(args->hdr, line); if (args->phased == FLT_INCLUDE && !phased) { return 0; } // skip unphased if (args->phased == FLT_EXCLUDE && phased) { return 0; } // skip phased } if (args->sites_only) bcf_subset(args->hsub ? args->hsub : args->hdr, line, 0, 0); return 1; }