int cmd_ls_files(int argc, const char **argv, const char *prefix) { int require_work_tree = 0, show_tag = 0; struct dir_struct dir; struct option builtin_ls_files_options[] = { { OPTION_CALLBACK, 'z', NULL, NULL, NULL, "paths are separated with NUL character", PARSE_OPT_NOARG, option_parse_z }, OPT_BOOLEAN('t', NULL, &show_tag, "identify the file status with tags"), OPT_BOOLEAN('v', NULL, &show_valid_bit, "use lowercase letters for 'assume unchanged' files"), OPT_BOOLEAN('c', "cached", &show_cached, "show cached files in the output (default)"), OPT_BOOLEAN('d', "deleted", &show_deleted, "show deleted files in the output"), OPT_BOOLEAN('m', "modified", &show_modified, "show modified files in the output"), OPT_BOOLEAN('o', "others", &show_others, "show other files in the output"), OPT_BIT('i', "ignored", &dir.flags, "show ignored files in the output", DIR_SHOW_IGNORED), OPT_BOOLEAN('s', "stage", &show_stage, "show staged contents' object name in the output"), OPT_BOOLEAN('k', "killed", &show_killed, "show files on the filesystem that need to be removed"), OPT_BIT(0, "directory", &dir.flags, "show 'other' directories' name only", DIR_SHOW_OTHER_DIRECTORIES), OPT_NEGBIT(0, "empty-directory", &dir.flags, "don't show empty directories", DIR_HIDE_EMPTY_DIRECTORIES), OPT_BOOLEAN('u', "unmerged", &show_unmerged, "show unmerged files in the output"), OPT_BOOLEAN(0, "resolve-undo", &show_resolve_undo, "show resolve-undo information"), { OPTION_CALLBACK, 'x', "exclude", &dir.exclude_list[EXC_CMDL], "pattern", "skip files matching pattern", 0, option_parse_exclude }, { OPTION_CALLBACK, 'X', "exclude-from", &dir, "file", "exclude patterns are read from <file>", 0, option_parse_exclude_from }, OPT_STRING(0, "exclude-per-directory", &dir.exclude_per_dir, "file", "read additional per-directory exclude patterns in <file>"), { OPTION_CALLBACK, 0, "exclude-standard", &dir, NULL, "add the standard git exclusions", PARSE_OPT_NOARG, option_parse_exclude_standard }, { OPTION_SET_INT, 0, "full-name", &prefix_offset, NULL, "make the output relative to the project top directory", PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL }, OPT_BOOLEAN(0, "error-unmatch", &error_unmatch, "if any <file> is not in the index, treat this as an error"), OPT_STRING(0, "with-tree", &with_tree, "tree-ish", "pretend that paths removed since <tree-ish> are still present"), OPT__ABBREV(&abbrev), OPT_END() }; memset(&dir, 0, sizeof(dir)); if (prefix) prefix_offset = strlen(prefix); git_config(git_default_config, NULL); if (read_cache() < 0) die("index file corrupt"); argc = parse_options(argc, argv, prefix, builtin_ls_files_options, ls_files_usage, 0); if (show_tag || show_valid_bit) { tag_cached = "H "; tag_unmerged = "M "; tag_removed = "R "; tag_modified = "C "; tag_other = "? "; tag_killed = "K "; tag_skip_worktree = "S "; tag_resolve_undo = "U "; } if (show_modified || show_others || show_deleted || (dir.flags & DIR_SHOW_IGNORED) || show_killed) require_work_tree = 1; if (show_unmerged) /* * There's no point in showing unmerged unless * you also show the stage information. */ show_stage = 1; if (dir.exclude_per_dir) exc_given = 1; if (require_work_tree && !is_inside_work_tree()) setup_work_tree(); pathspec = get_pathspec(prefix, argv); /* be nice with submodule paths ending in a slash */ if (pathspec) strip_trailing_slash_from_submodules(); /* Verify that the pathspec matches the prefix */ if (pathspec) prefix = verify_pathspec(prefix); /* Treat unmatching pathspec elements as errors */ if (pathspec && error_unmatch) { int num; for (num = 0; pathspec[num]; num++) ; ps_matched = xcalloc(1, num); } if ((dir.flags & DIR_SHOW_IGNORED) && !exc_given) die("ls-files --ignored needs some exclude pattern"); /* With no flags, we default to showing the cached files */ if (!(show_stage | show_deleted | show_others | show_unmerged | show_killed | show_modified | show_resolve_undo)) show_cached = 1; if (prefix) prune_cache(prefix); if (with_tree) { /* * Basic sanity check; show-stages and show-unmerged * would not make any sense with this option. */ if (show_stage || show_unmerged) die("ls-files --with-tree is incompatible with -s or -u"); overlay_tree_on_cache(with_tree, prefix); } show_files(&dir, prefix); if (show_resolve_undo) show_ru_info(prefix); if (ps_matched) { int bad; bad = report_path_error(ps_matched, pathspec, prefix_offset); if (bad) fprintf(stderr, "Did you forget to 'git add'?\n"); return bad ? 1 : 0; } return 0; }
int cmd_ls_files(int argc, const char **argv, const char *prefix) { int i; int exc_given = 0, require_work_tree = 0; struct dir_struct dir; memset(&dir, 0, sizeof(dir)); if (prefix) prefix_offset = strlen(prefix); git_config(git_default_config, NULL); for (i = 1; i < argc; i++) { const char *arg = argv[i]; if (!strcmp(arg, "--")) { i++; break; } if (!strcmp(arg, "-z")) { line_terminator = 0; continue; } if (!strcmp(arg, "-t") || !strcmp(arg, "-v")) { tag_cached = "H "; tag_unmerged = "M "; tag_removed = "R "; tag_modified = "C "; tag_other = "? "; tag_killed = "K "; if (arg[1] == 'v') show_valid_bit = 1; continue; } if (!strcmp(arg, "-c") || !strcmp(arg, "--cached")) { show_cached = 1; continue; } if (!strcmp(arg, "-d") || !strcmp(arg, "--deleted")) { show_deleted = 1; continue; } if (!strcmp(arg, "-m") || !strcmp(arg, "--modified")) { show_modified = 1; require_work_tree = 1; continue; } if (!strcmp(arg, "-o") || !strcmp(arg, "--others")) { show_others = 1; require_work_tree = 1; continue; } if (!strcmp(arg, "-i") || !strcmp(arg, "--ignored")) { dir.show_ignored = 1; require_work_tree = 1; continue; } if (!strcmp(arg, "-s") || !strcmp(arg, "--stage")) { show_stage = 1; continue; } if (!strcmp(arg, "-k") || !strcmp(arg, "--killed")) { show_killed = 1; require_work_tree = 1; continue; } if (!strcmp(arg, "--directory")) { dir.show_other_directories = 1; continue; } if (!strcmp(arg, "--no-empty-directory")) { dir.hide_empty_directories = 1; continue; } if (!strcmp(arg, "-u") || !strcmp(arg, "--unmerged")) { /* There's no point in showing unmerged unless * you also show the stage information. */ show_stage = 1; show_unmerged = 1; continue; } if (!strcmp(arg, "-x") && i+1 < argc) { exc_given = 1; add_exclude(argv[++i], "", 0, &dir.exclude_list[EXC_CMDL]); continue; } if (!prefixcmp(arg, "--exclude=")) { exc_given = 1; add_exclude(arg+10, "", 0, &dir.exclude_list[EXC_CMDL]); continue; } if (!strcmp(arg, "-X") && i+1 < argc) { exc_given = 1; add_excludes_from_file(&dir, argv[++i]); continue; } if (!prefixcmp(arg, "--exclude-from=")) { exc_given = 1; add_excludes_from_file(&dir, arg+15); continue; } if (!prefixcmp(arg, "--exclude-per-directory=")) { exc_given = 1; dir.exclude_per_dir = arg + 24; continue; } if (!strcmp(arg, "--exclude-standard")) { exc_given = 1; setup_standard_excludes(&dir); continue; } if (!strcmp(arg, "--full-name")) { prefix_offset = 0; continue; } if (!strcmp(arg, "--error-unmatch")) { error_unmatch = 1; continue; } if (!prefixcmp(arg, "--with-tree=")) { with_tree = arg + 12; continue; } if (!prefixcmp(arg, "--abbrev=")) { abbrev = strtoul(arg+9, NULL, 10); if (abbrev && abbrev < MINIMUM_ABBREV) abbrev = MINIMUM_ABBREV; else if (abbrev > 40) abbrev = 40; continue; } if (!strcmp(arg, "--abbrev")) { abbrev = DEFAULT_ABBREV; continue; } if (*arg == '-') usage(ls_files_usage); break; } if (require_work_tree && !is_inside_work_tree()) setup_work_tree(); pathspec = get_pathspec(prefix, argv + i); /* Verify that the pathspec matches the prefix */ if (pathspec) prefix = verify_pathspec(prefix); /* Treat unmatching pathspec elements as errors */ if (pathspec && error_unmatch) { int num; for (num = 0; pathspec[num]; num++) ; ps_matched = xcalloc(1, num); } if (dir.show_ignored && !exc_given) { fprintf(stderr, "%s: --ignored needs some exclude pattern\n", argv[0]); exit(1); } /* With no flags, we default to showing the cached files */ if (!(show_stage | show_deleted | show_others | show_unmerged | show_killed | show_modified)) show_cached = 1; read_cache(); if (prefix) prune_cache(prefix); if (with_tree) { /* * Basic sanity check; show-stages and show-unmerged * would not make any sense with this option. */ if (show_stage || show_unmerged) die("ls-files --with-tree is incompatible with -s or -u"); overlay_tree_on_cache(with_tree, prefix); } show_files(&dir, prefix); if (ps_matched) { int bad; bad = report_path_error(ps_matched, pathspec, prefix_offset); if (bad) fprintf(stderr, "Did you forget to 'git add'?\n"); return bad ? 1 : 0; } return 0; }
int main(int argc, char * argv[]) { int c; const char* efile=0; const char* ffile=0; int pfe_index = 2; while ((c = getopt(argc, argv, "cpf:e:i:n:l:m:h")) != -1) { switch (c) { case 'e': efile = optarg; break; case 'f': ffile = optarg; break; case 'i': // index of pfe in phrase table pfe_index = atoi(optarg); break; case 'n': // keep only the top n entries in phrase table sorted by p(f|e) (0=all) pfe_filter_limit = atoi(optarg); std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl; break; case 'c': print_cooc_counts = true; break; case 'p': print_neglog_significance = true; break; case 'h': hierarchical = true; break; case 'm': max_cache = atoi(optarg); break; case 'l': std::cerr << "-l = " << optarg << "\n"; if (strcmp(optarg,"a+e") == 0) { sig_filter_limit = ALPHA_PLUS_EPS; } else if (strcmp(optarg,"a-e") == 0) { sig_filter_limit = ALPHA_MINUS_EPS; } else { char *x; sig_filter_limit = strtod(optarg, &x); if (sig_filter_limit < 0.0) { std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n"; usage(); } } break; default: usage(); } } if (sig_filter_limit == 0.0) pef_filter_only = true; //----------------------------------------------------------------------------- if (optind != argc || ((!efile || !ffile) && !pef_filter_only)) { usage(); } //load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false) if (!pef_filter_only) { e_sa.loadData_forSearch(efile, false, false); f_sa.loadData_forSearch(ffile, false, false); size_t elines = e_sa.returnTotalSentNumber(); size_t flines = f_sa.returnTotalSentNumber(); if (elines != flines) { std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n"; usage(); } else { std::cerr << "Training corpus: " << elines << " lines\n"; num_lines = elines; } p_111 = -log(fisher_exact(1,1,1)); std::cerr << "\\alpha = " << p_111 << "\n"; if (sig_filter_limit == ALPHA_MINUS_EPS) { sig_filter_limit = p_111 - 0.001; } else if (sig_filter_limit == ALPHA_PLUS_EPS) { sig_filter_limit = p_111 + 0.001; } std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n"; } else { std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl; } char tmpString[10000]; std::string prev = ""; std::vector<PTEntry*> options; size_t pt_lines = 0; while(!cin.eof()) { cin.getline(tmpString,10000,'\n'); if(++pt_lines%10000==0) { std::cerr << "."; prune_cache(esets); prune_cache(fsets); if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n"; } if(strlen(tmpString)>0) { PTEntry* pp = new PTEntry(tmpString, pfe_index); if (prev != pp->f_phrase) { prev = pp->f_phrase; if (!options.empty()) { // always true after first line compute_cooc_stats_and_filter(options); } for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) { std::cout << **i << std::endl; delete *i; } options.clear(); options.push_back(pp); } else { options.push_back(pp); } // for(int i=0;i<locations.size(); i++){ // cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl; // } } } compute_cooc_stats_and_filter(options); for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) { std::cout << **i << std::endl; delete *i; } float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines; float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines; std::cerr << "\n\n------------------------------------------------------\n" << " unfiltered phrases pairs: " << pt_lines << "\n" << "\n" << " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n" << " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n" << " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n" << "\n" << " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n" << "------------------------------------------------------\n"; return 0; }