/** * Main function for cwb-huffcode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int R_cwb_huffcode (char *corpus_name, char *registry_dir) { char *registry_directory = registry_dir; char *output_fn = NULL; char *attr_name = DEFAULT_ATT_NAME; Attribute *attr; HCD hc; Rprintf("Corpus: %s\n", corpus_name); int i_want_to_believe = 0; /* skip error checks? */ int all_attributes = 0; protocol = NULL; /* 'delayed' init (see top of file) */ /* ------------------------------------------------- PARSE ARGUMENTS */ /* parse arguments */ all_attributes++; corpus_id_cwb_huffcode = corpus_name; Rprintf("Corpus 1: %s\n", corpus_name); if ((corpus = cl_new_corpus(registry_directory, corpus_id_cwb_huffcode)) == NULL) { Rprintf( "Corpus %s not found in registry %s . Aborted.\n", corpus_id_cwb_huffcode, (registry_directory ? registry_directory : central_corpus_directory())); rcqp_receive_error(1); } Rprintf("Corpus 2: %s\n", corpus_name); if (all_attributes) { for (attr = corpus->attributes; attr; attr = attr->any.next) if (attr->any.type == ATT_POS) { compute_code_lengths(attr, &hc, output_fn); if (! i_want_to_believe) decode_check_huff(attr, output_fn); } } else { if ((attr = cl_new_attribute(corpus, attr_name, ATT_POS)) == NULL) { Rprintf( "Attribute %s.%s doesn't exist. Aborted.\n", corpus_id_cwb_huffcode, attr_name); rcqp_receive_error(1); } compute_code_lengths(attr, &hc, output_fn); if (! i_want_to_believe) decode_check_huff(attr, output_fn); } Rprintf("Corpus 3: %s\n", corpus_name); cl_delete_corpus(corpus); return(0); }
/** * Main function for cwb-compress-rdx. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int R_cwb_compress_rdx(char *corpus_name, char * registry_dir) { char *registry_directory = registry_dir; char *attr_name = DEFAULT_ATT_NAME; Attribute *attr; char *output_fn = NULL; char *debug_fn = NULL; int i_want_to_believe = 0; /* skip error checks? */ int all_attributes = 0; // debug_output = stderr; /* 'delayed' init (see top of file) */ /* ------------------------------------------------- PARSE ARGUMENTS */ /* parse arguments */ all_attributes++; corpus_id_cwb_compress_rdx = corpus_name; if (debug_fn) { if (strcmp(debug_fn, "-") == 0) debug_output = NULL; else if ((debug_output = fopen(debug_fn, "w")) == NULL) { Rprintf( "Can't write debug_cwb_compress_rdx output to file %s. Aborted.", debug_fn); perror(debug_fn); compressrdx_cleanup(1); } } if ((corpus = cl_new_corpus(registry_directory, corpus_id_cwb_compress_rdx)) == NULL) { Rprintf( "Corpus %s not found in registry %s . Aborted.\n", corpus_id_cwb_compress_rdx, (registry_directory ? registry_directory : cl_standard_registry())); compressrdx_cleanup(1); } if (all_attributes) { for (attr = corpus->attributes; attr; attr = attr->any.next) if (attr->any.type == ATT_POS) { compress_reversed_index(attr, output_fn); if (! i_want_to_believe) decompress_check_reversed_index(attr, output_fn); } } else { if ((attr = cl_new_attribute_oldstyle(corpus, attr_name, ATT_POS, NULL)) == NULL) { Rprintf( "Attribute %s.%s doesn't exist. Aborted.\n", corpus_id_cwb_compress_rdx, attr_name); compressrdx_cleanup(1); } compress_reversed_index(attr, output_fn); if (! i_want_to_believe) decompress_check_reversed_index(attr, output_fn); } compressrdx_cleanup(0); return(0); /* to keep gcc from complaining */ }
/** * Main function for cwb-makeall. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { char *attr_name = NULL; Attribute *attribute; char *registry_directory = NULL; char *corpus_id = NULL; extern int optind; extern char *optarg; int c; int validate = 0; char *component = NULL; ComponentID cid; int i = 0; /* ------------------------------------------------- PARSE ARGUMENTS */ progname = argv[0]; /* parse arguments */ while ((c = getopt(argc, argv, "+r:c:P:hDM:V")) != EOF) { switch (c) { /* r: registry directory */ case 'r': if (registry_directory == NULL) registry_directory = optarg; else { fprintf(stderr, "%s: -r option used twice\n", progname); exit(2); } break; case 'P': if (attr_name == NULL) attr_name = optarg; else { fprintf(stderr, "%s: -P option used twice\n", progname); exit(2); } break; case 'c': if (component == NULL) component = optarg; else { fprintf(stderr, "%s: -c option used twice\n", progname); exit(2); } break; case 'D': cl_set_debug_level(1); break; case 'M': i = atoi(optarg); cl_set_memory_limit(i); break; case 'V': validate++; break; case 'h': default: makeall_usage(); } } if (optind >= argc) { fprintf(stderr, "Missing argument, try \"%s -h\" for more information.\n", progname); exit(1); } /* first argument: corpus id */ corpus_id = argv[optind++]; if (component != NULL) { cid = component_id(component); if (cid == CompLast) { fprintf(stderr, "Illegal component name: ``%s''\n", component); exit(1); } } else { cid = CompLast; } if ((corpus = cl_new_corpus(registry_directory, corpus_id)) == NULL) { fprintf(stderr, "Corpus %s not found in registry %s . Aborted.\n", corpus_id, (registry_directory ? registry_directory : central_corpus_directory())); exit(1); } printf("=== Makeall: processing corpus %s ===\n", corpus_id); printf("Registry directory: %s\n", corpus->registry_dir); if (optind < argc) { for (i = optind; i < argc; i++) { if ((attribute = cl_new_attribute(corpus, argv[i], ATT_POS)) != NULL) { makeall_do_attribute(attribute, cid, validate); } else { fprintf(stderr, "p-attribute %s.%s not defined. Aborted.\n", corpus_id, attr_name); exit(1); } } } else if (attr_name != NULL) { if ((attribute = cl_new_attribute(corpus, attr_name, ATT_POS)) != NULL) { makeall_do_attribute(attribute, cid, validate); } else { fprintf(stderr, "p-attribute %s.%s not defined. Aborted.\n", corpus_id, attr_name); exit(1); } } else { /* process each p-attribute of the corpus in turn */ for (attribute = corpus->attributes; attribute; attribute = attribute->any.next) if (attribute->type == ATT_POS) { ComponentID my_cid; makeall_do_attribute(attribute, cid, validate); /* now destoy all components; this makes the attribute unusable, but it is currently the only way to free allocated and memory-mapped data */ for (my_cid = CompDirectory; my_cid < CompLast; my_cid++) { /* ordering gleaned from attributes.h */ drop_component(attribute, my_cid); } } } printf("========================================\n"); exit(0); }
/** * Main function for cwb-decode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { Attribute *attr; Attribute *context = NULL; int sp; /* start position of a match */ int ep; /* end position of a match */ int w, cnt, read_pos_frm_stdin; char s[CL_MAX_LINE_LENGTH]; /* buffer for strings read from file */ char *token; char *input_filename = NULL; FILE *input_file = stdin; /* ------------------------------------------------- PARSE ARGUMENTS */ int c; extern char *optarg; extern int optind; progname = argv[0]; first_token = -1; last = -1; maxlast = -1; read_pos_frm_stdin = 0; /* use getopt() to parse command-line options */ while((c = getopt(argc, argv, "+s:e:r:nLHCxXf:ph")) != EOF) switch(c) { /* s: start corpus position */ case 's': first_token = atoi(optarg); break; /* e: end corpus position */ case 'e': last = atoi(optarg); break; /* r: registry directory */ case 'r': if (registry_directory == NULL) registry_directory = optarg; else { fprintf(stderr, "%s: -r option used twice\n", progname); exit(2); } break; /* n: show cpos in -H mode */ case 'n': printnum++; break; /* x: XML-compatible output in -C mode (-Cx) */ case 'x': xml_compatible++; break; /* L,H,C,X: Lisp, Horizontal, Compact, and XML modes */ case 'L': mode = LispMode; break; case 'H': mode = ConclineMode; break; case 'C': mode = EncodeMode; break; case 'X': mode = XMLMode; break; /* f: matchlist mode / read corpus positions from file */ case 'f': input_filename = optarg; break; /* p: matchlist mode / read corpus positions from stdin */ case 'p': read_pos_frm_stdin++; break; /* h: help page */ case 'h': decode_usage(2); break; default: fprintf(stderr, "Illegal option. Try \"%s -h\" for more information.\n", progname); fprintf(stderr, "[remember that options go before the corpus name, and attribute declarations after it!]\n"); decode_cleanup(2); } /* required argument: corpus id */ if (optind < argc) { corpus_id = argv[optind++]; if ((corpus = cl_new_corpus(registry_directory, corpus_id)) == NULL) { fprintf(stderr, "Corpus %s not found in registry %s . Aborted.\n", corpus_id, (registry_directory ? registry_directory : cl_standard_registry() ) ); decode_cleanup(1); } } else { fprintf(stderr, "Missing argument. Try \"%s -h\" for more information.\n", progname); decode_cleanup(2); } /* now parse output flags (-P, -S, ...) [cnt is our own argument counter] */ for (cnt = optind; cnt < argc; cnt++) { if (strcmp(argv[cnt], "-c") == 0) { /* -c: context */ if ((context = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } } else if (strcmp(argv[cnt], "-P") == 0) { /* -P: positional attribute */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_POS)) == NULL) { fprintf(stderr, "Can't open p-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else { if (cl_max_cpos(attr) > 0) { decode_add_attribute(attr); if (maxlast < 0) maxlast = cl_max_cpos(attr); /* determines corpus size */ } else { fprintf(stderr, "Attribute %s.%s is declared, but not accessible (missing data?). Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } } } else if (strcmp(argv[cnt], "-ALL") == 0) { /* -ALL: all p-attributes and s-attributes */ for (attr = corpus->attributes; attr; attr = attr->any.next) if (attr->any.type == ATT_POS) { decode_add_attribute(attr); if (maxlast < 0) maxlast = cl_max_cpos(attr); } else if (attr->any.type == ATT_STRUC) { decode_add_attribute(attr); } } else if (strcmp(argv[cnt], "-D") == 0) { /* -D: dynamic attribute (not implemented) */ fprintf(stderr, "Sorry, dynamic attributes are not implemented. Aborting.\n"); decode_cleanup(2); } else if (strcmp(argv[cnt], "-A") == 0) { /* -A: alignment attribute */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_ALIGN)) == NULL) { fprintf(stderr, "Can't open a-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else decode_add_attribute(attr); } else if (strcmp(argv[cnt], "-S") == 0) { /* -S: structural attribute (as tags) */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else decode_add_attribute(attr); } else if (strcmp(argv[cnt], "-V") == 0) { /* -V: show structural attribute values (with -p or -f) */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else if (!cl_struc_values(attr)) { fprintf(stderr, "S-attribute %s.%s does not have annotations. Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else if (printValuesIndex >= MAX_PRINT_VALUES) { fprintf(stderr, "Too many -V attributes, sorry. Aborted.\n"); decode_cleanup(1); } else printValues[printValuesIndex++] = attr; } else { fprintf(stderr, "Unknown flag: %s\n", argv[cnt]); decode_cleanup(2); } } /* ---- end of parse attribute declarations ---- */ if (input_filename != NULL) { if (strcmp(input_filename, "-") == 0) input_file = stdin; else if ((input_file = fopen(input_filename, "r")) == NULL) { perror(input_filename); exit(1); } read_pos_frm_stdin++; } decode_verify_print_value_list(); /* ------------------------------------------------------------ DECODE CORPUS */ if (read_pos_frm_stdin == 0) { /* * normal mode: decode entire corpus or specified range */ if (maxlast < 0) { fprintf(stderr, "Need at least one p-attribute (-P flag). Aborted.\n"); decode_cleanup(2); } if (first_token < 0 || first_token >= maxlast) first_token = 0; if (last < 0 || last >= maxlast) last = maxlast - 1; if (last < first_token) { fprintf(stderr, "Warning: output range #%d..#%d is empty. No output.\n", first_token, last); decode_cleanup(2); } if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { decode_print_xml_declaration(); printf("<corpus name=\"%s\" start=\"%d\" end=\"%d\">\n", corpus_id, first_token, last); } /* decode_print_surrounding_s_att_values(first_token); */ /* don't do that in "normal" mode, coz it doesn't make sense */ for (w = first_token; w <= last; w++) decode_print_token_sequence(w, -1, context); if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { printf("</corpus>\n"); } } else { /* * matchlist mode: read (pairs of) corpus positions from stdin or file */ if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { decode_print_xml_declaration(); printf("<matchlist corpus=\"%s\">\n", corpus_id); } cnt = 0; while (fgets(s, CL_MAX_LINE_LENGTH, input_file) != NULL) { token = strtok(s, " \t\n"); if ((token != NULL) && is_num(token)) { sp = atoi(token); ep = -1; if ((token = strtok(NULL, " \t\n")) != NULL) { if (!is_num(token)) { fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", token); decode_cleanup(1); } else ep = atoi(token); } cnt++; /* count matches in matchlist */ if (mode == XMLMode) { printf("<match nr=\"%d\"", cnt); if (printnum) printf(" start=\"%d\" end=\"%d\"", sp, (ep >= 0) ? ep : sp); printf(">\n"); } else { /* nothing shown before range */ } decode_print_surrounding_s_att_values(sp); decode_print_token_sequence(sp, ep, context); if (mode == XMLMode) { printf("</match>\n"); } else if (mode != ConclineMode) { printf("\n"); /* blank line, unless in -H mode */ } } else { fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", s); decode_cleanup(1); } } if (input_file != stdin) fclose(input_file); if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { printf("</matchlist>\n"); } } decode_cleanup(0); return 0; /* just to keep gcc from complaining */ }
/** * Parse options and set global variables */ void sencode_parse_options(int argc, char **argv) { int c; extern char *optarg; extern int optind; /* by default, output files are written to current directory */ char *directory = "."; /* may need to set registry if source corpus is specified */ char *registry = NULL; /* source corpus _may_ be set with the -C switch */ char *corpus_name = NULL; /* if text_fd is unspecified, stdin will be used */ text_fd = NULL; /* make sure either -S or -V is used: reset new_satt.name now & check after getopt */ new_satt.name = NULL; while((c = getopt(argc, argv, "+qBd:f:msDS:V:r:C:Mah")) != EOF) switch(c) { /* q: be silent (quiet) */ case 'q': silent++; break; /* B: strip blanks */ case 'B': strip_blanks_in_values++; break; /* d: directory for generated data files */ case 'd': directory = optarg; break; /* f: read input from file */ case 'f': if (text_fd) { Rprintf( "Error: -f option used twice\n\n"); rcqp_receive_error(1); } if ((text_fd = fopen(optarg, "r")) == NULL) { perror("Can't open input file"); rcqp_receive_error(1); } break; /* M: compile list in memory, then write to disk */ case 'M': in_memory++; break; /* a: add to existing attribute (implies -M) */ case 'a': add_to_existing++; in_memory++; break; /* r: registry directory */ case 'r': registry = optarg; break; /* C: source corpus */ case 'C': corpus_name = optarg; break; /* m: set ('multi-value') attribute */ case 'm': set_att = set_any; /* don't know yet whether it's '|'-delimited or "split on whitespace" */ break; /* s: strict syntax checks on set attribute */ case 's': set_syntax_strict++; break; /* D: debug mode */ case 'D': debug++; break; /* S: s-attribute without annotations */ case 'S': sencode_declare_new_satt(optarg, directory, 0); if (optind < argc) { Rprintf( "Error: -S <att> must be last flag on command line.\n\n"); rcqp_receive_error(1); } break; /* V: s-attribute with annotations */ case 'V': sencode_declare_new_satt(optarg, directory, 1); if (optind < argc) { Rprintf( "Error: -V <att> must be last flag on command line.\n\n"); rcqp_receive_error(1); } break; /* default or -h: error */ case 'h': default: sencode_usage(); break; } /* now, check the default and obligatory values */ if (!text_fd) text_fd = stdin; if (new_satt.name == NULL) { Rprintf( "Error: either -S or -V flag must be specified.\n\n"); rcqp_receive_error(1); } if (optind < argc) { Rprintf( "Error: extra arguments.\n\n"); rcqp_receive_error(1); } /* if -C <corpus> was specified, open source corpus */ if (corpus_name != NULL) { corpus = cl_new_corpus(registry, corpus_name); if (corpus == NULL) { Rprintf( "Error: Can't find corpus <%s>!\n", corpus_name); rcqp_receive_error(1); } } }
/** * Main function for cwb-align-encode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char *argv[]) { int argindex; /* index of first argument in argv[] */ char *align_name = NULL; /* name of the .align file */ FILE *af = NULL; /* alignment file handle */ int af_is_pipe; /* need to know whether to call fclose() or pclose() */ char alx_name[CL_MAX_LINE_LENGTH]; /* full pathname of .alx file */ char alg_name[CL_MAX_LINE_LENGTH]; /* full pathname of optional .alg file */ FILE *alx=NULL, *alg=NULL; /* file handles for .alx and optional .alg file */ char line[CL_MAX_LINE_LENGTH]; /* one line of input from <infile> */ char corpus1_name[CL_MAX_FILENAME_LENGTH]; char corpus2_name[CL_MAX_FILENAME_LENGTH]; char s1_name[CL_MAX_FILENAME_LENGTH]; char s2_name[CL_MAX_FILENAME_LENGTH]; Corpus *corpus1, *corpus2; /* corpus handles */ Attribute *w1, *w2; /* attribute handles for 'word' attributes; used to determine corpus size */ int size1, size2; /* size of source & target corpus */ Corpus *source_corpus; /* encode alignment in this corpus (depends on -R flag, important for -D option) */ char *source_corpus_name; /* just for error messages */ char *attribute_name; /* name of alignment attribute (depends on -R flag, must be lowercase) */ int f1,l1,f2,l2; /* alignment regions */ int current1, current2; int mark, n_0_1, n_1_0; int l; progname = argv[0]; /* parse command line and read arguments */ argindex = alignencode_parse_args(argc, argv, 1); align_name = argv[argindex]; /* open alignment file and parse header; .gz files are automatically decompressed */ af_is_pipe = 0; l = strlen(align_name); if ((l > 3) && (strncasecmp(align_name + l - 3, ".gz", 3) == 0)) { char *pipe_cmd = (char *) cl_malloc(l+10); sprintf(pipe_cmd, "gzip -cd %s", align_name); /* write .gz file through gzip pipe */ af = popen(pipe_cmd, "r"); if (af == NULL) { perror(pipe_cmd); Rprintf( "%s: can't read compressed file %s\n", progname, align_name); rcqp_receive_error(1); } af_is_pipe = 1; cl_free(pipe_cmd); } else { af = fopen(align_name, "r"); if (af == NULL) { perror(align_name); Rprintf( "%s: can't read file %s\n", progname, align_name); rcqp_receive_error(1); } } /* read header = first line */ fgets(line, CL_MAX_LINE_LENGTH, af); if (4 != sscanf(line, "%s %s %s %s", corpus1_name, s1_name, corpus2_name, s2_name)) { Rprintf( "%s: %s not in .align format\n", progname, align_name); Rprintf( "wrong header: %s", line); rcqp_receive_error(1); } if (verbose) { if (reverse) Rprintf("Encoding alignment for [%s, %s] from file %s\n", corpus2_name, corpus1_name, align_name); else Rprintf("Encoding alignment for [%s, %s] from file %s\n", corpus1_name, corpus2_name, align_name); } /* open corpora and determine their sizes (for validity checks and compatibility mode) */ if (NULL == (corpus1 = cl_new_corpus(registry_dir, corpus1_name))) { Rprintf( "%s: can't open corpus %s\n", progname, corpus1_name); rcqp_receive_error(1); } if (NULL == (corpus2 = cl_new_corpus(registry_dir, corpus2_name))) { Rprintf( "%s: can't open corpus %s\n", progname, corpus2_name); rcqp_receive_error(1); } if (NULL == (w1 = cl_new_attribute(corpus1, "word", ATT_POS))) { Rprintf( "%s: can't open p-attribute %s.word\n", progname, corpus1_name); rcqp_receive_error(1); } if (NULL == (w2 = cl_new_attribute(corpus2, "word", ATT_POS))) { Rprintf( "%s: can't open p-attribute %s.word\n", progname, corpus2_name); rcqp_receive_error(1); } size1 = cl_max_cpos(w1); if (size1 <= 0) { Rprintf( "%s: data access error (%s.word)\n", progname, corpus1_name); rcqp_receive_error(1); } size2 = cl_max_cpos(w2); if (size2 <= 0) { Rprintf( "%s: data access error (%s.word)\n", progname, corpus2_name); rcqp_receive_error(1); } /* now work out the actual source corpus and the alignment attribute name (depending on -R flag) */ source_corpus = (reverse) ? corpus2 : corpus1; source_corpus_name = (reverse) ? corpus2_name : corpus1_name; attribute_name = cl_strdup((reverse) ? corpus1_name : corpus2_name); cl_id_tolower(attribute_name); /* fold attribute name to lowercase */ /* with -D option, determine data file name(s) from actual source corpus; otherwise use directory specified with -d and the usual naming conventions */ if (data_dir_from_corpus) { Attribute *alignment = cl_new_attribute(source_corpus, attribute_name, ATT_ALIGN); char *comp_pathname; if (alignment == NULL) { Rprintf( "%s: alignment attribute %s.%s not declared in registry file\n", progname, source_corpus_name, attribute_name); rcqp_receive_error(1); } comp_pathname = component_full_name(alignment, CompXAlignData, NULL); if (comp_pathname == NULL) { Rprintf( "%s: can't determine pathname for .alx file (internal error)\n", progname); rcqp_receive_error(1); } strcpy(alx_name, comp_pathname); /* need to strcpy because component_full_name() returns pointer to internal buffer */ if (compatibility) { comp_pathname = component_full_name(alignment, CompAlignData, NULL); if (comp_pathname == NULL) { Rprintf( "%s: can't determine pathname for .alg file (internal error)\n", progname); rcqp_receive_error(1); } strcpy(alg_name, comp_pathname); } } else { sprintf(alx_name, "%s" SUBDIR_SEP_STRING "%s.alx", data_dir, attribute_name); if (compatibility) sprintf(alg_name, "%s" SUBDIR_SEP_STRING "%s.alg", data_dir, attribute_name); } /* now open output file(s) */ alx = fopen(alx_name, "wb"); if (alx == NULL) { perror(alx_name); Rprintf( "%s: can't write file %s\n", progname, alx_name); rcqp_receive_error(1); } if (verbose) Rprintf("Writing file %s ...\n", alx_name); if (compatibility) { alg = fopen(alg_name, "wb"); if (alg == NULL) { perror(alg_name); Rprintf( "%s: can't write file %s\n", progname, alg_name); rcqp_receive_error(1); } if (verbose) Rprintf("Writing file %s ...\n", alg_name); } /* main encoding loop */ f1 = f2 = l1 = l2 = 0; mark = -1; /* check that regions occur in ascending order */ current1 = current2 = -1; /* for compatibility mode */ n_0_1 = n_1_0 = 0; /* number of 0:1 and 1:0 alignments, which are skipped */ while (! feof(af)) { if (NULL == fgets(line, CL_MAX_LINE_LENGTH, af)) break; /* end of file (or read error, which we choose to ignore) */ if (4 != sscanf(line, "%d %d %d %d", &f1, &l1, &f2, &l2)) { Rprintf( "%s: input format error: %s", progname, line); rcqp_receive_error(1); } /* skip 0:1 and 1:0 alignments */ if (l1 < f1) { n_0_1++; continue; } if (l2 < f2) { n_1_0++; continue; } /* check that source regions are non-overlapping and in ascending order */ if (((reverse) ? f2 : f1) <= mark) { Rprintf( "%s: source regions of alignment must be in ascending order\n", progname); Rprintf( "Last region was [*, %d]; current is [%d, %d].\n", mark, f1, l1); Rprintf( "Aborted.\n"); rcqp_receive_error(1); } mark = (reverse) ? l2 : l1; /* write alignment region to .alx file */ if (reverse) { NwriteInt(f2, alx); NwriteInt(l2, alx); NwriteInt(f1, alx); NwriteInt(l1, alx); } else { NwriteInt(f1, alx); NwriteInt(l1, alx); NwriteInt(f2, alx); NwriteInt(l2, alx); } if (compatibility) { /* source and target regions of .alg file must be contiguous; store start points only; */ /* hence we must collapse crossing alignments into one larger region (I know that's bullshit) */ if ((f1 > current1) && (f2 > current2)) { if (reverse) { NwriteInt(f2, alg); NwriteInt(f1, alg); } else { NwriteInt(f1, alg); NwriteInt(f2, alg); } current1 = f1; current2 = f2; } } } if (compatibility) { if (reverse) { NwriteInt(size2, alg); NwriteInt(size1, alg); /* end of corpus alignment point*/ } else { NwriteInt(size1, alg); NwriteInt(size2, alg); /* end of corpus alignment point*/ } } if (verbose) { Rprintf("I skipped %d 0:1 alignments and %d 1:0 alignments.\n", n_0_1, n_1_0); } /* that's it; close file handles */ fclose(alx); if (compatibility) fclose(alg); if (af_is_pipe) pclose(af); else fclose(af); return 0; }
/** * Main function for cwb-describe-corpus. * * Prints information about an indexed corpus to STDOUT. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { int i; Corpus *corpus; int c; extern char *optarg; extern int optind; int show_stats = 0; int show_details = 0; char *registry = NULL; progname = argv[0]; while ((c = getopt(argc, argv, "+r:sdh")) != EOF) { switch(c) { /* -r <dir>: change registry directory */ case 'r': if (registry == NULL) registry = optarg; else { Rprintf( "%s: -r option used twice\n", progname); rcqp_receive_error(2); } break; /* -s: show statistics */ case 's': show_stats++; break; /* -d: show details */ case 'd': show_details++; break; /* -h: help page */ case 'h': default: describecorpus_usage(); break; } } if (optind >= argc) { Rprintf( "Missing argument, try \"%s -h\" for more information.\n", progname); rcqp_receive_error(1); } for (i = optind; i < argc; i++) { if ((corpus = cl_new_corpus(registry, argv[i])) == NULL) { Rprintf( "ERROR. Can't access corpus %s !\n", argv[i]); rcqp_receive_error(1); } Rprintf("\n============================================================\n"); Rprintf("Corpus: %s\n", argv[i]); Rprintf("============================================================\n\n"); describecorpus_show_basic_info(corpus, !(show_stats || show_details)); /* show attribute names only if no other options are selected */ if (show_stats) { describecorpus_show_statistics(corpus); } if (show_details) { describe_corpus(corpus); } cl_delete_corpus(corpus); } return 0; }
/** * Main function for cwb-huffcode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { char *registry_directory = NULL; char *output_fn = NULL; char *attr_name = DEFAULT_ATT_NAME; Attribute *attr; HCD hc; extern int optind; extern char *optarg; int c; int i_want_to_believe = 0; /* skip error checks? */ int all_attributes = 0; protocol = stdout; /* 'delayed' init (see top of file) */ /* ------------------------------------------------- PARSE ARGUMENTS */ progname = argv[0]; /* parse arguments */ while ((c = getopt(argc, argv, "+TvP:r:f:dAh")) != EOF) { switch (c) { /* T: skip decompression / error checking pass ("I trust you") */ case 'T': i_want_to_believe++; break; /* v: verbose -> displays protocol of compression process on stdout */ case 'v': do_protocol++; break; /* P: attribute to compress */ case 'P': attr_name = optarg; break; /* r: registry directory */ case 'r': if (registry_directory == NULL) registry_directory = optarg; else { fprintf(stderr, "%s: -r option used twice\n", progname); exit(2); } break; /* f: filename prefix for compressed data files */ case 'f': output_fn = optarg; break; /* d: debug mode --- unused */ case 'd': debug++; break; /* A: compress all attributes */ case 'A': all_attributes++; break; /* h: help page */ case 'h': huffcode_usage(NULL, 2); break; default: huffcode_usage("illegal option.", 2); break; } } /* single argument: corpus id */ if (optind < argc) { corpus_id = argv[optind++]; } else { huffcode_usage("corpus not specified (missing argument)", 1); } if (optind < argc) { huffcode_usage("Too many arguments", 1); } if ((corpus = cl_new_corpus(registry_directory, corpus_id)) == NULL) { fprintf(stderr, "Corpus %s not found in registry %s . Aborted.\n", corpus_id, (registry_directory ? registry_directory : central_corpus_directory())); exit(1); } if (all_attributes) { for (attr = corpus->attributes; attr; attr = attr->any.next) if (attr->any.type == ATT_POS) { compute_code_lengths(attr, &hc, output_fn); if (! i_want_to_believe) decode_check_huff(attr, output_fn); } } else { if ((attr = cl_new_attribute(corpus, attr_name, ATT_POS)) == NULL) { fprintf(stderr, "Attribute %s.%s doesn't exist. Aborted.\n", corpus_id, attr_name); exit(1); } compute_code_lengths(attr, &hc, output_fn); if (! i_want_to_believe) decode_check_huff(attr, output_fn); } cl_delete_corpus(corpus); exit(0); }