/** attribute (selected/unselected) print helper routine */ void PrintAttributes(FILE *fd, char *header, AttributeList *al, int show_if_annot) { int line = 0, i; AttributeInfo *current; if (al && al->list) { for (current = al->list; current; current = current->next) { if (line++ == 0) fprintf(fd, "%s", header); else for (i = strlen(header); i; i--) fprintf(fd, " "); if (current->status) fprintf(fd, " * "); else fprintf(fd, " "); /* structural attributes only; * note we DEPEND on show_if_annot only being true iff al is a list of struc attributes, * otherwise calling cl_struc_values will cause a cl_error */ if (!show_if_annot || !cl_struc_values(current->attribute)) fprintf(fd, "%s\n", current->attribute->any.name); else fprintf(fd, "%-20s [A]\n", current->attribute->any.name); } } else fprintf(fd, "%s <none>\n", header); }
/* * ------------------------------------------------------------------------ * * "rcqpCmd_structural_attribute_has_values(SEXP inAttribute)" -- * * * * ------------------------------------------------------------------------ */ SEXP rcqpCmd_structural_attribute_has_values(SEXP inAttribute) { SEXP result = R_NilValue; char * a; Attribute * attribute; /* rcqp_initialize(); */ if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string"); PROTECT(inAttribute); a = (char*)CHAR(STRING_ELT(inAttribute,0)); attribute = cqi_lookup_attribute(a, ATT_STRUC); if (attribute != NULL) { result = PROTECT(allocVector(LGLSXP, 1)); LOGICAL(result)[0] = (cl_struc_values(attribute) != 0); } else { rcqp_error_code(cqi_errno); } UNPROTECT(2); return result; }
/** attribute print helper routine (non pretty-printing mode) * ( TODO desperately needs a better name ) */ void PrintAttributesSimple(FILE *fd, char *type, AttributeList *al, int show_if_annot) { AttributeInfo *ai; if (al && al->list) { for (ai = al->list; ai; ai = ai->next) { fprintf(fd, "%s\t%s", type, ai->attribute->any.name); if (show_if_annot) { fprintf(fd, "\t%s", (cl_struc_values(ai->attribute)) ? "-V" : ""); } fprintf(fd, "\n"); } } }
/** * Prints statistical information about a corpus to STDOUT. * * Each corpus attribute gets info printed about it: * tokens and types for a P-attribute, number of instances * of regions for an S-attribute, number of alignment * blocks for an A-attribute. * * @param corpus The corpus to analyse. */ void describecorpus_show_statistics (Corpus *corpus) { Attribute *a; int tokens, types, regions, blocks; for (a = corpus->attributes; a; a = a->any.next) { switch(a->any.type) { case ATT_POS: Rprintf("p-ATT %-16s ", a->any.name); tokens = cl_max_cpos(a); types = cl_max_id(a); if ((tokens > 0) && (types > 0)) Rprintf("%10d tokens, %8d types", tokens, types); else Rprintf(" NO DATA"); break; case ATT_STRUC: Rprintf("s-ATT %-16s ", a->any.name); regions = cl_max_struc(a); if (regions >= 0) { Rprintf("%10d regions", regions); if (cl_struc_values(a)) Rprintf(" (with annotations)"); } else Rprintf(" NO DATA"); break; case ATT_ALIGN: Rprintf("a-ATT %-16s ", a->any.name); blocks = cl_max_alg(a); if (blocks >= 0) { Rprintf("%10d alignment blocks", blocks); if (cl_has_extended_alignment(a)) Rprintf(" (extended)"); } else Rprintf(" NO DATA"); break; default: Rprintf("??? %-16s (unknown attribute type)", a->any.name); break; } Rprintf("\n"); } Rprintf("\n"); }
void do_cqi_corpus_structural_attribute_has_values(void) { char *a; Attribute *attribute; a = cqi_read_string(); /* need to try all possible attribute types */ if (server_debug) Rprintf( "CQi: CQI_CORPUS_STRUCTURAL_ATTRIBUTE_HAS_VALUES('%s')\n", a); attribute = cqi_lookup_attribute(a, ATT_STRUC); if (attribute != NULL) { cqi_data_bool(cl_struc_values(attribute)); } else { cqi_command(cqi_errno); } free(a); }
/** * Main function for cwb-decode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { Attribute *attr; Attribute *context = NULL; int sp; /* start position of a match */ int ep; /* end position of a match */ int w, cnt, read_pos_frm_stdin; char s[CL_MAX_LINE_LENGTH]; /* buffer for strings read from file */ char *token; char *input_filename = NULL; FILE *input_file = stdin; /* ------------------------------------------------- PARSE ARGUMENTS */ int c; extern char *optarg; extern int optind; progname = argv[0]; first_token = -1; last = -1; maxlast = -1; read_pos_frm_stdin = 0; /* use getopt() to parse command-line options */ while((c = getopt(argc, argv, "+s:e:r:nLHCxXf:ph")) != EOF) switch(c) { /* s: start corpus position */ case 's': first_token = atoi(optarg); break; /* e: end corpus position */ case 'e': last = atoi(optarg); break; /* r: registry directory */ case 'r': if (registry_directory == NULL) registry_directory = optarg; else { fprintf(stderr, "%s: -r option used twice\n", progname); exit(2); } break; /* n: show cpos in -H mode */ case 'n': printnum++; break; /* x: XML-compatible output in -C mode (-Cx) */ case 'x': xml_compatible++; break; /* L,H,C,X: Lisp, Horizontal, Compact, and XML modes */ case 'L': mode = LispMode; break; case 'H': mode = ConclineMode; break; case 'C': mode = EncodeMode; break; case 'X': mode = XMLMode; break; /* f: matchlist mode / read corpus positions from file */ case 'f': input_filename = optarg; break; /* p: matchlist mode / read corpus positions from stdin */ case 'p': read_pos_frm_stdin++; break; /* h: help page */ case 'h': decode_usage(2); break; default: fprintf(stderr, "Illegal option. Try \"%s -h\" for more information.\n", progname); fprintf(stderr, "[remember that options go before the corpus name, and attribute declarations after it!]\n"); decode_cleanup(2); } /* required argument: corpus id */ if (optind < argc) { corpus_id = argv[optind++]; if ((corpus = cl_new_corpus(registry_directory, corpus_id)) == NULL) { fprintf(stderr, "Corpus %s not found in registry %s . Aborted.\n", corpus_id, (registry_directory ? registry_directory : cl_standard_registry() ) ); decode_cleanup(1); } } else { fprintf(stderr, "Missing argument. Try \"%s -h\" for more information.\n", progname); decode_cleanup(2); } /* now parse output flags (-P, -S, ...) [cnt is our own argument counter] */ for (cnt = optind; cnt < argc; cnt++) { if (strcmp(argv[cnt], "-c") == 0) { /* -c: context */ if ((context = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } } else if (strcmp(argv[cnt], "-P") == 0) { /* -P: positional attribute */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_POS)) == NULL) { fprintf(stderr, "Can't open p-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else { if (cl_max_cpos(attr) > 0) { decode_add_attribute(attr); if (maxlast < 0) maxlast = cl_max_cpos(attr); /* determines corpus size */ } else { fprintf(stderr, "Attribute %s.%s is declared, but not accessible (missing data?). Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } } } else if (strcmp(argv[cnt], "-ALL") == 0) { /* -ALL: all p-attributes and s-attributes */ for (attr = corpus->attributes; attr; attr = attr->any.next) if (attr->any.type == ATT_POS) { decode_add_attribute(attr); if (maxlast < 0) maxlast = cl_max_cpos(attr); } else if (attr->any.type == ATT_STRUC) { decode_add_attribute(attr); } } else if (strcmp(argv[cnt], "-D") == 0) { /* -D: dynamic attribute (not implemented) */ fprintf(stderr, "Sorry, dynamic attributes are not implemented. Aborting.\n"); decode_cleanup(2); } else if (strcmp(argv[cnt], "-A") == 0) { /* -A: alignment attribute */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_ALIGN)) == NULL) { fprintf(stderr, "Can't open a-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else decode_add_attribute(attr); } else if (strcmp(argv[cnt], "-S") == 0) { /* -S: structural attribute (as tags) */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else decode_add_attribute(attr); } else if (strcmp(argv[cnt], "-V") == 0) { /* -V: show structural attribute values (with -p or -f) */ if ((attr = cl_new_attribute(corpus, argv[++cnt], ATT_STRUC)) == NULL) { fprintf(stderr, "Can't open s-attribute %s.%s . Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else if (!cl_struc_values(attr)) { fprintf(stderr, "S-attribute %s.%s does not have annotations. Aborted.\n", corpus_id, argv[cnt]); decode_cleanup(1); } else if (printValuesIndex >= MAX_PRINT_VALUES) { fprintf(stderr, "Too many -V attributes, sorry. Aborted.\n"); decode_cleanup(1); } else printValues[printValuesIndex++] = attr; } else { fprintf(stderr, "Unknown flag: %s\n", argv[cnt]); decode_cleanup(2); } } /* ---- end of parse attribute declarations ---- */ if (input_filename != NULL) { if (strcmp(input_filename, "-") == 0) input_file = stdin; else if ((input_file = fopen(input_filename, "r")) == NULL) { perror(input_filename); exit(1); } read_pos_frm_stdin++; } decode_verify_print_value_list(); /* ------------------------------------------------------------ DECODE CORPUS */ if (read_pos_frm_stdin == 0) { /* * normal mode: decode entire corpus or specified range */ if (maxlast < 0) { fprintf(stderr, "Need at least one p-attribute (-P flag). Aborted.\n"); decode_cleanup(2); } if (first_token < 0 || first_token >= maxlast) first_token = 0; if (last < 0 || last >= maxlast) last = maxlast - 1; if (last < first_token) { fprintf(stderr, "Warning: output range #%d..#%d is empty. No output.\n", first_token, last); decode_cleanup(2); } if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { decode_print_xml_declaration(); printf("<corpus name=\"%s\" start=\"%d\" end=\"%d\">\n", corpus_id, first_token, last); } /* decode_print_surrounding_s_att_values(first_token); */ /* don't do that in "normal" mode, coz it doesn't make sense */ for (w = first_token; w <= last; w++) decode_print_token_sequence(w, -1, context); if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { printf("</corpus>\n"); } } else { /* * matchlist mode: read (pairs of) corpus positions from stdin or file */ if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { decode_print_xml_declaration(); printf("<matchlist corpus=\"%s\">\n", corpus_id); } cnt = 0; while (fgets(s, CL_MAX_LINE_LENGTH, input_file) != NULL) { token = strtok(s, " \t\n"); if ((token != NULL) && is_num(token)) { sp = atoi(token); ep = -1; if ((token = strtok(NULL, " \t\n")) != NULL) { if (!is_num(token)) { fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", token); decode_cleanup(1); } else ep = atoi(token); } cnt++; /* count matches in matchlist */ if (mode == XMLMode) { printf("<match nr=\"%d\"", cnt); if (printnum) printf(" start=\"%d\" end=\"%d\"", sp, (ep >= 0) ? ep : sp); printf(">\n"); } else { /* nothing shown before range */ } decode_print_surrounding_s_att_values(sp); decode_print_token_sequence(sp, ep, context); if (mode == XMLMode) { printf("</match>\n"); } else if (mode != ConclineMode) { printf("\n"); /* blank line, unless in -H mode */ } } else { fprintf(stderr, "Invalid corpus position #%s . Aborted.\n", s); decode_cleanup(1); } } if (input_file != stdin) fclose(input_file); if ( (mode == XMLMode) || ((mode == EncodeMode) && xml_compatible) ) { printf("</matchlist>\n"); } } decode_cleanup(0); return 0; /* just to keep gcc from complaining */ }
/** * Prints out the requested attributes for a sequence of tokens * (or a single token if end_position == -1). * * If the -c flag was used (and, thus, the context parameter is not NULL), * then the sequence is extended to the entire s-attribute region (in matchlist mode). */ void decode_print_token_sequence(int start_position, int end_position, Attribute *context) { int alg, aligned_start, aligned_end, aligned_start2, aligned_end2, rng_start, rng_end, snum; int start_context, end_context, dummy; int lastposa, i, w; /* pointer used for values of p-attributes */ char *wrd; start_context = start_position; end_context = (end_position >= 0) ? end_position : start_position; /* above ensures that in non-matchlist mode (where ep == -1), we only print one token */ if (context != NULL) { /* expand the start_context end_context numbers to the start * and end points of the containing region of the context s-attribute */ if (!cl_cpos2struc2cpos(context, start_position, &start_context, &end_context)) { start_context = start_position; end_context = (end_position >= 0) ? end_position : start_position; } else if (end_position >= 0) { if (!cl_cpos2struc2cpos(context, end_position, &dummy, &end_context)) { end_context = (end_position >= 0) ? end_position : start_position; } } /* indicate that we're showing context */ switch (mode) { case LispMode: printf("(TARGET %d\n", start_position); if (end_position >= 0) printf("(INTERVAL %d %d)\n", start_position, end_position); break; case EncodeMode: case ConclineMode: /* nothing here */ break; case XMLMode: printf("<context start=\"%d\" end=\"%d\"/>\n", start_context, end_context); break; case StandardMode: default: if (end_position >= 0) { printf("INTERVAL %d %d\n", start_position, end_position); } else { printf("TARGET %d\n", start_position); } break; } } /* endif context != NULL */ /* some extra information in -L and -H modes */ if (mode == LispMode && end_position != -1) printf("(CONTEXT %d %d)\n", start_context, end_context); else if (mode == ConclineMode) { if (printnum) printf("%8d: ", start_position); } /* now print the token sequence (including context) with all requested attributes */ for (w = start_context; w <= end_context; w++) { int beg_of_line; /* extract s-attribute regions for start and end tags into s_att_regions[] */ N_sar = 0; /* counter and index */ for (i = 0; i < print_list_index; i++) { if (print_list[i]->any.type == ATT_STRUC) { if ( ((snum = cl_cpos2struc(print_list[i], w)) >= 0) && (cl_struc2cpos(print_list[i], snum, &rng_start, &rng_end)) && ((w == rng_start) || (w == rng_end)) ) { s_att_regions[N_sar].name = print_list[i]->any.name; s_att_regions[N_sar].start = rng_start; s_att_regions[N_sar].end = rng_end; if (cl_struc_values(print_list[i])) s_att_regions[N_sar].annot = cl_struc2str(print_list[i], snum); else s_att_regions[N_sar].annot = NULL; N_sar++; } } } decode_sort_s_att_regions(); /* sort regions to ensure proper nesting of start and end tags */ /* show corpus positions with -n option */ if (printnum) switch (mode) { case LispMode: printf("(%d ", w); break; case EncodeMode: printf("%8d\t", w); break; case ConclineMode: /* nothing here (shown at start of line in -H mode) */ break; case XMLMode: /* nothing here */ break; case StandardMode: default: printf("%8d: ", w); break; } else { if (mode == LispMode) printf("("); /* entire match is parenthesised list in -L mode */ } lastposa = -1; /* print start tags (s- and a-attributes) with -C,-H,-X */ if ((mode == EncodeMode) || (mode == ConclineMode) || (mode == XMLMode)) { /* print a-attributes from print_list[] */ for (i = 0; i < print_list_index; i++) { switch (print_list[i]->any.type) { case ATT_ALIGN: if ( ((alg = cl_cpos2alg(print_list[i], w)) >= 0) && (cl_alg2cpos(print_list[i], alg, &aligned_start, &aligned_end, &aligned_start2, &aligned_end2)) && (w == aligned_start) ) { if (mode == XMLMode) { printf("<align type=\"start\" target=\"%s\"", print_list[i]->any.name); if (printnum) printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2); printf("/>\n"); } else { printf("<%s", print_list[i]->any.name); if (printnum) printf(" %d %d", aligned_start2, aligned_end2); printf(">%c", (mode == EncodeMode) ? '\n' : ' '); } } break; default: /* ignore all other attribute types */ break; } } /* print s-attributes from s_att_regions[] (using sar_sort_index[]) */ for (i = 0; i < N_sar; i++) { SAttRegion *region = &(s_att_regions[sar_sort_index[i]]); if (region->start == w) { if (mode == XMLMode) { printf("<tag type=\"start\" name=\"%s\"", region->name); if (printnum) printf(" cpos=\"%d\"", w); if (region->annot) printf(" value=\"%s\"", decode_string_escape(region->annot)); printf("/>\n"); } else { printf("<%s%s%s>%c", region->name, region->annot ? " " : "", region->annot ? region->annot : "", (mode == ConclineMode ? ' ' : '\n')); } } } } /* now print token with its attribute values (p-attributes only for -C,-H,-X) */ if (mode == XMLMode) { printf("<token"); if (printnum) printf(" cpos=\"%d\"", w); printf(">"); } beg_of_line = 1; /* Loop printing each attribute for this cpos (w) */ for (i = 0; i < print_list_index; i++) { switch (print_list[i]->any.type) { case ATT_POS: lastposa = i; if ((wrd = decode_string_escape(cl_cpos2str(print_list[i], w))) != NULL) { switch (mode) { case LispMode: printf("(%s \"%s\")", print_list[i]->any.name, wrd); break; case EncodeMode: if (beg_of_line) { printf("%s", wrd); beg_of_line = 0; } else printf("\t%s", wrd); break; case ConclineMode: if (beg_of_line) { printf("%s", wrd); beg_of_line = 0; } else printf("/%s", wrd); break; case XMLMode: printf(" <attr name=\"%s\">%s</attr>", print_list[i]->any.name, wrd); break; case StandardMode: default: printf("%s=%s\t", print_list[i]->any.name, wrd); break; } } else { cl_error("(aborting) cl_cpos2str() failed"); decode_cleanup(1); } break; case ATT_ALIGN: /* do not print in encode, concline or xml modes because already done (above) */ if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) { if ( ((alg = cl_cpos2alg(print_list[i], w)) >= 0) && (cl_alg2cpos(print_list[i], alg, &aligned_start, &aligned_end, &aligned_start2, &aligned_end2)) ) { if (mode == LispMode) { printf("(ALG %d %d %d %d)", aligned_start, aligned_end, aligned_start2, aligned_end2); } else { printf("%d-%d==>%s:%d-%d\t", aligned_start, aligned_end, print_list[i]->any.name, aligned_start2, aligned_end2); } } else if (cl_errno != CDA_OK) { cl_error("(aborting) alignment error"); decode_cleanup(1); } } break; case ATT_STRUC: /* do not print in encode, concline or xml modes because already done (above) */ if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) { if (cl_cpos2struc2cpos(print_list[i], w, &rng_start, &rng_end)) { /* standard and -L mode don't show tag annotations */ printf(mode == LispMode ? "(STRUC %s %d %d)" : "<%s>:%d-%d\t", print_list[i]->any.name, rng_start, rng_end); } else if (cl_errno != CDA_OK) cl_error("(aborting) cl_cpos2struc2cpos() failed"); } break; case ATT_DYN: /* dynamic attributes aren't implemented */ default: break; } } /* print token separator (or end of token in XML mode) */ switch (mode) { case LispMode: printf(")\n"); break; case ConclineMode: printf(" "); break; case XMLMode: printf(" </token>\n"); break; case EncodeMode: case StandardMode: default: printf("\n"); break; } /* now, after printing all the positional attributes, print end tags with -H,-C,-X */ if (mode == EncodeMode || mode == ConclineMode || mode == XMLMode) { /* print s-attributes from s_att_regions[] (using sar_sort_index[] in reverse order) */ for (i = N_sar - 1; i >= 0; i--) { SAttRegion *region = &(s_att_regions[sar_sort_index[i]]); if (region->end == w) { if (mode == XMLMode) { printf("<tag type=\"end\" name=\"%s\"", region->name); if (printnum) printf(" cpos=\"%d\"", w); printf("/>\n"); } else { printf("</%s>%c", region->name, (mode == ConclineMode ? ' ' : '\n')); } } } /* print a-attributes from print_list[] */ for (i = 0; i < print_list_index; i++) { switch (print_list[i]->any.type) { case ATT_ALIGN: if ( ((alg = cl_cpos2alg(print_list[i], w)) >= 0) && (cl_alg2cpos(print_list[i], alg, &aligned_start, &aligned_end, &aligned_start2, &aligned_end2)) && (w == aligned_end) ) { if (mode == XMLMode) { printf("<align type=\"end\" target=\"%s\"", print_list[i]->any.name); if (printnum) printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2); printf("/>\n"); } else { printf("</%s", print_list[i]->any.name); if (printnum) printf(" %d %d", aligned_start2, aligned_end2); printf(">%c", (mode == EncodeMode) ? '\n' : ' '); } } break; default: /* ignore all other attribute types */ break; } } } /* end of print end tags */ } /* end of match range loop: for w from start_context to end_context */ /* end of match (for matchlist mode in particular) */ if ((context != NULL) && (mode == LispMode)) printf(")\n"); else if (mode == ConclineMode) printf("\n"); return; }
/** * Main function for cwb-s-encode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { int input_line; int start, end; char *annot; char buf[CL_MAX_LINE_LENGTH]; Attribute *att; int V_switch, values, S_annotations_dropped; int i, N; progname = argv[0]; sencode_parse_options(argc, argv); /* -a mode: read existing regions into memory */ if (add_to_existing) { if (corpus == NULL) { Rprintf( "Error: You have to specify source corpus (-C <corpus>) for -a switch.\n"); rcqp_receive_error(1); } att = cl_new_attribute(corpus, new_satt.name, ATT_STRUC); if ((att != NULL) && (cl_max_struc(att) > 0)) { V_switch = new_satt.store_values; values = cl_struc_values(att); if (V_switch && (!values)) { Rprintf( "Error: Existing regions of -V attribute have no annotations.\n"); rcqp_receive_error(1); } else if ((!V_switch) && values) { Rprintf( "Error: Existing regions of -S attributes have annotations.\n"); rcqp_receive_error(1); } if (!silent) Rprintf("[Loading previous <%s> regions]\n", new_satt.name); N = cl_max_struc(att); for (i = 0; i < N; i++) { cl_struc2cpos(att, i, &start, &end); annot = cl_struc2str(att, i); SL_insert(start, end, annot); } } else { if (!silent) Rprintf("[No <%s> regions defined (skipped)]\n", new_satt.name); } } /* loop reading input (stdin or -f <file>) */ if (in_memory && (!silent)) Rprintf("[Reading input data]\n"); input_line = 0; S_annotations_dropped = 0; while (fgets(buf, CL_MAX_LINE_LENGTH, text_fd)) { input_line++; /* check for buffer overflow */ if (strlen(buf) >= (CL_MAX_LINE_LENGTH - 1)) { Rprintf( "BUFFER OVERFLOW, input line #%d is too long:\n>> %s", input_line, buf); rcqp_receive_error(1); } if (! sencode_parse_line(buf, &start, &end, &annot)) { Rprintf( "FORMAT ERROR on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } if (new_satt.store_values && (annot == NULL)) { Rprintf( "MISSING ANNOTATION on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } if ((!new_satt.store_values) && (annot != NULL)) { if (! S_annotations_dropped) Rprintf( "WARNING: Annotation for -S attribute ignored on line #%d (warning issued only once):\n>> %s", input_line, buf); S_annotations_dropped++; } if ((start <= new_satt.last_cpos) || (end < start)) { Rprintf( "RANGE INCONSISTENCY on line #%d:\n>> %s(end of previous region was %d)\n", input_line, buf, new_satt.last_cpos); rcqp_receive_error(1); } if (annot != NULL && set_att != set_none) { /* convert set annotation into standard syntax */ annot = sencode_check_set(annot); if (annot == NULL) { Rprintf( "SET ANNOTATION SYNTAX ERROR on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } } /* debugging output */ if (debug) { Rprintf( "[%d, %d]", start, end); if (annot != NULL) Rprintf( " <%s>", annot); Rprintf( "\n"); } /* in -M mode, store this region in memory; otherwise write it to the disk files */ if (in_memory) SL_insert(start, end, annot); else sencode_write_region(start, end, annot); cl_free(annot); } /* in -M mode, write data to disk now that we have finished looping across input data */ if (in_memory) { SL item; if (!silent) Rprintf("[Creating encoded disk file(s)]\n"); SL_rewind(); while ((item = SL_next()) != NULL) sencode_write_region(item->start, item->end, item->annot); } /* close files */ sencode_close_files(); if (S_annotations_dropped > 0) Rprintf( "Warning: %d annotation values dropped for -S attribute '%s'.\n", S_annotations_dropped, new_satt.name); rcqp_receive_error(0); }
Group *compute_grouping(CorpusList *cl, FieldType source_field, int source_offset, char *source_attr_name, FieldType target_field, int target_offset, char *target_attr_name, int cutoff_freq) { Group *group; Attribute *source_attr, *target_attr; int source_is_struc = 0, target_is_struc = 0; char *source_base = NULL, *target_base = 0; if ((cl == NULL) || (cl->corpus == NULL)) { cqpmessage(Warning, "Grouping:\nCan't access corpus."); return NULL; } if ((cl->size == 0) || (cl->range == NULL)) { cqpmessage(Warning, "Corpus %s is empty, no grouping possible", cl->name); return NULL; } if ((source_attr_name == NULL) && (source_field == NoField)) { source_attr = NULL; } else { source_attr = find_attribute(cl->corpus, source_attr_name, ATT_POS, NULL); if (source_attr == NULL) { source_attr = find_attribute(cl->corpus, source_attr_name, ATT_STRUC, NULL); source_is_struc = 1; } if (source_attr == NULL) { cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", source_attr_name, cl->name); return NULL; } if (source_is_struc) { if (cl_struc_values(source_attr)) { source_base = cl_struc2str(source_attr, 0); /* should be beginning of the attribute's lexicon */ assert(source_base && "Internal error. Please don't use s-attributes in group command."); } else { cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", source_attr_name, cl->name); return NULL; } } switch (source_field) { case KeywordField: if (cl->keywords == NULL) { cqpmessage(Error, "No keyword anchors defined for %s", cl->name); return NULL; } break; case TargetField: if (cl->targets == NULL) { cqpmessage(Error, "No target anchors defined for %s", cl->name); return NULL; } break; case MatchField: case MatchEndField: assert(cl->range && cl->size > 0); break; case NoField: default: cqpmessage(Error, "Illegal second anchor in group command"); return NULL; break; } } target_attr = find_attribute(cl->corpus, target_attr_name, ATT_POS, NULL); if (target_attr == NULL) { target_attr = find_attribute(cl->corpus, target_attr_name, ATT_STRUC, NULL); target_is_struc = 1; } if (target_attr == NULL) { cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", target_attr_name, cl->name); return NULL; } if (target_is_struc) { if (cl_struc_values(target_attr)) { target_base = cl_struc2str(target_attr, 0); /* should be beginning of the attribute's lexicon */ assert(target_base && "Internal error. Please don't use s-attributes in group command."); } else { cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", target_attr_name, cl->name); return NULL; } } switch (target_field) { case KeywordField: if (cl->keywords == NULL) { cqpmessage(Error, "No keyword anchors defined for %s", cl->name); return NULL; } break; case TargetField: if (cl->targets == NULL) { cqpmessage(Error, "No target anchors defined for %s", cl->name); return NULL; } break; case MatchField: case MatchEndField: assert(cl->range && cl->size > 0); break; case NoField: default: cqpmessage(Error, "Illegal anchor in group command"); return NULL; break; } /* set up Group object */ group = (Group *) cl_malloc(sizeof(Group)); group->my_corpus = cl; group->source_attribute = source_attr; group->source_offset = source_offset; group->source_is_struc = source_is_struc; group->source_base = source_base; group->source_field = source_field; group->target_attribute = target_attr; group->target_offset = target_offset; group->target_is_struc = target_is_struc; group->target_base = target_base; group->target_field = target_field; group->nr_cells = 0; group->count_cells = NULL; group->cutoff_frequency = cutoff_freq; if (UseExternalGrouping && !insecure && !(source_is_struc || target_is_struc)) return ComputeGroupExternally(group); /* modifies Group object in place and returns pointer or NULL */ else return ComputeGroupInternally(group); }
/* tabulate specified query result, using settings from global list of tabulation items; return value indicates whether tabulation was successful (otherwise, generates error message) */ int print_tabulation(CorpusList *cl, int first, int last, struct Redir *rd) { TabulationItem item = TabulationList; int current; if (! cl) return 0; if (first <= 0) first = 0; /* make sure that first and last match to tabulate are in range */ if (last >= cl->size) last = cl->size - 1; while (item) { /* obtain attribute handles for tabulation items */ if (item->attribute_name) { if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_POS))) { item->attribute_type = ATT_POS; } else if (NULL != (item->attribute = cl_new_attribute(cl->corpus, item->attribute_name, ATT_STRUC))) { item->attribute_type = ATT_STRUC; if (! cl_struc_values(item->attribute)) { cqpmessage(Error, "No annotated values for s-attribute ``%s'' in named query %s", item->attribute_name, cl->name); return 0; } } else { cqpmessage(Error, "Can't find attribute ``%s'' for named query %s", item->attribute_name, cl->name); return 0; } } else { item->attribute_type = ATT_NONE; /* no attribute -> print corpus position */ } if (cl->size > 0) { /* work around bug: anchor validation will fail for empty query result (but then loop below is void anyway) */ if (! (pt_validate_anchor(cl, item->anchor1) && pt_validate_anchor(cl, item->anchor2))) return 0; } item = item->next; } if (! open_stream(rd, cl->corpus->charset)) { cqpmessage(Error, "Can't redirect output to file or pipe\n"); return 0; } /* tabulate selected attribute values for matches <first> .. <last> */ for (current = first; current <= last; current++) { TabulationItem item = TabulationList; while (item) { int start = pt_get_anchor_cpos(cl, current, item->anchor1, item->offset1); int end = pt_get_anchor_cpos(cl, current, item->anchor2, item->offset2); int cpos; if (start < 0 || end < 0) /* one of the anchors is undefined -> print single undefined value for entire range */ start = end = -1; for (cpos = start; cpos <= end; cpos++) { if (item->attribute_type == ATT_NONE) { fprintf(rd->stream, "%d", cpos); } else { if (cpos >= 0) { /* undefined anchors print empty string */ char *string = NULL; if (item->attribute_type == ATT_POS) string = cl_cpos2str(item->attribute, cpos); else string = cl_cpos2struc2str(item->attribute, cpos); if (string) { if (item->flags) { char *copy = cl_strdup(string); cl_string_canonical(copy, cl->corpus->charset, item->flags); fprintf(rd->stream, "%s", copy); cl_free(copy); } else { fprintf(rd->stream, "%s", string); } } } } if (cpos < end) /* multiple values for tabulation item are separated by blanks */ fprintf(rd->stream, " "); } if (item->next) /* multiple tabulation items are separated by TABs */ fprintf(rd->stream, "\t"); item = item->next; } fprintf(rd->stream, "\n"); } close_stream(rd); free_tabulation_list(); return 1; }