/* * ------------------------------------------------------------------------ * * "rcqpCmd_attribute_size(SEXP inAttribute)" -- * * * * ------------------------------------------------------------------------ */ SEXP rcqpCmd_attribute_size(SEXP inAttribute) { SEXP result = R_NilValue; char * a; Attribute * attribute; int size; int found = 0; if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string"); PROTECT(inAttribute); a = (char*)CHAR(STRING_ELT(inAttribute,0)); /* Need to try all possible attribute types */ attribute = cqi_lookup_attribute(a, ATT_POS); if (attribute != NULL) { size = cl_max_cpos(attribute); if (size < 0) { UNPROTECT(1); rcqp_send_error(); } else { found = 1; } } else { attribute = cqi_lookup_attribute(a, ATT_STRUC); if (attribute != NULL) { size = cl_max_struc(attribute); if (size < 0) { size = 0; } else { found = 1; } } else { attribute = cqi_lookup_attribute(a, ATT_ALIGN); if (attribute != NULL) { size = cl_max_alg(attribute); if (size < 0) { UNPROTECT(1); rcqp_send_error(); } else { found = 1; } } else { UNPROTECT(1); rcqp_error_code(cqi_errno); } } } if (found) { result = PROTECT(allocVector(INTSXP, 1)); INTEGER(result)[0] = size; } UNPROTECT(2); return result; }
void do_cqi_cl_attribute_size(void) { char *a; Attribute *attribute; int size; a = cqi_read_string(); /* need to try all possible attribute types */ if (server_debug) Rprintf( "CQi: CQI_CL_ATTRIBUTE_SIZE('%s')\n", a); attribute = cqi_lookup_attribute(a, ATT_POS); if (attribute != NULL) { size = cl_max_cpos(attribute); if (size < 0) { send_cl_error(); } else { cqi_data_int(size); } } else { attribute = cqi_lookup_attribute(a, ATT_STRUC); if (attribute != NULL) { size = cl_max_struc(attribute); if (size < 0) { /* send_cl_error(); */ /* current version of CL considers 0 regions a data access error condition, but we want to allow that */ cqi_data_int(0); } else { cqi_data_int(size); } } else { attribute = cqi_lookup_attribute(a, ATT_ALIGN); if (attribute != NULL) { size = cl_max_alg(attribute); if (size < 0) { send_cl_error(); } else { cqi_data_int(size); } } else { cqi_command(cqi_errno); /* return errno from the last lookup */ } } } free(a); }
/** * Prints statistical information about a corpus to STDOUT. * * Each corpus attribute gets info printed about it: * tokens and types for a P-attribute, number of instances * of regions for an S-attribute, number of alignment * blocks for an A-attribute. * * @param corpus The corpus to analyse. */ void describecorpus_show_statistics (Corpus *corpus) { Attribute *a; int tokens, types, regions, blocks; for (a = corpus->attributes; a; a = a->any.next) { switch(a->any.type) { case ATT_POS: Rprintf("p-ATT %-16s ", a->any.name); tokens = cl_max_cpos(a); types = cl_max_id(a); if ((tokens > 0) && (types > 0)) Rprintf("%10d tokens, %8d types", tokens, types); else Rprintf(" NO DATA"); break; case ATT_STRUC: Rprintf("s-ATT %-16s ", a->any.name); regions = cl_max_struc(a); if (regions >= 0) { Rprintf("%10d regions", regions); if (cl_struc_values(a)) Rprintf(" (with annotations)"); } else Rprintf(" NO DATA"); break; case ATT_ALIGN: Rprintf("a-ATT %-16s ", a->any.name); blocks = cl_max_alg(a); if (blocks >= 0) { Rprintf("%10d alignment blocks", blocks); if (cl_has_extended_alignment(a)) Rprintf(" (extended)"); } else Rprintf(" NO DATA"); break; default: Rprintf("??? %-16s (unknown attribute type)", a->any.name); break; } Rprintf("\n"); } Rprintf("\n"); }
/** * Main function for cwb-s-encode. * * @param argc Number of command-line arguments. * @param argv Command-line arguments. */ int main(int argc, char **argv) { int input_line; int start, end; char *annot; char buf[CL_MAX_LINE_LENGTH]; Attribute *att; int V_switch, values, S_annotations_dropped; int i, N; progname = argv[0]; sencode_parse_options(argc, argv); /* -a mode: read existing regions into memory */ if (add_to_existing) { if (corpus == NULL) { Rprintf( "Error: You have to specify source corpus (-C <corpus>) for -a switch.\n"); rcqp_receive_error(1); } att = cl_new_attribute(corpus, new_satt.name, ATT_STRUC); if ((att != NULL) && (cl_max_struc(att) > 0)) { V_switch = new_satt.store_values; values = cl_struc_values(att); if (V_switch && (!values)) { Rprintf( "Error: Existing regions of -V attribute have no annotations.\n"); rcqp_receive_error(1); } else if ((!V_switch) && values) { Rprintf( "Error: Existing regions of -S attributes have annotations.\n"); rcqp_receive_error(1); } if (!silent) Rprintf("[Loading previous <%s> regions]\n", new_satt.name); N = cl_max_struc(att); for (i = 0; i < N; i++) { cl_struc2cpos(att, i, &start, &end); annot = cl_struc2str(att, i); SL_insert(start, end, annot); } } else { if (!silent) Rprintf("[No <%s> regions defined (skipped)]\n", new_satt.name); } } /* loop reading input (stdin or -f <file>) */ if (in_memory && (!silent)) Rprintf("[Reading input data]\n"); input_line = 0; S_annotations_dropped = 0; while (fgets(buf, CL_MAX_LINE_LENGTH, text_fd)) { input_line++; /* check for buffer overflow */ if (strlen(buf) >= (CL_MAX_LINE_LENGTH - 1)) { Rprintf( "BUFFER OVERFLOW, input line #%d is too long:\n>> %s", input_line, buf); rcqp_receive_error(1); } if (! sencode_parse_line(buf, &start, &end, &annot)) { Rprintf( "FORMAT ERROR on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } if (new_satt.store_values && (annot == NULL)) { Rprintf( "MISSING ANNOTATION on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } if ((!new_satt.store_values) && (annot != NULL)) { if (! S_annotations_dropped) Rprintf( "WARNING: Annotation for -S attribute ignored on line #%d (warning issued only once):\n>> %s", input_line, buf); S_annotations_dropped++; } if ((start <= new_satt.last_cpos) || (end < start)) { Rprintf( "RANGE INCONSISTENCY on line #%d:\n>> %s(end of previous region was %d)\n", input_line, buf, new_satt.last_cpos); rcqp_receive_error(1); } if (annot != NULL && set_att != set_none) { /* convert set annotation into standard syntax */ annot = sencode_check_set(annot); if (annot == NULL) { Rprintf( "SET ANNOTATION SYNTAX ERROR on line #%d:\n>> %s", input_line, buf); rcqp_receive_error(1); } } /* debugging output */ if (debug) { Rprintf( "[%d, %d]", start, end); if (annot != NULL) Rprintf( " <%s>", annot); Rprintf( "\n"); } /* in -M mode, store this region in memory; otherwise write it to the disk files */ if (in_memory) SL_insert(start, end, annot); else sencode_write_region(start, end, annot); cl_free(annot); } /* in -M mode, write data to disk now that we have finished looping across input data */ if (in_memory) { SL item; if (!silent) Rprintf("[Creating encoded disk file(s)]\n"); SL_rewind(); while ((item = SL_next()) != NULL) sencode_write_region(item->start, item->end, item->annot); } /* close files */ sencode_close_files(); if (S_annotations_dropped > 0) Rprintf( "Warning: %d annotation values dropped for -S attribute '%s'.\n", S_annotations_dropped, new_satt.name); rcqp_receive_error(0); }