Beispiel #1
0
/* 
 * ------------------------------------------------------------------------
 * 
 * "rcqpCmd_alg2cpos(SEXP inAttribute, SEXP inAlg)" --
 * 
 * 
 * 
 * ------------------------------------------------------------------------
 */
SEXP rcqpCmd_alg2cpos(SEXP inAttribute, SEXP inAlg)
{
	SEXP			result = R_NilValue;
	int				alg, s1, s2, t1, t2;
	char *			a;
	Attribute *		attribute;
	
	if (!isString(inAttribute) || length(inAttribute) != 1) error("argument 'attribute' must be a string");
	if (!isVector(inAlg) || length(inAlg) != 1) error("argument 'alg' must be an integer");
	PROTECT(inAttribute);
	PROTECT(inAlg);
	
	a = (char*)CHAR(STRING_ELT(inAttribute,0));

	alg = asInteger(inAlg);
	if (alg == NA_INTEGER) {
		UNPROTECT(2);
	    error("invalid 'alg' value (too large or NA)");
	}

	attribute = cqi_lookup_attribute(a, ATT_ALIGN);
	if (attribute == NULL) {
		UNPROTECT(2);
		rcqp_error_code(cqi_errno);
	} else {
		if (cl_alg2cpos(attribute, alg, &s1, &s2, &t1, &t2)) {
			result = PROTECT(allocVector(INTSXP, 4));	
			INTEGER(result)[0] = s1;
			INTEGER(result)[1] = s2;
			INTEGER(result)[2] = t1;
			INTEGER(result)[3] = t2;
		} else {
			rcqp_send_error();
		} 
	}
	
	UNPROTECT(3);

	return result;
}
Beispiel #2
0
void
do_cqi_cl_alg2cpos(void)
{
  int alg, s1, s2, t1, t2;
  char *a;
  Attribute *attribute;

  a = cqi_read_string();
  alg = cqi_read_int();
  if (server_debug)
   Rprintf( "CQi: CQI_CL_ALG2CPOS('%s', %d)\n", a, alg);
  attribute = cqi_lookup_attribute(a, ATT_ALIGN);
  if (attribute == NULL) {
    cqi_command(cqi_errno);
  }
  else {
    if (cl_alg2cpos(attribute, alg, &s1, &s2, &t1, &t2))
      cqi_data_int_int_int_int(s1, s2, t1, t2);
    else
      send_cl_error();
  }
  free(a);                      /* don't forget to free allocated space */
}
Beispiel #3
0
/**
 * Prints out the requested attributes for a sequence of tokens
 * (or a single token if end_position == -1).
 *
 * If the -c flag was used (and, thus, the context parameter is not NULL),
 * then the sequence is extended to the entire s-attribute region (in matchlist mode).
 */
void
decode_print_token_sequence(int start_position, int end_position, Attribute *context)
{
  int alg, aligned_start, aligned_end, aligned_start2, aligned_end2,
    rng_start, rng_end, snum;
  int start_context, end_context, dummy;
  int lastposa, i, w;

  /* pointer used for values of p-attributes */
  char *wrd;


  start_context = start_position;
  end_context = (end_position >= 0) ? end_position : start_position;
  /* above ensures that in non-matchlist mode (where ep == -1), we only print one token */

  if (context != NULL) {

    /* expand the start_context end_context numbers to the start
     * and end points of the containing region of the context s-attribute */
    if (!cl_cpos2struc2cpos(context, start_position,
                            &start_context, &end_context)) {
      start_context = start_position;
      end_context = (end_position >= 0) ? end_position : start_position;
    }
    else if (end_position >= 0) {
      if (!cl_cpos2struc2cpos(context, end_position,
                               &dummy, &end_context)) {
        end_context = (end_position >= 0) ? end_position : start_position;
      }
    }

    /* indicate that we're showing context */
    switch (mode) {
    case LispMode:
      printf("(TARGET %d\n", start_position);
      if (end_position >= 0)
        printf("(INTERVAL %d %d)\n", start_position, end_position);
      break;
    case EncodeMode:
    case ConclineMode:
      /* nothing here */
      break;
    case XMLMode:
      printf("<context start=\"%d\" end=\"%d\"/>\n", start_context, end_context);
      break;
    case StandardMode:
    default:
      if (end_position >= 0) {
        printf("INTERVAL %d %d\n", start_position, end_position);
      }
      else {
        printf("TARGET %d\n", start_position);
      }
      break;
    }

  } /* endif context != NULL */

  /* some extra information in -L and -H modes */
  if (mode == LispMode && end_position != -1)
    printf("(CONTEXT %d %d)\n", start_context, end_context);
  else if (mode == ConclineMode) {
    if (printnum)
      printf("%8d: ", start_position);
  }

  /* now print the token sequence (including context) with all requested attributes */
  for (w = start_context; w <= end_context; w++) {
    int beg_of_line;

    /* extract s-attribute regions for start and end tags into s_att_regions[] */
    N_sar = 0;                  /* counter and index */
    for (i = 0; i < print_list_index; i++) {
      if (print_list[i]->any.type == ATT_STRUC) {
        if ( ((snum = cl_cpos2struc(print_list[i], w)) >= 0) &&
             (cl_struc2cpos(print_list[i], snum, &rng_start, &rng_end)) &&
             ((w == rng_start) || (w == rng_end)) ) {
          s_att_regions[N_sar].name = print_list[i]->any.name;
          s_att_regions[N_sar].start = rng_start;
          s_att_regions[N_sar].end = rng_end;
          if (cl_struc_values(print_list[i]))
            s_att_regions[N_sar].annot = cl_struc2str(print_list[i], snum);
          else
            s_att_regions[N_sar].annot = NULL;
          N_sar++;
        }
      }
    }
    decode_sort_s_att_regions();       /* sort regions to ensure proper nesting of start and end tags */

    /* show corpus positions with -n option */
    if (printnum)
      switch (mode) {
      case LispMode:
        printf("(%d ", w);
        break;
      case EncodeMode:
        printf("%8d\t", w);
        break;
      case ConclineMode:
        /* nothing here (shown at start of line in -H mode) */
        break;
      case XMLMode:
        /* nothing here */
        break;
      case StandardMode:
      default:
        printf("%8d: ", w);
        break;
      }
    else {
      if (mode == LispMode)
        printf("(");            /* entire match is parenthesised list in -L mode */
    }

    lastposa = -1;

    /* print start tags (s- and a-attributes) with -C,-H,-X */
    if ((mode == EncodeMode) || (mode == ConclineMode) || (mode == XMLMode)) {

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_start)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"start\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("<%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:                /* ignore all other attribute types */
          break;
        }
      }

      /* print s-attributes from s_att_regions[] (using sar_sort_index[]) */
      for (i = 0; i < N_sar; i++) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->start == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"start\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            if (region->annot)
              printf(" value=\"%s\"", decode_string_escape(region->annot));
            printf("/>\n");
          }
          else {
            printf("<%s%s%s>%c",
                   region->name,
                   region->annot ? " " : "",
                   region->annot ? region->annot : "",
                   (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

    }

    /* now print token with its attribute values (p-attributes only for -C,-H,-X) */
    if (mode == XMLMode) {
      printf("<token");
      if (printnum)
        printf(" cpos=\"%d\"", w);
      printf(">");
    }

    beg_of_line = 1;
    /* Loop printing each attribute for this cpos (w) */
    for (i = 0; i < print_list_index; i++) {

      switch (print_list[i]->any.type) {
      case ATT_POS:
        lastposa = i;
        if ((wrd = decode_string_escape(cl_cpos2str(print_list[i], w))) != NULL) {
          switch (mode) {
          case LispMode:
            printf("(%s \"%s\")", print_list[i]->any.name, wrd);
            break;

          case EncodeMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("\t%s", wrd);
            break;

          case ConclineMode:
            if (beg_of_line) {
              printf("%s", wrd);
              beg_of_line = 0;
            }
            else
              printf("/%s", wrd);
            break;

          case XMLMode:
            printf(" <attr name=\"%s\">%s</attr>", print_list[i]->any.name, wrd);
            break;

          case StandardMode:
          default:
            printf("%s=%s\t", print_list[i]->any.name, wrd);
            break;
          }
        }
        else {
          cl_error("(aborting) cl_cpos2str() failed");
          decode_cleanup(1);
        }
        break;

      case ATT_ALIGN:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              ) {
            if (mode == LispMode) {
              printf("(ALG %d %d %d %d)",
                     aligned_start, aligned_end, aligned_start2, aligned_end2);
            }
            else {
              printf("%d-%d==>%s:%d-%d\t",
                     aligned_start, aligned_end, print_list[i]->any.name, aligned_start2, aligned_end2);
            }
          }
          else if (cl_errno != CDA_OK) {
            cl_error("(aborting) alignment error");
            decode_cleanup(1);
          }
        }
        break;

      case ATT_STRUC:
        /* do not print in encode, concline or xml modes because already done (above) */
        if ((mode != EncodeMode) && (mode != ConclineMode) && (mode != XMLMode)) {
          if (cl_cpos2struc2cpos(print_list[i], w, &rng_start, &rng_end)) {
            /* standard and -L mode don't show tag annotations */
            printf(mode == LispMode ? "(STRUC %s %d %d)" : "<%s>:%d-%d\t",
                   print_list[i]->any.name,
                   rng_start, rng_end);
          }
          else if (cl_errno != CDA_OK)
            cl_error("(aborting) cl_cpos2struc2cpos() failed");
        }
        break;

      case ATT_DYN:
        /* dynamic attributes aren't implemented */
      default:
        break;
      }
    }

    /* print token separator (or end of token in XML mode) */
    switch (mode) {
    case LispMode:
      printf(")\n");
      break;
    case ConclineMode:
      printf(" ");
      break;
    case XMLMode:
      printf(" </token>\n");
      break;
    case EncodeMode:
    case StandardMode:
    default:
      printf("\n");
      break;
    }

    /* now, after printing all the positional attributes, print end tags with -H,-C,-X */
    if (mode == EncodeMode  || mode == ConclineMode || mode == XMLMode) {

      /* print s-attributes from s_att_regions[] (using sar_sort_index[] in reverse order) */
      for (i = N_sar - 1; i >= 0; i--) {
        SAttRegion *region = &(s_att_regions[sar_sort_index[i]]);

        if (region->end == w) {
          if (mode == XMLMode) {
            printf("<tag type=\"end\" name=\"%s\"", region->name);
            if (printnum)
              printf(" cpos=\"%d\"", w);
            printf("/>\n");
          }
          else {
            printf("</%s>%c", region->name, (mode == ConclineMode ? ' ' : '\n'));
          }
        }
      }

      /* print a-attributes from print_list[] */
      for (i = 0; i < print_list_index; i++) {
        switch (print_list[i]->any.type) {
        case ATT_ALIGN:
          if (
              ((alg = cl_cpos2alg(print_list[i], w)) >= 0)
              && (cl_alg2cpos(print_list[i], alg,
                              &aligned_start, &aligned_end,
                              &aligned_start2, &aligned_end2))
              && (w == aligned_end)
              ) {
            if (mode == XMLMode) {
              printf("<align type=\"end\" target=\"%s\"", print_list[i]->any.name);
              if (printnum)
                printf(" start=\"%d\" end=\"%d\"", aligned_start2, aligned_end2);
              printf("/>\n");
            }
            else {
              printf("</%s", print_list[i]->any.name);
              if (printnum)
                printf(" %d %d", aligned_start2, aligned_end2);
              printf(">%c", (mode == EncodeMode) ? '\n' : ' ');
            }
          }
          break;

        default:
          /* ignore all other attribute types */
          break;
        }
      }

    } /* end of print end tags */

  }  /* end of match range loop: for w from start_context to end_context */

  /* end of match (for matchlist mode in particular) */
  if ((context != NULL) && (mode == LispMode))
    printf(")\n");
  else if (mode == ConclineMode)
    printf("\n");

  return;
}