Ejemplo n.º 1
0
std::string ptb_normalizer::next()
{
    // if we have buffered tokens, keep returning them until we have
    // exhausted the buffer
    if (!tokens_.empty())
    {
        auto token = tokens_.front();
        tokens_.pop_front();
        return token;
    }

    if (!*source_)
        throw token_stream_exception{"next() called with empty source"};

    auto token = source_->next();

    if (token == "\"")
    {
        tokens_.push_back("``");

        // keep reading until we either hit the next pair of quotes or, in
        // error cases, the end of sentence marker. Buffer tokens along the
        // way.
        while (*source_)
        {
            auto nxt = source_->next();
            if (nxt == "\"")
            {
                tokens_.push_back("''");
                return current_token();
            }

            parse_token(nxt);

            if (nxt == "</s>")
                return current_token();
        }

        // only get here if we've parsed the whole source and never found a
        // matching end quote, so just return the buffered tokens at this
        // point
        return current_token();
    }

    parse_token(token);
    return current_token();
}
static gboolean previous_token(const gchar* str, gint offset, gint *token_start, gint *token_end,
    const gboolean table_token_symbols[TABLE_SIZE])
{
    const gchar *current = str + offset;
    for( ; (current > str) && table_token_symbols[(guchar)*current]; --current);
    if(current == str)
        return FALSE;
    for( ; (current > str) && !table_token_symbols[(guchar)*current]; --current);
    if(!table_token_symbols[(guchar)*current])
        return FALSE;
    return current_token(str, current - str, token_start, token_end, table_token_symbols);
}
static gboolean next_token(const gchar* str, gint offset, gint *token_start, gint *token_end,
    const gboolean table_token_symbols[TABLE_SIZE])
{
    const gchar *current = str + offset;
    for( ; (*current != 0) && table_token_symbols[(guchar)*current]; ++current);
    if(*current == 0)
        return FALSE;
    for(++current ; (*current != 0) && !table_token_symbols[(guchar)*current]; ++current);
    if(!table_token_symbols[(guchar)*current])
        return FALSE;
    return current_token(str, current - str, token_start, token_end, table_token_symbols);
}
Ejemplo n.º 4
0
std::string sentence_boundary::next()
{
    if (tokens_.empty())
    {
        // the buffer is exhausted, so we require there to be tokens available
        // in source
        if (!*source_)
            throw token_stream_exception{"next() called with empty source"};
        tokens_.emplace_back(source_->next());
    }

    if (!possible_punc(tokens_.front()) || (prev_ && !possible_end(*prev_)))
        return current_token();

    // we need to look ahead one token: if there is none, then this is
    // forced to be the end of a sentence at the end of a document
    if (!*source_)
    {
        tokens_.emplace_back("</s>");
        return current_token();
    }

    auto token = source_->next();

    // we only break sentences after whitespace
    if (token != " ")
    {
        tokens_.emplace_back(std::move(token));
        return current_token();
    }

    // we again need to look ahead a single token: if there are none, this
    // is forced to be the end of the sentence at the end of the document.
    if (!*source_)
    {
        tokens_.emplace_back("</s>");
        return current_token();
    }

    auto start_token = source_->next();
    if (!possible_start(start_token))
    {
        tokens_.emplace_back(std::move(token));
        tokens_.emplace_back(std::move(start_token));
        return current_token();
    }

    // end of sentence! add the end and start tags and the lookahead token
    // to the buffer.
    tokens_.emplace_back("</s>");
    tokens_.emplace_back(std::move(token));
    tokens_.emplace_back("<s>");
    tokens_.emplace_back(std::move(start_token));
    return current_token();
}
//***********************************my_atk_text_get_text_before_offset******************
static gchar* my_atk_text_get_text_before_offset(AtkText *text, gint offset,
    AtkTextBoundary boundary_type, gint *start_offset, gint *end_offset)
{
    gchar *str = ((MyAtkText*)text)->str;
    gint len = my_strlen(str);
    if((offset < 0) || (offset >= len))
    {
        return NULL;
    }
    
    // This variable is set in switch statement. If after that statement variable is TRUE,
    // then return text from 'strat_offset' to 'end_offset'. Otherwise NULL will be returned.
    gboolean is_successed = TRUE;
    
    gint start_tmp;
    gint end_tmp;
    
    switch(boundary_type)
    {
    case ATK_TEXT_BOUNDARY_CHAR:
        if(offset == 0)
        {
            is_successed = FALSE;
            break;
        }
        *start_offset = offset - 1;
        *end_offset = offset;
        is_successed = TRUE;
        break;
    case ATK_TEXT_BOUNDARY_WORD_START:
    case ATK_TEXT_BOUNDARY_SENTENCE_START:
    case ATK_TEXT_BOUNDARY_LINE_START:
        if(!current_token(str, offset, end_offset, &end_tmp, tables[boundary_type]))
        {
            if(!previous_token(str, offset, end_offset, &end_tmp, tables[boundary_type]))
            {
                is_successed = FALSE;
                break;
            }
        }
        if(!previous_token(str, *end_offset, start_offset, &end_tmp, tables[boundary_type]))
        {
            is_successed = FALSE;
            break;    
        }
        is_successed = TRUE;
        //debug
//        ++(*start_offset);
        break;
    case ATK_TEXT_BOUNDARY_WORD_END:
    case ATK_TEXT_BOUNDARY_SENTENCE_END:
    case ATK_TEXT_BOUNDARY_LINE_END:
        if(!previous_token(str, offset, &start_tmp, end_offset, tables[boundary_type]))
        {
            is_successed = FALSE;
            break;
        }
        if(!previous_token(str, start_tmp, &start_tmp, start_offset, tables[boundary_type]))
        {
            *start_offset = 0;
        }
        is_successed = TRUE;
        break;
    default:
        is_successed = FALSE;
    }

    if(is_successed)
    {
        return my_atk_text_get_text(text, *start_offset, *end_offset);
    }
    else
    {
        return NULL;
    }
}
Ejemplo n.º 6
0
void OptionsDB::SetFromCommandLine(const std::vector<std::string>& args) {
    //bool option_changed = false;

    for (unsigned int i = 1; i < args.size(); ++i) {
        std::string current_token(args[i]);
        if (current_token.find("--") == 0) {
            std::string option_name = current_token.substr(2);

            std::map<std::string, Option>::iterator it = m_options.find(option_name);

            if (it == m_options.end() || !it->second.recognized) { // unrecognized option: may be registered later on so we'll store it for now
                // Check for more parameters (if this is the last one, assume that it is a flag).
                std::string value_str("-");
                if (i + 1 < static_cast<unsigned int>(args.size())) {
                    value_str = args[i + 1]; // copy assignment
                    StripQuotation(value_str);
                }

                if (value_str.at(0) == '-') { // this is either the last parameter or the next parameter is another option, assume this one is a flag
                    m_options[option_name] = Option(static_cast<char>(0), option_name, true, boost::lexical_cast<std::string>(false),
                                                    "", 0, false, true, false);
                } else { // the next parameter is the value, store it as a string to be parsed later
                    m_options[option_name] = Option(static_cast<char>(0), option_name, value_str, value_str,
                                                    "", new Validator<std::string>(), false, false, false); // don't attempt to store options that have only been specified on the command line
                }

                WarnLogger() << "Option \"" << option_name << "\", was specified on the command line but was not recognized.  It may not be registered yet or could be a typo.";
            } else {
                Option& option = it->second;
                if (option.value.empty())
                    throw std::runtime_error("The value member of option \"--" + option.name + "\" is undefined.");

                if (!option.flag) { // non-flag
                    try {
                        // ensure a parameter exists...
                        if (i + 1 >= static_cast<unsigned int>(args.size()))
                            throw std::runtime_error("the option \"" + option.name +
                                                     "\" was specified, at the end of the list, with no parameter value.");
                        // get parameter value
                        std::string value_str(args[++i]);
                        StripQuotation(value_str);
                        // ensure parameter is actually a parameter, and not the next option name (which would indicate
                        // that the option was specified without a parameter value, as if it was a flag)
                        if (value_str.at(0) == '-')
                            throw std::runtime_error("the option \"" + option.name +
                                                     "\" was followed by the parameter \"" + value_str +
                                                     "\", which appears to be an option flag, not a parameter value, because it begins with a \"-\" character.");
                        m_dirty |= option.SetFromString(value_str);
                    } catch (const std::exception& e) {
                        throw std::runtime_error("OptionsDB::SetFromCommandLine() : the following exception was caught when attempting to set option \"" + option.name + "\": " + e.what() + "\n\n");
                    }
                } else { // flag
                    option.value = true;
                }
            }

            //option_changed = true;
        } else if (current_token.find('-') == 0
#ifdef FREEORION_MACOSX
                && current_token.find("-psn") != 0 // Mac OS X passes a process serial number to all applications using Carbon or Cocoa, it should be ignored here
#endif
            ) {
            std::string single_char_options = current_token.substr(1);

            if (single_char_options.size() == 0) {
                throw std::runtime_error("A \'-\' was given with no options.");
            } else {
                for (unsigned int j = 0; j < single_char_options.size(); ++j) {
                    std::map<char, std::string>::iterator short_name_it = Option::short_names.find(single_char_options[j]);

                    if (short_name_it == Option::short_names.end())
                        throw std::runtime_error(std::string("Unknown option \"-") + single_char_options[j] + "\" was given.");

                    std::map<std::string, Option>::iterator name_it = m_options.find(short_name_it->second);

                    if (name_it == m_options.end())
                        throw std::runtime_error("Option \"--" + short_name_it->second + "\", abbreviated as \"-" + short_name_it->first + "\", could not be found.");

                    Option& option = name_it->second;
                    if (option.value.empty())
                        throw std::runtime_error("The value member of option \"--" + option.name + "\" is undefined.");

                    if (!option.flag) {
                        if (j < single_char_options.size() - 1)
                            throw std::runtime_error(std::string("Option \"-") + single_char_options[j] + "\" was given with no parameter.");
                        else
                            m_dirty |= option.SetFromString(args[++i]);
                    } else {
                        option.value = true;
                    }
                }
            }
        }
    }
}
Ejemplo n.º 7
0
int main(int argc, char *argv[]) {
  bool interactive;
  int32_t code;
  FILE *dump;
  double memused;

  process_command_line(argc, argv);

  yices_init();
  init_tstack(&stack, NUM_BASE_OPCODES);
  interactive = false;

  if (input_filename == NULL) {
    init_yices_stdin_lexer(&lexer);
    interactive = true;
  } else {
    if (init_yices_file_lexer(&lexer, input_filename) < 0) {
      perror(input_filename);
      exit(YICES_EXIT_FILE_NOT_FOUND);
    }
  }

  init_parser(&parser, &lexer, &stack);
  while (current_token(&lexer) != TK_EOS) {
    if (interactive) {
      printf("yices> ");
      fflush(stdout);
    }
    code = parse_yices_command(&parser, stderr);
    if (code < 0) {
      flush_lexer(&lexer);
    }
  }

  delete_parser(&parser);
  close_lexer(&lexer);
  delete_tstack(&stack);

  memused = mem_size() / (1024 * 1024);
  if (memused > 0) {
    fprintf(stderr, "Memory used: %.2f MB\n", memused);
  }

  if (dump_requested) {
    if (dump_filename == NULL) {
      dump = stdout;
    } else {
      dump = fopen(dump_filename, "w");
      if (dump == NULL) {
	perror(dump_filename);
	exit(YICES_EXIT_FILE_NOT_FOUND);
      }
    }

    fprintf(dump, "\n==== ALL TYPES ====\n");
    print_type_table(dump, __yices_globals.types);
    fflush(dump);
    fprintf(dump, "\n==== ALL TERMS ====\n");
    print_term_table(dump, __yices_globals.terms);
    fflush(dump);

    if (dump_filename != NULL) {
      if (fclose(dump) != 0) {
	fprintf(stderr, "Error while closing dump file: ");
	perror(dump_filename);
      }
    }
  }

  yices_exit();

  return 0;
}