std::string ptb_normalizer::next() { // if we have buffered tokens, keep returning them until we have // exhausted the buffer if (!tokens_.empty()) { auto token = tokens_.front(); tokens_.pop_front(); return token; } if (!*source_) throw token_stream_exception{"next() called with empty source"}; auto token = source_->next(); if (token == "\"") { tokens_.push_back("``"); // keep reading until we either hit the next pair of quotes or, in // error cases, the end of sentence marker. Buffer tokens along the // way. while (*source_) { auto nxt = source_->next(); if (nxt == "\"") { tokens_.push_back("''"); return current_token(); } parse_token(nxt); if (nxt == "</s>") return current_token(); } // only get here if we've parsed the whole source and never found a // matching end quote, so just return the buffered tokens at this // point return current_token(); } parse_token(token); return current_token(); }
static gboolean previous_token(const gchar* str, gint offset, gint *token_start, gint *token_end, const gboolean table_token_symbols[TABLE_SIZE]) { const gchar *current = str + offset; for( ; (current > str) && table_token_symbols[(guchar)*current]; --current); if(current == str) return FALSE; for( ; (current > str) && !table_token_symbols[(guchar)*current]; --current); if(!table_token_symbols[(guchar)*current]) return FALSE; return current_token(str, current - str, token_start, token_end, table_token_symbols); }
static gboolean next_token(const gchar* str, gint offset, gint *token_start, gint *token_end, const gboolean table_token_symbols[TABLE_SIZE]) { const gchar *current = str + offset; for( ; (*current != 0) && table_token_symbols[(guchar)*current]; ++current); if(*current == 0) return FALSE; for(++current ; (*current != 0) && !table_token_symbols[(guchar)*current]; ++current); if(!table_token_symbols[(guchar)*current]) return FALSE; return current_token(str, current - str, token_start, token_end, table_token_symbols); }
std::string sentence_boundary::next() { if (tokens_.empty()) { // the buffer is exhausted, so we require there to be tokens available // in source if (!*source_) throw token_stream_exception{"next() called with empty source"}; tokens_.emplace_back(source_->next()); } if (!possible_punc(tokens_.front()) || (prev_ && !possible_end(*prev_))) return current_token(); // we need to look ahead one token: if there is none, then this is // forced to be the end of a sentence at the end of a document if (!*source_) { tokens_.emplace_back("</s>"); return current_token(); } auto token = source_->next(); // we only break sentences after whitespace if (token != " ") { tokens_.emplace_back(std::move(token)); return current_token(); } // we again need to look ahead a single token: if there are none, this // is forced to be the end of the sentence at the end of the document. if (!*source_) { tokens_.emplace_back("</s>"); return current_token(); } auto start_token = source_->next(); if (!possible_start(start_token)) { tokens_.emplace_back(std::move(token)); tokens_.emplace_back(std::move(start_token)); return current_token(); } // end of sentence! add the end and start tags and the lookahead token // to the buffer. tokens_.emplace_back("</s>"); tokens_.emplace_back(std::move(token)); tokens_.emplace_back("<s>"); tokens_.emplace_back(std::move(start_token)); return current_token(); }
//***********************************my_atk_text_get_text_before_offset****************** static gchar* my_atk_text_get_text_before_offset(AtkText *text, gint offset, AtkTextBoundary boundary_type, gint *start_offset, gint *end_offset) { gchar *str = ((MyAtkText*)text)->str; gint len = my_strlen(str); if((offset < 0) || (offset >= len)) { return NULL; } // This variable is set in switch statement. If after that statement variable is TRUE, // then return text from 'strat_offset' to 'end_offset'. Otherwise NULL will be returned. gboolean is_successed = TRUE; gint start_tmp; gint end_tmp; switch(boundary_type) { case ATK_TEXT_BOUNDARY_CHAR: if(offset == 0) { is_successed = FALSE; break; } *start_offset = offset - 1; *end_offset = offset; is_successed = TRUE; break; case ATK_TEXT_BOUNDARY_WORD_START: case ATK_TEXT_BOUNDARY_SENTENCE_START: case ATK_TEXT_BOUNDARY_LINE_START: if(!current_token(str, offset, end_offset, &end_tmp, tables[boundary_type])) { if(!previous_token(str, offset, end_offset, &end_tmp, tables[boundary_type])) { is_successed = FALSE; break; } } if(!previous_token(str, *end_offset, start_offset, &end_tmp, tables[boundary_type])) { is_successed = FALSE; break; } is_successed = TRUE; //debug // ++(*start_offset); break; case ATK_TEXT_BOUNDARY_WORD_END: case ATK_TEXT_BOUNDARY_SENTENCE_END: case ATK_TEXT_BOUNDARY_LINE_END: if(!previous_token(str, offset, &start_tmp, end_offset, tables[boundary_type])) { is_successed = FALSE; break; } if(!previous_token(str, start_tmp, &start_tmp, start_offset, tables[boundary_type])) { *start_offset = 0; } is_successed = TRUE; break; default: is_successed = FALSE; } if(is_successed) { return my_atk_text_get_text(text, *start_offset, *end_offset); } else { return NULL; } }
void OptionsDB::SetFromCommandLine(const std::vector<std::string>& args) { //bool option_changed = false; for (unsigned int i = 1; i < args.size(); ++i) { std::string current_token(args[i]); if (current_token.find("--") == 0) { std::string option_name = current_token.substr(2); std::map<std::string, Option>::iterator it = m_options.find(option_name); if (it == m_options.end() || !it->second.recognized) { // unrecognized option: may be registered later on so we'll store it for now // Check for more parameters (if this is the last one, assume that it is a flag). std::string value_str("-"); if (i + 1 < static_cast<unsigned int>(args.size())) { value_str = args[i + 1]; // copy assignment StripQuotation(value_str); } if (value_str.at(0) == '-') { // this is either the last parameter or the next parameter is another option, assume this one is a flag m_options[option_name] = Option(static_cast<char>(0), option_name, true, boost::lexical_cast<std::string>(false), "", 0, false, true, false); } else { // the next parameter is the value, store it as a string to be parsed later m_options[option_name] = Option(static_cast<char>(0), option_name, value_str, value_str, "", new Validator<std::string>(), false, false, false); // don't attempt to store options that have only been specified on the command line } WarnLogger() << "Option \"" << option_name << "\", was specified on the command line but was not recognized. It may not be registered yet or could be a typo."; } else { Option& option = it->second; if (option.value.empty()) throw std::runtime_error("The value member of option \"--" + option.name + "\" is undefined."); if (!option.flag) { // non-flag try { // ensure a parameter exists... if (i + 1 >= static_cast<unsigned int>(args.size())) throw std::runtime_error("the option \"" + option.name + "\" was specified, at the end of the list, with no parameter value."); // get parameter value std::string value_str(args[++i]); StripQuotation(value_str); // ensure parameter is actually a parameter, and not the next option name (which would indicate // that the option was specified without a parameter value, as if it was a flag) if (value_str.at(0) == '-') throw std::runtime_error("the option \"" + option.name + "\" was followed by the parameter \"" + value_str + "\", which appears to be an option flag, not a parameter value, because it begins with a \"-\" character."); m_dirty |= option.SetFromString(value_str); } catch (const std::exception& e) { throw std::runtime_error("OptionsDB::SetFromCommandLine() : the following exception was caught when attempting to set option \"" + option.name + "\": " + e.what() + "\n\n"); } } else { // flag option.value = true; } } //option_changed = true; } else if (current_token.find('-') == 0 #ifdef FREEORION_MACOSX && current_token.find("-psn") != 0 // Mac OS X passes a process serial number to all applications using Carbon or Cocoa, it should be ignored here #endif ) { std::string single_char_options = current_token.substr(1); if (single_char_options.size() == 0) { throw std::runtime_error("A \'-\' was given with no options."); } else { for (unsigned int j = 0; j < single_char_options.size(); ++j) { std::map<char, std::string>::iterator short_name_it = Option::short_names.find(single_char_options[j]); if (short_name_it == Option::short_names.end()) throw std::runtime_error(std::string("Unknown option \"-") + single_char_options[j] + "\" was given."); std::map<std::string, Option>::iterator name_it = m_options.find(short_name_it->second); if (name_it == m_options.end()) throw std::runtime_error("Option \"--" + short_name_it->second + "\", abbreviated as \"-" + short_name_it->first + "\", could not be found."); Option& option = name_it->second; if (option.value.empty()) throw std::runtime_error("The value member of option \"--" + option.name + "\" is undefined."); if (!option.flag) { if (j < single_char_options.size() - 1) throw std::runtime_error(std::string("Option \"-") + single_char_options[j] + "\" was given with no parameter."); else m_dirty |= option.SetFromString(args[++i]); } else { option.value = true; } } } } } }
int main(int argc, char *argv[]) { bool interactive; int32_t code; FILE *dump; double memused; process_command_line(argc, argv); yices_init(); init_tstack(&stack, NUM_BASE_OPCODES); interactive = false; if (input_filename == NULL) { init_yices_stdin_lexer(&lexer); interactive = true; } else { if (init_yices_file_lexer(&lexer, input_filename) < 0) { perror(input_filename); exit(YICES_EXIT_FILE_NOT_FOUND); } } init_parser(&parser, &lexer, &stack); while (current_token(&lexer) != TK_EOS) { if (interactive) { printf("yices> "); fflush(stdout); } code = parse_yices_command(&parser, stderr); if (code < 0) { flush_lexer(&lexer); } } delete_parser(&parser); close_lexer(&lexer); delete_tstack(&stack); memused = mem_size() / (1024 * 1024); if (memused > 0) { fprintf(stderr, "Memory used: %.2f MB\n", memused); } if (dump_requested) { if (dump_filename == NULL) { dump = stdout; } else { dump = fopen(dump_filename, "w"); if (dump == NULL) { perror(dump_filename); exit(YICES_EXIT_FILE_NOT_FOUND); } } fprintf(dump, "\n==== ALL TYPES ====\n"); print_type_table(dump, __yices_globals.types); fflush(dump); fprintf(dump, "\n==== ALL TERMS ====\n"); print_term_table(dump, __yices_globals.terms); fflush(dump); if (dump_filename != NULL) { if (fclose(dump) != 0) { fprintf(stderr, "Error while closing dump file: "); perror(dump_filename); } } } yices_exit(); return 0; }