void print_it(unsigned char* buffer, int allow_punctuation, int addbang, int length, int addfive) { int i; for (i = 0; i < length; ++i) { unsigned char c = buffer[i]; if (c > 126) { c -= 126; } if (c > 126) { c -= 126; } if (c < 33) { c += 33; } if (!allow_punctuation) { c = remove_punctuation(c); } if (addbang && i == 0) { printf("!"); } else if (addfive && i == 5) { printf("5"); } else { printf("%c", c); } } }
void LTA(char *buf, char *word) { char tmp_buf[128]; strcpy(buf, word); remove_punctuation(buf); replace_with(tmp_buf, buf, "ight", "ite"); /* Don't process words 4 letters or under */ if (strlen(buf) <= 5) return; buf[0] = word[0]; remove_vowels(buf); }
void VoiceCommands::parse_text(const std::string &text) throw() { std::stringstream text_stream(text); // Skip to the appropriate position std::string word; for (int i = 0; i < m_num_parsed_words; ++i) { text_stream >> word; if (!text_stream.good()) return; } int word_location = m_num_parsed_words - 1; while (text_stream.good()) { text_stream >> word; word_location++; word = to_lower(remove_punctuation(word)); bool found_trigger = false; for (std::string trigger : m_trigger_list[m_trigger_index]) { if (trigger == word) { found_trigger = true; break; } } // If we haven't yet found the trigger word, panic and move to the next state. if (word_location >= m_trigger_locations[m_trigger_index] + MAX_WORD_OFFSET) found_trigger = true; if (found_trigger) { m_num_parsed_words = std::min(m_trigger_locations[m_trigger_index] + MAX_WORD_OFFSET, word_location) + 1; m_active_state = m_state_list[m_trigger_index]; m_trigger_index = std::min(static_cast<int>(m_state_list.size() - 1), 1 + m_trigger_index); break; } } }
void Tokeniser::tokenise(const std::string& sentence, std::vector< Word >& result, std::vector< bracketing >& brackets) { switch(m_mode) { case Tok : tokenise_toktag<read_helper_token>(sentence,result,brackets); break; case Tag : tokenise_toktag<read_helper_tokentag>(sentence,result,brackets); break; case Raw : tokenise_raw(sentence, result); break; default : break; } if(m_remove_punct) remove_punctuation(result); }
/** * Function to convert a single string onto an its words as an array * of strings. */ void split(const char *name, char ***words, size_t *len, const int verbose) { char *p = NULL; char *copy = NULL; char **list = NULL; char **tmp = NULL; char *last = NULL; char *numeral[4]; /* 3 numerales, maximo */ int n = 0; /* cantidad de numerales */ int i = 0; /* Initialize the results */ *words = NULL; *len = 0; copy = strndup(name, strlen(name)); if ( copy == NULL ) { fprintf(stderr, "split: Not enough memory to copy name"); return; } for ( (p = strtok_r(copy, " \t\n", &last)); p; (p = strtok_r(NULL, " \t\n", &last)) ) { /* Remove any punctuation sign */ /* This call may also be used to apply Rule 4 */ p = remove_punctuation(p, verbose); /* Apply rule 9 */ if ( !strcasecmp(p, "EL") || !strcasecmp(p, "LA") || !strcasecmp(p, "DE") || !strcasecmp(p, "LOS") || !strcasecmp(p, "LAS") || !strcasecmp(p, "DEL") || !strcasecmp(p, "EN" ) || !strcasecmp(p, "CON" ) || !strcasecmp(p, "POR" ) || !strcasecmp(p, "PARA" ) || !strcasecmp(p, "Y") ) { if ( verbose ) { printf("split: Aplicando Regla 9. Eliminando la palabra '%s'.\n", p); } continue; } /* Apply rule 11 */ if ( !strcasecmp(p, "Compania") || !strcasecmp(p, "Cia") || !strcasecmp(p, "Sociedad") || !strcasecmp(p, "Soc") ) { if ( verbose ) { printf("split: Aplicando Regla 11. Eliminando la palabra '%s'.\n", p); } continue; } /* Apply rule 3 * Do not take into account composite characters */ p = moral_regla3(p, verbose); /* Apply rule 12 */ p = moral_regla12(p, verbose); /* Apply rule 10 * Sustituye numeros arabigos. Para este caso, tanto 'p' como el * resultado de aplicar la regla puede resultar en una o mas palabras * y cada una de ellas de logitud mayor a la de 'p' */ n = moral_regla10(p, numeral, verbose); if ( n > 0 ) { /* Agrega el numeral a la lista de palabras para formar la clave */ for (i = 0; i < n; i++ ) { tmp = realloc(list, (sizeof * list) * (*len + 1)); if ( tmp == NULL ) { fprintf(stderr, "Failed to allocate memory for more words."); *words = list; return; } list = tmp; list[*len] = strndup(numeral[i], strlen(numeral[i]) + 1); if ( list[*len] == NULL ) { fprintf(stderr, "No fue posible duplicar la cadena."); *words = list; return; } (*len)++; } } else { tmp = realloc(list, (sizeof * list) * (*len + 1)); if ( tmp == NULL ) { fprintf(stderr, "Failed to allocate memory for more words."); *words = list; return; } list = tmp; list[*len] = strndup(p, strlen(p) + 1); if ( list[*len] == NULL ) { fprintf(stderr, "No fue posible duplicar la cadena."); *words = list; return; } (*len)++; } } /* Add the sentinel */ tmp = realloc(list, (sizeof * list) * (*len + 1)); list = tmp; list[*len] = NULL; free(copy); free(p); *words = list; }