Example #1
0
void
print_it(unsigned char* buffer, int allow_punctuation, int addbang, int length,
         int addfive) {
  int i;
  for (i = 0; i < length; ++i) {
    unsigned char c = buffer[i];
    if (c > 126) {
      c -= 126;
    }
    if (c > 126) {
      c -= 126;
    }
    if (c < 33) {
      c += 33;
    }
    if (!allow_punctuation) {
      c = remove_punctuation(c);
    }
    if (addbang && i == 0) {
      printf("!");
    } else if (addfive && i == 5) {
      printf("5");
    } else {
      printf("%c", c);
    }
  }
}
Example #2
0
File: lt.c Project: kdomen/test
void LTA(char *buf, char *word) {
    char tmp_buf[128];

    strcpy(buf, word);
    remove_punctuation(buf);
    
    replace_with(tmp_buf, buf, "ight", "ite");

    /* Don't process words 4 letters or under */
    if (strlen(buf) <= 5) return;

    buf[0] = word[0];
    remove_vowels(buf);
}
Example #3
0
void VoiceCommands::parse_text(const std::string &text) throw()
{
    std::stringstream text_stream(text);
    
    // Skip to the appropriate position
    std::string word;
    for (int i = 0; i < m_num_parsed_words; ++i)
    {
        text_stream >> word;
        if (!text_stream.good())
            return;
    }
    
    int word_location = m_num_parsed_words - 1;  
    while (text_stream.good())
    { 
        text_stream >> word;
        word_location++;

        word = to_lower(remove_punctuation(word));
        bool found_trigger = false;
        for (std::string trigger : m_trigger_list[m_trigger_index])
        {
            if (trigger == word)
            {
                found_trigger = true;
                break;
            }
        }
        
        // If we haven't yet found the trigger word, panic and move to the next state.            
        if (word_location >= m_trigger_locations[m_trigger_index] + MAX_WORD_OFFSET)
            found_trigger = true;

        if (found_trigger)
        {
            m_num_parsed_words = std::min(m_trigger_locations[m_trigger_index] + MAX_WORD_OFFSET,
                                          word_location) + 1;
            
            m_active_state  = m_state_list[m_trigger_index];
            m_trigger_index = std::min(static_cast<int>(m_state_list.size() - 1),
                                       1 + m_trigger_index);
            break;
        }
    }
}
Example #4
0
void Tokeniser::tokenise(const std::string& sentence, std::vector< Word >& result, std::vector< bracketing >& brackets)
{
  switch(m_mode) {
  case Tok :
    tokenise_toktag<read_helper_token>(sentence,result,brackets);
    break;
  case Tag :
    tokenise_toktag<read_helper_tokentag>(sentence,result,brackets);
    break;
  case Raw :
    tokenise_raw(sentence, result);
    break;
  default :
    break;
  }

  if(m_remove_punct)
    remove_punctuation(result);
}
Example #5
0
/**
 * Function to convert a single string onto an its words as an array
 * of strings.
 */
void
split(const char *name, char ***words, size_t *len, const int verbose)
{
  char *p = NULL;
  char *copy = NULL;
  char **list = NULL;
  char **tmp = NULL;
  char *last = NULL;
  char *numeral[4]; /* 3 numerales, maximo */
  int n = 0; /* cantidad de numerales */
  int i = 0;

  /* Initialize the results */
  *words = NULL;
  *len = 0;

  copy = strndup(name, strlen(name));
  if ( copy == NULL ) {
    fprintf(stderr, "split: Not enough memory to copy name");
    return;
  }

  for ( (p = strtok_r(copy, " \t\n", &last));
	p;
	(p = strtok_r(NULL, " \t\n", &last)) ) {

    /* Remove any punctuation sign */
    /* This call may also be used to apply Rule 4 */
    p = remove_punctuation(p, verbose);

    /* Apply rule 9 */
    if ( !strcasecmp(p, "EL") ||
	 !strcasecmp(p, "LA") ||
	 !strcasecmp(p, "DE") ||
	 !strcasecmp(p, "LOS") ||
	 !strcasecmp(p, "LAS") ||
	 !strcasecmp(p, "DEL") ||
	 !strcasecmp(p, "EN" ) ||
	 !strcasecmp(p, "CON" ) ||
	 !strcasecmp(p, "POR" ) ||
	 !strcasecmp(p, "PARA" ) ||
	 !strcasecmp(p, "Y") ) {
      if ( verbose ) {
	printf("split: Aplicando Regla 9. Eliminando la palabra '%s'.\n", p);
      }
      continue;
    }

    /* Apply rule 11 */
    if ( !strcasecmp(p, "Compania") ||
	 !strcasecmp(p, "Cia") ||
	 !strcasecmp(p, "Sociedad") ||
	 !strcasecmp(p, "Soc") ) {
      if ( verbose ) {
	printf("split: Aplicando Regla 11. Eliminando la palabra '%s'.\n", p);
      }
      continue;
    }

    /* Apply rule 3
     * Do not take into account composite characters
     */
    p = moral_regla3(p, verbose);

    /* Apply rule 12
     */
    p = moral_regla12(p, verbose);

    /* Apply rule 10
     * Sustituye numeros arabigos. Para este caso, tanto 'p' como el
     * resultado de aplicar la regla puede resultar en una o mas palabras
     * y cada una de ellas de logitud mayor a la de 'p'
     */
    n = moral_regla10(p, numeral, verbose);
    if ( n > 0 ) {
      /* Agrega el numeral a la lista de palabras para formar la clave */
      for (i = 0; i < n; i++ ) {
        tmp = realloc(list, (sizeof * list) * (*len + 1));
        if ( tmp == NULL ) {
          fprintf(stderr, "Failed to allocate memory for more words.");
          *words = list;
          return;
        }

        list = tmp;

        list[*len] = strndup(numeral[i], strlen(numeral[i]) + 1);
        if ( list[*len] == NULL ) {
          fprintf(stderr, "No fue posible duplicar la cadena.");
          *words = list;
          return;
        }
        (*len)++;
      }

    } else {
 
      tmp = realloc(list, (sizeof * list) * (*len + 1));
      if ( tmp == NULL ) {
        fprintf(stderr, "Failed to allocate memory for more words.");
        *words = list;
        return;
      }

      list = tmp;

      list[*len] = strndup(p, strlen(p) + 1);
      if ( list[*len] == NULL ) {
        fprintf(stderr, "No fue posible duplicar la cadena.");
        *words = list;
        return;
      }
      (*len)++;

    }

  }

  /* Add the sentinel */
  tmp = realloc(list, (sizeof * list) * (*len + 1));
  list = tmp;
  list[*len] = NULL;

  free(copy);
  free(p);

  *words = list;
}