Exemple #1
0
Tokens StrSplit(const std::string &src, const std::string &sep)
{
    Tokens r;
    std::string s;
    for (std::string::const_iterator i = src.begin(); i != src.end(); i++)
    {
        if (sep.find(*i) != std::string::npos)
        {
            if (s.length()) r.push_back(s);
            s = "";
        }
        else
        {
            s += *i;
        }
    }
    if (s.length()) r.push_back(s);
    return r;
}
 Tokens get_tokens( const String& s ) {
   Tokens res;
   char prev = ' ';
   String tmp;
   for ( auto c : s ) {
     if ( prev != ' ' && prev != c ) {
       res.push_back(tmp);
       prev = c;
       tmp = "";
       tmp += c;
     } else {
       prev = c;
       tmp += c;
     }
   }
   if ( tmp != "" )
     res.push_back(tmp);
   return res;
 }
Exemple #3
0
Tokens StrSplit(const std::string& src, const std::string& sep)
{
    Tokens r;
    std::string s;
    for (char i : src)
    {
        if (sep.find(i) != std::string::npos)
        {
            if (s.length()) r.push_back(s);
            s.clear();
        }
        else
        {
            s += i;
        }
    }
    if (s.length()) r.push_back(s);
    return r;
}
StringUtils::Tokens StringUtils::tokenize(const std::string& str, const std::string& delimiters)
{
	Tokens tokens;
    std::string::size_type delimPos = 0, tokenPos = 0, pos = 0;

	if(str.length()<1)  return tokens;
	while(1)
	{
		delimPos = str.find_first_of(delimiters, pos);
		tokenPos = str.find_first_not_of(delimiters, pos);
		if (tokenPos != std::string::npos && str[tokenPos]=='\"')
		{
			delimPos = str.find_first_of("\"", tokenPos+1);
			pos++;
		}

		if(std::string::npos != delimPos)
		{
			if(std::string::npos != tokenPos)
			{
				if(tokenPos<delimPos)
				{
					std::string token = str.substr(pos,delimPos-pos);
					if (token.length()) tokens.push_back(token);
				}
			}
			pos = delimPos+1;
		}
		else
		{
			if(std::string::npos != tokenPos)
			{
				std::string token = str.substr(pos);
				if (token.length()) tokens.push_back(token);
			}
			break;
		}
	}
	return tokens;
}
Exemple #5
0
Tokens Bytecode::lexer(const std::string& source) const {
    Strings lines;
    split(source, '\n', lines);
    Strings tokens_str;
    for (int i = 0; i < lines.size(); i++) {
        split(lines[i], ' ', tokens_str);
        tokens_str.push_back("\n");
    }
    Tokens result;
    for (int i = 0; i < tokens_str.size(); i++) {
        result.push_back(makeToken(tokens_str[i]));
    }
    return result;
}
Exemple #6
0
int main()
{
    Code code("//bla bli blo\nflap");
    CodeRange cr(code);
    Tokens tokens;
    tokens.push_back(Token::Ptr(new Symbol(reduce(cr, 1))));
    tokens.push_back(Token::Ptr(new Symbol(reduce(cr, 1))));
    tokens.push_back(Token::Ptr(new Name(reduce(cr, 3))));
    tokens.push_back(Token::Ptr(new Whitespace(reduce(cr, 1))));
    tokens.push_back(Token::Ptr(new Name(reduce(cr, 3))));
    tokens.push_back(Token::Ptr(new Whitespace(reduce(cr, 1))));
    tokens.push_back(Token::Ptr(new Name(reduce(cr, 3))));
    tokens.push_back(Token::Ptr(new Newline(reduce(cr, 1))));
    tokens.push_back(Token::Ptr(new Name(reduce(cr, 4))));
    TokenRange tr(tokens);
    auto comment = Comment::construct(tr);
    return 0;
}
//static
bool PipelineBuilder::extract(Tokens& tokens, int& index, int length,
                              string& name, Tokens& tks, Range*& range)
{
    // expect:
    //   1. name of a function
    //   2. name of a pre-defined pipeline
    //   3. expression in { ... }

    if (index >= length) THROW("Invalid pipeline expression: unfinished");

    if (tokens[index].m_angen == ANGEN_EXPRESSION) {
        name = "";
        tks.push_back(tokens[index]);
        if (index >= length-1) return false;
        index++;
        if (tokens[index].m_angen == ANGEN_PIPE) { index++; return true; }
        return false;
    }

    if (tokens[index].m_angen != ANGEN_IDENT) THROW("Invalid pipeline expression: expected name");
    name = tokens[index].m_val.m_str;

    if (index >= length-1) return false;
    index++;

    // expect:
    //   1. (  function parameters
    //   2. |  next processor in pipeline
    //   3. ^  range for a pre-defined pipeline

    if (tokens[index].m_angen == ANGEN_PIPE) { index++; return true; }

    if (tokens[index].m_angen == ANGEN_PAREN_OPEN) {
        if (index >= length-1) THROW("Invalid pipeline expression: missing parenthesis");
        index++;

        int inside = 1;
        for ( ; (index < length) && (inside > 0); index++) {
            if (tokens[index].m_angen == ANGEN_PAREN_CLOSE) inside--;
            else if (tokens[index].m_angen == ANGEN_PAREN_OPEN) inside++;
            else tks.push_back(tokens[index]);
        }
        if (inside != 0) THROW("Expected closing parenthesis");
    }
    if (index >= length-1) return false;
    if (tokens[index].m_angen == ANGEN_PIPE) { index++; return true; }
    
    if (tokens[index].m_angen == ANGEN_RANGE) {
        index++;
        if (index >= length) THROW("Invalid pipeline expression: unfinished range");
        int period;
        PeriodType periodType = SECOND;
        if (tokens[index].m_angen == ANGEN_LONG) {
            if (tokens[index].m_val.m_long <= 0) THROW("Invalid period value");
            period = tokens[index].m_val.m_long;
        }
        else if (tokens[index].m_angen == ANGEN_TITLE) {
            parsePeriod(tokens[index].m_val.m_str.c_str(), period, periodType);
        }
        else {
            THROW("Expected range value");
        }
        range = new Range(period, periodType);
        index++;
    }
    if (index >= length-1) return false;
    if (tokens[index].m_angen == ANGEN_PIPE) { index++; return true; }

    return false;
}
Exemple #8
0
int main(int argc, char const *argv[]) {
  // If there are not enough args, return -1
  if (argc < 5) {
    std::cerr << "Usage: P7 <corpus> <sentence> <dictionary> <n> <threshold> <delta> <model>" << '\n';
    return -1;
  }

  // Otherwise, collect the function parameters
  string corpusFileName = argv[1];
  string sentenceFileName = argv[2];
  string dictionaryFileName = argv[3];
  unsigned int n = stoi(argv[4]);
  unsigned int threshold = stoi(argv[5]);
  double delta = stod(argv[6]);
  bool model = stoi(argv[7]);



  // Capture all tokens
  Tokens corpusTokens;
  Tokens sentenceTokens;
  Tokens dictionaryTokens;
  read_tokens(corpusFileName, corpusTokens, false);
  read_tokens(sentenceFileName, sentenceTokens, true);
  read_tokens(dictionaryFileName, dictionaryTokens, false);


  if (corpusTokens.size() < n) {
    std::cerr << "\nInput file '" << corpusFileName << "' is too small to create any nGrams of size " << n;
    return -1;
  }

  if (sentenceTokens.size() < n) {
    std::cerr << "\nInput file '" << sentenceFileName << "' is too small to create any nGrams of size " << n;
    return -1;
  }


  unordered_map <string, int> vocabulary;
  unordered_map <string, int> dictionary;
  vector<Corpus> corpus = getCorpusList(corpusTokens, n);

  for (auto &word : corpusTokens) {
    if (vocabulary.count(word) == 0)
      vocabulary[word] = 1;
  }

  for (auto &word : dictionaryTokens) {
    if (dictionary.count(word) == 0)
      dictionary[word] = 1;
  }

  vector<double> probs;

  int V = vocabulary.size() + 1;
  double N = corpusTokens.size();

  // Collect sentences
  vector<Tokens> sentences;
  Tokens sentence;
  for (auto &word : sentenceTokens) {
    if (word == EOS) {
      sentences.push_back(sentence);
      sentence.clear();
    } else {
      sentence.push_back(word);
    }
  }

  // Proof sentences
  for (auto &sentence : sentences) {
    std::cout << "Sentence:\t";
    for (auto &word : sentence)
      std::cout << word << ' ';
    std::cout << '\n';
    // Check against all words within reasonable distance
    vector<Tokens> candidateWords;
    for (auto &word : sentence) {
      Tokens candidates;
      for (auto &candidate : dictionary)
        if (uiLevenshteinDistance(word, candidate.first) <= 1)
          candidates.push_back(candidate.first);

      candidateWords.push_back(candidates);
    }

    // Check that the produced sentences from the candidate words makes semantic sense
    vector<Tokens> candidateSentences;

    // for (auto &words : candidateWords) {
    //   for (auto &word : words) {
    //     Tokens temp = sentence;
    //     temp
    //     candidateSentences.push_back(temp)
    //   }
    // }

    for (int i = 0; i < candidateWords.size(); i++) {
      for (auto &word : candidateWords[i]) {
        Tokens temp = sentence;
        temp[i] = word;
        candidateSentences.push_back(temp);
      }
    }

    double bestProb = -DBL_MAX;
    Tokens bestSentence;

    for (auto &sentence : candidateSentences) {
      double prob = getProb(corpus, sentence, n, delta, N, V, threshold, model);
      if (prob > bestProb) {
        bestProb = prob;
        bestSentence = sentence;
      }
    }
    std::cout << "Suggestion:\t";
    for (auto &word : bestSentence)
      std::cout << word << " ";
    std::cout << "\n";
  }
  return 0;
}