Beispiel #1
0
int main(int argc, char *argv[]) {
  const char input1[] = "I eat an apple.";
  const char input2[] = "You drive a car.";
	
  char buffer[BUFFER_SIZE];	
  rh_stack_instance disableBackReferenceStack;
  rh_stackstack_instance sandbox;
  rh_hashmap_instance backreference_lookup;
  rh_list_instance warnings;
  rh_stack_instance errors;    
  npeg_context context = {
    &disableBackReferenceStack, 
    &sandbox,
    &backreference_lookup,
    &warnings,
    &errors
  };    
  npeg_inputiterator iterator;
  npeg_astnode *ast, *subjnode, *verbnode, *objnode;
  int (*parsetree)(npeg_inputiterator*, npeg_context*) = &sentence_root;
  int IsMatch = 0;

  // load npeg managed memory
  npeg_inputiterator_constructor(&iterator, input1, strlen(input1));
  npeg_constructor(&context, NULL);	
    
  printf("\tReached: parsetree(&iterator, &context) for input1.\n");
  IsMatch = parsetree(&iterator, &context);
  assert(IsMatch);
  printf("\tVerified: The expected input was matched by parser.\n");
    
  ast = npeg_get_ast(&context);
  assert(0 == strcmp(ast->token->name, "Sentence"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);

  assert(3 == ast->nof_children);  
  subjnode = ast->children[0];
  verbnode = ast->children[1];
  objnode = ast->children[2];
  printf("\tVerified: #children of AST root.\n", ast->token->name);

  assert(0 == strcmp(subjnode->token->name, "subject"));
  printf("\tVerified: The expected token name: %s.\n", subjnode->token->name);

  assert(0 == strcmp(verbnode->token->name, "verb"));
  printf("\tVerified: The expected token name: %s.\n", verbnode->token->name);

  assert(0 == strcmp(objnode->token->name, "object"));
  printf("\tVerified: The expected token name: %s.\n", objnode->token->name);
    
  npeg_inputiterator_get_text(buffer, &iterator, ast->token->start, ast->token->end);
  assert(0 == strcmp(buffer, input1));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);
			    
  npeg_destructor(&context); 
  npeg_inputiterator_destructor(&iterator);
  npeg_astnode_delete_tree(ast, npeg_astnode_tokendeletion_callback);
  // unload npeg managed memory

  // load npeg managed memory
  npeg_inputiterator_constructor(&iterator, input2, strlen(input2));
  npeg_constructor(&context, NULL);	
    
  printf("\tReached: parsetree(&iterator, &context) for input2.\n");
  IsMatch = parsetree(&iterator, &context);
  assert(IsMatch);
  printf("\tVerified: The expected input was matched by parser.\n");
    
  ast = npeg_get_ast(&context);
  assert(0 == strcmp(ast->token->name, "Sentence"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);
    
  npeg_inputiterator_get_text(buffer, &iterator, ast->token->start, ast->token->end);
  assert(0 == strcmp(buffer, input2));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  assert(3 == ast->nof_children);  
  subjnode = ast->children[0];
  verbnode = ast->children[1];
  objnode = ast->children[2];
  printf("\tVerified: #children of AST root.\n", ast->token->name);

  assert(0 == strcmp(subjnode->token->name, "subject"));
  printf("\tVerified: The expected token name: %s.\n", subjnode->token->name);

  assert(0 == strcmp(verbnode->token->name, "verb"));
  printf("\tVerified: The expected token name: %s.\n", verbnode->token->name);

  assert(0 == strcmp(objnode->token->name, "object"));
  printf("\tVerified: The expected token name: %s.\n", objnode->token->name);
			    
  npeg_astnode_delete_tree(ast, npeg_astnode_tokendeletion_callback);
  npeg_destructor(&context); 
  npeg_inputiterator_destructor(&iterator);
  // unload npeg managed memory

  return 0;
}
Beispiel #2
0
int main(int argc, char *argv[])
  //argv[1] is Alphabet File, 
  //argv[2] is Datafile, 
  //argv[3] is maximum  length of string
  //argv[4] is the (optional) flag for changing the significance level
  //argv[5] is multiline version, if used
  // argv[6] is use of chi-squared test (optional)
{
  int max_length;
  char* data_file;
  char* alpha_file;
  HashTable2 *alphaHash;
  bool isMulti = false;
  bool stateRemoved = false; //dummy
  Machine* machine;
  double sigLevel = SIGLEVEL;
  bool isSigLevel = false;
  bool isChi = false;

  //read in info from command line
  //check for proper arguments
  if(argc !=4 )	  
    {
      if (argc == 5)
	FiveArgs(argv, isMulti, isSigLevel, sigLevel, isChi);

      else if (argc == 6)
	SixArgs(argv, isMulti, isSigLevel, sigLevel, isChi);

	  else if (argc == 7)
	SevenArgs(argv, isMulti, isSigLevel, sigLevel, isChi);

      else
	PrintError();
    }

  PrintCopyrightInfo();

  //set arguments
  max_length = atoi(argv[3]);
  data_file = argv[2];
  alpha_file = argv[1];

  //if no significance level is set, use default
  //(should be set already, just to be careful)
  if(!isSigLevel)
    sigLevel = SIGLEVEL;
  else
    cout << "Significance level set to " << sigLevel <<".\n";

  //create parse tree to store all strings in data
  ParseTree parsetree(max_length);

  //if using multi-line input, read in data and enter
  //tree one line at a time
  if(isMulti)
    {
      parsetree.ReadProcessMultiLine(alpha_file, data_file);
      cout << "Multi-line option is set.\n"
		   << "Max line length is "<< MAX_LINE_SIZE
		   << "\n";
    }

  //otherwise do data read first, then enter in tree
  else
    {
      //read in data and alphabet from files
      parsetree.ReadInput(alpha_file, data_file);
      //enter data in tree
      parsetree.FillTree();
    }

  //make hash table of alpha symbols and indices
  alphaHash = parsetree.MakeAlphaHash();

  //create array of states
  AllStates allstates(parsetree.getAlphaSize(), sigLevel, isChi);

  //calculate frequency of occurence of symbols
  allstates.InitialFrequencies(parsetree);

  //check all possible strings up to max 
  //length and compare distributions
  for(int k = 1; k <= max_length; k++)
    allstates.CalcNewDist(k, parsetree);

  //remove shorter strings
  stateRemoved = allstates.DestroyShortHists(max_length, parsetree);

  //remove all non-recurring states
  allstates.CheckConnComponents(parsetree);

  //check futures longer than 1,
  //by using determinism of states
  allstates.Determinize(parsetree);

  //remove all non-recurring states (again, since there may be new ones)
  allstates.CheckConnComponents(parsetree);

  //store transitions from state to state
  allstates.StoreTransitions(parsetree.getMaxLength(), parsetree.getAlpha());

  //calculate distribution/frequency of states
  allstates.GetStateDistsMulti(parsetree, data_file, alphaHash, isMulti);

  //calculate information values
  machine = new Machine(&allstates);
  machine->CalcRelEnt(parsetree, alphaHash, isMulti);
  machine->CalcRelEntRate(parsetree, alphaHash, isMulti);
  machine->CalcCmu();
  machine->CalcEntRate();
  machine->CalcVariation(parsetree, alphaHash, isMulti);

  //print out states
  allstates.PrintOut(data_file, parsetree.getAlpha());

  //print out machine and calculationsf
  machine->PrintOut(data_file, alpha_file, data_file, max_length, sigLevel, isMulti, isChi, parsetree.getAlphaSize());
  machine->PrintDot(data_file, parsetree.getAlpha());

  delete machine;
  return 1;
}
Beispiel #3
0
int main(int argc, char *argv[])
{
  const char text1[] = "(1*3+4)/5*93";
  const char text2[] = "9+(9-8)*10";  

  char buffer[BUFFER_SIZE];
  rh_stack_instance disableBackReferenceStack;
  rh_stackstack_instance sandbox;
  rh_hashmap_instance backreference_lookup;
  rh_list_instance warnings;
  rh_stack_instance errors;
  npeg_context context;
  npeg_inputiterator iterator;
  npeg_astnode* ast, *p_child;

  int (*parsetree)(npeg_inputiterator*, npeg_context*) = &MathematicalFormula_impl_0;
  int IsMatch = 0;

  // load npeg managed memory
  context.disableBackReferenceStack = &disableBackReferenceStack;
  context.sandbox = &sandbox;
  context.backReferenceLookup = &backreference_lookup;
  context.warnings = &warnings; 
  context.errors = &errors;

  npeg_inputiterator_constructor(&iterator, text1, strlen(text1));
  npeg_constructor(&context, NULL);

  IsMatch = parsetree(&iterator, &context);
  assert(IsMatch);
  printf("\tVerified: The expected input was matched by parser.\n");

  ast = npeg_get_ast(&context);
  npeg_printVisitor(ast, NULL);
  assert(0 == strcmp(ast->token->name, "EXPRESSION"));
  printf("\tVerified: The expected token name: '%s'.\n", ast->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, ast->token->start, ast->token->end);
  assert(0 == strcmp(buffer, text1));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  assert(ast->nof_children == 5);
  puts("\tVerified: Expected number of children.");
  p_child = ast->children[0];
  assert(0 == strcmp(p_child->token->name, "EXPRESSION"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "1*3+4"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[1];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "/"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);
  
  p_child = ast->children[2];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "5"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[3];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "*"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[4];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "93"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  assert(ast->children[0]->nof_children == 5);
  puts("\tVerified: Expected number of children.");

  p_child = ast->children[0]->children[0];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "1"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[0]->children[1];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "*"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[0]->children[2];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "3"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[0]->children[3];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "+"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[0]->children[4];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "4"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  // unload npeg managed memory
  npeg_astnode_delete_tree(ast, npeg_astnode_tokendeletion_callback);
  npeg_destructor(&context);
  npeg_inputiterator_destructor(&iterator);

  npeg_inputiterator_constructor(&iterator, text2, strlen(text2));
  npeg_constructor(&context, NULL);

  IsMatch = parsetree(&iterator, &context);
  assert(IsMatch);
  printf("\tVerified: The expected input was matched by parser.\n");

  ast = npeg_get_ast(&context);
  assert(0 == strcmp(ast->token->name, "EXPRESSION"));
  printf("\tVerified: The expected token name: '%s'.\n", ast->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, ast->token->start, ast->token->end);
  assert(0 == strcmp(buffer, text2));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  assert(ast->nof_children == 5);
  puts("\tVerified: Expected number of children.");

  p_child = ast->children[0];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "9"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);
  
  p_child = ast->children[1];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "+"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[2];
  assert(0 == strcmp(p_child->token->name, "EXPRESSION"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "9-8"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[3];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "*"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[4];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "10"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);
  
  assert(ast->children[2]->nof_children == 3);
  puts("\tVerified: Expected number of children.");

  p_child = ast->children[2]->children[0];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "9"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[2]->children[1];
  assert(0 == strcmp(p_child->token->name, "SYMBOL"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "-"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  p_child = ast->children[2]->children[2];
  assert(0 == strcmp(p_child->token->name, "VALUE"));
  printf("\tVerified: The expected token name: '%s'.\n", p_child->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "8"));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);

  // unload npeg managed memory
  npeg_astnode_delete_tree(ast, npeg_astnode_tokendeletion_callback);
  npeg_destructor(&context);
  npeg_inputiterator_destructor(&iterator);

  return 0;
}
Beispiel #4
0
int main(int argc, char *argv[])
{
  char* input1 = "123-456-7890";
	
  char buffer[BUFFER_SIZE];
	
  rh_stack_instance disableBackReferenceStack;
  rh_stackstack_instance sandbox;
  rh_hashmap_instance backreference_lookup;
  rh_list_instance warnings;
  rh_stack_instance errors;
  npeg_astnode *ast, *p_child;
  npeg_context context;
  npeg_inputiterator iterator;
  int (*parsetree)(npeg_inputiterator*, npeg_context*) = &PhoneNumber_impl_0;
  int IsMatch = 0;

  // load npeg managed memory
  context.disableBackReferenceStack = &disableBackReferenceStack;
  context.sandbox = &sandbox;
  context.backReferenceLookup = &backreference_lookup;
  context.warnings = &warnings; 
  context.errors = &errors;
  npeg_inputiterator_constructor(&iterator, input1, 31);
  npeg_constructor(&context, NULL);	
	
  printf("\tReached: parsetree(&iterator, &context) for input1.\n");
  IsMatch = parsetree(&iterator, &context);
  assert(IsMatch);
  printf("\tVerified: The expected input was matched by parser.\n");
	
  ast = npeg_get_ast(&context);
  npeg_printVisitor(ast, NULL);
  assert(0 == strcmp(ast->token->name, "PhoneNumber"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);
	
  npeg_inputiterator_get_text(buffer, &iterator, ast->token->start, ast->token->end);
  assert(0 == strcmp(buffer, input1));
  printf("\tVerified: The expected matched string: '%s'.\n", buffer);
  
  assert(ast->nof_children == 3);
  puts("\tVerified: Expected number of children.");

  p_child = ast->children[0];
  assert(0 == strcmp(p_child->token->name, "ThreeDigitCode"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "123"));
  printf("\tVerified: The expected matched string of 1st child: '%s'.\n", buffer);
  
  p_child = ast->children[1];
  assert(0 == strcmp(p_child->token->name, "ThreeDigitCode"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "456"));
  printf("\tVerified: The expected matched string of 2nd child: '%s'.\n", buffer);


  p_child = ast->children[2];
  assert(0 == strcmp(p_child->token->name, "FourDigitCode"));
  printf("\tVerified: The expected token name: %s.\n", ast->token->name);
  npeg_inputiterator_get_text(buffer, &iterator, p_child->token->start, p_child->token->end);
  assert(0 == strcmp(buffer, "7890"));
  printf("\tVerified: The expected matched string of 3rd child: '%s'.\n", buffer);

  npeg_astnode_delete_tree(ast, npeg_astnode_tokendeletion_callback);
  npeg_destructor(&context); 
  npeg_inputiterator_destructor(&iterator);
  // unload npeg managed memory

  return 0;
}