Exemplo n.º 1
0
int main(int argc, char *argv[]){
	//check usage
	if (argc != 3) {
		fprintf(stderr, "Usage: ./query [INDEXER OUTPUT FILE] [DIRECTORY WITH CRAWLER OUTPUT FILES]\n");
		return -1;
	}

	//Check that the index file exists
	FILE *indexfile = fopen(argv[1], "rb");
	if (!indexfile) {
		fprintf(stderr, "Error: Given index file cannot be opened\n");
		return -1;
	}
	fclose(indexfile);

	//Check that the Crawler directory exists and is readable
   DIR *directory = opendir(argv[2]);
   if(directory == NULL) {  
        if(errno == EACCES) 
            fprintf(stderr, "Error: Crawler Directory has incompatible permissions for this program\n");
        else if(errno == ENOENT) 
            fprintf(stderr, "Error: Crawler Directory does not exist\n");
        else if (errno == ENOTDIR)
            fprintf(stderr, "Error: Given Crawler directory is not a directory\n");
        else 
            fprintf(stderr, "Error: Incompatible directory.\n");
           
        closedir(directory);
        return -1;
    }
    closedir(directory); 

    HashTable *index = readFile(argv[1]);

    //infinite loop for query input;
    while (1) {
        char input[MAX_USER_INPUT];
        //clear the input buffer
        for(int i = 0; i < MAX_USER_INPUT; i++){
            input[i] = '\0';
        }

        printf("QUERY:>");

        //get input and send it to the query engine
        
        if (fgets(input, MAX_USER_INPUT, stdin) != NULL)
        //if we reached end of file, break out of loop
            break;

        //otherwise pass in results
        queryEngine(input, index, argv[2], argv[1]);
    }

    //you exit loop when user enters 
    cleanupHashTable(index);

    return 0;
}
Exemplo n.º 2
0
int main(int argc, char *argv[]){
	//check usage
	if (argc != 3) {
		fprintf(stderr, "Usage: ./query [INDEXER OUTPUT FILE] [DIRECTORY WITH CRAWLER OUTPUT FILES]\n");
		return -1;
	}

	//Check that the index file exists
	FILE *indexfile = fopen(argv[1], "rb");
	if (!indexfile) {
		fprintf(stderr, "Error: Given index file cannot be opened\n");
		return -1;
	}
	fclose(indexfile);

	//Check that the Crawler directory exists and is readable
   DIR *directory = opendir(argv[2]);
   if(directory == NULL) {  
        if(errno == EACCES) 
            fprintf(stderr, "Error: Crawler Directory has incompatible permissions for this program\n");
        else if(errno == ENOENT) 
            fprintf(stderr, "Error: Crawler Directory does not exist\n");
        else if (errno == ENOTDIR)
            fprintf(stderr, "Error: Given Crawler directory is not a directory\n");
        else 
            fprintf(stderr, "Error: Incompatible directory.\n");
           
        closedir(directory);
        return -1;
    }
    closedir(directory); 

    HashTable *index = readFile(argv[1]);

    printf("Processing Case Test 1: hash AND list\n");
    queryEngine("hash AND list\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 1.5: list AND hash\n");
    printf("Should have the same results as above\n");
    queryEngine("list AND hash\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 2: HASH AND LIST\n");
    printf("Should have the same results as above\n");
    queryEngine("HASH AND LIST\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 3: hash list\n");
    printf("Should have the same results as above\n");
    queryEngine("hash list\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 3.5: hash  list (testing handling of extra space)\n");
    printf("Should have the same results as above\n");
    queryEngine("hash  list\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 4: hash OR list\n");
    queryEngine("hash OR list\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 5:\n");
    printf("Should find 0 files\n");
    queryEngine("\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 6: jkfnbvhfbvhjf\n");
    printf("Should find 0 files\n");
    queryEngine("jkfnbvhfbvhjf\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 7: hash AND list OR head AND tail \n");
    queryEngine("hash AND list OR head AND tail\n", index, argv[2], argv[1]);

    printf("\nProcessing Case Test 8: (string is greater than 1000 characters)\n");
    printf("Should print an error message\n");

  	char *fartoomany = calloc(1001, sizeof(char)); 
  	for(int i = 0; i < MAX_USER_INPUT; i++)
  		sprintf(fartoomany, "a");

    queryEngine(fartoomany, index, argv[2], argv[1]);

    free(fartoomany);

    printf("\nProcessing Case Test 9: hash\n");
    queryEngine("hash\n", index, argv[2], argv[1]);

     printf("\nProcessing Case Test 10: bhjcdbcjd  OR ncvdkjnbvjdk \n");
     printf("Should find 0 files\n");
    queryEngine("bhjcdbcjd  OR ncvdkjnbvjdk\n", index, argv[2], argv[1]);
   

    cleanupHashTable(index);
    return 0;

}
Exemplo n.º 3
0
void computeBIGRAPID (tree *tr, analdef *adef, boolean estimateModel) 
{ 
  unsigned int
    vLength = 0;
  int
    i,
    impr, 
    bestTrav,
    rearrangementsMax = 0, 
    rearrangementsMin = 0,    
    thoroughIterations = 0,
    fastIterations = 0;
   
  double lh, previousLh, difference, epsilon;              
  bestlist *bestT, *bt;  
    
#ifdef _TERRACES
  /* store the 20 best trees found in a dedicated list */

  bestlist
    *terrace;
  
  /* output file names */

  char 
    terraceFileName[1024],
    buf[64];
#endif

  hashtable *h = (hashtable*)NULL;
  unsigned int **bitVectors = (unsigned int**)NULL;
  
 
  if(tr->searchConvergenceCriterion)
    {          
      bitVectors = initBitVector(tr, &vLength);
      h = initHashTable(tr->mxtips * 4);   
    }

  bestT = (bestlist *) rax_malloc(sizeof(bestlist));
  bestT->ninit = 0;
  initBestTree(bestT, 1, tr->mxtips);
      
  bt = (bestlist *) rax_malloc(sizeof(bestlist));      
  bt->ninit = 0;
  initBestTree(bt, 20, tr->mxtips); 

#ifdef _TERRACES 
  /* initialize the tree list and the output file name for the current tree search/replicate */


  terrace = (bestlist *) rax_malloc(sizeof(bestlist));      
  terrace->ninit = 0;
  initBestTree(terrace, 20, tr->mxtips); 
  
  sprintf(buf, "%d", bCount);
  
  strcpy(terraceFileName,         workdir);
  strcat(terraceFileName,         "RAxML_terrace.");
  strcat(terraceFileName,         run_id);
  strcat(terraceFileName,         ".BS.");
  strcat(terraceFileName,         buf);
  
  printf("%s\n", terraceFileName);
#endif

  initInfoList(50);
 
  difference = 10.0;
  epsilon = 0.01;    
    
  Thorough = 0;     
  
  if(estimateModel)
    {
      if(adef->useBinaryModelFile)
	{
	  readBinaryModel(tr);
	  evaluateGenericInitrav(tr, tr->start);
	  treeEvaluate(tr, 2);
	}
      else
	{
	  evaluateGenericInitrav(tr, tr->start);
	  modOpt(tr, adef, FALSE, 10.0);
	}
    }
  else
    treeEvaluate(tr, 2);  


  printLog(tr, adef, FALSE); 

  saveBestTree(bestT, tr);
  
  if(!adef->initialSet)   
    bestTrav = adef->bestTrav = determineRearrangementSetting(tr, adef, bestT, bt);                   
  else
    bestTrav = adef->bestTrav = adef->initial;

  if(estimateModel)
    {
      if(adef->useBinaryModelFile)	
	treeEvaluate(tr, 2);
      else
	{
	  evaluateGenericInitrav(tr, tr->start);
	  modOpt(tr, adef, FALSE, 5.0);
	}
    }
  else
    treeEvaluate(tr, 1);
  
  saveBestTree(bestT, tr); 
  impr = 1;
  if(tr->doCutoff)
    tr->itCount = 0;

 

  while(impr)
    {              
      recallBestTree(bestT, 1, tr); 

      if(tr->searchConvergenceCriterion)
	{
	  int bCounter = 0;	      
	  
	  if(fastIterations > 1)
	    cleanupHashTable(h, (fastIterations % 2));		
	  
	  bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, fastIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL,
				  &bCounter, 1, FALSE, FALSE);	    
	  
	  assert(bCounter == tr->mxtips - 3);	    	   
	  
	  if(fastIterations > 0)
	    {
	      double rrf = convergenceCriterion(h, tr->mxtips);
	      
	      if(rrf <= 0.01) /* 1% cutoff */
		{
		  printBothOpen("ML fast search converged at fast SPR cycle %d with stopping criterion\n", fastIterations);
		  printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%");
		  cleanupHashTable(h, 0);
		  cleanupHashTable(h, 1);
		  goto cleanup_fast;
		}
	      else		    
		printBothOpen("ML search convergence criterion fast cycle %d->%d Relative Robinson-Foulds %f\n", fastIterations - 1, fastIterations, rrf);
	    }
	}

	 
      fastIterations++;	


      treeEvaluate(tr, 1.0);
      
      
      saveBestTree(bestT, tr);           
      printLog(tr, adef, FALSE);         
      printResult(tr, adef, FALSE);    
      lh = previousLh = tr->likelihood;
   
     
      treeOptimizeRapid(tr, 1, bestTrav, adef, bt);   
      
      impr = 0;
	  
      for(i = 1; i <= bt->nvalid; i++)
	{	    		  	   
	  recallBestTree(bt, i, tr);
	  
	  treeEvaluate(tr, 0.25);	    	 		      	 

	  difference = ((tr->likelihood > previousLh)? 
			tr->likelihood - previousLh: 
			previousLh - tr->likelihood); 	    
	  if(tr->likelihood > lh && difference > epsilon)
	    {
	      impr = 1;	       
	      lh = tr->likelihood;	       	     
	      saveBestTree(bestT, tr);
	    }	   	   
	}	
    }

 

  if(tr->searchConvergenceCriterion)
    {
      cleanupHashTable(h, 0);
      cleanupHashTable(h, 1);
    }

 cleanup_fast:

  Thorough = 1;
  impr = 1;
  
  recallBestTree(bestT, 1, tr); 
  if(estimateModel)
    {
      if(adef->useBinaryModelFile)	
	treeEvaluate(tr, 2);
      else
	{
	  evaluateGenericInitrav(tr, tr->start);
	  modOpt(tr, adef, FALSE, 1.0);
	}
    }
  else
    treeEvaluate(tr, 1.0);

  while(1)
    {	
      recallBestTree(bestT, 1, tr);    
      if(impr)
	{	    
	  printResult(tr, adef, FALSE);
	  rearrangementsMin = 1;
	  rearrangementsMax = adef->stepwidth;	

	 

	  if(tr->searchConvergenceCriterion)
	    {
	      int bCounter = 0;	      

	      if(thoroughIterations > 1)
		cleanupHashTable(h, (thoroughIterations % 2));		
		
	      bitVectorInitravSpecial(bitVectors, tr->nodep[1]->back, tr->mxtips, vLength, h, thoroughIterations % 2, BIPARTITIONS_RF, (branchInfo *)NULL,
				      &bCounter, 1, FALSE, FALSE);	    
	      
	      assert(bCounter == tr->mxtips - 3);	    	   
	      
	      if(thoroughIterations > 0)
		{
		  double rrf = convergenceCriterion(h, tr->mxtips);
		  
		  if(rrf <= 0.01) /* 1% cutoff */
		    {
		      printBothOpen("ML search converged at thorough SPR cycle %d with stopping criterion\n", thoroughIterations);
		      printBothOpen("Relative Robinson-Foulds (RF) distance between respective best trees after one succseful SPR cycle: %f%s\n", rrf, "%");
		      goto cleanup;
		    }
		  else		    
		    printBothOpen("ML search convergence criterion thorough cycle %d->%d Relative Robinson-Foulds %f\n", thoroughIterations - 1, thoroughIterations, rrf);
		}
	    }

	 
	   	  
	  thoroughIterations++;	  
	}			  			
      else
	{		       	   
	  rearrangementsMax += adef->stepwidth;
	  rearrangementsMin += adef->stepwidth; 	        	      
	  if(rearrangementsMax > adef->max_rearrange)	     	     	 
	    goto cleanup; 	   
	}
      treeEvaluate(tr, 1.0);
     
      previousLh = lh = tr->likelihood;	      
      saveBestTree(bestT, tr); 
      
      printLog(tr, adef, FALSE);
      treeOptimizeRapid(tr, rearrangementsMin, rearrangementsMax, adef, bt);
	
      impr = 0;			      		            

      for(i = 1; i <= bt->nvalid; i++)
	{		 
	  recallBestTree(bt, i, tr);	 	    	    	
	  
	  treeEvaluate(tr, 0.25);	    	 
	
#ifdef _TERRACES
	  /* save all 20 best trees in the terrace tree list */
	  saveBestTree(terrace, tr);
#endif

	  difference = ((tr->likelihood > previousLh)? 
			tr->likelihood - previousLh: 
			previousLh - tr->likelihood); 	    
	  if(tr->likelihood > lh && difference > epsilon)
	    {
	      impr = 1;	       
	      lh = tr->likelihood;	  	     
	      saveBestTree(bestT, tr);
	    }	   	   
	}  

                      
    }

 cleanup: 

#ifdef _TERRACES
  {
    double
      bestLH = tr->likelihood;
    FILE 
      *f = myfopen(terraceFileName, "w");
    
    /* print out likelihood of best tree found */

    printf("best tree: %f\n", tr->likelihood);

    /* print out likelihoods of 20 best trees found during the tree search */

    for(i = 1; i <= terrace->nvalid; i++)
      {
	recallBestTree(terrace, i, tr);
	
	/* if the likelihood scores are smaller than some epsilon 0.000001
	   print the tree to file */
	   
	if(ABS(bestLH - tr->likelihood) < 0.000001)
	  {
	    printf("%d %f\n", i, tr->likelihood);
	    Tree2String(tr->tree_string, tr, tr->start->back, FALSE, TRUE, FALSE, FALSE, FALSE, adef, NO_BRANCHES, FALSE, FALSE, FALSE, FALSE);
	    
	    fprintf(f, "%s\n", tr->tree_string); 
	  }
      }

    fclose(f);
    /* increment tree search counter */
    bCount++;
  }
#endif
 
  
  if(tr->searchConvergenceCriterion)
    {
      freeBitVectors(bitVectors, 2 * tr->mxtips);
      rax_free(bitVectors);
      freeHashTable(h);
      rax_free(h);
    }
  
  freeBestTree(bestT);
  rax_free(bestT);
  freeBestTree(bt);
  rax_free(bt);

#ifdef _TERRACES
  /* free terrace tree list */

  freeBestTree(terrace);
  rax_free(terrace);
#endif

  freeInfoList();  
  printLog(tr, adef, FALSE);
  printResult(tr, adef, FALSE);
}
Exemplo n.º 4
0
int main(int argc, char *argv[]) {

	//Check validity of parameters
	if (argc != 3 && argc != 4) {
		fprintf(stderr, "Usage Error; This program has two methods of use as follows:\n");
		fprintf(stderr, "./indexer [DIRECTORY] [FILENAME]\n");
		fprintf(stderr, "./indexer [DIRECTORY] [FILENAME 1] [FILENAME 2]\n");
		return -1;
	}

	//Check that the directory exists and is readable
   DIR *directory = opendir(argv[1]);
   if(directory == NULL) {  
        if(errno == EACCES) 
            fprintf(stderr, "Error: Directory has incompatible permissions for this program\n");
        else if(errno == ENOENT) 
            fprintf(stderr, "Error: Directory does not exist\n");
        else if (errno == ENOTDIR)
            fprintf(stderr, "Error: Given directory is not a directory\n");
        else 
            fprintf(stderr, "Error: Incompatible directory.\n");
           
        closedir(directory);
        return -1;
    }
    closedir(directory); 

    logfile = fopen("IndexerLogFile", "w");
	
	HashTable *inverted_index = initializeHashTable();
	fprintf(logfile, "Building the Index\n");

	char *dir_name = malloc(strlen(argv[1]) + 1);
	strcpy(dir_name, argv[1]);

	if (buildIndexFromDirectory(dir_name, inverted_index) != 1) {
		fprintf(logfile, "Error: Unable to build index\n");
		fprintf(stderr, "Error: Unable to build index\n");
		free(dir_name);
		fclose(logfile);
		return -1;
	}
	fprintf(logfile, "Index Built!\n");

	if (saveFile(argv[2], inverted_index) != 1){
		fprintf(stderr, "Unable to save the index into %s\n", argv[2]);
		cleanupHashTable(inverted_index);
		free(dir_name);
		fclose(logfile);
		return -1;
	}

	fprintf(logfile, "Index written to %s\n", argv[2]);

	cleanupHashTable(inverted_index);
	free(dir_name);

	//if in testing mode
	if (argc == 4) {
		fprintf(logfile, "Testing the index\n");
		HashTable *reindex = readFile(argv[2]);
		saveFile(argv[3], reindex);
		fprintf(logfile, "Finished Testing\n");
		cleanupHashTable(reindex);
	}
	fclose(logfile);
	return 0;
}