Ejemplo n.º 1
0
static int lahocorasick_release ( lua_State* L ) {
	AC_AUTOMATA_t* m = *(AC_AUTOMATA_t**)luaL_checkudata ( L , 1 , AHO_METATABLE_KEY );
	ac_automata_release ( m );
	return 0;
}
int main (int argc, char ** argv)
{
    unsigned int i;
    struct parameter my_param;
    // we use this struct to send/receive input/output parameters to/from automata
    my_param.position = 250;    // input: end position; change it to 1000 and see what happens
    my_param.match_count = 0;   // output:

    AC_TEXT_t input_text;
    AC_AUTOMATA_t * atm = ac_automata_init ();

    for (i=0; i<PATTERN_COUNT; i++)
    {
        AC_STATUS_t status;
        sample_patterns[i].length = strlen (sample_patterns[i].astring);
        status = ac_automata_add (atm, &sample_patterns[i]);
        switch (status)
        {
            case ACERR_DUPLICATE_PATTERN:
//                printf ("Add pattern failed: ACERR_DUPLICATE_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_LONG_PATTERN:
                printf ("Add pattern failed: ACERR_LONG_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_ZERO_PATTERN:
                printf ("Add pattern failed: ACERR_ZERO_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_AUTOMATA_CLOSED:
                printf ("Add pattern failed: ACERR_AUTOMATA_CLOSED: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_SUCCESS:
//                printf ("Pattern Added: %s\n", sample_patterns[i].astring);
                break;
        }
    }

    ac_automata_finalize (atm);
    
    // here we illustrates how to search a big text chunk by chunk.
    // in this example input buffer size is 64 and input file is pretty
    // bigger than that. we want to imitate reading from input file.
    // in such situations searching must be done inside a loop. the loop
    // continues until it consumed all input file.

//    printf ("Automata finalized.\n\nSearching...\n");
	//Added below code by Roja  
	FILE *fp;
	char *line;
	size_t len=0;

        fp = fopen(argv[1], "r");
	fp1 = fopen(argv[2], "w");
        if(fp == NULL  || fp1 == NULL) printf("File Couldn't Open\n");

        input_line[0]='_';
        while(getline(&line, &len, fp)!=-1)
        {
           strcpy(input_line+1, line);
         //  printf("%s", input_line);

           //calling function replace_space_with_underscore()
           replace_space_with_underscore(input_line);
           *(input_line+len)='_';
	//Code added by  Roja ended

	char * chunk_start = input_line;
	char * end_of_file = input_line + sizeof(input_line);
	input_text.astring = buffer;

	while (chunk_start<end_of_file)
	{
	        input_text.length = (chunk_start<end_of_file)?sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
        	strncpy (buffer, chunk_start, input_text.length);

	        if (ac_automata_search (atm, &input_text, 0, match_handler, (void *)(&my_param)))
        	    // if the search stopped in the middle (returned 1) we should break the loop
            		break;

	        chunk_start += sizeof(buffer);
	}


    printf(";~~~~~~~~~~\n"); //Added by Roja
    fprintf(fp1, ";~~~~~~~~~~\n"); //Added by Roja
    }
    
//    printf ("found %d occurrence in the beginning %d bytes\n", my_param.match_count, my_param.position);

    // TODO: do the same search with settext/findnext interface
    
    ac_automata_release (atm);

    return 0;
}
Ejemplo n.º 3
0
int main (int argc, char ** argv)
{
    unsigned int i;

    // 2. Define AC variables
    
    AC_AUTOMATA_t   *atm;
    AC_PATTERN_t    tmp_patt;
    AC_TEXT_t       tmp_text;

    // 3. Get a new automata
    
    atm = ac_automata_init ();

    // 4. Add patterns to automata
    
    for (i=0; i<PATTERN_NUMBER; i++)
    {
        tmp_patt.astring = sample_patterns[i];
        tmp_patt.rep.number = i+1; // optional
        tmp_patt.length = strlen (tmp_patt.astring);
        ac_automata_add (atm, &tmp_patt);
    }

    // 5. Finalize automata.
    
    ac_automata_finalize (atm);
    // after you have finished with adding patterns you must finalize the automata
    // from now you can not add patterns anymore.

    // 5.1. Display automata
    
    // ac_automata_display (atm, 'n');
    // the second argument determines the cast type of the pattern representative. 
    // 'n': as number 
    // 's': as string
    // because we use the integer part of union (tmp_patt.rep.number) so we used 'n'
    
    printf ("Searching: \"%s\"\n", input_text1);

    // 6. Set input text
    
    tmp_text.astring = input_text1;
    tmp_text.length = strlen (tmp_text.astring);

    // 7. Do search
    
    ac_automata_search (atm, &tmp_text, 0, match_handler, 0);
    // the 5th option is a (void *), and it will be forwarded to the callback 
    // function. you can pass everything you want to the callback function
    // using this argument.
    // in this example we don't send a parameter to callback function.
    // a typical practice is to define a struct that encloses whatever you want
    // to send the callback function, including input and output variables
    
    printf ("Searching: \"%s\"\n", input_text2);
    // do another search 

    tmp_text.astring = input_text2;
    tmp_text.length = strlen (tmp_text.astring);

    ac_automata_search (atm, &tmp_text, 0, match_handler, 0);

    printf ("Searching: \"%s\" with \'keep\' enabled\n", input_text3);
    // and another

    tmp_text.astring = input_text3;
    tmp_text.length = strlen (tmp_text.astring);

    ac_automata_search (atm, &tmp_text, 1, match_handler, 0);
    // when the keep option (3rd argument) in set, then the automata
    // considers that the given text is the next chunk of the previous text.
    // to understand the difference try it with 0 and 1 and compare the result

    // 8. Release automata

    ac_automata_release (atm);
    // do not forget to release the automata after you have done with it

    return 0;
}
int main (int argc, char ** argv)
{
	//*** 2. Define AC variables
	AC_AUTOMATA_t * acap;

	AC_TEXT_t input_text = {0, 0};

	#define PATTERN_NUMBER (sizeof(allpattern)/sizeof(AC_PATTERN_t))

	unsigned int i;

	/* Sending parameter to call-back function */
	struct sample_param my_param;
	my_param.anum = 1;
	my_param.achar = 'a'; /* 'a': find all, 'f': find first */

	//*** 3. Get a new automata
	acap = ac_automata_init (match_handler);

	//*** 4. add patterns to automata
	for (i=0; i<PATTERN_NUMBER; i++)
	{
		allpattern[i].length = strlen(allpattern[i].astring);
		ac_automata_add (acap, &allpattern[i]);
	}

	//*** 5. Finalize automata.
	ac_automata_finalize (acap);

	//*** 5.1 Display automata
	//ac_automata_display (acap, 's');

	/* This illustrates how to search big text chunk by chunk
	 * in this example input buffer size is 2500 and input file is pretty
	 * bigger than that. in fact it imitate reading from input file.
	 * in such situations searching must be done inside a loop. the loop
	 * continues until it consumes all input file.
	**/

	FILE *fp;
	char *line;
        size_t len=0;
	int k=0;
	char tolower_input_line[10000];

	fp = fopen(argv[1], "r");
        if(fp == NULL) printf("File Couldn't Open\n");

	input_line[0]='_'; tolower_input_line[0]='_';
	while(getline(&line, &len, fp)!=-1)
        {  
	   strcpy(input_line+1, line);
	 //  printf("%s", input_line);
	  
	   //calling function replace_space_with_underscore()
           replace_space_with_underscore(input_line);
	   *(input_line+len)='_';  

	   k=0;
	   strcpy(tolower_input_line+1, line); 

	   //tolower 
	   while(tolower_input_line[k])
           { 
	      *(tolower_input_line+k)= tolower(tolower_input_line[k]);
              k++;
           }

	   /*****appending original line and tolower of the same line
	         *(input_line+strcspn(input_line, "\n"))= '/';
                 strcat(input_line, tolower_input_line);            **********/
 
	   //calling function replace_space_with_underscore()
           replace_space_with_underscore(tolower_input_line);
	   *(tolower_input_line+len)='_';  

	   char * chunk_start = input_line;
           //	char * end_of_file = input_line+sizeof(input_line);
	   char * end_of_file = input_line+strlen(input_line);
	   input_text.astring = buffer;

	/* Search loop */
	 while (chunk_start<end_of_file) { 
		//*** 6. Set input text
		input_text.length = (chunk_start<end_of_file)?
				sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
		strncpy(input_text.astring, chunk_start, input_text.length);
        
		//*** 7. Do search
		if(ac_automata_search (acap, &input_text, (void *)(&my_param)))
			break;
		/* according to the return value of ac_automata_search() we decide to
		 * continue or break loop. */

		chunk_start += sizeof(buffer); 
	 }
	    //*** 8. Reset
	 ac_automata_reset(acap);

	   //Checking multi words after tolowering the i/p. 
	   strcpy(input_line,tolower_input_line);
	   chunk_start = input_line;
           end_of_file = input_line+strlen(input_line);
           input_text.astring = buffer;

        /* Search loop */
           while (chunk_start<end_of_file) { 
                //*** 6. Set input text
                input_text.length = (chunk_start<end_of_file)?
                                sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
                strncpy(input_text.astring, chunk_start, input_text.length);

                //*** 7. Do search
                if(ac_automata_search (acap, &input_text, (void *)(&my_param)))
                        break;
                /* according to the return value of ac_automata_search() we decide to
                 * continue or break loop. */

                chunk_start += sizeof(buffer);
            }  

	   ac_automata_reset(acap); 
	   printf(";~~~~~~~~~~\n");
	}
	//*** 9. Release automata
	ac_automata_release (acap);
	return 0;
}