예제 #1
0
static int lahocorasick_search ( lua_State* L ) {
	int cb;
	AC_AUTOMATA_t* m = *(AC_AUTOMATA_t**)luaL_checkudata ( L , 1 , AHO_METATABLE_KEY );
	AC_TEXT_t txt;
	size_t len;
	int keep;
	AC_MATCH_CALBACK_f callback;
	txt.astring = lua_tolstring(L, 2, &len);
	txt.length = len;
	if (lua_isnoneornil(L, 3)) {
		callback = &lahocorasick_cb_stop;
	} else {
		luaL_checktype(L, 3, LUA_TFUNCTION);
		callback = &lahocorasick_cb;
	}
	keep = lua_toboolean(L, 4);
	lua_settop ( L , 3 );
	switch (ac_automata_search(m, &txt, keep, callback, (void*)L)) {
		case -1: /* failed; automata is not finalized */
			return luaL_error(L, "automata is not finalized");
		case 0: /* success; input text was searched to the end */
			lua_pushboolean(L, 0);
			return 1;
		case 1: /* success; input text was searched partially. (callback broke the loop) */
			lua_pushboolean(L, 1);
			return 1;
		default:
			return luaL_error(L, "unknown error");
	}
}
예제 #2
0
static int lahocorasick_search ( lua_State* L ) {
	int cb;
	AC_AUTOMATA_t* m = *(AC_AUTOMATA_t**)luaL_checkudata ( L , 1 , AHO_METATABLE_KEY );
	AC_TEXT_t txt;
	txt.astring = lua_tolstring ( L , 2 , &txt.length );
	lua_settop ( L , 3 );
	lua_pushnumber ( L , ac_automata_search ( m , &txt , (void*)L ) );
	return 1;
}
예제 #3
0
static int matchStringProtocol(struct ndpi_detection_module_struct *ndpi_struct, 
			struct ndpi_flow_struct *flow, 
			char *string_to_match, 
			u_int string_to_match_len) {
  int i = 0, end = string_to_match_len-1, num_found = 0;
  struct ndpi_packet_struct *packet = &flow->packet;
#ifdef AHOCORASICK
  AC_TEXT_t ac_input_text;
#endif

  while(end > 0) {
    if(string_to_match[end] == '.') {
      num_found++;
      if(num_found == 2) {
	end++;
	break;
      }
    }
    end--;
  }

  strncpy(flow->l4.tcp.host_server_name, 
	  &string_to_match[end], 
	  ndpi_min(sizeof(flow->l4.tcp.host_server_name)-1, string_to_match_len-end));

#ifdef AHOCORASICK
  matching_protocol_id = -1;

  ac_input_text.astring = string_to_match;
  ac_input_text.length = string_to_match_len;
  ac_automata_search (ac_automa, &ac_input_text, 0);
  
  ac_automata_reset(ac_automa);

  if (matching_protocol_id != -1) {
    packet->detected_protocol_stack[0] = matching_protocol_id;
    return(packet->detected_protocol_stack[0]);
  }
#else
  for (i = 0; i < host_match_num_items; i++) {
    if(ndpi_strnstr(string_to_match, 
		    host_match[i].string_to_match, 
		    string_to_match_len) != NULL) {
      packet->detected_protocol_stack[0] = host_match[i].protocol_id;
      return(packet->detected_protocol_stack[0]);
    } else
      i++;
  }
#endif

#ifdef DEBUG
  string_to_match[string_to_match_len] = '\0';
  printf("[NTOP] Unable to find a match for '%s'\n", string_to_match);
#endif

  return(-1);
}
예제 #4
0
int search_file (const char * filename, AC_AUTOMATA_t * paca)
{
    #define STREAM_BUFFER_SIZE 4096
    int fd_input; // Input file descriptor
    static AC_TEXT_t intext; // input text
    static AC_ALPHABET_t in_stream_buffer[STREAM_BUFFER_SIZE];
    static struct match_param mparm; // Match parameters
    long num_read; // Number of byes read from input file

    intext.astring = in_stream_buffer;

    // Open input file
    if (!strcmp(configuration.input_files[0], "-"))
    {
        fd_input = 0; // read from stdin
    }
    else if ((fd_input = open(filename, O_RDONLY|O_NONBLOCK))==-1)
    {
        fprintf(stderr, "Cannot read from input file '%s'\n", filename);
        return -1;
    }

    // Reset the parameter
    mparm.item = 0;
    mparm.total_match = 0;
    mparm.fname = fd_input?(char *)filename:NULL;

    int keep = 0;
    // loop to load and search the input file repeatedly, chunk by chunk
    do
    {
        // Read a chunk from input file
        num_read = read (fd_input, (void *)in_stream_buffer, STREAM_BUFFER_SIZE);
        
        intext.length = num_read;

        // Handle case sensitivity
        if (configuration.insensitive)
            lower_case(in_stream_buffer, intext.length);

        // Break loop if call-back function has done its work
        if (ac_automata_search (paca, &intext, keep, match_handler, &mparm))
            break;
        keep = 1;
    } while (num_read == STREAM_BUFFER_SIZE);

    close (fd_input);

    return 0;
}
예제 #5
0
int main (int argc, char ** argv)
{
    unsigned int i;

    // 2. Define AC variables
    
    AC_AUTOMATA_t   *atm;
    AC_PATTERN_t    tmp_patt;
    AC_TEXT_t       tmp_text;

    // 3. Get a new automata
    
    atm = ac_automata_init ();

    // 4. Add patterns to automata
    
    for (i=0; i<PATTERN_NUMBER; i++)
    {
        tmp_patt.astring = sample_patterns[i];
        tmp_patt.rep.number = i+1; // optional
        tmp_patt.length = strlen (tmp_patt.astring);
        ac_automata_add (atm, &tmp_patt);
    }

    // 5. Finalize automata.
    
    ac_automata_finalize (atm);
    // after you have finished with adding patterns you must finalize the automata
    // from now you can not add patterns anymore.

    // 5.1. Display automata
    
    // ac_automata_display (atm, 'n');
    // the second argument determines the cast type of the pattern representative. 
    // 'n': as number 
    // 's': as string
    // because we use the integer part of union (tmp_patt.rep.number) so we used 'n'
    
    printf ("Searching: \"%s\"\n", input_text1);

    // 6. Set input text
    
    tmp_text.astring = input_text1;
    tmp_text.length = strlen (tmp_text.astring);

    // 7. Do search
    
    ac_automata_search (atm, &tmp_text, 0, match_handler, 0);
    // the 5th option is a (void *), and it will be forwarded to the callback 
    // function. you can pass everything you want to the callback function
    // using this argument.
    // in this example we don't send a parameter to callback function.
    // a typical practice is to define a struct that encloses whatever you want
    // to send the callback function, including input and output variables
    
    printf ("Searching: \"%s\"\n", input_text2);
    // do another search 

    tmp_text.astring = input_text2;
    tmp_text.length = strlen (tmp_text.astring);

    ac_automata_search (atm, &tmp_text, 0, match_handler, 0);

    printf ("Searching: \"%s\" with \'keep\' enabled\n", input_text3);
    // and another

    tmp_text.astring = input_text3;
    tmp_text.length = strlen (tmp_text.astring);

    ac_automata_search (atm, &tmp_text, 1, match_handler, 0);
    // when the keep option (3rd argument) in set, then the automata
    // considers that the given text is the next chunk of the previous text.
    // to understand the difference try it with 0 and 1 and compare the result

    // 8. Release automata

    ac_automata_release (atm);
    // do not forget to release the automata after you have done with it

    return 0;
}
int main (int argc, char ** argv)
{
    unsigned int i;
    struct parameter my_param;
    // we use this struct to send/receive input/output parameters to/from automata
    my_param.position = 250;    // input: end position; change it to 1000 and see what happens
    my_param.match_count = 0;   // output:

    AC_TEXT_t input_text;
    AC_AUTOMATA_t * atm = ac_automata_init ();

    for (i=0; i<PATTERN_COUNT; i++)
    {
        AC_STATUS_t status;
        sample_patterns[i].length = strlen (sample_patterns[i].astring);
        status = ac_automata_add (atm, &sample_patterns[i]);
        switch (status)
        {
            case ACERR_DUPLICATE_PATTERN:
//                printf ("Add pattern failed: ACERR_DUPLICATE_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_LONG_PATTERN:
                printf ("Add pattern failed: ACERR_LONG_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_ZERO_PATTERN:
                printf ("Add pattern failed: ACERR_ZERO_PATTERN: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_AUTOMATA_CLOSED:
                printf ("Add pattern failed: ACERR_AUTOMATA_CLOSED: %s\n", sample_patterns[i].astring);
                break;
            case ACERR_SUCCESS:
//                printf ("Pattern Added: %s\n", sample_patterns[i].astring);
                break;
        }
    }

    ac_automata_finalize (atm);
    
    // here we illustrates how to search a big text chunk by chunk.
    // in this example input buffer size is 64 and input file is pretty
    // bigger than that. we want to imitate reading from input file.
    // in such situations searching must be done inside a loop. the loop
    // continues until it consumed all input file.

//    printf ("Automata finalized.\n\nSearching...\n");
	//Added below code by Roja  
	FILE *fp;
	char *line;
	size_t len=0;

        fp = fopen(argv[1], "r");
	fp1 = fopen(argv[2], "w");
        if(fp == NULL  || fp1 == NULL) printf("File Couldn't Open\n");

        input_line[0]='_';
        while(getline(&line, &len, fp)!=-1)
        {
           strcpy(input_line+1, line);
         //  printf("%s", input_line);

           //calling function replace_space_with_underscore()
           replace_space_with_underscore(input_line);
           *(input_line+len)='_';
	//Code added by  Roja ended

	char * chunk_start = input_line;
	char * end_of_file = input_line + sizeof(input_line);
	input_text.astring = buffer;

	while (chunk_start<end_of_file)
	{
	        input_text.length = (chunk_start<end_of_file)?sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
        	strncpy (buffer, chunk_start, input_text.length);

	        if (ac_automata_search (atm, &input_text, 0, match_handler, (void *)(&my_param)))
        	    // if the search stopped in the middle (returned 1) we should break the loop
            		break;

	        chunk_start += sizeof(buffer);
	}


    printf(";~~~~~~~~~~\n"); //Added by Roja
    fprintf(fp1, ";~~~~~~~~~~\n"); //Added by Roja
    }
    
//    printf ("found %d occurrence in the beginning %d bytes\n", my_param.match_count, my_param.position);

    // TODO: do the same search with settext/findnext interface
    
    ac_automata_release (atm);

    return 0;
}
int main (int argc, char ** argv)
{
	//*** 2. Define AC variables
	AC_AUTOMATA_t * acap;

	AC_TEXT_t input_text = {0, 0};

	#define PATTERN_NUMBER (sizeof(allpattern)/sizeof(AC_PATTERN_t))

	unsigned int i;

	/* Sending parameter to call-back function */
	struct sample_param my_param;
	my_param.anum = 1;
	my_param.achar = 'a'; /* 'a': find all, 'f': find first */

	//*** 3. Get a new automata
	acap = ac_automata_init (match_handler);

	//*** 4. add patterns to automata
	for (i=0; i<PATTERN_NUMBER; i++)
	{
		allpattern[i].length = strlen(allpattern[i].astring);
		ac_automata_add (acap, &allpattern[i]);
	}

	//*** 5. Finalize automata.
	ac_automata_finalize (acap);

	//*** 5.1 Display automata
	//ac_automata_display (acap, 's');

	/* This illustrates how to search big text chunk by chunk
	 * in this example input buffer size is 2500 and input file is pretty
	 * bigger than that. in fact it imitate reading from input file.
	 * in such situations searching must be done inside a loop. the loop
	 * continues until it consumes all input file.
	**/

	FILE *fp;
	char *line;
        size_t len=0;
	int k=0;
	char tolower_input_line[10000];

	fp = fopen(argv[1], "r");
        if(fp == NULL) printf("File Couldn't Open\n");

	input_line[0]='_'; tolower_input_line[0]='_';
	while(getline(&line, &len, fp)!=-1)
        {  
	   strcpy(input_line+1, line);
	 //  printf("%s", input_line);
	  
	   //calling function replace_space_with_underscore()
           replace_space_with_underscore(input_line);
	   *(input_line+len)='_';  

	   k=0;
	   strcpy(tolower_input_line+1, line); 

	   //tolower 
	   while(tolower_input_line[k])
           { 
	      *(tolower_input_line+k)= tolower(tolower_input_line[k]);
              k++;
           }

	   /*****appending original line and tolower of the same line
	         *(input_line+strcspn(input_line, "\n"))= '/';
                 strcat(input_line, tolower_input_line);            **********/
 
	   //calling function replace_space_with_underscore()
           replace_space_with_underscore(tolower_input_line);
	   *(tolower_input_line+len)='_';  

	   char * chunk_start = input_line;
           //	char * end_of_file = input_line+sizeof(input_line);
	   char * end_of_file = input_line+strlen(input_line);
	   input_text.astring = buffer;

	/* Search loop */
	 while (chunk_start<end_of_file) { 
		//*** 6. Set input text
		input_text.length = (chunk_start<end_of_file)?
				sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
		strncpy(input_text.astring, chunk_start, input_text.length);
        
		//*** 7. Do search
		if(ac_automata_search (acap, &input_text, (void *)(&my_param)))
			break;
		/* according to the return value of ac_automata_search() we decide to
		 * continue or break loop. */

		chunk_start += sizeof(buffer); 
	 }
	    //*** 8. Reset
	 ac_automata_reset(acap);

	   //Checking multi words after tolowering the i/p. 
	   strcpy(input_line,tolower_input_line);
	   chunk_start = input_line;
           end_of_file = input_line+strlen(input_line);
           input_text.astring = buffer;

        /* Search loop */
           while (chunk_start<end_of_file) { 
                //*** 6. Set input text
                input_text.length = (chunk_start<end_of_file)?
                                sizeof(buffer):(sizeof(input_line)%sizeof(buffer));
                strncpy(input_text.astring, chunk_start, input_text.length);

                //*** 7. Do search
                if(ac_automata_search (acap, &input_text, (void *)(&my_param)))
                        break;
                /* according to the return value of ac_automata_search() we decide to
                 * continue or break loop. */

                chunk_start += sizeof(buffer);
            }  

	   ac_automata_reset(acap); 
	   printf(";~~~~~~~~~~\n");
	}
	//*** 9. Release automata
	ac_automata_release (acap);
	return 0;
}