Exemple #1
0
END_TEST

START_TEST(test_regexextraction)
{

    int i;
    /*
     * Please note that all strings are \ escaped
     */
    const char *tests[][15] = {
        { "123(\\w+\\s+)abc", "123sdf    abc", "sdf    ", NULL},
        { "123(\\w+\\s+)abc", "abc123sdf    abc", "sdf    ", NULL},
        { "123 (\\d+.\\d.\\d.\\d\\d*\\d*)", "123 45.6.5.567", "45.6.5.567", NULL},
        { "from (\\S*\\d+.\\d+.\\d+.\\d\\d*\\d*)", "sshd[21576]: Illegal user web14 from ::ffff:212.227.60.55", "::ffff:212.227.60.55", NULL},
        { "^sshd[\\d+]: Accepted \\S+ for (\\S+) from (\\S+) port ", "sshd[21405]: Accepted password for root from 192.1.1.1 port 6023", "root", "192.1.1.1", NULL},
        { ": \\((\\S+)@(\\S+)\\) [", "pure-ftpd: ([email protected]) [INFO] New connection from enigma.lab.ossec.net", "?", "enigma.lab.ossec.net", NULL},
        {NULL,NULL,NULL}
    };

    for(i=0; tests[i][0] != NULL; i++) {
        OSRegex reg;
        ck_assert_int_eq(OSRegex_Compile(tests[i][0], &reg, OS_RETURN_SUBSTRING), 1);
        ck_assert_ptr_ne(OSRegex_Execute(tests[i][1], &reg), NULL);



        char **result = reg.sub_strings;

        int j;
        int k;
        for(j = 2, k = 0; tests[i][j] != NULL; j++, k++)
        {
            ck_assert_ptr_ne(result[k], NULL);
            ck_assert_str_eq(result[k], tests[i][j]);
        }
        ck_assert_ptr_eq(result[k], NULL);

        OSRegex_FreePattern(&reg);
    }
}
Exemple #2
0
/** int OSRegex_Compile(char *pattern, OSRegex *reg, int flags) v0.1
 * Compile a regular expression to be used later.
 * Allowed flags are:
 *      - OS_CASE_SENSITIVE
 *      - OS_RETURN_SUBSTRING
 * Returns 1 on success or 0 on error.
 * The error code is set on reg->error.
 */
int OSRegex_Compile(char *pattern, OSRegex *reg, int flags)
{
    int i = 0;
    int count = 0;
    int end_of_string = 0;
    int parenthesis = 0;
    int prts_size = 0;
    int max_prts_size = 0;

    char *pt;
    char *new_str;
    char *new_str_free = NULL;


    /* Checking for references not initialized */
    if(reg == NULL)
    {
        return(0);
    }


    /* Initializing OSRegex structure */
    reg->error = 0;
    reg->patterns = NULL;
    reg->flags = NULL;
    reg->prts_closure = NULL;
    reg->prts_str = NULL;
    reg->sub_strings = NULL;



    /* The pattern can't be null */
    if(pattern == NULL)
    {
        reg->error = OS_REGEX_PATTERN_NULL;
        goto compile_error;
    }

    /* Maximum size of the pattern */
    if(strlen(pattern) > OS_PATTERN_MAXSIZE)
    {
        reg->error = OS_REGEX_MAXSIZE;
        goto compile_error;
    }


    /* Duping the pattern for our internal work */
    new_str = strdup(pattern);
    if(!new_str)
    {
        reg->error = OS_REGEX_OUTOFMEMORY;
        goto compile_error;
    }
    new_str_free = new_str;
    pt = new_str;


    /* Getting the number of sub patterns */
    do
    {
        if(*pt == BACKSLASH)
        {
            pt++;
            if(!((*pt == 'w') ||
                 (*pt == 'W') ||
                 (*pt == 's') ||
                 (*pt == 'S') ||
                 (*pt == 'd') ||
                 (*pt == 'D') ||
                 (*pt == '.') ||
                 (*pt == '(') ||
                 (*pt == ')') ||
                 (*pt == 'p') ||
                 (*pt == 't') ||
                 (*pt == '$') ||
                 (*pt == '|') ||
                 (*pt == '<') ||
                 (*pt == '\\')))
            {
                reg->error = OS_REGEX_BADREGEX;
                goto compile_error;
            }

            /* Giving the new values for each regex */
            switch(*pt)
            {
                case 'd': *pt = 1;break;
                case 'w': *pt = 2;break;
                case 's': *pt = 3;break;
                case 'p': *pt = 4;break;
                case '(': *pt = 5;break;
                case ')': *pt = 6;break;
                case '\\':*pt = 7;break;
                case 'D': *pt = 8;break;
                case 'W': *pt = 9;break;
                case 'S': *pt = 10;break;
                case '.': *pt = 11;break;
                case 't': *pt = 12;break;
                case '$': *pt = 13;break;
                case '|': *pt = 14;break;
                case '<': *pt = 15;break;
            }
            pt++;

            continue;
        }
        else if(*pt == '(')
        {
            parenthesis++;
        }
        else if(*pt == ')')
        {
            /* Internally, open and closed are the same */
            *pt = '(';
            parenthesis--;
            prts_size++;
        }

        /* We only allow one level of parenthesis */
        if(parenthesis != 0 && parenthesis != 1)
        {
            reg->error = OS_REGEX_BADPARENTHESIS;
            goto compile_error;
        }

        /* The pattern must be always lower case if
         * case sensitive is set
         */
        if(!(flags & OS_CASE_SENSITIVE))
        {
            *pt = charmap[(uchar)*pt];
        }

        if(*pt == OR)
        {
            /* Each sub pattern must be closed on parenthesis */
            if(parenthesis != 0)
            {
                reg->error = OS_REGEX_BADPARENTHESIS;
                goto compile_error;
            }
            count++;
        }
        pt++;
    }while(*pt != '\0');


    /* After the whole pattern is read, the parenthesis must all be closed */
    if(parenthesis != 0)
    {
        reg->error = OS_REGEX_BADPARENTHESIS;
        goto compile_error;
    }


    /* Allocating the memory for the sub patterns */
    count++;
    reg->patterns = calloc(count +1, sizeof(char *));
    reg->flags = calloc(count +1, sizeof(int));


    /* For the substrings */
    if((prts_size > 0) && (flags & OS_RETURN_SUBSTRING))
    {
        reg->prts_closure = calloc(count +1, sizeof(char **));
        reg->prts_str = calloc(count +1, sizeof(char **));
        if(!reg->prts_closure || !reg->prts_str)
        {
            reg->error = OS_REGEX_OUTOFMEMORY;
            goto compile_error;
        }
    }


    /* Memory allocation error check */
    if(!reg->patterns || !reg->flags)
    {
        reg->error = OS_REGEX_OUTOFMEMORY;
        goto compile_error;
    }

    /* Initializing each sub pattern */
    for(i = 0; i<=count; i++)
    {
        reg->patterns[i] = NULL;
        reg->flags[i] = 0;

        /* The parenthesis closure if set */
        if(reg->prts_closure)
        {
            reg->prts_closure[i] = NULL;
            reg->prts_str[i] = NULL;
        }
    }
    i = 0;


    /* Reassigning pt to the beginning of the string */
    pt = new_str;


    /* Getting the sub patterns */
    do
    {
        if((*pt == OR) || (*pt == '\0'))
        {
            if(*pt == '\0')
            {
                end_of_string = 1;
            }

            *pt = '\0';

            /* If string starts with ^, set the BEGIN SET flag */
            if(*new_str == BEGINREGEX)
            {
                new_str++;
                reg->flags[i]|=BEGIN_SET;
            }

            /* If string ends with $, set the END_SET flag */
            if(*(pt-1) == ENDREGEX)
            {
                *(pt-1) = '\0';
                reg->flags[i]|=END_SET;
            }

            reg->patterns[i] = strdup(new_str);

            if(!reg->patterns[i])
            {
                reg->error = OS_REGEX_OUTOFMEMORY;
                goto compile_error;

            }


            /* Setting the parenthesis closures */
            /* The parenthesis closure if set */
            if(reg->prts_closure)
            {
                int tmp_int = 0;
                char *tmp_str;


                /* search the whole pattern for parenthesis */
                prts_size = 0;

                /* First loop we get the number of parenthesis.
                 * We allocate the memory and loop again setting
                 * the parenthesis closures.
                 */
                tmp_str = reg->patterns[i];
                while(*tmp_str != '\0')
                {
                    if(prts(*tmp_str))
                    {
                        prts_size++;
                    }
                    tmp_str++;
                }

                /* Getting the maximum number of parenthesis for
                 * all sub strings. We need that to set up the maximum
                 * number of substrings to be returned.
                 */
                if(max_prts_size < prts_size)
                {
                    max_prts_size = prts_size;
                }

                /* Allocating the memory */
                reg->prts_closure[i] = calloc(prts_size + 1, sizeof(char *));
                reg->prts_str[i] = calloc(prts_size + 1, sizeof(char *));
                if((reg->prts_closure[i] == NULL)||(reg->prts_str[i] == NULL))
                {
                    reg->error = OS_REGEX_OUTOFMEMORY;
                    goto compile_error;
                }

                /* Next loop to set the closures */
                tmp_str = reg->patterns[i];
                while(*tmp_str != '\0')
                {
                    if(prts(*tmp_str))
                    {
                        if(tmp_int >= prts_size)
                        {
                            reg->error = OS_REGEX_BADPARENTHESIS;
                            goto compile_error;
                        }

                        /* Setting to the pointer to the string */
                        reg->prts_closure[i][tmp_int] = tmp_str;
                        reg->prts_str[i][tmp_int] = NULL;

                        tmp_int++;
                    }

                    tmp_str++;
                }
            }


            if(end_of_string)
            {
                break;
            }

            new_str = ++pt;
            i++;
            continue;
        }
        pt++;

    }while(!end_of_string);

    /* Allocating sub string for the maximum number of parenthesis */
    reg->sub_strings = calloc(max_prts_size + 1, sizeof(char *));
    if(reg->sub_strings == NULL)
    {
        reg->error = OS_REGEX_OUTOFMEMORY;
        goto compile_error;
    }

    /* Success return */
    free(new_str_free);
    return(1);


    /* Error handling */
    compile_error:

    if(new_str_free)
    {
        free(new_str_free);
    }

    OSRegex_FreePattern(reg);

    return(0);
}
/* Compile a regular expression to be used later
 * Allowed flags are:
 *      - OS_CASE_SENSITIVE
 *      - OS_RETURN_SUBSTRING
 * Returns 1 on success or 0 on error
 * The error code is set on reg->error
 */
int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags)
{
    char *pattern_pcre2 = NULL;
    int flags_compile = 0;
    int error = 0;
    PCRE2_SIZE erroroffset = 0;
    size_t pattern_len = 0UL;
    char first_char, last_char;
    uint32_t count, i;

    /* Check for references not initialized */
    if (reg == NULL) {
        return (0);
    }

    /* Initialize OSRegex structure */
    reg->error = 0;
    reg->sub_strings = NULL;
    reg->regex = NULL;
    reg->match_data = NULL;
    reg->pattern_len = 0UL;
    reg->pattern = NULL;
    reg->exec_function = NULL;

    /* The pattern can't be null */
    if (pattern == NULL) {
        reg->error = OS_REGEX_PATTERN_NULL;
        goto compile_error;
    }

    /* Maximum size of the pattern */
    pattern_len = strlen(pattern);
    if (pattern_len > OS_PATTERN_MAXSIZE) {
        reg->error = OS_REGEX_MAXSIZE;
        goto compile_error;
    }

    if (OSRegex_CouldBeOptimized(pattern)) {
        first_char = pattern[0];
        last_char = pattern[pattern_len - 1];

        if (first_char == '^') {
            if (last_char == '$') {
                reg->pattern = strdup(&pattern[1]);
                reg->pattern_len = pattern_len - 2;
                reg->pattern[reg->pattern_len] = '\0';
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strcmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strcasecmp;
                }
                return (1);
            } else {
                reg->pattern = strdup(&pattern[1]);
                reg->pattern_len = pattern_len - 1;
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strncmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strncasecmp;
                }
                return (1);
            }
        } else {
            if (last_char == '$') {
                reg->pattern = strdup(pattern);
                reg->pattern_len = pattern_len - 1;
                reg->pattern[reg->pattern_len] = '\0';
                if (flags & OS_CASE_SENSITIVE) {
                    reg->exec_function = OSRegex_Execute_strrcmp;
                } else {
                    reg->exec_function = OSRegex_Execute_strrcasecmp;
                }
                return (1);
            }
        }
    }

    reg->exec_function = OSRegex_Execute_pcre2_match;

    /* Ossec pattern conversion */
    if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_REGEX) == 0) {
        reg->error = OS_REGEX_BADREGEX;
        goto compile_error;
    }

    flags_compile |= PCRE2_UTF;
    flags_compile |= PCRE2_NO_UTF_CHECK;
    flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS;
    reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile,
                               &error, &erroroffset, NULL);
    if (reg->regex == NULL) {
        reg->error = OS_REGEX_BADREGEX;
        goto compile_error;
    }

    reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL);
    if (reg->match_data == NULL) {
        reg->error = OS_REGEX_OUTOFMEMORY;
        goto compile_error;
    }

#ifdef USE_PCRE2_JIT
    /* Just In Time compilation for faster execution */
    if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) {
        reg->error = OS_REGEX_NO_JIT;
        goto compile_error;
    }
#endif

    if (flags & OS_RETURN_SUBSTRING) {
        pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count);
        count++; // to store NULL pointer at the end
        reg->sub_strings = calloc(count, sizeof(char *));
        if (reg->sub_strings == NULL) {
            reg->error = OS_REGEX_OUTOFMEMORY;
            goto compile_error;
        }
        for (i = 0; i < count; i++) {
            reg->sub_strings[i] = NULL;
        }
    }

    free(pattern_pcre2);

    return (1);

compile_error:
    /* Error handling */

    if (pattern_pcre2) {
        free(pattern_pcre2);
    }

    OSRegex_FreePattern(reg);

    return (0);
}