X-Git-Url: http://ftp.carnet.hr/carnet-debian/scm?p=ossec-hids.git;a=blobdiff_plain;f=src%2Fos_regex%2Fos_regex_compile.c;fp=src%2Fos_regex%2Fos_regex_compile.c;h=868f6446875be3ebaed8c61485fe9b7577962071;hp=d961b2abb343f8b50dcf91bfc008f66673dc9e7a;hb=3f728675941dc69d4e544d3a880a56240a6e394a;hpb=927951d1c1ad45ba9e7325f07d996154a91c911b diff --git a/src/os_regex/os_regex_compile.c b/src/os_regex/os_regex_compile.c old mode 100755 new mode 100644 index d961b2a..868f644 --- a/src/os_regex/os_regex_compile.c +++ b/src/os_regex/os_regex_compile.c @@ -1,5 +1,3 @@ -/* $OSSEC, os_regex_compile.c, v0.1, 2006/01/02, Daniel B. Cid$ */ - /* Copyright (C) 2009 Trend Micro Inc. * All right reserved. * @@ -9,349 +7,166 @@ * Foundation */ - #include #include #include -#include #include "os_regex.h" -#include "os_regex_internal.h" +const char *OSRegex_Execute_pcre2_match(const char *str, OSRegex *reg); +const char *OSRegex_Execute_strncmp(const char *subject, OSRegex *reg); +const char *OSRegex_Execute_strrcmp(const char *subject, OSRegex *reg); +const char *OSRegex_Execute_strcasecmp(const char *subject, OSRegex *reg); +const char *OSRegex_Execute_strncasecmp(const char *subject, OSRegex *reg); +const char *OSRegex_Execute_strrcasecmp(const char *subject, OSRegex *reg); +const char *OSRegex_Execute_strcmp(const char *subject, OSRegex *reg); +int OSRegex_CouldBeOptimized(const char *pattern2check); -/** int OSRegex_Compile(char *pattern, OSRegex *reg, int flags) v0.1 - * Compile a regular expression to be used later. +/* Compile a regular expression to be used later * Allowed flags are: * - OS_CASE_SENSITIVE * - OS_RETURN_SUBSTRING - * Returns 1 on success or 0 on error. - * The error code is set on reg->error. + * Returns 1 on success or 0 on error + * The error code is set on reg->error */ int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags) { - size_t i = 0; - size_t count = 0; - int end_of_string = 0; - int parenthesis = 0; - unsigned prts_size = 0; - unsigned max_prts_size = 0; - - char *pt; - char *new_str; - char *new_str_free = NULL; - - - /* Checking for references not initialized */ - if(reg == NULL) - { - return(0); + char *pattern_pcre2 = NULL; + int flags_compile = 0; + int error = 0; + PCRE2_SIZE erroroffset = 0; + size_t pattern_len = 0UL; + char first_char, last_char; + uint32_t count, i; + + /* Check for references not initialized */ + if (reg == NULL) { + return (0); } - - /* Initializing OSRegex structure */ + /* Initialize OSRegex structure */ reg->error = 0; - reg->patterns = NULL; - reg->flags = NULL; - reg->prts_closure = NULL; - reg->prts_str = NULL; reg->sub_strings = NULL; - - + reg->regex = NULL; + reg->match_data = NULL; + reg->pattern_len = 0UL; + reg->pattern = NULL; + reg->exec_function = NULL; /* The pattern can't be null */ - if(pattern == NULL) - { + if (pattern == NULL) { reg->error = OS_REGEX_PATTERN_NULL; goto compile_error; } /* Maximum size of the pattern */ - if(strlen(pattern) > OS_PATTERN_MAXSIZE) - { + pattern_len = strlen(pattern); + if (pattern_len > OS_PATTERN_MAXSIZE) { reg->error = OS_REGEX_MAXSIZE; goto compile_error; } - - /* Duping the pattern for our internal work */ - new_str = strdup(pattern); - if(!new_str) - { - reg->error = OS_REGEX_OUTOFMEMORY; - goto compile_error; - } - new_str_free = new_str; - pt = new_str; - - - /* Getting the number of sub patterns */ - do - { - if(*pt == BACKSLASH) - { - pt++; - /* Giving the new values for each regex */ - switch(*pt) - { - case 'd': *pt = 1;break; - case 'w': *pt = 2;break; - case 's': *pt = 3;break; - case 'p': *pt = 4;break; - case '(': *pt = 5;break; - case ')': *pt = 6;break; - case '\\':*pt = 7;break; - case 'D': *pt = 8;break; - case 'W': *pt = 9;break; - case 'S': *pt = 10;break; - case '.': *pt = 11;break; - case 't': *pt = 12;break; - case '$': *pt = 13;break; - case '|': *pt = 14;break; - case '<': *pt = 15;break; - default: - reg->error = OS_REGEX_BADREGEX; - goto compile_error; + if (OSRegex_CouldBeOptimized(pattern)) { + first_char = pattern[0]; + last_char = pattern[pattern_len - 1]; + + if (first_char == '^') { + if (last_char == '$') { + reg->pattern = strdup(&pattern[1]); + reg->pattern_len = pattern_len - 2; + reg->pattern[reg->pattern_len] = '\0'; + if (flags & OS_CASE_SENSITIVE) { + reg->exec_function = OSRegex_Execute_strcmp; + } else { + reg->exec_function = OSRegex_Execute_strcasecmp; + } + return (1); + } else { + reg->pattern = strdup(&pattern[1]); + reg->pattern_len = pattern_len - 1; + if (flags & OS_CASE_SENSITIVE) { + reg->exec_function = OSRegex_Execute_strncmp; + } else { + reg->exec_function = OSRegex_Execute_strncasecmp; + } + return (1); } - pt++; - - continue; - } - else if(*pt == '(') - { - parenthesis++; - } - else if(*pt == ')') - { - /* Internally, open and closed are the same */ - *pt = '('; - parenthesis--; - prts_size++; - } - - /* We only allow one level of parenthesis */ - if(parenthesis != 0 && parenthesis != 1) - { - reg->error = OS_REGEX_BADPARENTHESIS; - goto compile_error; - } - - /* The pattern must be always lower case if - * case sensitive is set - */ - if(!(flags & OS_CASE_SENSITIVE)) - { - *pt = (char) charmap[(uchar)*pt]; - } - - if(*pt == OR) - { - /* Each sub pattern must be closed on parenthesis */ - if(parenthesis != 0) - { - reg->error = OS_REGEX_BADPARENTHESIS; - goto compile_error; + } else { + if (last_char == '$') { + reg->pattern = strdup(pattern); + reg->pattern_len = pattern_len - 1; + reg->pattern[reg->pattern_len] = '\0'; + if (flags & OS_CASE_SENSITIVE) { + reg->exec_function = OSRegex_Execute_strrcmp; + } else { + reg->exec_function = OSRegex_Execute_strrcasecmp; + } + return (1); } - count++; } - pt++; - }while(*pt != '\0'); + } + reg->exec_function = OSRegex_Execute_pcre2_match; - /* After the whole pattern is read, the parenthesis must all be closed */ - if(parenthesis != 0) - { - reg->error = OS_REGEX_BADPARENTHESIS; + /* Ossec pattern conversion */ + if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_REGEX) == 0) { + reg->error = OS_REGEX_BADREGEX; goto compile_error; } + flags_compile |= PCRE2_UTF; + flags_compile |= PCRE2_NO_UTF_CHECK; + flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS; + reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile, + &error, &erroroffset, NULL); + if (reg->regex == NULL) { + reg->error = OS_REGEX_BADREGEX; + goto compile_error; + } - /* Allocating the memory for the sub patterns */ - count++; - reg->patterns = (char **) calloc(count +1, sizeof(char *)); - reg->flags = (int *) calloc(count +1, sizeof(int)); - - /* Memory allocation error check */ - if(!reg->patterns || !reg->flags) - { + reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL); + if (reg->match_data == NULL) { reg->error = OS_REGEX_OUTOFMEMORY; goto compile_error; } +#ifdef USE_PCRE2_JIT + /* Just In Time compilation for faster execution */ + if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) { + reg->error = OS_REGEX_NO_JIT; + goto compile_error; + } +#endif - /* For the substrings */ - if((prts_size > 0) && (flags & OS_RETURN_SUBSTRING)) - { - reg->prts_closure = (const char ***) calloc(count +1, sizeof(const char **)); - reg->prts_str = (const char ***) calloc(count +1, sizeof(const char **)); - if(!reg->prts_closure || !reg->prts_str) - { + if (flags & OS_RETURN_SUBSTRING) { + pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count); + count++; // to store NULL pointer at the end + reg->sub_strings = calloc(count, sizeof(char *)); + if (reg->sub_strings == NULL) { reg->error = OS_REGEX_OUTOFMEMORY; goto compile_error; } - } - - - /* Initializing each sub pattern */ - for(i = 0; i<=count; i++) - { - reg->patterns[i] = NULL; - reg->flags[i] = 0; - - /* The parenthesis closure if set */ - if(reg->prts_closure) - { - reg->prts_closure[i] = NULL; - reg->prts_str[i] = NULL; + for (i = 0; i < count; i++) { + reg->sub_strings[i] = NULL; } } - i = 0; - - /* Reassigning pt to the beginning of the string */ - pt = new_str; - - - /* Getting the sub patterns */ - do - { - if((*pt == OR) || (*pt == '\0')) - { - if(*pt == '\0') - { - end_of_string = 1; - } - - *pt = '\0'; - - /* If string starts with ^, set the BEGIN SET flag */ - if(*new_str == BEGINREGEX) - { - new_str++; - reg->flags[i]|=BEGIN_SET; - } - - /* If string ends with $, set the END_SET flag */ - if(*(pt-1) == ENDREGEX) - { - *(pt-1) = '\0'; - reg->flags[i]|=END_SET; - } - - reg->patterns[i] = strdup(new_str); - - if(!reg->patterns[i]) - { - reg->error = OS_REGEX_OUTOFMEMORY; - goto compile_error; - - } - - - /* Setting the parenthesis closures */ - /* The parenthesis closure if set */ - if(reg->prts_closure) - { - unsigned tmp_int = 0; - char *tmp_str; - - - /* search the whole pattern for parenthesis */ - prts_size = 0; - - /* First loop we get the number of parenthesis. - * We allocate the memory and loop again setting - * the parenthesis closures. - */ - tmp_str = reg->patterns[i]; - while(*tmp_str != '\0') - { - if(prts(*tmp_str)) - { - prts_size++; - } - tmp_str++; - } - - /* Getting the maximum number of parenthesis for - * all sub strings. We need that to set up the maximum - * number of substrings to be returned. - */ - if(max_prts_size < prts_size) - { - max_prts_size = prts_size; - } - - /* Allocating the memory */ - reg->prts_closure[i] = (const char **) calloc(prts_size + 1, sizeof(const char *)); - reg->prts_str[i] = (const char **) calloc(prts_size + 1, sizeof(const char *)); - if((reg->prts_closure[i] == NULL)||(reg->prts_str[i] == NULL)) - { - reg->error = OS_REGEX_OUTOFMEMORY; - goto compile_error; - } - - /* Next loop to set the closures */ - tmp_str = reg->patterns[i]; - while(*tmp_str != '\0') - { - if(prts(*tmp_str)) - { - if(tmp_int >= prts_size) - { - reg->error = OS_REGEX_BADPARENTHESIS; - goto compile_error; - } - - /* Setting to the pointer to the string */ - reg->prts_closure[i][tmp_int] = tmp_str; - reg->prts_str[i][tmp_int] = NULL; - - tmp_int++; - } - - tmp_str++; - } - } - - - if(end_of_string) - { - break; - } - - new_str = ++pt; - i++; - continue; - } - pt++; - - }while(!end_of_string); - - /* Allocating sub string for the maximum number of parenthesis */ - reg->sub_strings = (char **) calloc(max_prts_size + 1, sizeof(char *)); - if(reg->sub_strings == NULL) - { - reg->error = OS_REGEX_OUTOFMEMORY; - goto compile_error; - } - - /* Success return */ - free(new_str_free); - return(1); + free(pattern_pcre2); + return (1); +compile_error: /* Error handling */ - compile_error: - if(new_str_free) - { - free(new_str_free); + if (pattern_pcre2) { + free(pattern_pcre2); } OSRegex_FreePattern(reg); - return(0); + return (0); } - -/* EOF */ +int OSRegex_CouldBeOptimized(const char *pattern2check) { + return OS_Pcre2("^\\^?[A-Za-z0-9 !\"#%&',/:;<=>@_`~-]*\\$?$", pattern2check); +}