-/* $OSSEC, os_regex_compile.c, v0.1, 2006/01/02, Daniel B. Cid$ */
-
/* Copyright (C) 2009 Trend Micro Inc.
* All right reserved.
*
* Foundation
*/
-
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
-#include <ctype.h>
#include "os_regex.h"
-#include "os_regex_internal.h"
+const char *OSRegex_Execute_pcre2_match(const char *str, OSRegex *reg);
+const char *OSRegex_Execute_strncmp(const char *subject, OSRegex *reg);
+const char *OSRegex_Execute_strrcmp(const char *subject, OSRegex *reg);
+const char *OSRegex_Execute_strcasecmp(const char *subject, OSRegex *reg);
+const char *OSRegex_Execute_strncasecmp(const char *subject, OSRegex *reg);
+const char *OSRegex_Execute_strrcasecmp(const char *subject, OSRegex *reg);
+const char *OSRegex_Execute_strcmp(const char *subject, OSRegex *reg);
+int OSRegex_CouldBeOptimized(const char *pattern2check);
-/** int OSRegex_Compile(char *pattern, OSRegex *reg, int flags) v0.1
- * Compile a regular expression to be used later.
+/* Compile a regular expression to be used later
* Allowed flags are:
* - OS_CASE_SENSITIVE
* - OS_RETURN_SUBSTRING
- * Returns 1 on success or 0 on error.
- * The error code is set on reg->error.
+ * Returns 1 on success or 0 on error
+ * The error code is set on reg->error
*/
-int OSRegex_Compile(char *pattern, OSRegex *reg, int flags)
+int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags)
{
- int i = 0;
- int count = 0;
- int end_of_string = 0;
- int parenthesis = 0;
- int prts_size = 0;
- int max_prts_size = 0;
-
- char *pt;
- char *new_str;
- char *new_str_free = NULL;
-
-
- /* Checking for references not initialized */
- if(reg == NULL)
- {
- return(0);
+ char *pattern_pcre2 = NULL;
+ int flags_compile = 0;
+ int error = 0;
+ PCRE2_SIZE erroroffset = 0;
+ size_t pattern_len = 0UL;
+ char first_char, last_char;
+ uint32_t count, i;
+
+ /* Check for references not initialized */
+ if (reg == NULL) {
+ return (0);
}
-
- /* Initializing OSRegex structure */
+ /* Initialize OSRegex structure */
reg->error = 0;
- reg->patterns = NULL;
- reg->flags = NULL;
- reg->prts_closure = NULL;
- reg->prts_str = NULL;
reg->sub_strings = NULL;
-
-
+ reg->regex = NULL;
+ reg->match_data = NULL;
+ reg->pattern_len = 0UL;
+ reg->pattern = NULL;
+ reg->exec_function = NULL;
/* The pattern can't be null */
- if(pattern == NULL)
- {
+ if (pattern == NULL) {
reg->error = OS_REGEX_PATTERN_NULL;
goto compile_error;
}
/* Maximum size of the pattern */
- if(strlen(pattern) > OS_PATTERN_MAXSIZE)
- {
+ pattern_len = strlen(pattern);
+ if (pattern_len > OS_PATTERN_MAXSIZE) {
reg->error = OS_REGEX_MAXSIZE;
goto compile_error;
}
-
- /* Duping the pattern for our internal work */
- new_str = strdup(pattern);
- if(!new_str)
- {
- reg->error = OS_REGEX_OUTOFMEMORY;
- goto compile_error;
- }
- new_str_free = new_str;
- pt = new_str;
-
-
- /* Getting the number of sub patterns */
- do
- {
- if(*pt == BACKSLASH)
- {
- pt++;
- if(!((*pt == 'w') ||
- (*pt == 'W') ||
- (*pt == 's') ||
- (*pt == 'S') ||
- (*pt == 'd') ||
- (*pt == 'D') ||
- (*pt == '.') ||
- (*pt == '(') ||
- (*pt == ')') ||
- (*pt == 'p') ||
- (*pt == 't') ||
- (*pt == '$') ||
- (*pt == '|') ||
- (*pt == '<') ||
- (*pt == '\\')))
- {
- reg->error = OS_REGEX_BADREGEX;
- goto compile_error;
- }
-
- /* Giving the new values for each regex */
- switch(*pt)
- {
- case 'd': *pt = 1;break;
- case 'w': *pt = 2;break;
- case 's': *pt = 3;break;
- case 'p': *pt = 4;break;
- case '(': *pt = 5;break;
- case ')': *pt = 6;break;
- case '\\':*pt = 7;break;
- case 'D': *pt = 8;break;
- case 'W': *pt = 9;break;
- case 'S': *pt = 10;break;
- case '.': *pt = 11;break;
- case 't': *pt = 12;break;
- case '$': *pt = 13;break;
- case '|': *pt = 14;break;
- case '<': *pt = 15;break;
+ if (OSRegex_CouldBeOptimized(pattern)) {
+ first_char = pattern[0];
+ last_char = pattern[pattern_len - 1];
+
+ if (first_char == '^') {
+ if (last_char == '$') {
+ reg->pattern = strdup(&pattern[1]);
+ reg->pattern_len = pattern_len - 2;
+ reg->pattern[reg->pattern_len] = '\0';
+ if (flags & OS_CASE_SENSITIVE) {
+ reg->exec_function = OSRegex_Execute_strcmp;
+ } else {
+ reg->exec_function = OSRegex_Execute_strcasecmp;
+ }
+ return (1);
+ } else {
+ reg->pattern = strdup(&pattern[1]);
+ reg->pattern_len = pattern_len - 1;
+ if (flags & OS_CASE_SENSITIVE) {
+ reg->exec_function = OSRegex_Execute_strncmp;
+ } else {
+ reg->exec_function = OSRegex_Execute_strncasecmp;
+ }
+ return (1);
}
- pt++;
-
- continue;
- }
- else if(*pt == '(')
- {
- parenthesis++;
- }
- else if(*pt == ')')
- {
- /* Internally, open and closed are the same */
- *pt = '(';
- parenthesis--;
- prts_size++;
- }
-
- /* We only allow one level of parenthesis */
- if(parenthesis != 0 && parenthesis != 1)
- {
- reg->error = OS_REGEX_BADPARENTHESIS;
- goto compile_error;
- }
-
- /* The pattern must be always lower case if
- * case sensitive is set
- */
- if(!(flags & OS_CASE_SENSITIVE))
- {
- *pt = charmap[(uchar)*pt];
- }
-
- if(*pt == OR)
- {
- /* Each sub pattern must be closed on parenthesis */
- if(parenthesis != 0)
- {
- reg->error = OS_REGEX_BADPARENTHESIS;
- goto compile_error;
+ } else {
+ if (last_char == '$') {
+ reg->pattern = strdup(pattern);
+ reg->pattern_len = pattern_len - 1;
+ reg->pattern[reg->pattern_len] = '\0';
+ if (flags & OS_CASE_SENSITIVE) {
+ reg->exec_function = OSRegex_Execute_strrcmp;
+ } else {
+ reg->exec_function = OSRegex_Execute_strrcasecmp;
+ }
+ return (1);
}
- count++;
}
- pt++;
- }while(*pt != '\0');
+ }
+ reg->exec_function = OSRegex_Execute_pcre2_match;
- /* After the whole pattern is read, the parenthesis must all be closed */
- if(parenthesis != 0)
- {
- reg->error = OS_REGEX_BADPARENTHESIS;
+ /* Ossec pattern conversion */
+ if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_REGEX) == 0) {
+ reg->error = OS_REGEX_BADREGEX;
goto compile_error;
}
-
- /* Allocating the memory for the sub patterns */
- count++;
- reg->patterns = calloc(count +1, sizeof(char *));
- reg->flags = calloc(count +1, sizeof(int));
-
-
- /* For the substrings */
- if((prts_size > 0) && (flags & OS_RETURN_SUBSTRING))
- {
- reg->prts_closure = calloc(count +1, sizeof(char **));
- reg->prts_str = calloc(count +1, sizeof(char **));
- if(!reg->prts_closure || !reg->prts_str)
- {
- reg->error = OS_REGEX_OUTOFMEMORY;
- goto compile_error;
- }
+ flags_compile |= PCRE2_UTF;
+ flags_compile |= PCRE2_NO_UTF_CHECK;
+ flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS;
+ reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile,
+ &error, &erroroffset, NULL);
+ if (reg->regex == NULL) {
+ reg->error = OS_REGEX_BADREGEX;
+ goto compile_error;
}
-
- /* Memory allocation error check */
- if(!reg->patterns || !reg->flags)
- {
+ reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL);
+ if (reg->match_data == NULL) {
reg->error = OS_REGEX_OUTOFMEMORY;
goto compile_error;
}
- /* Initializing each sub pattern */
- for(i = 0; i<=count; i++)
- {
- reg->patterns[i] = NULL;
- reg->flags[i] = 0;
-
- /* The parenthesis closure if set */
- if(reg->prts_closure)
- {
- reg->prts_closure[i] = NULL;
- reg->prts_str[i] = NULL;
- }
+#ifdef USE_PCRE2_JIT
+ /* Just In Time compilation for faster execution */
+ if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) {
+ reg->error = OS_REGEX_NO_JIT;
+ goto compile_error;
}
- i = 0;
-
+#endif
- /* Reassigning pt to the beginning of the string */
- pt = new_str;
-
-
- /* Getting the sub patterns */
- do
- {
- if((*pt == OR) || (*pt == '\0'))
- {
- if(*pt == '\0')
- {
- end_of_string = 1;
- }
-
- *pt = '\0';
-
- /* If string starts with ^, set the BEGIN SET flag */
- if(*new_str == BEGINREGEX)
- {
- new_str++;
- reg->flags[i]|=BEGIN_SET;
- }
-
- /* If string ends with $, set the END_SET flag */
- if(*(pt-1) == ENDREGEX)
- {
- *(pt-1) = '\0';
- reg->flags[i]|=END_SET;
- }
-
- reg->patterns[i] = strdup(new_str);
-
- if(!reg->patterns[i])
- {
- reg->error = OS_REGEX_OUTOFMEMORY;
- goto compile_error;
-
- }
-
-
- /* Setting the parenthesis closures */
- /* The parenthesis closure if set */
- if(reg->prts_closure)
- {
- int tmp_int = 0;
- char *tmp_str;
-
-
- /* search the whole pattern for parenthesis */
- prts_size = 0;
-
- /* First loop we get the number of parenthesis.
- * We allocate the memory and loop again setting
- * the parenthesis closures.
- */
- tmp_str = reg->patterns[i];
- while(*tmp_str != '\0')
- {
- if(prts(*tmp_str))
- {
- prts_size++;
- }
- tmp_str++;
- }
-
- /* Getting the maximum number of parenthesis for
- * all sub strings. We need that to set up the maximum
- * number of substrings to be returned.
- */
- if(max_prts_size < prts_size)
- {
- max_prts_size = prts_size;
- }
-
- /* Allocating the memory */
- reg->prts_closure[i] = calloc(prts_size + 1, sizeof(char *));
- reg->prts_str[i] = calloc(prts_size + 1, sizeof(char *));
- if((reg->prts_closure[i] == NULL)||(reg->prts_str[i] == NULL))
- {
- reg->error = OS_REGEX_OUTOFMEMORY;
- goto compile_error;
- }
-
- /* Next loop to set the closures */
- tmp_str = reg->patterns[i];
- while(*tmp_str != '\0')
- {
- if(prts(*tmp_str))
- {
- if(tmp_int >= prts_size)
- {
- reg->error = OS_REGEX_BADPARENTHESIS;
- goto compile_error;
- }
-
- /* Setting to the pointer to the string */
- reg->prts_closure[i][tmp_int] = tmp_str;
- reg->prts_str[i][tmp_int] = NULL;
-
- tmp_int++;
- }
-
- tmp_str++;
- }
- }
-
-
- if(end_of_string)
- {
- break;
- }
-
- new_str = ++pt;
- i++;
- continue;
+ if (flags & OS_RETURN_SUBSTRING) {
+ pcre2_pattern_info(reg->regex, PCRE2_INFO_CAPTURECOUNT, (void *)&count);
+ count++; // to store NULL pointer at the end
+ reg->sub_strings = calloc(count, sizeof(char *));
+ if (reg->sub_strings == NULL) {
+ reg->error = OS_REGEX_OUTOFMEMORY;
+ goto compile_error;
+ }
+ for (i = 0; i < count; i++) {
+ reg->sub_strings[i] = NULL;
}
- pt++;
-
- }while(!end_of_string);
-
- /* Allocating sub string for the maximum number of parenthesis */
- reg->sub_strings = calloc(max_prts_size + 1, sizeof(char *));
- if(reg->sub_strings == NULL)
- {
- reg->error = OS_REGEX_OUTOFMEMORY;
- goto compile_error;
}
- /* Success return */
- free(new_str_free);
- return(1);
+ free(pattern_pcre2);
+ return (1);
+compile_error:
/* Error handling */
- compile_error:
- if(new_str_free)
- {
- free(new_str_free);
+ if (pattern_pcre2) {
+ free(pattern_pcre2);
}
OSRegex_FreePattern(reg);
- return(0);
+ return (0);
}
-
-/* EOF */
+int OSRegex_CouldBeOptimized(const char *pattern2check) {
+ return OS_Pcre2("^\\^?[A-Za-z0-9 !\"#%&',/:;<=>@_`~-]*\\$?$", pattern2check);
+}