new upstream release (3.3.0); modify package compatibility for Stretch
[ossec-hids.git] / src / os_regex / os_match_compile.c
old mode 100755 (executable)
new mode 100644 (file)
index bf1c951..a651c87
@@ -1,5 +1,3 @@
-/*   $OSSEC, os_match_compile.c, v0.1, 2006/04/17, Daniel B. Cid$   */
-
 /* Copyright (C) 2009 Trend Micro Inc.
  * All right reserved.
  *
  * Foundation
  */
 
-
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
-#include <ctype.h>
 
 #include "os_regex.h"
-#include "os_regex_internal.h"
 
-/** int OSMatch_Compile(char *pattern, OSMatch *reg, int flags) v0.1
- * Compile a pattern to be used later.
+int OSMatch_Execute_pcre2_match(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_true(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strncmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strrcmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strcmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strncasecmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strrcasecmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_Execute_strcasecmp(const char *subject, size_t len, OSMatch *match);
+int OSMatch_CouldBeOptimized(const char *pattern2check);
+
+/* Compile a pattern to be used later
  * Allowed flags are:
  *      - OS_CASE_SENSITIVE
- * Returns 1 on success or 0 on error.
- * The error code is set on reg->error.
+ * Returns 1 on success or 0 on error
+ * The error code is set on reg->error
  */
 int OSMatch_Compile(const char *pattern, OSMatch *reg, int flags)
 {
-    int usstrstr = 0;
-    size_t i = 0;
-    size_t count = 0;
-    int end_of_string = 0;
-
-    char *pt;
-    char *new_str;
-    char *new_str_free = NULL;
-
-
-    /* Checking for references not initialized */
-    if(reg == NULL)
-    {
-        return(0);
+    char *pattern_pcre2 = NULL;
+    int flags_compile = 0;
+    int error = 0;
+    PCRE2_SIZE erroroffset = 0;
+    size_t pattern_len = 0UL;
+    char first_char, last_char;
+
+    /* Check for references not initialized */
+    if (reg == NULL) {
+        return (0);
     }
 
-
-    /* Initializing OSRegex structure */
+    /* Initialize OSMatch structure */
     reg->error = 0;
-    reg->patterns = NULL;
-    reg->size = NULL;
-    reg->match_fp = NULL;
-
+    reg->regex = NULL;
+    reg->match_data = NULL;
+    reg->pattern_len = 0UL;
+    reg->pattern = NULL;
+    reg->exec_function = NULL;
 
     /* The pattern can't be null */
-    if(pattern == NULL)
-    {
+    if (pattern == NULL) {
         reg->error = OS_REGEX_PATTERN_NULL;
         goto compile_error;
     }
 
-
     /* Maximum size of the pattern */
-    if(strlen(pattern) > OS_PATTERN_MAXSIZE)
-    {
+    pattern_len = strlen(pattern);
+    if (pattern_len > OS_PATTERN_MAXSIZE) {
         reg->error = OS_REGEX_MAXSIZE;
         goto compile_error;
     }
 
-
-    /* Duping the pattern for our internal work */
-    new_str = strdup(pattern);
-    if(!new_str)
-    {
-        reg->error = OS_REGEX_OUTOFMEMORY;
-        goto compile_error;
-    }
-    new_str_free = new_str;
-    pt = new_str;
-
-
-
-    /* Getting the number of sub patterns */
-    while(*pt != '\0')
-    {
-        /* The pattern must be always lower case if
-         * case sensitive is set
-         */
-        if(!(flags & OS_CASE_SENSITIVE))
-        {
-            *pt = (char) charmap[(uchar)*pt];
-        }
-
-        /* Number of sub patterns */
-        if(*pt == OR)
-        {
-            count++;
-        }
-        else if(*pt == -29)
-        {
-            usstrstr = 1;
+    if (pattern_len == 0) {
+        reg->exec_function = OSMatch_Execute_true;
+        return (1);
+    } else if (OSMatch_CouldBeOptimized(pattern)) {
+        first_char = pattern[0];
+        last_char = pattern[pattern_len - 1];
+
+        if (first_char == '^') {
+            if (last_char == '$') {
+                reg->pattern = strdup(&pattern[1]);
+                reg->pattern_len = pattern_len - 2;
+                reg->pattern[reg->pattern_len] = '\0';
+                if (flags & OS_CASE_SENSITIVE) {
+                    reg->exec_function = OSMatch_Execute_strcmp;
+                } else {
+                    reg->exec_function = OSMatch_Execute_strcasecmp;
+                }
+                return (1);
+            } else {
+                reg->pattern = strdup(&pattern[1]);
+                reg->pattern_len = pattern_len - 1;
+                if (flags & OS_CASE_SENSITIVE) {
+                    reg->exec_function = OSMatch_Execute_strncmp;
+                } else {
+                    reg->exec_function = OSMatch_Execute_strncasecmp;
+                }
+                return (1);
+            }
+        } else {
+            if (last_char == '$') {
+                reg->pattern = strdup(pattern);
+                reg->pattern_len = pattern_len - 1;
+                reg->pattern[reg->pattern_len] = '\0';
+                if (flags & OS_CASE_SENSITIVE) {
+                    reg->exec_function = OSMatch_Execute_strrcmp;
+                } else {
+                    reg->exec_function = OSMatch_Execute_strrcasecmp;
+                }
+                return (1);
+            }
         }
-        pt++;
     }
 
+    reg->exec_function = OSMatch_Execute_pcre2_match;
 
-    /* For the last pattern */
-    count++;
-    reg->patterns = (char **) calloc(count +1, sizeof(char *));
-    reg->size = (size_t *) calloc(count +1, sizeof(size_t));
-    reg->match_fp = (int (**)(const char *, const char *, size_t, size_t)) calloc(count +1, sizeof(void *));
+    /* Ossec pattern conversion */
+    if (OSRegex_Convert(pattern, &pattern_pcre2, OS_CONVERT_MATCH) == 0) {
+        reg->error = OS_REGEX_BADREGEX;
+        goto compile_error;
+    }
 
+    flags_compile |= PCRE2_UTF;
+    flags_compile |= PCRE2_NO_UTF_CHECK;
+    flags_compile |= (flags & OS_CASE_SENSITIVE) ? 0 : PCRE2_CASELESS;
+    reg->regex = pcre2_compile((PCRE2_SPTR)pattern_pcre2, PCRE2_ZERO_TERMINATED, flags_compile,
+                               &error, &erroroffset, NULL);
+    if (reg->regex == NULL) {
+        reg->error = OS_REGEX_BADREGEX;
+        goto compile_error;
+    }
 
-    /* Memory allocation error check */
-    if(!reg->patterns || !reg->size || !reg->match_fp)
-    {
+    reg->match_data = pcre2_match_data_create_from_pattern(reg->regex, NULL);
+    if (reg->match_data == NULL) {
         reg->error = OS_REGEX_OUTOFMEMORY;
         goto compile_error;
     }
 
-
-    /* Initializing each sub pattern */
-    for(i = 0; i<=count; i++)
-    {
-        reg->patterns[i] = NULL;
-        reg->match_fp[i] = NULL;
-        reg->size[i] = 0;
+#ifdef USE_PCRE2_JIT
+    /* Just In Time compilation for faster execution */
+    if (pcre2_jit_compile(reg->regex, PCRE2_JIT_COMPLETE) != 0) {
+        reg->error = OS_REGEX_NO_JIT;
+        goto compile_error;
     }
-    i = 0;
-
+#endif
 
-    /* Reassigning pt to the beginning of the string */
-    pt = new_str;
-
-
-    /* Getting the sub patterns */
-    do
-    {
-        if((*pt == OR) || (*pt == '\0'))
-        {
-            if(*pt == '\0')
-            {
-                end_of_string = 1;
-            }
-
-            *pt = '\0';
-
-            /* Dupping the string */
-            if(*new_str == BEGINREGEX)
-                reg->patterns[i] = strdup(new_str +1);
-            else
-                reg->patterns[i] = strdup(new_str);
-
-            /* Memory error */
-            if(!reg->patterns[i])
-            {
-                reg->error = OS_REGEX_OUTOFMEMORY;
-                goto compile_error;
-            }
-
-
-            /* If the string has ^ and $ */
-            if((*new_str == BEGINREGEX) && (*(pt -1) == ENDREGEX))
-            {
-                reg->match_fp[i] = _os_strcmp;
-                reg->size[i] = strlen(reg->patterns[i]) -1;
-                reg->patterns[i][reg->size[i]] = '\0';
-            }
-            else if(strlen(new_str) == 0)
-            {
-                reg->match_fp[i] = _os_strmatch;
-                reg->size[i] = 0;
-            }
-
-            /* String only has $ */
-            else if(*(pt -1) == ENDREGEX)
-            {
-                reg->match_fp[i] = _os_strcmp_last;
-                reg->size[i] = strlen(reg->patterns[i]) -1;
-                reg->patterns[i][reg->size[i]] = '\0';
-            }
-
-            /* If string starts with ^, use strncmp */
-            else if(*new_str == BEGINREGEX)
-            {
-                reg->match_fp[i] = _os_strncmp;
-                reg->size[i] = strlen(reg->patterns[i]);
-            }
-
-            else if(usstrstr == 1)
-            {
-                reg->match_fp[i] = _os_strstr;
-                reg->size[i] = strlen(reg->patterns[i]);
-            }
-
-            else
-            {
-                reg->match_fp[i] = _OS_Match;
-                reg->size[i] = strlen(reg->patterns[i]);
-            }
-
-            if(end_of_string)
-            {
-                break;
-            }
-
-            new_str = ++pt;
-            i++;
-            continue;
-        }
-        pt++;
-
-    }while(!end_of_string);
-
-
-    /* Success return */
-    free(new_str_free);
-    return(1);
+    free(pattern_pcre2);
 
+    return (1);
 
+compile_error:
     /* Error handling */
-    compile_error:
 
-    if(new_str_free)
-    {
-        free(new_str_free);
+    if (pattern_pcre2) {
+        free(pattern_pcre2);
     }
 
     OSMatch_FreePattern(reg);
 
-    return(0);
+    return (0);
 }
 
+int OSMatch_CouldBeOptimized(const char *pattern2check)
+{
+    return OS_Pcre2("^\\^?[^$|^]+\\$?$", pattern2check);
+}
 
-/* EOF */