1 /* $OSSEC, os_regex_compile.c, v0.1, 2006/01/02, Daniel B. Cid$ */
3 /* Copyright (C) 2009 Trend Micro Inc.
6 * This program is a free software; you can redistribute it
7 * and/or modify it under the terms of the GNU General Public
8 * License (version 2) as published by the FSF - Free Software
19 #include "os_regex_internal.h"
22 /** int OSRegex_Compile(char *pattern, OSRegex *reg, int flags) v0.1
23 * Compile a regular expression to be used later.
26 * - OS_RETURN_SUBSTRING
27 * Returns 1 on success or 0 on error.
28 * The error code is set on reg->error.
30 int OSRegex_Compile(const char *pattern, OSRegex *reg, int flags)
34 int end_of_string = 0;
36 unsigned prts_size = 0;
37 unsigned max_prts_size = 0;
41 char *new_str_free = NULL;
44 /* Checking for references not initialized */
51 /* Initializing OSRegex structure */
55 reg->prts_closure = NULL;
57 reg->sub_strings = NULL;
61 /* The pattern can't be null */
64 reg->error = OS_REGEX_PATTERN_NULL;
68 /* Maximum size of the pattern */
69 if(strlen(pattern) > OS_PATTERN_MAXSIZE)
71 reg->error = OS_REGEX_MAXSIZE;
76 /* Duping the pattern for our internal work */
77 new_str = strdup(pattern);
80 reg->error = OS_REGEX_OUTOFMEMORY;
83 new_str_free = new_str;
87 /* Getting the number of sub patterns */
93 /* Giving the new values for each regex */
96 case 'd': *pt = 1;break;
97 case 'w': *pt = 2;break;
98 case 's': *pt = 3;break;
99 case 'p': *pt = 4;break;
100 case '(': *pt = 5;break;
101 case ')': *pt = 6;break;
102 case '\\':*pt = 7;break;
103 case 'D': *pt = 8;break;
104 case 'W': *pt = 9;break;
105 case 'S': *pt = 10;break;
106 case '.': *pt = 11;break;
107 case 't': *pt = 12;break;
108 case '$': *pt = 13;break;
109 case '|': *pt = 14;break;
110 case '<': *pt = 15;break;
112 reg->error = OS_REGEX_BADREGEX;
125 /* Internally, open and closed are the same */
131 /* We only allow one level of parenthesis */
132 if(parenthesis != 0 && parenthesis != 1)
134 reg->error = OS_REGEX_BADPARENTHESIS;
138 /* The pattern must be always lower case if
139 * case sensitive is set
141 if(!(flags & OS_CASE_SENSITIVE))
143 *pt = (char) charmap[(uchar)*pt];
148 /* Each sub pattern must be closed on parenthesis */
151 reg->error = OS_REGEX_BADPARENTHESIS;
160 /* After the whole pattern is read, the parenthesis must all be closed */
163 reg->error = OS_REGEX_BADPARENTHESIS;
168 /* Allocating the memory for the sub patterns */
170 reg->patterns = (char **) calloc(count +1, sizeof(char *));
171 reg->flags = (int *) calloc(count +1, sizeof(int));
173 /* Memory allocation error check */
174 if(!reg->patterns || !reg->flags)
176 reg->error = OS_REGEX_OUTOFMEMORY;
181 /* For the substrings */
182 if((prts_size > 0) && (flags & OS_RETURN_SUBSTRING))
184 reg->prts_closure = (const char ***) calloc(count +1, sizeof(const char **));
185 reg->prts_str = (const char ***) calloc(count +1, sizeof(const char **));
186 if(!reg->prts_closure || !reg->prts_str)
188 reg->error = OS_REGEX_OUTOFMEMORY;
194 /* Initializing each sub pattern */
195 for(i = 0; i<=count; i++)
197 reg->patterns[i] = NULL;
200 /* The parenthesis closure if set */
201 if(reg->prts_closure)
203 reg->prts_closure[i] = NULL;
204 reg->prts_str[i] = NULL;
210 /* Reassigning pt to the beginning of the string */
214 /* Getting the sub patterns */
217 if((*pt == OR) || (*pt == '\0'))
226 /* If string starts with ^, set the BEGIN SET flag */
227 if(*new_str == BEGINREGEX)
230 reg->flags[i]|=BEGIN_SET;
233 /* If string ends with $, set the END_SET flag */
234 if(*(pt-1) == ENDREGEX)
237 reg->flags[i]|=END_SET;
240 reg->patterns[i] = strdup(new_str);
242 if(!reg->patterns[i])
244 reg->error = OS_REGEX_OUTOFMEMORY;
250 /* Setting the parenthesis closures */
251 /* The parenthesis closure if set */
252 if(reg->prts_closure)
254 unsigned tmp_int = 0;
258 /* search the whole pattern for parenthesis */
261 /* First loop we get the number of parenthesis.
262 * We allocate the memory and loop again setting
263 * the parenthesis closures.
265 tmp_str = reg->patterns[i];
266 while(*tmp_str != '\0')
275 /* Getting the maximum number of parenthesis for
276 * all sub strings. We need that to set up the maximum
277 * number of substrings to be returned.
279 if(max_prts_size < prts_size)
281 max_prts_size = prts_size;
284 /* Allocating the memory */
285 reg->prts_closure[i] = (const char **) calloc(prts_size + 1, sizeof(const char *));
286 reg->prts_str[i] = (const char **) calloc(prts_size + 1, sizeof(const char *));
287 if((reg->prts_closure[i] == NULL)||(reg->prts_str[i] == NULL))
289 reg->error = OS_REGEX_OUTOFMEMORY;
293 /* Next loop to set the closures */
294 tmp_str = reg->patterns[i];
295 while(*tmp_str != '\0')
299 if(tmp_int >= prts_size)
301 reg->error = OS_REGEX_BADPARENTHESIS;
305 /* Setting to the pointer to the string */
306 reg->prts_closure[i][tmp_int] = tmp_str;
307 reg->prts_str[i][tmp_int] = NULL;
328 }while(!end_of_string);
330 /* Allocating sub string for the maximum number of parenthesis */
331 reg->sub_strings = (char **) calloc(max_prts_size + 1, sizeof(char *));
332 if(reg->sub_strings == NULL)
334 reg->error = OS_REGEX_OUTOFMEMORY;
351 OSRegex_FreePattern(reg);