Imported Upstream version 2.7
[ossec-hids.git] / src / os_regex / os_regex_execute.c
1 /*   $OSSEC, os_regex.c, v0.3, 2005/04/05, Daniel B. Cid$   */
2
3 /* Copyright (C) 2009 Trend Micro Inc.
4  * All right reserved.
5  *
6  * This program is a free software; you can redistribute it
7  * and/or modify it under the terms of the GNU General Public
8  * License (version 2) as published by the FSF - Free Software
9  * Foundation
10  */
11
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdlib.h>
16
17 #include "os_regex.h"
18 #include "os_regex_internal.h"
19
20
21 /** Internal prototypes **/
22 char *_OS_Regex(char *pattern, char *str, char **prts_closure,
23               char **prts_str, int flags);
24
25
26
27 /** char *OSRegex_Execute(char *str, OSRegex *reg) v0.1
28  * Compare an already compiled regular expression with
29  * a not NULL string.
30  * Returns the end of the string on success or NULL on error.
31  * The error code is set on reg->error.
32  */
33 char *OSRegex_Execute(char *str, OSRegex *reg)
34 {
35     char *ret;
36     int i = 0;
37
38     /* The string can't be NULL */
39     if(str == NULL)
40     {
41         reg->error = OS_REGEX_STR_NULL;
42         return(0);
43     }
44
45
46     /* If we need the sub strings */
47     if(reg->prts_closure)
48     {
49         int j = 0, k = 0, str_char = 0;
50
51         /* Looping on all sub patterns */
52         while(reg->patterns[i])
53         {
54             /* Cleaning the prts_str */
55             j = 0;
56             while(reg->prts_closure[i][j])
57             {
58                 reg->prts_str[i][j] = NULL;
59                 j++;
60             }
61
62             if((ret = _OS_Regex(reg->patterns[i], str, reg->prts_closure[i],
63                         reg->prts_str[i], reg->flags[i])))
64             {
65                 j = 0;
66
67                 /* We must always have the open and the close */
68                 while(reg->prts_str[i][j] && reg->prts_str[i][j+1])
69                 {
70                     str_char = reg->prts_str[i][j+1][0];
71
72                     reg->prts_str[i][j+1][0] = '\0';
73
74                     reg->sub_strings[k] = strdup(reg->prts_str[i][j]);
75                     if(!reg->sub_strings[k])
76                     {
77                         OSRegex_FreeSubStrings(reg);
78                         return(NULL);
79                     }
80
81                     /* Set the next one to null */
82                     reg->prts_str[i][j+1][0] = str_char;
83                     k++;
84                     reg->sub_strings[k] = NULL;
85
86                     /* Go two by two */
87                     j+=2;
88                 }
89
90                 return(ret);
91             }
92             i++;
93         }
94
95         return(0);
96
97     }
98
99     /* If we don't need the sub strings */
100
101     /* Looping on all sub patterns */
102     while(reg->patterns[i])
103     {
104         if((ret = _OS_Regex(reg->patterns[i], str, NULL, NULL, reg->flags[i])))
105         {
106             return(ret);
107         }
108         i++;
109     }
110
111     return(NULL);
112 }
113
114 #define PRTS(x) ((prts(*x) && x++) || 1)
115 #define ENDOFFILE(x) ( PRTS(x) && (*x == '\0'))
116
117 /** int _OS_Regex(char *pattern, char *str, char **prts_closure,
118               char **prts_str, int flags) v0.1
119  * Perform the pattern matching on the pattern/string provided.
120  * Returns 1 on success and 0 on failure.
121  * If prts_closure is set, the parenthesis locations will be
122  * written on prts_str (which must not be NULL)
123  */
124 char *_OS_Regex(char *pattern, char *str, char **prts_closure,
125               char **prts_str, int flags)
126 {
127     char *r_code = NULL;
128
129     int ok_here;
130     int _regex_matched = 0;
131
132     int prts_int;
133
134     char *st = str;
135     char *st_error = NULL;
136
137     char *pt = pattern;
138     char *next_pt;
139
140     char *pt_error[4] = {NULL, NULL, NULL, NULL};
141     char *pt_error_str[4];
142
143
144     /* Will loop the whole string, trying to find a match */
145     do
146     {
147         switch(*pt)
148         {
149             case '\0':
150                 if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
151                     return(r_code);
152                 break;
153
154                 /* If it is a parenthesis do not match against the character */
155             case '(':
156                 /* Find the closure for the parenthesis */
157                 if(prts_closure)
158                 {
159                     prts_int = 0;
160                     while(prts_closure[prts_int])
161                     {
162                         if(prts_closure[prts_int] == pt)
163                         {
164                             prts_str[prts_int] = st;
165                             break;
166                         }
167                         prts_int++;
168                     }
169                 }
170
171                 pt++;
172                 if(*pt == '\0')
173                 {
174                     if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
175                         return(r_code);
176                 }
177                 break;
178         }
179
180         /* If it starts on Backslash (future regex) */
181         if(*pt == BACKSLASH)
182         {
183             if(Regex((uchar)*(pt+1), (uchar)*st))
184             {
185                 next_pt = pt+2;
186
187                 /* If we don't have a '+' or '*', we should skip
188                  * searching using this pattern.
189                  */
190                 if(!isPlus(*next_pt))
191                 {
192                     pt = next_pt;
193                     if(!st_error)
194                     {
195                         /* If st_error is not set, we need to set it here.
196                          * In case of error in the matching later, we need
197                          * to continue from here (it will be incremented in
198                          * the while loop)
199                          */
200                         st_error = st;
201                     }
202                     r_code = st;
203                     continue;
204                 }
205
206                 /* If it is a '*', we need to set the _regex_matched
207                  * for the first pattern even.
208                  */
209                 if(*next_pt == '*')
210                 {
211                     _regex_matched = 1;
212                 }
213
214
215                 /* If our regex matches and we have a "+" set, we will
216                  * try the next one to see if it matches. If yes, we
217                  * can jump to it, but saving our currently location
218                  * in case of error.
219                  * _regex_matched will set set to true after the first
220                  * round of matches
221                  */
222                 if(_regex_matched)
223                 {
224                     next_pt++;
225                     ok_here = -1;
226
227                     /* If it is a parenthesis, jump to the next and write
228                      * the location down if 'ok_here >= 0'
229                      */
230                     if(prts(*next_pt))
231                     {
232                         next_pt++;
233                     }
234
235                     if(*next_pt == '\0')
236                     {
237                         ok_here = 1;
238                     }
239                     else if(*next_pt == BACKSLASH)
240                     {
241                         if(Regex((uchar)*(next_pt+1), (uchar)*st))
242                         {
243                             /* If the next one does not have
244                              * a '+' or '*', we can set it as
245                              * being read and continue.
246                              */
247                             if(!isPlus(*(next_pt+2)))
248                             {
249                                 ok_here = 2;
250                             }
251                             else
252                             {
253                                 ok_here = 0;
254                             }
255                         }
256                     }
257                     else if(*next_pt == charmap[(uchar)*st])
258                     {
259                         _regex_matched = 0;
260                         ok_here = 1;
261                     }
262
263                     /* If the next character matches in here */
264                     if(ok_here >= 0)
265                     {
266                         if(prts_closure && prts(*(next_pt - 1)))
267                         {
268                             prts_int = 0;
269                             while(prts_closure[prts_int])
270                             {
271                                 if(prts_closure[prts_int] == (next_pt -1))
272                                 {
273                                     if(*(st+1) == '\0')
274                                         prts_str[prts_int] = st+1;
275                                     else
276                                         prts_str[prts_int] = st;
277                                     break;
278                                 }
279                                 prts_int++;
280                             }
281                         }
282
283
284                         /* If next_pt == \0, return the r_code */
285                         if(*next_pt == '\0')
286                         {
287                             continue;
288                         }
289
290
291                         /* Each "if" will increment the amount
292                          * necessary for the next pattern in ok_here
293                          */
294                         if(ok_here)
295                             next_pt+=ok_here;
296
297
298                         if(!pt_error[0])
299                         {
300                             pt_error[0] = pt;
301                             pt_error_str[0] = st;
302                         }
303                         else if(!pt_error[1])
304                         {
305                             pt_error[1] = pt;
306                             pt_error_str[1] = st;
307                         }
308                         else if(!pt_error[2])
309                         {
310                             pt_error[2] = pt;
311                             pt_error_str[2] = st;
312
313                         }
314                         else if(!pt_error[3])
315                         {
316                             pt_error[3] = pt;
317                             pt_error_str[3] = st;
318                         }
319
320                         pt = next_pt;
321                     }
322                 }
323                 else
324                 {
325                     next_pt++;
326
327                     /* If it is a parenthesis, mark the location */
328                     if(prts_closure && prts(*next_pt))
329                     {
330                         prts_int = 0;
331                         while(prts_closure[prts_int])
332                         {
333                             if(prts_closure[prts_int] == next_pt)
334                             {
335                                 if(*(st+1) == '\0')
336                                     prts_str[prts_int] = st +1;
337                                 else
338                                     prts_str[prts_int] = st;
339                                 break;
340                             }
341                             prts_int++;
342                         }
343                         next_pt++;
344                     }
345
346                     _regex_matched = 1;
347                 }
348
349                 r_code = st;
350                 continue;
351             }
352
353             else if((*(pt+3) == '\0') && (_regex_matched == 1)&&(r_code))
354             {
355                 r_code = st;
356                 if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
357                     return(r_code);
358             }
359
360             /* If we didn't match regex, but _regex_matched == 1, jump
361              * to the next available pattern
362              */
363             else if((*(pt+2) == '+') && (_regex_matched == 1))
364             {
365                 pt+=3;
366                 st--;
367                 _regex_matched = 0;
368                 continue;
369             }
370             /* We may not match with '*' */
371             else if(*(pt+2) == '*')
372             {
373                 pt+=3;
374                 st--;
375                 r_code = st;
376                 _regex_matched = 0;
377                 continue;
378             }
379
380             _regex_matched = 0;
381         }
382         else if(*pt == charmap[(uchar)*st])
383         {
384             pt++;
385             if(!st_error)
386             {
387                 /* If st_error is not set, we need to set it here.
388                  * In case of error in the matching later, we need
389                  * to continue from here (it will be incremented in
390                  * the while loop)
391                  */
392                 st_error = st;
393             }
394             r_code = st;
395             continue;
396         }
397
398         /* Error Handling */
399             if(pt_error[3])
400             {
401                 pt = pt_error[3];
402                 st = pt_error_str[3];
403                 pt_error[3] = NULL;
404                 continue;
405             }
406             else if(pt_error[2])
407             {
408                 pt = pt_error[2];
409                 st = pt_error_str[2];
410                 pt_error[2] = NULL;
411                 continue;
412             }
413             else if(pt_error[1])
414             {
415                 pt = pt_error[1];
416                 st = pt_error_str[1];
417                 pt_error[1] = NULL;
418                 continue;
419             }
420             else if(pt_error[0])
421             {
422                 pt = pt_error[0];
423                 st = pt_error_str[0];
424                 pt_error[0] = NULL;
425                 continue;
426             }
427             else if(flags & BEGIN_SET)
428             {
429                 /* If we get an error and the "^" option is
430                  * set, we can return "not matched" in here.
431                  */
432                 return(NULL);
433             }
434             else if(st_error)
435             {
436                 st = st_error;
437                 st_error = NULL;
438             }
439             pt = pattern;
440             r_code = NULL;
441
442     }while(*(++st) != '\0');
443
444
445     /* Matching for a possible last parenthesis */
446     if(prts_closure)
447     {
448         while(!prts(*pt) && *pt != '\0')
449         {
450             if(*pt == BACKSLASH && *(pt+2) == '*')
451                 pt+=3;
452             else
453                 break;
454         }
455
456         if(prts(*pt))
457         {
458             prts_int = 0;
459             while(prts_closure[prts_int])
460             {
461                 if(prts_closure[prts_int] == pt)
462                 {
463                     prts_str[prts_int] = st;
464                     break;
465                 }
466                 prts_int++;
467             }
468         }
469     }
470
471     /* Cleaning up */
472     if(ENDOFFILE(pt) ||
473         (*pt == BACKSLASH &&
474         _regex_matched &&
475         (pt+=2) &&
476         isPlus(*pt) &&
477         (pt++) &&
478         ((ENDOFFILE(pt)) ||
479         ((*pt == BACKSLASH) &&
480         (pt+=2) &&
481         (*pt == '*') &&
482         (pt++) &&
483         (ENDOFFILE(pt)) ))) ||
484         (*pt == BACKSLASH &&
485         (pt+=2) &&
486         (*pt == '*') &&
487         (pt++) &&
488         ENDOFFILE(pt))
489         )
490     {
491         return(r_code);
492     }
493
494     return(NULL);
495 }
496
497
498 /* EOF */