03277a90b1c42a3656c1ea8741e8771a7bb72e44
[ossec-hids.git] / src / os_regex / os_regex_execute.c
1 /*   $OSSEC, os_regex.c, v0.3, 2005/04/05, Daniel B. Cid$   */
2
3 /* Copyright (C) 2009 Trend Micro Inc.
4  * All right reserved.
5  *
6  * This program is a free software; you can redistribute it
7  * and/or modify it under the terms of the GNU General Public
8  * License (version 2) as published by the FSF - Free Software
9  * Foundation
10  */
11
12
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdlib.h>
16
17 #include "os_regex.h"
18 #include "os_regex_internal.h"
19
20
21 /** Internal prototypes **/
22 char *_OS_Regex(char *pattern, char *str, char **prts_closure,
23               char **prts_str, int flags);
24
25
26
27 /** char *OSRegex_Execute(char *str, OSRegex *reg) v0.1
28  * Compare an already compiled regular expression with
29  * a not NULL string.
30  * Returns the end of the string on success or NULL on error.
31  * The error code is set on reg->error.
32  */
33 char *OSRegex_Execute(char *str, OSRegex *reg)
34 {
35     char *ret;
36     int i = 0;
37     
38     /* The string can't be NULL */
39     if(str == NULL)
40     {
41         reg->error = OS_REGEX_STR_NULL;
42         return(0);
43     }
44
45
46     /* If we need the sub strings */
47     if(reg->prts_closure)
48     {
49         int j = 0, k = 0, str_char = 0;
50
51         /* Looping on all sub patterns */
52         while(reg->patterns[i])
53         {
54             /* Cleaning the prts_str */
55             while(reg->prts_closure[i][j])
56             {
57                 reg->prts_str[i][j] = NULL;
58                 j++;
59             }
60
61             if((ret = _OS_Regex(reg->patterns[i], str, reg->prts_closure[i],
62                         reg->prts_str[i], reg->flags[i])))
63             {
64                 j = 0;
65
66                 /* We must always have the open and the close */
67                 while(reg->prts_str[i][j] && reg->prts_str[i][j+1])
68                 {
69                     str_char = reg->prts_str[i][j+1][0];
70
71                     reg->prts_str[i][j+1][0] = '\0';
72
73                     reg->sub_strings[k] = strdup(reg->prts_str[i][j]);
74                     if(!reg->sub_strings[k])
75                     {
76                         OSRegex_FreeSubStrings(reg);
77                         return(NULL);
78                     }
79                     
80                     /* Set the next one to null */
81                     reg->prts_str[i][j+1][0] = str_char;
82                     k++;
83                     reg->sub_strings[k] = NULL;
84
85                     /* Go two by two */
86                     j+=2;
87                 }
88
89                 return(ret);
90             }
91             i++;
92         }
93
94         return(0);
95
96     }
97    
98     /* If we don't need the sub strings */
99      
100     /* Looping on all sub patterns */
101     while(reg->patterns[i])
102     {
103         if((ret = _OS_Regex(reg->patterns[i], str, NULL, NULL, reg->flags[i]))) 
104         {
105             return(ret);
106         }
107         i++;
108     }
109
110     return(NULL);
111 }    
112
113 #define PRTS(x) ((prts(*x) && x++) || 1)
114 #define ENDOFFILE(x) ( PRTS(x) && (*x == '\0'))
115
116 /** int _OS_Regex(char *pattern, char *str, char **prts_closure,
117               char **prts_str, int flags) v0.1
118  * Perform the pattern matching on the pattern/string provided.
119  * Returns 1 on success and 0 on failure.
120  * If prts_closure is set, the parenthesis locations will be
121  * written on prts_str (which must not be NULL)
122  */              
123 char *_OS_Regex(char *pattern, char *str, char **prts_closure, 
124               char **prts_str, int flags)
125 {
126     char *r_code = NULL;
127     
128     int ok_here;
129     int _regex_matched = 0;
130     
131     int prts_int;
132
133     char *st = str;
134     char *st_error = NULL;
135     
136     char *pt = pattern;
137     char *next_pt;
138
139     char *pt_error[4] = {NULL, NULL, NULL, NULL};
140     char *pt_error_str[4];
141     
142
143     /* Will loop the whole string, trying to find a match */
144     do
145     {
146         switch(*pt)
147         {
148             case '\0':
149                 if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
150                     return(r_code);
151                 break;
152
153                 /* If it is a parenthesis do not match against the character */
154             case '(':
155                 /* Find the closure for the parenthesis */
156                 if(prts_closure)
157                 {
158                     prts_int = 0;
159                     while(prts_closure[prts_int])
160                     {
161                         if(prts_closure[prts_int] == pt)
162                         {
163                             prts_str[prts_int] = st;
164                             break;
165                         }
166                         prts_int++;
167                     }
168                 }
169
170                 pt++;
171                 if(*pt == '\0')
172                 {
173                     if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
174                         return(r_code);
175                 }
176                 break;
177         }
178
179         /* If it starts on Backslash (future regex) */
180         if(*pt == BACKSLASH)
181         {
182             if(Regex((uchar)*(pt+1), (uchar)*st))
183             {
184                 next_pt = pt+2;
185                 
186                 /* If we don't have a '+' or '*', we should skip
187                  * searching using this pattern.
188                  */
189                 if(!isPlus(*next_pt))
190                 {
191                     pt = next_pt;
192                     if(!st_error)
193                     {
194                         /* If st_error is not set, we need to set it here.
195                          * In case of error in the matching later, we need
196                          * to continue from here (it will be incremented in
197                          * the while loop)
198                          */
199                         st_error = st;
200                     }
201                     r_code = st;
202                     continue;
203                 }
204                 
205                 /* If it is a '*', we need to set the _regex_matched
206                  * for the first pattern even.
207                  */
208                 if(*next_pt == '*')
209                 {
210                     _regex_matched = 1;
211                 }
212
213
214                 /* If our regex matches and we have a "+" set, we will
215                  * try the next one to see if it matches. If yes, we 
216                  * can jump to it, but saving our currently location
217                  * in case of error.
218                  * _regex_matched will set set to true after the first
219                  * round of matches
220                  */
221                 if(_regex_matched)
222                 {
223                     next_pt++;
224                     ok_here = -1;
225
226                     /* If it is a parenthesis, jump to the next and write
227                      * the location down if 'ok_here >= 0'
228                      */
229                     if(prts(*next_pt))
230                     {
231                         next_pt++;
232                     }
233                     
234                     if(*next_pt == '\0')
235                     {
236                         ok_here = 1;
237                     }
238                     else if(*next_pt == BACKSLASH)
239                     {
240                         if(Regex((uchar)*(next_pt+1), (uchar)*st))
241                         {
242                             /* If the next one does not have
243                              * a '+' or '*', we can set it as
244                              * being read and continue.
245                              */
246                             if(!isPlus(*(next_pt+2)))
247                             {
248                                 ok_here = 2;
249                             }
250                             else
251                             {
252                                 ok_here = 0;
253                             }
254                         }
255                     }
256                     else if(*next_pt == charmap[(uchar)*st])
257                     {
258                         _regex_matched = 0;
259                         ok_here = 1;
260                     }
261
262                     /* If the next character matches in here */
263                     if(ok_here >= 0)
264                     {
265                         if(prts_closure && prts(*(next_pt - 1)))
266                         {
267                             prts_int = 0;
268                             while(prts_closure[prts_int])
269                             {
270                                 if(prts_closure[prts_int] == (next_pt -1))
271                                 {
272                                     if(*(st+1) == '\0')
273                                         prts_str[prts_int] = st+1;
274                                     else    
275                                         prts_str[prts_int] = st;
276                                     break;
277                                 }
278                                 prts_int++;
279                             }
280                         }
281
282
283                         /* If next_pt == \0, return the r_code */
284                         if(*next_pt == '\0')
285                         {
286                             continue;
287                         }
288
289                             
290                         /* Each "if" will increment the amount
291                          * necessary for the next pattern in ok_here
292                          */
293                         if(ok_here) 
294                             next_pt+=ok_here;
295                         
296                         
297                         if(!pt_error[0])
298                         {
299                             pt_error[0] = pt;
300                             pt_error_str[0] = st;
301                         }
302                         else if(!pt_error[1])
303                         {
304                             pt_error[1] = pt;
305                             pt_error_str[1] = st;
306                         }
307                         else if(!pt_error[2])
308                         {
309                             pt_error[2] = pt;
310                             pt_error_str[2] = st;
311
312                         }
313                         else if(!pt_error[3])
314                         {
315                             pt_error[3] = pt;
316                             pt_error_str[3] = st;
317                         }
318
319                         pt = next_pt;
320                     }
321                 }
322                 else
323                 {
324                     next_pt++;
325
326                     /* If it is a parenthesis, mark the location */
327                     if(prts_closure && prts(*next_pt))
328                     {
329                         prts_int = 0;
330                         while(prts_closure[prts_int])
331                         {
332                             if(prts_closure[prts_int] == next_pt)
333                             {
334                                 if(*(st+1) == '\0')
335                                     prts_str[prts_int] = st +1;
336                                 else
337                                     prts_str[prts_int] = st;
338                                 break;
339                             }
340                             prts_int++;
341                         }
342                         next_pt++;
343                     }
344
345                     _regex_matched = 1;
346                 }
347                 
348                 r_code = st;
349                 continue;
350             }
351             
352             else if((*(pt+3) == '\0') && (_regex_matched == 1)&&(r_code))
353             {
354                 r_code = st;
355                 if(!(flags & END_SET) || (flags & END_SET && (*st == '\0')))
356                     return(r_code);
357             }
358             
359             /* If we didn't match regex, but _regex_matched == 1, jump
360              * to the next available pattern
361              */
362             else if((*(pt+2) == '+') && (_regex_matched == 1))
363             {
364                 pt+=3;
365                 st--;
366                 _regex_matched = 0;
367                 continue;
368             }
369             /* We may not match with '*' */
370             else if(*(pt+2) == '*')
371             {
372                 pt+=3;
373                 st--;
374                 r_code = st;
375                 _regex_matched = 0;
376                 continue;
377             }
378
379             _regex_matched = 0;
380         }
381         else if(*pt == charmap[(uchar)*st])
382         {
383             pt++;
384             if(!st_error)
385             {
386                 /* If st_error is not set, we need to set it here.
387                  * In case of error in the matching later, we need
388                  * to continue from here (it will be incremented in
389                  * the while loop)
390                  */
391                 st_error = st;
392             }
393             r_code = st;
394             continue;
395         }
396
397         /* Error Handling */
398             if(pt_error[3])
399             {
400                 pt = pt_error[3];
401                 st = pt_error_str[3];
402                 pt_error[3] = NULL;
403                 continue;
404             }
405             else if(pt_error[2])
406             {
407                 pt = pt_error[2];
408                 st = pt_error_str[2];
409                 pt_error[2] = NULL;
410                 continue;
411             }
412             else if(pt_error[1])
413             {
414                 pt = pt_error[1];
415                 st = pt_error_str[1];
416                 pt_error[1] = NULL;
417                 continue;
418             }
419             else if(pt_error[0])
420             {
421                 pt = pt_error[0];
422                 st = pt_error_str[0];
423                 pt_error[0] = NULL;
424                 continue;
425             }
426             else if(flags & BEGIN_SET)
427             {
428                 /* If we get an error and the "^" option is
429                  * set, we can return "not matched" in here.
430                  */
431                 return(NULL);
432             }
433             else if(st_error)
434             {
435                 st = st_error;
436                 st_error = NULL;
437             }
438             pt = pattern;
439             r_code = NULL;
440         
441     }while(*(++st) != '\0');
442
443
444     /* Matching for a possible last parenthesis */
445     if(prts_closure)
446     {
447         while(!prts(*pt) && *pt != '\0')
448         {
449             if(*pt == BACKSLASH && *(pt+2) == '*')
450                 pt+=3;
451             else
452                 break;    
453         }
454         
455         if(prts(*pt))
456         {
457             prts_int = 0;
458             while(prts_closure[prts_int])
459             {
460                 if(prts_closure[prts_int] == pt)
461                 {
462                     prts_str[prts_int] = st;
463                     break;
464                 }
465                 prts_int++;
466             }
467         }
468     }
469
470     /* Cleaning up */
471     if(ENDOFFILE(pt) || 
472         (*pt == BACKSLASH && 
473         _regex_matched && 
474         (pt+=2) && 
475         isPlus(*pt) && 
476         (pt++) &&
477         ((ENDOFFILE(pt)) || 
478         ((*pt == BACKSLASH) && 
479         (pt+=2) && 
480         (*pt == '*') && 
481         (pt++) && 
482         (ENDOFFILE(pt)) ))) ||
483         (*pt == BACKSLASH &&
484         (pt+=2) &&
485         (*pt == '*') &&
486         (pt++) &&
487         ENDOFFILE(pt))
488         ) 
489     {
490         return(r_code);
491     }
492    
493     return(NULL);
494 }
495
496
497 /* EOF */