1 /* Copyright (C) 2009 Trend Micro Inc.
4 * This program is a free software; you can redistribute it
5 * and/or modify it under the terms of the GNU General Public
6 * License (version 2) as published by the FSF - Free Software
10 #include "cleanevent.h"
13 #include "os_regex/os_regex.h"
14 #include "analysisd.h"
18 /* To translate between month (int) to month (char) */
19 static const char *(month[]) = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
20 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
24 /* Format a received message in the Eventinfo structure */
25 int OS_CleanMSG(char *msg, Eventinfo *lf)
31 /* The message is formatted in the following way:
32 * id:location:message.
35 /* Ignore the id of the message in here */
38 /* Set pieces as the message */
39 pieces = strchr(msg, ':');
41 merror(FORMAT_ERROR, ARGV0);
45 /* Is this from an agent? */
47 { /* look past '->' for the first ':' */
48 pieces = strchr(strstr(msg, "->"), ':');
51 merror(FORMAT_ERROR, ARGV0);
59 os_strdup(msg, lf->location);
61 /* Get the log length */
62 loglen = strlen(pieces) + 1;
64 /* Assign the values in the structure (lf->full_log) */
65 os_malloc((2 * loglen) + 1, lf->full_log);
67 /* Set the whole message at full_log */
68 strncpy(lf->full_log, pieces, loglen);
70 /* Log is the one used for parsing in the decoders and rules */
71 lf->log = lf->full_log + loglen;
72 strncpy(lf->log, pieces, loglen);
74 /* check if month contains an umlaut and repair
75 * umlauts are non-ASCII and use 2 slots in the char array
76 * repair to only one slot so we can detect the correct date format in the next step
79 if (pieces[1] == (char) 195) {
80 if (pieces[2] == (char) 164) {
88 /* Check for the syslog date format
89 * ( ex: Dec 29 10:00:01
90 * or 2015-04-16 21:51:02,805 for proftpd 1.3.5
91 * or 2007-06-14T15:48:55-04:00 for syslog-ng isodate
92 * or 2007-06-14T15:48:55.3352-04:00 for syslog-ng isodate with up to 6 optional fraction of a second
93 * or 2009-05-22T09:36:46.214994-07:00 for rsyslog
94 * or 2015 Dec 29 10:00:01 )
97 ( /* ex: Dec 29 10:00:01 */
100 (pieces[6] == ' ') &&
101 (pieces[9] == ':') &&
102 (pieces[12] == ':') &&
103 (pieces[15] == ' ') && (lf->log += 16)
106 ( /* ex: 2015-04-16 21:51:02,805 */
108 (pieces[4] == '-') &&
109 (pieces[7] == '-') &&
110 (pieces[10] == ' ') &&
111 (pieces[13] == ':') &&
112 (pieces[16] == ':') &&
113 (pieces[19] == ',') &&
119 (pieces[4] == '-') &&
120 (pieces[7] == '-') &&
121 (pieces[10] == 'T') &&
122 (pieces[13] == ':') &&
123 (pieces[16] == ':') &&
124 ( /* ex: 2007-06-14T15:48:55-04:00 */
126 (pieces[22] == ':') &&
127 (pieces[25] == ' ') && (lf->log += 26)
130 /* ex: 2007-06-14T15:48:55.3-04:00 or 2009-05-22T09:36:46,214994-07:00 */
133 (pieces[19] == '.') || (pieces[19] == ',')
137 ( (pieces[24] == ':') && (lf->log += 27) ) ||
138 ( (pieces[25] == ':') && (lf->log += 28) ) ||
139 ( (pieces[26] == ':') && (lf->log += 29) ) ||
140 ( (pieces[27] == ':') && (lf->log += 30) ) ||
141 ( (pieces[28] == ':') && (lf->log += 31) ) ||
142 ( (pieces[29] == ':') && (lf->log += 32) )
148 ( /* ex: 2015 Dec 29 10:00:01 */
150 (isdigit(pieces[0])) &&
151 (pieces[4] == ' ') &&
152 (pieces[8] == ' ') &&
153 (pieces[11] == ' ') &&
154 (pieces[14] == ':') &&
155 (pieces[17] == ':') &&
156 (pieces[20] == ' ') && (lf->log += 21)
159 /* Check for an extra space in here */
160 if (*lf->log == ' ') {
166 pieces = lf->hostname = lf->log;
169 /* Check for a valid hostname */
170 while (isValidChar(*pieces) == 1) {
174 /* Check if it is a syslog without hostname (common on Solaris) */
175 if (*pieces == ':' && pieces[1] == ' ') {
176 /* Getting solaris 8/9 messages without hostname.
177 * In these cases, the process_name should be there.
178 * http://www.ossec.net/wiki/index.php/Log_Samples_Solaris
180 lf->program_name = lf->hostname;
183 /* End the program name string */
190 /* Extract the hostname */
191 else if (*pieces != ' ') {
192 /* Invalid hostname */
196 /* End the hostname string */
199 /* Move pieces to the beginning of the log message */
203 /* Get program_name */
204 lf->program_name = pieces;
206 /* Extract program_name */
210 * p_name[pid]: [ID xx facility.severity]
211 * auth|security:info p_name:
213 while (isValidChar(*pieces) == 1) {
217 /* Check for the first format: p_name: */
218 if ((*pieces == ':') && (pieces[1] == ' ')) {
223 /* Check for the second format: p_name[pid]: */
224 else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
227 while (isdigit((int)*pieces)) {
231 if ((*pieces == ']') && (pieces[1] == ':') && (pieces[2] == ' ')) {
234 /* Some systems are not terminating the program name with
235 * a ':'. Working around this in here...
237 else if ((*pieces == ']') && (pieces[1] == ' ')) {
240 /* Fix for some weird log formats */
242 while (isdigit((int)*pieces)) {
246 if (*pieces == '\0') {
250 lf->program_name = NULL;
254 else if ((*pieces == '|') && islower((int)pieces[1])) {
257 /* Remove facility */
258 while (isalnum((int)*pieces)) {
262 if (*pieces == ':') {
263 /* Remove severity */
265 while (isalnum((int)*pieces)) {
269 if (*pieces == ' ') {
271 lf->program_name = pieces;
274 /* Get program name again */
275 while (isValidChar(*pieces) == 1) {
279 /* Check for the first format: p_name: */
280 if ((*pieces == ':') && (pieces[1] == ' ')) {
285 /* Check for the second format: p_name[pid]: */
286 else if ((*pieces == '[') && (isdigit((int)pieces[1]))) {
289 while (isdigit((int)*pieces)) {
293 if ((*pieces == ']') && (pieces[1] == ':') &&
294 (pieces[2] == ' ')) {
302 lf->program_name = NULL;
308 lf->program_name = NULL;
312 lf->program_name = NULL;
316 /* Remove [ID xx facility.severity] */
318 /* Set log after program name */
321 if ((pieces[0] == '[') &&
322 (pieces[1] == 'I') &&
323 (pieces[2] == 'D') &&
324 (pieces[3] == ' ')) {
327 /* Going after the ] */
328 pieces = strchr(pieces, ']');
336 /* Get program name size */
337 if (lf->program_name) {
338 lf->p_name_size = strlen(lf->program_name);
342 /* xferlog date format
343 * Mon Apr 17 18:27:14 2006 1 64.160.42.130
345 else if ((loglen > 28) &&
346 (pieces[3] == ' ') &&
347 (pieces[7] == ' ') &&
348 (pieces[10] == ' ') &&
349 (pieces[13] == ':') &&
350 (pieces[16] == ':') &&
351 (pieces[19] == ' ') &&
352 (pieces[24] == ' ') &&
353 (pieces[26] == ' ')) {
354 /* Move log to the beginning of the message */
358 /* Check for snort date format
359 * ex: 01/28-09:13:16.240702 [**]
361 else if ( (loglen > 24) &&
362 (pieces[2] == '/') &&
363 (pieces[5] == '-') &&
364 (pieces[8] == ':') &&
365 (pieces[11] == ':') &&
366 (pieces[14] == '.') &&
367 (pieces[21] == ' ') ) {
371 /* Check for suricata (new) date format
372 * ex: 01/28/1979-09:13:16.240702 [**]
374 else if ( (loglen > 26) &&
375 (pieces[2] == '/') &&
376 (pieces[5] == '/') &&
377 (pieces[10] == '-') &&
378 (pieces[13] == ':') &&
379 (pieces[16] == ':') &&
380 (pieces[19] == '.') &&
381 (pieces[26] == ' ') ) {
386 /* Check for apache log format */
387 /* [Fri Feb 11 18:06:35 2004] [warn] */
388 else if ( (loglen > 27) &&
389 (pieces[0] == '[') &&
390 (pieces[4] == ' ') &&
391 (pieces[8] == ' ') &&
392 (pieces[11] == ' ') &&
393 (pieces[14] == ':') &&
394 (pieces[17] == ':') &&
395 (pieces[20] == ' ') &&
396 (pieces[25] == ']') ) {
400 /* Check for the osx asl log format.
402 * [Time 2006.12.28 15:53:55 UTC] [Facility auth] [Sender sshd] [PID 483] [Message error: PAM: Authentication failure for username from 192.168.0.2] [Level 3] [UID -2] [GID -2] [Host Hostname]
403 * [Time 2006.11.02 14:02:11 UTC] [Facility auth] [Sender sshd] [PID 856]
404 [Message refused connect from 59.124.44.34] [Level 4] [UID -2] [GID -2]
405 [Host robert-wyatts-emac]
407 else if ((loglen > 26) &&
408 (pieces[0] == '[') &&
409 (pieces[1] == 'T') &&
410 (pieces[5] == ' ') &&
411 (pieces[10] == '.') &&
412 (pieces[13] == '.') &&
413 (pieces[16] == ' ') &&
414 (pieces[19] == ':')) {
415 /* Do not read more than 1 message entry -> log tampering */
416 short unsigned int done_message = 0;
418 /* Remove the date */
421 /* Get the desired values */
422 pieces = strchr(lf->log, '[');
426 /* Get the sender (set to program name) */
427 if ((strncmp(pieces, "Sender ", 7) == 0) &&
428 (lf->program_name == NULL)) {
430 lf->program_name = pieces;
432 /* Get the closing brackets */
433 pieces = strchr(pieces, ']');
437 /* Set program_name size */
438 lf->p_name_size = strlen(lf->program_name);
442 /* Invalid program name */
444 lf->program_name = NULL;
450 else if ((strncmp(pieces, "Message ", 8) == 0) &&
451 (done_message == 0)) {
457 /* Get the closing brackets */
458 pieces = strchr(pieces, ']');
463 /* Invalid log closure */
470 else if (strncmp(pieces, "Host ", 5) == 0) {
472 lf->hostname = pieces;
474 /* Get the closing brackets */
475 pieces = strchr(pieces, ']');
481 /* Invalid hostname */
489 pieces = strchr(pieces, '[');
493 /* Check for squid date format
494 * 1140804070.368 11623
495 * seconds from 00:00:00 1970-01-01 UTC
497 else if ((loglen > 32) &&
498 (pieces[0] == '1') &&
499 (isdigit((int)pieces[1])) &&
500 (isdigit((int)pieces[2])) &&
501 (isdigit((int)pieces[3])) &&
502 (pieces[10] == '.') &&
503 (isdigit((int)pieces[13])) &&
504 (pieces[14] == ' ') &&
505 ((pieces[21] == ' ') || (pieces[22] == ' '))) {
508 /* We need to start at the size of the event */
509 while (*lf->log == ' ') {
514 /* Every message must be in the format
515 * hostname->location or
516 * (agent) ip->location.
519 /* Set hostname for local messages */
520 if (lf->location[0] == '(') {
521 /* Messages from an agent */
522 lf->hostname = lf->location;
523 } else if (lf->hostname == NULL) {
524 lf->hostname = __shost;
527 /* Set up the event data */
529 p = localtime(&c_time);
531 /* Assign hour, day, year and month values */
532 lf->day = p->tm_mday;
533 lf->year = p->tm_year + 1900;
534 strncpy(lf->mon, month[p->tm_mon], 3);
535 snprintf(lf->hour, 9, "%02d:%02d:%02d",
540 /* Set the global hour/weekday */
541 __crt_hour = p->tm_hour;
542 __crt_wday = p->tm_wday;
546 print_out("**Phase 1: Completed pre-decoding.");
547 print_out(" full event: '%s'", lf->full_log);
548 print_out(" hostname: '%s'", lf->hostname);
549 print_out(" program_name: '%s'", lf->program_name);
550 print_out(" log: '%s'", lf->log);