apache2/msc_util.c

   1 /*
   2  * ModSecurity for Apache 2.x, http://www.modsecurity.org/
   3  * Copyright (c) 2004-2009 Breach Security, Inc. (http://www.breach.com/)
   4  *
   5  * This product is released under the terms of the General Public Licence,
   6  * version 2 (GPLv2). Please refer to the file LICENSE (included with this
   7  * distribution) which contains the complete text of the licence.
   8  *
   9  * There are special exceptions to the terms and conditions of the GPL
  10  * as it is applied to this software. View the full text of the exception in
  11  * file MODSECURITY_LICENSING_EXCEPTION in the directory of this software
  12  * distribution.
  13  *
  14  * If any of the files related to licensing are missing or if you have any
  15  * other questions related to licensing please contact Breach Security, Inc.
  16  * directly using the email address support@breach.com.
  17  *
  18  */
  19 #include "msc_release.h"
  20 #include "msc_util.h"
  21
  22 #include <ctype.h>
  23 #include <fcntl.h>
  24 #include <stdlib.h>
  25 #include <sys/types.h>
  26 #include <sys/stat.h>
  27
  28 #include <apr_lib.h>
  29
  30 /**
  31  * NOTE: Be careful as these can ONLY be used on static values for X.
  32  * (i.e. VALID_HEX(c++) will NOT work)
  33  */
  34 #define VALID_HEX(X) (((X >= '0')&&(X <= '9')) || ((X >= 'a')&&(X <= 'f')) || ((X >= 'A')&&(X <= 'F')))
  35 #define ISODIGIT(X) ((X >= '0')&&(X <= '7'))
  36
  37 #if (defined(WIN32) || defined(NETWARE))
  38 /** Windows does not define all the octal modes */
  39 #define S_IXOTH 00001
  40 #define S_IWOTH 00002
  41 #define S_IROTH 00004
  42 #define S_IXGRP 00010
  43 #define S_IWGRP 00020
  44 #define S_IRGRP 00040
  45 #define S_IXUSR 00100
  46 #define S_IWUSR 00200
  47 #define S_IRUSR 00400
  48 #define S_ISVTX 01000
  49 #define S_ISGID 02000
  50 #define S_ISUID 04000
  51 #endif /* defined(WIN32 || NETWARE) */
  52
  53 /**
  54  *
  55  */
  56 int parse_boolean(const char *input) {
  57     if (input == NULL) return -1;
  58     if (strcasecmp(input, "on") == 0) return 1;
  59     if (strcasecmp(input, "true") == 0) return 1;
  60     if (strcasecmp(input, "1") == 0) return 1;
  61     if (strcasecmp(input, "off") == 0) return 0;
  62     if (strcasecmp(input, "false") == 0) return 0;
  63     if (strcasecmp(input, "0") == 0) return 0;
  64
  65     return -1;
  66 }
  67
  68 /**
  69  * Parses a string that contains a name-value pair in the form "name=value".
  70  * IMP1 It does not check for whitespace between tokens.
  71  */
  72 int parse_name_eq_value(apr_pool_t *mp, const char *input, char **name, char **value) {
  73     char *p = NULL;
  74
  75     if ((name == NULL)||(value == NULL)) return -1;
  76     if (input == NULL) return 0;
  77
  78     *name = NULL;
  79     *value = NULL;
  80     p = (char *)input;
  81
  82     while((*p != '=')&&(*p != '\0')) p++;
  83     if (*p == '\0') {
  84         *name = (char *)input;
  85         return 1;
  86     }
  87
  88     *name = apr_pstrmemdup(mp, input, p - input);
  89     if (*name == NULL) return -1;
  90     p++;
  91
  92     *value = apr_pstrdup(mp, p);
  93     if (*value == NULL) return -1;
  94
  95     return 1;
  96 }
  97
  98 /**
  99  *
 100  * IMP1 Assumes NUL-terminated
 101  */
 102 char *url_encode(apr_pool_t *mp, char *input, unsigned int input_len, int *changed) {
 103     char *rval, *d;
 104     unsigned int i, len;
 105
 106     *changed = 0;
 107
 108     len = input_len * 3 + 1;
 109     d = rval = apr_palloc(mp, len);
 110     if (rval == NULL) return NULL;
 111
 112     /* ENH Only encode the characters that really need to be encoded. */
 113
 114     for(i = 0; i < input_len; i++) {
 115         unsigned char c = input[i];
 116
 117         if (c == ' ') {
 118             *d++ = '+';
 119             *changed = 1;
 120         } else
 121         if ( (c == 42) || ((c >= 48)&&(c <= 57)) || ((c >= 65)&&(c <= 90))
 122             || ((c >= 97)&&(c <= 122))
 123         ) {
 124             *d++ = c;
 125         } else {
 126             *d++ = '%';
 127             c2x(c, (unsigned char *)d);
 128             d += 2;
 129             *changed = 1;
 130         }
 131     }
 132
 133     *d = '\0';
 134
 135     return rval;
 136 }
 137
 138 /**
 139  * Appends an URL-encoded version of the source string to the
 140  * destination string, but makes sure that no more than "maxlen"
 141  * bytes are added.
 142  */
 143 char *strnurlencat(char *destination, char *source, unsigned int maxlen) {
 144     char *s = source;
 145     char *d = destination;
 146
 147     /* ENH Only encode the characters that really need to be encoded. */
 148
 149     /* Advance to the end of destination string. */
 150     while(*d != '\0') d++;
 151
 152     /* Loop while there's bytes in the source string or
 153      * until we reach the output limit.
 154      */
 155     while((*s != '\0')&&(maxlen > 0)) {
 156         unsigned char c = *s;
 157
 158         if (c == ' ') {
 159             *d++ = '+';
 160             maxlen--;
 161         } else
 162         if ( (c == 42) || ((c >= 48)&&(c <= 57)) || ((c >= 65)&&(c <= 90))
 163             || ((c >= 97)&&(c <= 122))
 164         ) {
 165             *d++ = c;
 166             maxlen--;
 167         } else {
 168             if (maxlen >= 3) {
 169                 *d++ = '%';
 170                 c2x(c, (unsigned char *)d);
 171                 d += 2;
 172                 maxlen -= 3;
 173             } else {
 174                 /* If there's not enough room for the encoded
 175                  * byte we ignore it.
 176                  */
 177                 maxlen = 0;
 178             }
 179         }
 180
 181         s++;
 182     }
 183
 184     *d++ = '\0';
 185
 186     return destination;
 187 }
 188
 189 /**
 190  *
 191  */
 192 char *file_basename(apr_pool_t *mp, const char *filename) {
 193     char *d, *p;
 194
 195     if (filename == NULL) return NULL;
 196     d = apr_pstrdup(mp, filename);
 197     if (d == NULL) return NULL;
 198
 199     p = strrchr(d, '/');
 200     if (p != NULL) d = p + 1;
 201     p = strrchr(d, '\\');
 202     if (p != NULL) d = p + 1;
 203
 204     return d;
 205 }
 206
 207 /**
 208  *
 209  */
 210 #ifdef WIN32
 211 char *file_dirname(apr_pool_t *p, const char *filename) {
 212     char *b, *c, *d;
 213
 214     if (filename == NULL) return NULL;
 215     b = apr_pstrdup(p, filename);
 216     if (b == NULL) return NULL;
 217
 218     c = strrchr(b, '/');
 219     if (c != NULL) {
 220         d = strrchr(c, '\\');
 221         if (d != NULL) *d = '\0';
 222         else *c = '\0';
 223     } else {
 224         d = strrchr(b, '\\');
 225         if (d != NULL) *d = '\0';
 226     }
 227
 228     return b;
 229 }
 230 #else
 231 char *file_dirname(apr_pool_t *p, const char *filename) {
 232     char *b, *c;
 233
 234     if (filename == NULL) return NULL;
 235     b = apr_pstrdup(p, filename);
 236     if (b == NULL) return NULL;
 237
 238     c = strrchr(b, '/');
 239     if (c != NULL) *c = '\0';
 240
 241     return b;
 242 }
 243 #endif
 244
 245
 246 /**
 247  *
 248  */
 249 int hex2bytes_inplace(unsigned char *data, int len) {
 250     unsigned char *d = data;
 251     int i, count = 0;
 252
 253     if ((data == NULL)||(len == 0)) return 0;
 254
 255     for(i = 0; i <= len - 2; i += 2) {
 256         *d++ = x2c(&data[i]);
 257         count++;
 258     }
 259     *d = '\0';
 260
 261     return count;
 262 }
 263
 264 /**
 265  * Converts a series of bytes into its hexadecimal
 266  * representation.
 267  */
 268 char *bytes2hex(apr_pool_t *pool, unsigned char *data, int len) {
 269     static unsigned char b2hex[] = "0123456789abcdef";
 270     char *hex = NULL;
 271     int i, j;
 272
 273     hex = apr_palloc(pool, (len * 2) + 1);
 274     if (hex == NULL) return NULL;
 275
 276     j = 0;
 277     for(i = 0; i < len; i++) {
 278         hex[j++] = b2hex[data[i] >> 4];
 279         hex[j++] = b2hex[data[i] & 0x0f];
 280     }
 281     hex[j] = 0;
 282
 283     return hex;
 284 }
 285
 286 /**
 287  *
 288  */
 289 int is_token_char(unsigned char c) {
 290     /* ENH Is the performance important at all? We could use a table instead. */
 291
 292     /* CTLs not allowed */
 293     if ((c <= 32)||(c >= 127)) return 0;
 294
 295     switch(c) {
 296         case '(' :
 297         case ')' :
 298         case '<' :
 299         case '>' :
 300         case '@' :
 301         case ',' :
 302         case ';' :
 303         case ':' :
 304         case '\\' :
 305         case '"' :
 306         case '/' :
 307         case '[' :
 308         case ']' :
 309         case '?' :
 310         case '=' :
 311             return 0;
 312     }
 313
 314     return 1;
 315 }
 316
 317 /**
 318  *
 319  */
 320 int remove_lf_crlf_inplace(char *text) {
 321     char *p = text;
 322     int count = 0;
 323
 324     if (text == NULL) return -1;
 325
 326     while(*p != '\0') {
 327         count++;
 328         p++;
 329     }
 330
 331     if (count > 0) {
 332         if (*(p - 1) == '\n') {
 333             *(p - 1) = '\0';
 334             if (count > 1) {
 335                 if (*(p - 2) == '\r') {
 336                     *(p - 2) = '\0';
 337                 }
 338             }
 339         }
 340     }
 341
 342     return 1;
 343 }
 344
 345 /**
 346  * Converts a byte given as its hexadecimal representation
 347  * into a proper byte. Handles uppercase and lowercase letters
 348  * but does not check for overflows.
 349  */
 350 unsigned char x2c(unsigned char *what) {
 351     register unsigned char digit;
 352
 353     digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
 354     digit *= 16;
 355     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
 356
 357     return digit;
 358 }
 359
 360 /**
 361  * Converts a single hexadecimal digit into a decimal value.
 362  */
 363 unsigned char xsingle2c(unsigned char *what) {
 364     register unsigned char digit;
 365
 366     digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
 367
 368     return digit;
 369 }
 370
 371 /**
 372  *
 373  */
 374 char *guess_tmp_dir(apr_pool_t *p) {
 375     char *filename = NULL;
 376
 377     /* ENH Use apr_temp_dir_get instead. */
 378
 379     #ifdef WIN32
 380     filename = apr_pcalloc(p, 256);
 381     if (filename == NULL) return "";
 382     if (GetTempPath(255, filename) != 0) return filename;
 383     #endif
 384
 385     filename = getenv("TMPDIR");
 386     if (filename != NULL) return filename;
 387
 388     filename = getenv("TEMP");
 389     if (filename != NULL) return filename;
 390
 391     filename = getenv("TMP");
 392     if (filename != NULL) return filename;
 393
 394     #if defined NETWARE
 395     return("sys:/tmp/");
 396     #elif defined WIN32
 397     return("");
 398     #else
 399     return("/tmp/");
 400     #endif
 401 }
 402
 403 /**
 404  *
 405  */
 406 char *current_logtime(apr_pool_t *mp) {
 407     apr_time_exp_t t;
 408     char tstr[100];
 409     apr_size_t len;
 410
 411     apr_time_exp_lt(&t, apr_time_now());
 412
 413     apr_strftime(tstr, &len, 80, "%d/%b/%Y:%H:%M:%S ", &t);
 414     apr_snprintf(tstr + strlen(tstr), 80 - strlen(tstr), "%c%.2d%.2d",
 415         t.tm_gmtoff < 0 ? '-' : '+',
 416         t.tm_gmtoff / (60 * 60), t.tm_gmtoff % (60 * 60));
 417     return apr_pstrdup(mp, tstr);
 418 }
 419
 420 /**
 421  *
 422  */
 423 char *current_filetime(apr_pool_t *mp) {
 424     apr_time_exp_t t;
 425     char tstr[100];
 426     apr_size_t len;
 427
 428     apr_time_exp_lt(&t, apr_time_now());
 429
 430     apr_strftime(tstr, &len, 80, "%Y%m%d-%H%M%S", &t);
 431     return apr_pstrdup(mp, tstr);
 432 }
 433
 434 /**
 435  *
 436  */
 437 int msc_mkstemp_ex(char *template, int mode) {
 438     /* ENH Use apr_file_mktemp instead. */
 439
 440     #if !(defined(WIN32)||defined(NETWARE))
 441     return mkstemp(template);
 442     #else
 443     if (mktemp(template) == NULL) return -1;
 444     return open(template, O_WRONLY | O_APPEND | O_CREAT | O_BINARY, mode);
 445     #endif
 446 }
 447
 448 /**
 449  *
 450  */
 451 int msc_mkstemp(char *template) {
 452     return msc_mkstemp_ex(template, CREATEMODE_UNISTD);
 453 }
 454
 455 /**
 456  * Converts the input string to lowercase (in-place).
 457  */
 458 char *strtolower_inplace(unsigned char *str) {
 459     unsigned char *c = str;
 460
 461     if (str == NULL) return NULL;
 462
 463     while(*c != 0) {
 464         *c = tolower(*c);
 465         c++;
 466     }
 467
 468     return (char *)str;
 469 }
 470
 471 /**
 472  * Converts a single byte into its hexadecimal representation.
 473  * Will overwrite two bytes at the destination.
 474  */
 475 unsigned char *c2x(unsigned what, unsigned char *where) {
 476     static const char c2x_table[] = "0123456789abcdef";
 477
 478     what = what & 0xff;
 479     *where++ = c2x_table[what >> 4];
 480     *where++ = c2x_table[what & 0x0f];
 481
 482     return where;
 483 }
 484
 485 char *log_escape(apr_pool_t *mp, const char *text) {
 486     return _log_escape(mp, (const unsigned char *)text, text ? strlen(text) : 0, 1, 0);
 487 }
 488
 489 char *log_escape_nq(apr_pool_t *mp, const char *text) {
 490     return _log_escape(mp, (const unsigned char *)text, text ? strlen(text) : 0, 0, 0);
 491 }
 492
 493 char *log_escape_ex(apr_pool_t *mp, const char *text, unsigned long int text_length) {
 494     return _log_escape(mp, (const unsigned char *)text, text_length, 1, 0);
 495 }
 496
 497 char *log_escape_nq_ex(apr_pool_t *mp, const char *text, unsigned long int text_length) {
 498     return _log_escape(mp, (const unsigned char *)text, text_length, 0, 0);
 499 }
 500
 501 char *log_escape_header_name(apr_pool_t *mp, const char *text) {
 502     return _log_escape(mp, (const unsigned char *)text, text ? strlen(text) : 0, 0, 1);
 503 }
 504
 505 char *log_escape_raw(apr_pool_t *mp, const unsigned char *text, unsigned long int text_length) {
 506     unsigned char *ret = apr_palloc(mp, text_length * 4 + 1);
 507     unsigned long int i, j;
 508
 509     for (i = 0, j = 0; i < text_length; i++, j += 4) {
 510         ret[j] = '\\';
 511         ret[j+1] = 'x';
 512         c2x(text[i], ret+j+2);
 513     }
 514     ret[text_length * 4] = '\0';
 515
 516     return (char *)ret;
 517 }
 518
 519 /**
 520  * Transform text to ASCII printable or hex escaped
 521  */
 522 char *log_escape_hex(apr_pool_t *mp, const unsigned char *text, unsigned long int text_length) {
 523     unsigned char *ret = apr_palloc(mp, text_length * 4 + 1);
 524     unsigned long int i, j;
 525
 526     for (i = 0, j = 0; i < text_length; i++) {
 527         if (  (text[i] == '"')
 528             ||(text[i] == '\\')
 529             ||(text[i] <= 0x1f)
 530             ||(text[i] >= 0x7f))
 531         {
 532             ret[j] = '\\';
 533             ret[j+1] = 'x';
 534             c2x(text[i], ret+j+2);
 535             j += 4;
 536         }
 537         else {
 538             ret[j] = text[i];
 539             j ++;
 540         }
 541     }
 542     ret[j] = '\0';
 543
 544     return (char *)ret;
 545 }
 546
 547 /**
 548  * Transform input into a form safe for logging.
 549  */
 550 char *_log_escape(apr_pool_t *mp, const unsigned char *input, unsigned long int input_len,
 551     int escape_quotes, int escape_colon)
 552 {
 553     unsigned char *d = NULL;
 554     char *ret = NULL;
 555     unsigned long int i;
 556
 557     if (input == NULL) return NULL;
 558
 559     ret = apr_palloc(mp, input_len * 4 + 1);
 560     if (ret == NULL) return NULL;
 561     d = (unsigned char *)ret;
 562
 563     i = 0;
 564     while(i < input_len) {
 565         switch(input[i]) {
 566             case ':' :
 567                 if (escape_colon) {
 568                     *d++ = '\\';
 569                     *d++ = ':';
 570                 } else {
 571                     *d++ = input[i];
 572                 }
 573                 break;
 574             case '"' :
 575                 if (escape_quotes) {
 576                     *d++ = '\\';
 577                     *d++ = '"';
 578                 } else {
 579                     *d++ = input[i];
 580                 }
 581                 break;
 582             case '\b' :
 583                 *d++ = '\\';
 584                 *d++ = 'b';
 585                 break;
 586             case '\n' :
 587                 *d++ = '\\';
 588                 *d++ = 'n';
 589                 break;
 590             case '\r' :
 591                 *d++ = '\\';
 592                 *d++ = 'r';
 593                 break;
 594             case '\t' :
 595                 *d++ = '\\';
 596                 *d++ = 't';
 597                 break;
 598             case '\v' :
 599                 *d++ = '\\';
 600                 *d++ = 'v';
 601                 break;
 602             case '\\' :
 603                 *d++ = '\\';
 604                 *d++ = '\\';
 605                 break;
 606             default :
 607                 if ((input[i] <= 0x1f)||(input[i] >= 0x7f)) {
 608                     *d++ = '\\';
 609                     *d++ = 'x';
 610                     c2x(input[i], d);
 611                     d += 2;
 612                 } else {
 613                     *d++ = input[i];
 614                 }
 615                 break;
 616         }
 617
 618         i++;
 619     }
 620
 621     *d = 0;
 622
 623     return ret;
 624 }
 625
 626 /**
 627  * JavaScript decoding.
 628  * IMP1 Assumes NUL-terminated
 629  */
 630
 631 int js_decode_nonstrict_inplace(unsigned char *input, long int input_len) {
 632     unsigned char *d = (unsigned char *)input;
 633     long int i, count;
 634
 635     if (input == NULL) return -1;
 636
 637     i = count = 0;
 638     while (i < input_len) {
 639         if (input[i] == '\\') {
 640             /* Character is an escape. */
 641
 642             if (   (i + 5 < input_len) && (input[i + 1] == 'u')
 643                 && (VALID_HEX(input[i + 2])) && (VALID_HEX(input[i + 3]))
 644                 && (VALID_HEX(input[i + 4])) && (VALID_HEX(input[i + 5])) )
 645             {
 646                 /* \uHHHH */
 647
 648                 /* Use only the lower byte. */
 649                 *d = x2c(&input[i + 4]);
 650
 651                 /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
 652                 if (   (*d > 0x00) && (*d < 0x5f)
 653                     && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
 654                     && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
 655                 {
 656                     (*d) += 0x20;
 657                 }
 658
 659                 d++;
 660                 count++;
 661                 i += 6;
 662             }
 663             else if (   (i + 3 < input_len) && (input[i + 1] == 'x')
 664                      && VALID_HEX(input[i + 2]) && VALID_HEX(input[i + 3])) {
 665                 /* \xHH */
 666                 *d++ = x2c(&input[i + 2]);
 667                 count++;
 668                 i += 4;
 669             }
 670             else if ((i + 1 < input_len) && ISODIGIT(input[i + 1])) {
 671                 /* \OOO (only one byte, \000 - \377) */
 672                 char buf[4];
 673                 int j = 0;
 674
 675                 while((i + 1 + j < input_len)&&(j < 3)) {
 676                     buf[j] = input[i + 1 + j];
 677                     j++;
 678                     if (!ISODIGIT(input[i + 1 + j])) break;
 679                 }
 680                 buf[j] = '\0';
 681
 682                 if (j > 0) {
 683                     /* Do not use 3 characters if we will be > 1 byte */
 684                     if ((j == 3) && (buf[0] > '3')) {
 685                         j = 2;
 686                         buf[j] = '\0';
 687                     }
 688                     *d++ = (unsigned char)strtol(buf, NULL, 8);
 689                     i += 1 + j;
 690                     count++;
 691                 }
 692             }
 693             else if (i + 1 < input_len) {
 694                 /* \C */
 695                 unsigned char c = input[i + 1];
 696                 switch(input[i + 1]) {
 697                     case 'a' :
 698                         c = '\a';
 699                         break;
 700                     case 'b' :
 701                         c = '\b';
 702                         break;
 703                     case 'f' :
 704                         c = '\f';
 705                         break;
 706                     case 'n' :
 707                         c = '\n';
 708                         break;
 709                     case 'r' :
 710                         c = '\r';
 711                         break;
 712                     case 't' :
 713                         c = '\t';
 714                         break;
 715                     case 'v' :
 716                         c = '\v';
 717                         break;
 718                     /* The remaining (\?,\\,\',\") are just a removal
 719                      * of the escape char which is default.
 720                      */
 721                 }
 722
 723                 *d++ = c;
 724                 i += 2;
 725                 count++;
 726             }
 727             else {
 728                 /* Not enough bytes */
 729                 while(i < input_len) {
 730                     *d++ = input[i++];
 731                     count++;
 732                 }
 733             }
 734         }
 735         else {
 736             *d++ = input[i++];
 737             count++;
 738         }
 739     }
 740
 741     *d = '\0';
 742
 743     return count;
 744 }
 745
 746 /**
 747  *
 748  * IMP1 Assumes NUL-terminated
 749  */
 750 int urldecode_uni_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *changed) {
 751     unsigned char *d = input;
 752     long int i, count;
 753
 754     *changed = 0;
 755
 756     if (input == NULL) return -1;
 757
 758     i = count = 0;
 759     while (i < input_len) {
 760         if (input[i] == '%') {
 761             /* Character is a percent sign. */
 762
 763             if ((i + 1 < input_len)&&( (input[i + 1] == 'u')||(input[i + 1] == 'U') )) {
 764                 /* IIS-specific %u encoding. */
 765                 if (i + 5 < input_len) {
 766                     /* We have at least 4 data bytes. */
 767                     if (  (VALID_HEX(input[i + 2]))&&(VALID_HEX(input[i + 3]))
 768                         &&(VALID_HEX(input[i + 4]))&&(VALID_HEX(input[i + 5])) )
 769                     {
 770                         /* We first make use of the lower byte here, ignoring the higher byte. */
 771                         *d = x2c(&input[i + 4]);
 772
 773                         /* Full width ASCII (ff01 - ff5e) needs 0x20 added */
 774                         if (   (*d > 0x00) && (*d < 0x5f)
 775                             && ((input[i + 2] == 'f') || (input[i + 2] == 'F'))
 776                             && ((input[i + 3] == 'f') || (input[i + 3] == 'F')))
 777                         {
 778                             (*d) += 0x20;
 779                         }
 780
 781                         d++;
 782                         count++;
 783                         i += 6;
 784                         *changed = 1;
 785                     } else {
 786                         /* Invalid data, skip %u. */
 787                         *d++ = input[i++];
 788                         *d++ = input[i++];
 789                         count += 2;
 790                     }
 791                 } else {
 792                     /* Not enough bytes (4 data bytes), skip %u. */
 793                     *d++ = input[i++];
 794                     *d++ = input[i++];
 795                     count += 2;
 796                 }
 797             }
 798             else {
 799                 /* Standard URL encoding. */
 800
 801                 /* Are there enough bytes available? */
 802                 if (i + 2 < input_len) {
 803                     /* Yes. */
 804
 805                     /* Decode a %xx combo only if it is valid.
 806                      */
 807                     char c1 = input[i + 1];
 808                     char c2 = input[i + 2];
 809
 810                     if (VALID_HEX(c1) && VALID_HEX(c2)) {
 811                         *d++ = x2c(&input[i + 1]);
 812                         count++;
 813                         i += 3;
 814                         *changed = 1;
 815                     } else {
 816                         /* Not a valid encoding, skip this % */
 817                         *d++ = input[i++];
 818                         count++;
 819                     }
 820                 } else {
 821                     /* Not enough bytes available, skip this % */
 822                     *d++ = input[i++];
 823                     count++;
 824                 }
 825             }
 826         }
 827         else {
 828             /* Character is not a percent sign. */
 829             if (input[i] == '+') {
 830                 *d++ = ' ';
 831                 *changed = 1;
 832             } else {
 833                 *d++ = input[i];
 834             }
 835
 836             count++;
 837             i++;
 838         }
 839     }
 840
 841     *d = '\0';
 842
 843     return count;
 844 }
 845
 846 /**
 847  *
 848  * IMP1 Assumes NUL-terminated
 849  */
 850 int urldecode_nonstrict_inplace_ex(unsigned char *input, long int input_len, int *invalid_count, int *changed) {
 851     unsigned char *d = (unsigned char *)input;
 852     long int i, count;
 853
 854     *changed = 0;
 855
 856     if (input == NULL) return -1;
 857
 858     i = count = 0;
 859     while (i < input_len) {
 860         if (input[i] == '%') {
 861             /* Character is a percent sign. */
 862
 863             /* Are there enough bytes available? */
 864             if (i + 2 < input_len) {
 865                 char c1 = input[i + 1];
 866                 char c2 = input[i + 2];
 867
 868                 if (VALID_HEX(c1) && VALID_HEX(c2)) {
 869                     /* Valid encoding - decode it. */
 870                     *d++ = x2c(&input[i + 1]);
 871                     count++;
 872                     i += 3;
 873                     *changed = 1;
 874                 } else {
 875                     /* Not a valid encoding, skip this % */
 876                     *d++ = input[i++];
 877                     count ++;
 878                     (*invalid_count)++;
 879                 }
 880             } else {
 881                 /* Not enough bytes available, copy the raw bytes. */
 882                 *d++ = input[i++];
 883                 count ++;
 884                 (*invalid_count)++;
 885             }
 886         } else {
 887             /* Character is not a percent sign. */
 888             if (input[i] == '+') {
 889                 *d++ = ' ';
 890                 *changed = 1;
 891             } else {
 892                 *d++ = input[i];
 893             }
 894             count++;
 895             i++;
 896         }
 897     }
 898
 899     *d = '\0';
 900
 901     return count;
 902 }
 903
 904 /**
 905  *
 906  * IMP1 Assumes NUL-terminated
 907  */
 908 int html_entities_decode_inplace(apr_pool_t *mp, unsigned char *input, int input_len) {
 909     unsigned char *d = input;
 910     int i, count;
 911
 912     if ((input == NULL)||(input_len <= 0)) return 0;
 913
 914     i = count = 0;
 915     while((i < input_len)&&(count < input_len)) {
 916         int z, copy = 1;
 917
 918         /* Require an ampersand and at least one character to
 919          * start looking into the entity.
 920          */
 921         if ((input[i] == '&')&&(i + 1 < input_len)) {
 922             int k, j = i + 1;
 923
 924             if (input[j] == '#') {
 925                 /* Numerical entity. */
 926                 copy++;
 927
 928                 if (!(j + 1 < input_len)) goto HTML_ENT_OUT; /* Not enough bytes. */
 929                 j++;
 930
 931                 if ((input[j] == 'x')||(input[j] == 'X')) {
 932                     /* Hexadecimal entity. */
 933                     copy++;
 934
 935                     if (!(j + 1 < input_len)) goto HTML_ENT_OUT; /* Not enough bytes. */
 936                     j++; /* j is the position of the first digit now. */
 937
 938                     k = j;
 939                     while((j < input_len)&&(isxdigit(input[j]))) j++;
 940                     if (j > k) { /* Do we have at least one digit? */
 941                         /* Decode the entity. */
 942                         char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);
 943                         *d++ = (unsigned char)strtol(x, NULL, 16);
 944                         count++;
 945
 946                         /* Skip over the semicolon if it's there. */
 947                         if ((j < input_len)&&(input[j] == ';')) i = j + 1;
 948                         else i = j;
 949
 950                         continue;
 951                     } else {
 952                         goto HTML_ENT_OUT;
 953                     }
 954                 } else {
 955                     /* Decimal entity. */
 956                     k = j;
 957                     while((j < input_len)&&(isdigit(input[j]))) j++;
 958                     if (j > k) { /* Do we have at least one digit? */
 959                         /* Decode the entity. */
 960                         char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);
 961                         *d++ = (unsigned char)strtol(x, NULL, 10);
 962                         count++;
 963
 964                         /* Skip over the semicolon if it's there. */
 965                         if ((j < input_len)&&(input[j] == ';')) i = j + 1;
 966                         else i = j;
 967
 968                         continue;
 969                     } else {
 970                         goto HTML_ENT_OUT;
 971                     }
 972                 }
 973             } else {
 974                 /* Text entity. */
 975
 976                 k = j;
 977                 while((j < input_len)&&(isalnum(input[j]))) j++;
 978                 if (j > k) { /* Do we have at least one digit? */
 979                     char *x = apr_pstrmemdup(mp, (const char *)&input[k], j - k);
 980
 981                     /* Decode the entity. */
 982                     /* ENH What about others? */
 983                     if (strcasecmp(x, "quot") == 0) *d++ = '"';
 984                     else
 985                     if (strcasecmp(x, "amp") == 0) *d++ = '&';
 986                     else
 987                     if (strcasecmp(x, "lt") == 0) *d++ = '<';
 988                     else
 989                     if (strcasecmp(x, "gt") == 0) *d++ = '>';
 990                     else
 991                     if (strcasecmp(x, "nbsp") == 0) *d++ = NBSP;
 992                     else {
 993                         /* We do no want to convert this entity, copy the raw data over. */
 994                         copy = j - k + 1;
 995                         goto HTML_ENT_OUT;
 996                     }
 997
 998                     count++;
 999
1000                     /* Skip over the semicolon if it's there. */
1001                     if ((j < input_len)&&(input[j] == ';')) i = j + 1;
1002                     else i = j;
1003
1004                     continue;
1005                 }
1006             }
1007         }
1008
1009         HTML_ENT_OUT:
1010
1011         for(z = 0; ((z < copy) && (count < input_len)); z++) {
1012             *d++ = input[i++];
1013             count++;
1014         }
1015     }
1016
1017     *d = '\0';
1018
1019     return count;
1020 }
1021
1022 /**
1023  *
1024  * IMP1 Assumes NUL-terminated
1025  */
1026 int ansi_c_sequences_decode_inplace(unsigned char *input, int input_len) {
1027     unsigned char *d = input;
1028     int i, count;
1029
1030     i = count = 0;
1031     while(i < input_len) {
1032         if ((input[i] == '\\')&&(i + 1 < input_len)) {
1033             int c = -1;
1034
1035             switch(input[i + 1]) {
1036                 case 'a' :
1037                     c = '\a';
1038                     break;
1039                 case 'b' :
1040                     c = '\b';
1041                     break;
1042                 case 'f' :
1043                     c = '\f';
1044                     break;
1045                 case 'n' :
1046                     c = '\n';
1047                     break;
1048                 case 'r' :
1049                     c = '\r';
1050                     break;
1051                 case 't' :
1052                     c = '\t';
1053                     break;
1054                 case 'v' :
1055                     c = '\v';
1056                     break;
1057                 case '\\' :
1058                     c = '\\';
1059                     break;
1060                 case '?' :
1061                     c = '?';
1062                     break;
1063                 case '\'' :
1064                     c = '\'';
1065                     break;
1066                 case '"' :
1067                     c = '"';
1068                     break;
1069             }
1070
1071             if (c != -1) i += 2;
1072
1073             /* Hexadecimal or octal? */
1074             if (c == -1) {
1075                 if ((input[i + 1] == 'x')||(input[i + 1] == 'X')) {
1076                     /* Hexadecimal. */
1077                     if ((i + 3 < input_len)&&(isxdigit(input[i + 2]))&&(isxdigit(input[i + 3]))) {
1078                         /* Two digits. */
1079                         c = x2c(&input[i + 2]);
1080                         i += 4;
1081                     } else {
1082                         /* Invalid encoding, do nothing. */
1083                     }
1084                 }
1085                 else
1086                 if (ISODIGIT(input[i + 1])) { /* Octal. */
1087                     char buf[4];
1088                     int j = 0;
1089
1090                     while((i + 1 + j < input_len)&&(j < 3)) {
1091                         buf[j] = input[i + 1 + j];
1092                         j++;
1093                         if (!ISODIGIT(input[i + 1 + j])) break;
1094                     }
1095                     buf[j] = '\0';
1096
1097                     if (j > 0) {
1098                         c = strtol(buf, NULL, 8);
1099                         i += 1 + j;
1100                     }
1101                 }
1102             }
1103
1104             if (c == -1) {
1105                 /* Didn't recognise encoding, copy raw bytes. */
1106                 *d++ = input[i + 1];
1107                 count++;
1108                 i += 2;
1109             } else {
1110                 /* Converted the encoding. */
1111                 *d++ = c;
1112                 count++;
1113             }
1114         } else {
1115             /* Input character not a backslash, copy it. */
1116             *d++ = input[i++];
1117             count++;
1118         }
1119     }
1120
1121     *d = '\0';
1122
1123     return count;
1124 }
1125
1126 /**
1127  *
1128  * IMP1 Assumes NUL-terminated
1129  */
1130 int normalise_path_inplace(unsigned char *input, int input_len, int win, int *changed) {
1131     unsigned char *d = input;
1132     int i, count;
1133
1134     *changed = 0;
1135
1136     i = count = 0;
1137     while ((i < input_len)&&(count < input_len)) {
1138         char c = input[i];
1139
1140         /* Convert backslash to forward slash on Windows only. */
1141         if ((win)&&(c == '\\')) {
1142             c = '/';
1143             *changed = 1;
1144         }
1145
1146         if (c == '/') {
1147             /* Is there a directory back-reference? Yes, we
1148              * require at least 5 prior bytes here. That's on
1149              * purpose.
1150              */
1151             if ((count >= 5)&&(*(d - 1) == '.')&&(*(d - 2) == '.')&&(*(d - 3) == '/')) {
1152                 unsigned char *cd = d - 4;
1153                 int ccount = count - 4;
1154
1155                 *changed = 1;
1156
1157                 /* Go back until we reach the beginning or a forward slash. */
1158                 while ((ccount > 0)&&(*cd != '/')) {
1159                     ccount--;
1160                     cd--;
1161                 }
1162
1163                 if (*cd == '/') {
1164                     d = cd;
1165                     count = ccount;
1166                 }
1167             } else
1168             /* Is there a directory self-reference? */
1169             if ((count >= 2)&&(*(d - 1) == '.')&&(*(d - 2) == '/')) {
1170                 /* Ignore the last two bytes. */
1171                 d -= 2;
1172                 count -= 2;
1173                 *changed = 1;
1174             } else
1175             /* Or are there just multiple occurences of forward slash? */
1176             if ((count >= 1)&&(*(d - 1) == '/')) {
1177                 /* Ignore the last one byte. */
1178                 d--;
1179                 count--;
1180                 *changed = 1;
1181             }
1182         }
1183
1184         /* Copy the byte over. */
1185         *d++ = c;
1186         count++;
1187         i++;
1188     }
1189
1190     *d = '\0';
1191
1192     return count;
1193 }
1194
1195 char *modsec_build(apr_pool_t *mp) {
1196     return apr_psprintf(mp, "%02i%02i%02i%1i%02i",
1197                             atoi(MODSEC_VERSION_MAJOR),
1198                             atoi(MODSEC_VERSION_MINOR),
1199                             atoi(MODSEC_VERSION_MAINT),
1200                             get_modsec_build_type(NULL),
1201                             atoi(MODSEC_VERSION_RELEASE));
1202 }
1203
1204 int is_empty_string(const char *string) {
1205     unsigned int i;
1206
1207     if (string == NULL) return 1;
1208
1209     for(i = 0; string[i] != '\0'; i++) {
1210         if (!isspace(string[i])) {
1211             return 0;
1212         }
1213     }
1214
1215     return 1;
1216 }
1217
1218 char *resolve_relative_path(apr_pool_t *pool, const char *parent_filename, const char *filename) {
1219     if (filename == NULL) return NULL;
1220     // TODO Support paths on operating systems other than Unix.
1221     if (filename[0] == '/') return (char *)filename;
1222
1223     return apr_pstrcat(pool, apr_pstrndup(pool, parent_filename,
1224         strlen(parent_filename) - strlen(apr_filepath_name_get(parent_filename))),
1225         filename, NULL);
1226 }
1227
1228 /**
1229  * Decode a string that contains CSS-escaped characters.
1230  *
1231  * References:
1232  *     http://www.w3.org/TR/REC-CSS2/syndata.html#q4
1233  *     http://www.unicode.org/roadmaps/
1234  */
1235 int css_decode_inplace(unsigned char *input, long int input_len) {
1236     unsigned char *d = (unsigned char *)input;
1237     long int i, j, count;
1238
1239     if (input == NULL) return -1;
1240
1241     i = count = 0;
1242     while (i < input_len) {
1243
1244         /* Is the character a backslash? */
1245         if (input[i] == '\\') {
1246
1247             /* Is there at least one more byte? */
1248             if (i + 1 < input_len) {
1249                 i++; /* We are not going to need the backslash. */
1250
1251                 /* Check for 1-6 hex characters following the backslash */
1252                 j = 0;
1253                 while (    (j < 6)
1254                         && (i + j < input_len)
1255                         && (VALID_HEX(input[i + j])))
1256                 {
1257                     j++;
1258                 }
1259
1260                 if (j > 0) { /* We have at least one valid hexadecimal character. */
1261                     int fullcheck = 0;
1262
1263                     /* For now just use the last two bytes. */
1264                     switch (j) {
1265                         /* Number of hex characters */
1266                         case 1:
1267                             *d++ = xsingle2c(&input[i]);
1268                             break;
1269
1270                         case 2:
1271                         case 3:
1272                             /* Use the last two from the end. */
1273                             *d++ = x2c(&input[i + j - 2]);
1274                             break;
1275
1276                         case 4:
1277                             /* Use the last two from the end, but request
1278                              * a full width check.
1279                              */
1280                             *d = x2c(&input[i + j - 2]);
1281                             fullcheck = 1;
1282                             break;
1283
1284                         case 5:
1285                             /* Use the last two from the end, but request
1286                              * a full width check if the number is greater
1287                              * or equal to 0xFFFF.
1288                              */
1289                             *d = x2c(&input[i + j - 2]);
1290
1291                             /* Do full check if first byte is 0 */
1292                             if (input[i] == '0') {
1293                                 fullcheck = 1;
1294                             }
1295                             else {
1296                                 d++;
1297                             }
1298                             break;
1299
1300                         case 6:
1301                             /* Use the last two from the end, but request
1302                              * a full width check if the number is greater
1303                              * or equal to 0xFFFF.
1304                              */
1305                             *d = x2c(&input[i + j - 2]);
1306
1307                             /* Do full check if first/second bytes are 0 */
1308                             if (    (input[i] == '0')
1309                                  && (input[i + 1] == '0')
1310                             ) {
1311                                 fullcheck = 1;
1312                             }
1313                             else {
1314                                 d++;
1315                             }
1316                             break;
1317                     }
1318
1319                     /* Full width ASCII (0xff01 - 0xff5e) needs 0x20 added */
1320                     if (fullcheck) {
1321                         if (   (*d > 0x00) && (*d < 0x5f)
1322                             && ((input[i + j - 3] == 'f') ||
1323                                 (input[i + j - 3] == 'F'))
1324                             && ((input[i + j - 4] == 'f') ||
1325                                 (input[i + j - 4] == 'F')))
1326                         {
1327                             (*d) += 0x20;
1328                         }
1329
1330                         d++;
1331                     }
1332
1333                     /* We must ignore a single whitespace after a hex escape */
1334                     if ((i + j < input_len) && isspace(input[i + j])) {
1335                         j++;
1336                     }
1337
1338                     /* Move over. */
1339                     count++;
1340                     i += j;
1341                 }
1342
1343                 /* No hexadecimal digits after backslash */
1344                 else if (input[i] == '\n') {
1345                     /* A newline character following backslash is ignored. */
1346                     i++;
1347                 }
1348
1349                 /* The character after backslash is not a hexadecimal digit, nor a newline. */
1350                 else {
1351                     /* Use one character after backslash as is. */
1352                     *d++ = input[i++];
1353                     count++;
1354                 }
1355             }
1356
1357             /* No characters after backslash. */
1358             else {
1359                 /* Do not include backslash in output (continuation to nothing) */
1360                 i++;
1361             }
1362         }
1363
1364         /* Character is not a backslash. */
1365         else {
1366             /* Copy one normal character to output. */
1367             *d++ = input[i++];
1368             count++;
1369         }
1370     }
1371
1372     /* Terminate output string. */
1373     *d = '\0';
1374
1375     return count;
1376 }
1377
1378 /**
1379  * Translate UNIX octal umask/mode to APR apr_fileperms_t
1380  */
1381 apr_fileperms_t mode2fileperms(int mode) {
1382     apr_fileperms_t perms = 0;
1383
1384     if (mode & S_IXOTH) perms |= APR_WEXECUTE;
1385     if (mode & S_IWOTH) perms |= APR_WWRITE;
1386     if (mode & S_IROTH) perms |= APR_WREAD;
1387     if (mode & S_IXGRP) perms |= APR_GEXECUTE;
1388     if (mode & S_IWGRP) perms |= APR_GWRITE;
1389     if (mode & S_IRGRP) perms |= APR_GREAD;
1390     if (mode & S_IXUSR) perms |= APR_UEXECUTE;
1391     if (mode & S_IWUSR) perms |= APR_UWRITE;
1392     if (mode & S_IRUSR) perms |= APR_UREAD;
1393     if (mode & S_ISVTX) perms |= APR_WSTICKY;
1394     if (mode & S_ISGID) perms |= APR_GSETID;
1395     if (mode & S_ISUID) perms |= APR_USETID;
1396
1397     return perms;
1398 }
1399