2 * ModSecurity for Apache 2.x, http://www.modsecurity.org/
3 * Copyright (c) 2004-2009 Breach Security, Inc. (http://www.breach.com/)
5 * This product is released under the terms of the General Public Licence,
6 * version 2 (GPLv2). Please refer to the file LICENSE (included with this
7 * distribution) which contains the complete text of the licence.
9 * There are special exceptions to the terms and conditions of the GPL
10 * as it is applied to this software. View the full text of the exception in
11 * file MODSECURITY_LICENSING_EXCEPTION in the directory of this software
14 * If any of the files related to licensing are missing or if you have any
15 * other questions related to licensing please contact Breach Security, Inc.
16 * directly using the email address support@breach.com.
19 #include "modsecurity.h"
21 #include "pdf_protect.h"
26 #define DEFAULT_TIMEOUT 10
27 #define DEFAULT_TOKEN_NAME "PDFPTOKEN"
28 #define ATTACHMENT_MIME_TYPE "application/octet-stream"
29 #define NOTE_TWEAK_HEADERS "pdfp-note-tweakheaders"
30 #define NOTE_DONE "pdfp-note-done"
31 #define REDIRECT_STATUS HTTP_TEMPORARY_REDIRECT
32 #define DISPOSITION_VALUE "attachment;"
34 // TODO We need ID and REV values for the PDF XSS alert.
36 // TODO It would be nice if the user could choose the ID/REV/SEVERITY/MESSAGE, etc.
38 static char *encode_sha1_base64(apr_pool_t *mp, const char *data) {
39 unsigned char digest[APR_SHA1_DIGESTSIZE];
40 apr_sha1_ctx_t context;
42 /* Calculate the hash first. */
43 apr_sha1_init(&context);
44 apr_sha1_update(&context, data, strlen(data));
45 apr_sha1_final(digest, &context);
47 /* Now transform with transport-friendly hex encoding. */
48 return bytes2hex(mp, digest, APR_SHA1_DIGESTSIZE);
51 static char *create_hash(modsec_rec *msr,
52 const char *time_string)
54 const char *content = NULL;
56 if (msr->txcfg->pdfp_secret == NULL) {
57 msr_log(msr, 1, "PdfProtect: Unable to generate hash. Please configure SecPdfProtectSecret.");
61 /* Our protection token is made out of the client's IP
62 * address, the secret key, and the token expiry time.
64 content = apr_pstrcat(msr->mp, msr->remote_addr, msr->txcfg->pdfp_secret,
66 if (content == NULL) return NULL;
68 return encode_sha1_base64(msr->mp, content);
74 static char *create_token(modsec_rec *msr) {
75 apr_time_t current_time;
76 const char *time_string = NULL;
77 const char *hash = NULL;
78 int timeout = DEFAULT_TIMEOUT;
80 if (msr->txcfg->pdfp_timeout != -1) {
81 timeout = msr->txcfg->pdfp_timeout;
84 current_time = apr_time_sec(apr_time_now());
85 time_string = apr_psprintf(msr->mp, "%" APR_TIME_T_FMT, (apr_time_t)(current_time + timeout));
86 if (time_string == NULL) return NULL;
88 hash = create_hash(msr, time_string);
89 if (hash == NULL) return NULL;
91 return apr_pstrcat(msr->mp, hash, "|", time_string, NULL);
97 static char *construct_new_uri(modsec_rec *msr) {
98 const char *token_parameter = NULL;
99 const char *new_uri = NULL;
100 const char *token = NULL;
101 const char *token_name = DEFAULT_TOKEN_NAME;
103 token = create_token(msr);
104 if (token == NULL) return NULL;
106 if (msr->txcfg->pdfp_token_name != NULL) {
107 token_name = msr->txcfg->pdfp_token_name;
110 token_parameter = apr_pstrcat(msr->mp, token_name, "=", token, NULL);
111 if (token_parameter == NULL) return NULL;
113 if (msr->r->args == NULL) { /* No other parameters. */
114 new_uri = apr_pstrcat(msr->mp, msr->r->uri, "?", token_parameter, "#PDFP", NULL);
115 } else { /* Preserve existing paramters. */
116 new_uri = apr_pstrcat(msr->mp, msr->r->uri, "?", msr->r->args, "&",
117 token_parameter, "#PDFP", NULL);
120 return (char *)new_uri;
126 static char *extract_token(modsec_rec *msr) {
127 char *search_string = NULL;
128 char *p = NULL, *t = NULL;
129 const char *token_name = DEFAULT_TOKEN_NAME;
131 if ((msr->r == NULL)||(msr->r->args == NULL)) {
135 if (msr->txcfg->pdfp_token_name != NULL) {
136 token_name = msr->txcfg->pdfp_token_name;
139 search_string = apr_pstrcat(msr->mp, msr->txcfg->pdfp_token_name, "=", NULL);
140 if (search_string == NULL) return NULL;
142 p = strstr(msr->r->args, search_string);
143 if (p == NULL) return NULL;
145 t = p = p + strlen(search_string);
146 while ((*t != '\0')&&(*t != '&')) t++;
148 return apr_pstrmemdup(msr->mp, p, t - p);
154 static int validate_time_string(const char *time_string) {
155 char *p = (char *)time_string;
158 if (!isdigit(*p)) return 0;
168 static int verify_token(modsec_rec *msr, const char *token, char **error_msg) {
169 unsigned int current_time, expiry_time;
170 const char *time_string = NULL;
171 const char *given_hash = NULL;
172 const char *hash = NULL;
173 const char *p = NULL;
175 if (error_msg == NULL) return 0;
178 /* Split token into its parts - hash and expiry time. */
179 p = strstr(token, "|");
180 if (p == NULL) return 0;
182 given_hash = apr_pstrmemdup(msr->mp, token, p - token);
184 if (!validate_time_string(time_string)) {
185 *error_msg = apr_psprintf(msr->mp, "PdfProtect: Invalid time string: %s",
186 log_escape_nq(msr->mp, time_string));
189 expiry_time = atoi(time_string);
191 /* Check the hash. */
192 hash = create_hash(msr, time_string);
193 if (strcmp(given_hash, hash) != 0) {
194 *error_msg = apr_psprintf(msr->mp, "PdfProtect: Invalid hash: %s (expected %s)",
195 log_escape_nq(msr->mp, given_hash), log_escape_nq(msr->mp, hash));
200 current_time = apr_time_sec(apr_time_now());
201 if (current_time > expiry_time) {
202 *error_msg = apr_psprintf(msr->mp, "PdfProtect: Token has expired.");
212 apr_status_t pdfp_output_filter(ap_filter_t *f, apr_bucket_brigade *bb_in) {
213 modsec_rec *msr = (modsec_rec *)f->ctx;
216 ap_log_error(APLOG_MARK, APLOG_ERR | APLOG_NOERRNO, 0, f->r->server,
217 "ModSecurity: Internal Error: Unable to retrieve context in PDF output filter.");
219 ap_remove_output_filter(f);
221 return send_error_bucket(msr, f, HTTP_INTERNAL_SERVER_ERROR);
224 if (msr->txcfg->pdfp_enabled == 1) {
225 // TODO Should we look at err_headers_out too?
226 const char *h_content_type = apr_table_get(f->r->headers_out, "Content-Type");
228 if (msr->txcfg->debuglog_level >= 9) {
229 msr_log(msr, 9, "PdfProtect: r->content_type=%s, header C-T=%s",
230 log_escape_nq(msr->mp, f->r->content_type),
231 log_escape_nq(msr->mp, h_content_type));
234 /* Have we been asked to tweak the headers? */
235 if (apr_table_get(f->r->notes, NOTE_TWEAK_HEADERS) != NULL) {
237 apr_table_set(f->r->headers_out, "Content-Disposition", DISPOSITION_VALUE);
238 f->r->content_type = ATTACHMENT_MIME_TYPE;
241 /* Check if we've already done the necessary work in
244 if (apr_table_get(f->r->notes, NOTE_DONE) != NULL) {
245 /* We have, so return straight away. */
246 ap_remove_output_filter(f);
247 return ap_pass_brigade(f->next, bb_in);
250 /* Proceed to detect dynamically-generated PDF files. */
252 // TODO application/x-pdf, application/vnd.fdf, application/vnd.adobe.xfdf,
253 // application/vnd.adobe.xdp+xml, application/vnd.adobe.xfd+xml, application/vnd.pdf
254 // application/acrobat, text/pdf, text/x-pdf ???
255 if (((f->r->content_type != NULL)&&(strcasecmp(f->r->content_type, "application/pdf") == 0))
256 || ((h_content_type != NULL)&&(strcasecmp(h_content_type, "application/pdf") == 0)))
258 request_rec *r = f->r;
259 const char *token = NULL;
261 if (msr->txcfg->debuglog_level >= 9) {
262 msr_log(msr, 9, "PdfProtect: Detected a dynamically-generated PDF in %s",
263 log_escape_nq(msr->mp, r->uri));
266 /* If we are configured with ForcedDownload protection method then we
267 * can do our thing here and finish early.
269 if (msr->txcfg->pdfp_method == PDF_PROTECT_METHOD_FORCED_DOWNLOAD) {
270 if (msr->txcfg->debuglog_level >= 9) {
271 msr_log(msr, 9, "PdfProtect: Forcing download of a dynamically "
272 "generated PDF file.");
275 apr_table_set(f->r->headers_out, "Content-Disposition", DISPOSITION_VALUE);
276 f->r->content_type = ATTACHMENT_MIME_TYPE;
278 ap_remove_output_filter(f);
280 return ap_pass_brigade(f->next, bb_in);
283 /* If we are here that means TokenRedirection is the desired protection method. */
285 /* Is this a non-GET request? */
286 if ((f->r->method_number != M_GET)&&
287 ((msr->txcfg->pdfp_only_get == 1)||(msr->txcfg->pdfp_only_get == -1))
289 /* This is a non-GET request and we have been configured
290 * not to intercept it. So we are going to tweak the headers
293 if (msr->txcfg->debuglog_level >= 9) {
294 msr_log(msr, 9, "PdfProtect: Forcing download of a dynamically "
295 "generated PDF file and non-GET method.");
298 apr_table_set(f->r->headers_out, "Content-Disposition", DISPOSITION_VALUE);
299 f->r->content_type = ATTACHMENT_MIME_TYPE;
301 ap_remove_output_filter(f);
303 return ap_pass_brigade(f->next, bb_in);
306 /* Locate the protection token. */
307 token = extract_token(msr);
309 if (token == NULL) { /* No token. */
310 char *new_uri = NULL;
312 /* Create a new URI with the protection token inside. */
313 new_uri = construct_new_uri(msr);
314 if (new_uri != NULL) {
315 /* Redirect user to the new URI. */
316 if (msr->txcfg->debuglog_level >= 9) {
317 msr_log(msr, 9, "PdfProtect: PDF request without a token - "
318 "redirecting to %s.", log_escape_nq(msr->mp, new_uri));
321 apr_table_set(r->headers_out, "Location", new_uri);
323 ap_remove_output_filter(f);
325 return send_error_bucket(msr, f, REDIRECT_STATUS);
327 } else { /* Token found. */
328 char *my_error_msg = NULL;
330 /* Verify the token is valid. */
332 if (verify_token(msr, token, &my_error_msg)) { /* Valid. */
333 /* Do nothing - serve the PDF file. */
334 if (msr->txcfg->debuglog_level >= 9) {
335 msr_log(msr, 9, "PdfProtect: PDF request with a valid token "
336 "- serving PDF file normally.");
340 } else { /* Not valid. */
341 /* The token is not valid. We will tweak the response
342 * to prevent the PDF file from opening in the browser.
344 if (msr->txcfg->debuglog_level >= 4) {
345 msr_log(msr, 9, "PdfProtect: PDF request with an invalid token "
346 "- serving file as attachment.");
349 apr_table_set(r->headers_out, "Content-Disposition", DISPOSITION_VALUE);
350 r->content_type = ATTACHMENT_MIME_TYPE;
358 ap_remove_output_filter(f);
360 return ap_pass_brigade(f->next, bb_in);
366 int pdfp_check(modsec_rec *msr) {
367 const char *token = NULL;
371 if (msr->txcfg->pdfp_enabled != 1) {
372 if (msr->txcfg->debuglog_level >= 4) {
373 msr_log(msr, 4, "PdfProtect: Not enabled here.");
379 if (msr->txcfg->pdfp_method != PDF_PROTECT_METHOD_TOKEN_REDIRECTION) {
380 if (msr->txcfg->debuglog_level >= 4) {
381 msr_log(msr, 4, "PdfProtect: Configured with ForcedDownload as protection method, "
382 "skipping detection on the inbound.");
388 /* Then determine whether we need to act at
389 * all. If the request is not for a PDF file
390 * return straight away.
393 if (msr->r->uri == NULL) {
394 if (msr->txcfg->debuglog_level >= 4) {
395 msr_log(msr, 4, "PdfProtect: Unable to inspect URI because it is NULL.");
398 return -1; /* Error. */
401 if (msr->txcfg->debuglog_level >= 9) {
402 msr_log(msr, 9, "PdfProtect: URI=%s, filename=%s, QUERY_STRING=%s.",
403 log_escape_nq(msr->mp, msr->r->uri), log_escape_nq(msr->mp, msr->r->filename),
404 log_escape_nq(msr->mp, msr->r->args));
407 uri = apr_pstrdup(msr->mp, msr->r->uri);
408 if (uri == NULL) return -1; /* Error. */
411 /* Attempt to figure out if this is a request for a PDF file. We are
412 * going to be liberal and look for the extension anywhere in the URI,
413 * not just at the end.
415 p = strstr(uri, ".pdf");
417 /* We do not think this is a PDF file. */
418 if (msr->txcfg->debuglog_level >= 4) {
419 msr_log(msr, 4, "PdfProtect: No indication in the URI this is a "
420 "request for a PDF file.");
426 /* Ignore request methods other than GET and HEAD if
427 * configured to do so.
429 * TODO: Code here is only GET, not GET|HEAD as comment states
431 if ((msr->r->method_number != M_GET)&&(msr->txcfg->pdfp_only_get != 0)) {
432 if (msr->txcfg->debuglog_level >= 4) {
433 msr_log(msr, 4, "PdfProtect: Not intercepting request "
434 "(method=%s/%d).", log_escape_nq(msr->mp, msr->r->method), msr->r->method_number);
440 /* We make a note for ourselves that we've already handled
443 apr_table_set(msr->r->notes, NOTE_DONE, "1");
445 /* Locate the protection token. */
446 token = extract_token(msr);
448 if (token == NULL) { /* No token. */
449 char *new_uri = NULL;
451 /* Create a new URI with the protection token inside. */
452 new_uri = construct_new_uri(msr);
453 if (new_uri == NULL) return DECLINED;
455 /* Redirect user to the new URI. */
456 if (msr->txcfg->debuglog_level >= 9) {
457 msr_log(msr, 9, "PdfProtect: PDF request without a token - redirecting to %s.",
458 log_escape_nq(msr->mp, new_uri));
461 apr_table_set(msr->r->headers_out, "Location", new_uri);
463 return REDIRECT_STATUS;
464 } else { /* Token found. */
465 char *my_error_msg = NULL;
467 /* Verify the token is valid. */
468 if (verify_token(msr, token, &my_error_msg)) { /* Valid. */
469 /* Do nothing - serve the PDF file. */
470 if (msr->txcfg->debuglog_level >= 9) {
471 msr_log(msr, 9, "PdfProtect: PDF request with a valid token - "
472 "serving PDF file normally.");
476 } else { /* Not valid. */
477 /* The token is not valid. We will tweak the response
478 * to prevent the PDF file from opening in the browser.
483 /* Changing response headers before response generation phase takes
484 * place is not really reliable. Although we do this we also make
485 * a note for ourselves (in the output filter) to check the headers
486 * again just before they are sent back to the end user.
488 apr_table_set(msr->r->headers_out, "Content-Disposition", DISPOSITION_VALUE);
489 msr->r->content_type = ATTACHMENT_MIME_TYPE;
490 apr_table_set(msr->r->notes, NOTE_TWEAK_HEADERS, "1");
492 /* Proceed with response (PDF) generation. */