1 /*************************************************
3 *************************************************/
5 /* This is a grep program that uses the 8-bit PCRE regular expression library
6 via the PCRE2 updated API to do its pattern matching. On Unix-like, Windows,
7 and native z/OS systems it can recurse into directories, and in z/OS it can
10 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
11 additional header is required. That header is not included in the main PCRE2
12 distribution because other apparatus is needed to compile pcre2grep for z/OS.
13 The header can be found in the special z/OS distribution, which is available
14 from www.zaconsultants.net or from www.cbttape.org.
16 Copyright (c) 1997-2018 University of Cambridge
18 -----------------------------------------------------------------------------
19 Redistribution and use in source and binary forms, with or without
20 modification, are permitted provided that the following conditions are met:
22 * Redistributions of source code must retain the above copyright notice,
23 this list of conditions and the following disclaimer.
25 * Redistributions in binary form must reproduce the above copyright
26 notice, this list of conditions and the following disclaimer in the
27 documentation and/or other materials provided with the distribution.
29 * Neither the name of the University of Cambridge nor the names of its
30 contributors may be used to endorse or promote products derived from
31 this software without specific prior written permission.
33 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43 POSSIBILITY OF SUCH DAMAGE.
44 -----------------------------------------------------------------------------
58 #include <sys/types.h>
61 #if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) \
62 && !defined WIN32 && !defined(__CYGWIN__)
66 /* Some cmake's define it still */
67 #if defined(__CYGWIN__) && defined(WIN32)
72 #include <io.h> /* For _setmode() */
73 #include <fcntl.h> /* For _O_BINARY */
76 #ifdef SUPPORT_PCRE2GREP_CALLOUT
96 #define PCRE2_CODE_UNIT_WIDTH 8
99 /* Older versions of MSVC lack snprintf(). This define allows for
100 warning/error-free compilation and testing with MSVC compilers back to at least
101 MSVC 10/2010. Except for VC6 (which is missing some fundamentals and fails). */
103 #if defined(_MSC_VER) && (_MSC_VER < 1900)
104 #define snprintf _snprintf
112 #define OFFSET_SIZE 33
115 #define MAXPATLEN BUFSIZ
117 #define MAXPATLEN 8192
120 #define FNBUFSIZ 2048
121 #define ERRBUFSIZ 256
123 /* Values for the "filenames" variable, which specifies options for file name
124 output. The order is important; it is assumed that a file name is wanted for
125 all values greater than FN_DEFAULT. */
127 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
129 /* File reading styles */
131 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
133 /* Actions for the -d and -D options */
135 enum { dee_READ, dee_SKIP, dee_RECURSE };
136 enum { DEE_READ, DEE_SKIP };
138 /* Actions for special processing options (flag bits) */
140 #define PO_WORD_MATCH 0x0001
141 #define PO_LINE_MATCH 0x0002
142 #define PO_FIXED_STRINGS 0x0004
144 /* Binary file options */
146 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
148 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
149 environments), a warning is issued if the value of fwrite() is ignored.
150 Unfortunately, casting to (void) does not suppress the warning. To get round
151 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
152 apply to fprintf(). */
154 #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
156 /* Under Windows, we have to set stdout to be binary, so that it does not
157 convert \r\n at the ends of output lines to \r\r\n. However, that means that
158 any messages written to stdout must have \r\n as their line terminator. This is
159 handled by using STDOUT_NL as the newline string. We also use a normal double
160 quote for the example, as single quotes aren't usually available. */
163 #define STDOUT_NL "\r\n"
166 #define STDOUT_NL "\n"
172 /*************************************************
174 *************************************************/
176 /* Jeffrey Friedl has some debugging requirements that are not part of the
180 static int S_arg = -1;
181 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
182 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
183 static const char *jfriedl_prefix = "";
184 static const char *jfriedl_postfix = "";
187 static const char *colour_string = "1;31";
188 static const char *colour_option = NULL;
189 static const char *dee_option = NULL;
190 static const char *DEE_option = NULL;
191 static const char *locale = NULL;
192 static const char *newline_arg = NULL;
193 static const char *om_separator = NULL;
194 static const char *stdin_name = "(standard input)";
195 static const char *output_text = NULL;
197 static char *main_buffer = NULL;
199 static int after_context = 0;
200 static int before_context = 0;
201 static int binary_files = BIN_BINARY;
202 static int both_context = 0;
203 static int bufthird = PCRE2GREP_BUFSIZE;
204 static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
205 static int bufsize = 3*PCRE2GREP_BUFSIZE;
206 static int endlinetype;
208 static unsigned long int total_count = 0;
209 static unsigned long int counts_printed = 0;
212 static int dee_action = dee_SKIP;
214 static int dee_action = dee_READ;
217 static int DEE_action = DEE_READ;
218 static int error_count = 0;
219 static int filenames = FN_DEFAULT;
221 #ifdef SUPPORT_PCRE2GREP_JIT
222 static BOOL use_jit = TRUE;
224 static BOOL use_jit = FALSE;
227 static const uint8_t *character_tables = NULL;
229 static uint32_t pcre2_options = 0;
230 static uint32_t extra_options = 0;
231 static PCRE2_SIZE heap_limit = PCRE2_UNSET;
232 static uint32_t match_limit = 0;
233 static uint32_t depth_limit = 0;
235 static pcre2_compile_context *compile_context;
236 static pcre2_match_context *match_context;
237 static pcre2_match_data *match_data;
238 static PCRE2_SIZE *offsets;
240 static BOOL count_only = FALSE;
241 static BOOL do_colour = FALSE;
243 static BOOL do_ansi = FALSE;
245 static BOOL file_offsets = FALSE;
246 static BOOL hyphenpending = FALSE;
247 static BOOL invert = FALSE;
248 static BOOL line_buffered = FALSE;
249 static BOOL line_offsets = FALSE;
250 static BOOL multiline = FALSE;
251 static BOOL number = FALSE;
252 static BOOL omit_zero_count = FALSE;
253 static BOOL resource_error = FALSE;
254 static BOOL quiet = FALSE;
255 static BOOL show_total_count = FALSE;
256 static BOOL silent = FALSE;
257 static BOOL utf = FALSE;
259 /* Structure for list of --only-matching capturing numbers. */
261 typedef struct omstr {
266 static omstr *only_matching = NULL;
267 static omstr *only_matching_last = NULL;
268 static int only_matching_count;
270 /* Structure for holding the two variables that describe a number chain. */
272 typedef struct omdatastr {
277 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
279 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
281 typedef struct fnstr {
286 static fnstr *exclude_from = NULL;
287 static fnstr *exclude_from_last = NULL;
288 static fnstr *include_from = NULL;
289 static fnstr *include_from_last = NULL;
291 static fnstr *file_lists = NULL;
292 static fnstr *file_lists_last = NULL;
293 static fnstr *pattern_files = NULL;
294 static fnstr *pattern_files_last = NULL;
296 /* Structure for holding the two variables that describe a file name chain. */
298 typedef struct fndatastr {
303 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
304 static fndatastr include_from_data = { &include_from, &include_from_last };
305 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
306 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
308 /* Structure for pattern and its compiled form; used for matching patterns and
309 also for include/exclude patterns. */
311 typedef struct patstr {
315 pcre2_code *compiled;
318 static patstr *patterns = NULL;
319 static patstr *patterns_last = NULL;
320 static patstr *include_patterns = NULL;
321 static patstr *include_patterns_last = NULL;
322 static patstr *exclude_patterns = NULL;
323 static patstr *exclude_patterns_last = NULL;
324 static patstr *include_dir_patterns = NULL;
325 static patstr *include_dir_patterns_last = NULL;
326 static patstr *exclude_dir_patterns = NULL;
327 static patstr *exclude_dir_patterns_last = NULL;
329 /* Structure holding the two variables that describe a pattern chain. A pointer
330 to such structures is used for each appropriate option. */
332 typedef struct patdatastr {
337 static patdatastr match_patdata = { &patterns, &patterns_last };
338 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
339 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
340 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
341 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
343 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
344 &include_dir_patterns, &exclude_dir_patterns };
346 static const char *incexname[4] = { "--include", "--exclude",
347 "--include-dir", "--exclude-dir" };
349 /* Structure for options and list of them */
351 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_U32NUMBER, OP_SIZE,
352 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
354 typedef struct option_item {
358 const char *long_name;
359 const char *help_text;
362 /* Options without a single-letter equivalent get a negative value. This can be
363 used to identify them. */
365 #define N_COLOUR (-1)
366 #define N_EXCLUDE (-2)
367 #define N_EXCLUDE_DIR (-3)
369 #define N_INCLUDE (-5)
370 #define N_INCLUDE_DIR (-6)
372 #define N_LOCALE (-8)
374 #define N_LOFFSETS (-10)
375 #define N_FOFFSETS (-11)
376 #define N_LBUFFER (-12)
377 #define N_H_LIMIT (-13)
378 #define N_M_LIMIT (-14)
379 #define N_M_LIMIT_DEP (-15)
380 #define N_BUFSIZE (-16)
381 #define N_NOJIT (-17)
382 #define N_FILE_LIST (-18)
383 #define N_BINARY_FILES (-19)
384 #define N_EXCLUDE_FROM (-20)
385 #define N_INCLUDE_FROM (-21)
386 #define N_OM_SEPARATOR (-22)
387 #define N_MAX_BUFSIZE (-23)
389 static option_item optionlist[] = {
390 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
391 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
392 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
393 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
394 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
395 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
396 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer starting size" },
397 { OP_NUMBER, N_MAX_BUFSIZE,&max_bufthird, "max-buffer-size=number", "set processing buffer maximum size" },
398 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
399 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
400 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
401 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
402 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
403 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
404 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
405 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
406 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
407 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
408 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
409 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
410 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
411 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
412 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
413 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
414 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
415 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
416 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
417 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
418 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
419 { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
420 { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
421 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
422 { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsolete synonym for depth-limit" },
423 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
424 { OP_STRING, 'N', &newline_arg, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
425 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
426 #ifdef SUPPORT_PCRE2GREP_JIT
427 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
429 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
431 { OP_STRING, 'O', &output_text, "output=text", "show only this text (possibly expanded)" },
432 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
433 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
434 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
435 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
436 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
437 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
438 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
439 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
440 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
441 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
443 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
445 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
446 { OP_NODATA, 't', NULL, "total-count", "print total count of matching lines" },
447 { OP_NODATA, 'u', NULL, "utf", "use UTF mode" },
448 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
449 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
450 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
451 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
452 { OP_NODATA, 0, NULL, NULL, NULL }
455 /* Table of names for newline types. Must be kept in step with the definitions
456 of PCRE2_NEWLINE_xx in pcre2.h. */
458 static const char *newlines[] = {
459 "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
461 /* UTF-8 tables - used only when the newline setting is "any". */
463 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
465 const char utf8_table4[] = {
466 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
467 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
468 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
469 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
472 #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
473 /*************************************************
474 * Emulated memmove() for systems without it *
475 *************************************************/
477 /* This function can make use of bcopy() if it is available. Otherwise do it by
478 steam, as there are some non-Unix environments that lack both memmove() and
482 emulated_memmove(void *d, const void *s, size_t n)
489 unsigned char *dest = (unsigned char *)d;
490 const unsigned char *src = (const unsigned char *)s;
495 for (i = 0; i < n; ++i) *(--dest) = *(--src);
500 for (i = 0; i < n; ++i) *dest++ = *src++;
501 return (void *)(dest - n);
503 #endif /* not HAVE_BCOPY */
506 #define memmove(d,s,n) emulated_memmove(d,s,n)
507 #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
510 /*************************************************
511 * Case-independent string compare *
512 *************************************************/
515 strcmpic(const char *str1, const char *str2)
518 while (*str1 != '\0' || *str2 != '\0')
520 c1 = tolower(*str1++);
521 c2 = tolower(*str2++);
522 if (c1 != c2) return ((c1 > c2) << 1) - 1;
528 /*************************************************
529 * Parse GREP_COLORS *
530 *************************************************/
532 /* Extract ms or mt from GREP_COLORS.
534 Argument: the string, possibly NULL
535 Returns: the value of ms or mt, or NULL if neither present
539 parse_grep_colors(const char *gc)
544 if (gc == NULL) return NULL;
545 col = strstr(gc, "ms=");
546 if (col == NULL) col = strstr(gc, "mt=");
547 if (col == NULL) return NULL;
550 while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
557 /*************************************************
558 * Exit from the program *
559 *************************************************/
561 /* If there has been a resource error, give a suitable message.
563 Argument: the return code
564 Returns: does not return
568 pcre2grep_exit(int rc)
570 /* VMS does exit codes differently: both exit(1) and exit(0) return with a
571 status of 1, which is not helpful. To help with this problem, define a symbol
572 (akin to an environment variable) called "PCRE2GREP_RC" and put the exit code
577 #include lib$routines
579 $DESCRIPTOR(sym_nam, "PCRE2GREP_RC");
580 $DESCRIPTOR(sym_val, val_buf);
581 sprintf(val_buf, "%d", rc);
582 sym_val.dsc$w_length = strlen(val_buf);
583 lib$set_symbol(&sym_nam, &sym_val);
588 fprintf(stderr, "pcre2grep: Error %d, %d, %d or %d means that a resource "
589 "limit was exceeded.\n", PCRE2_ERROR_JIT_STACKLIMIT, PCRE2_ERROR_MATCHLIMIT,
590 PCRE2_ERROR_DEPTHLIMIT, PCRE2_ERROR_HEAPLIMIT);
591 fprintf(stderr, "pcre2grep: Check your regex for nested unlimited loops.\n");
597 /*************************************************
598 * Add item to chain of patterns *
599 *************************************************/
601 /* Used to add an item onto a chain, or just return an unconnected item if the
602 "after" argument is NULL.
605 s pattern string to add
606 patlen length of pattern
607 after if not NULL points to item to insert after
609 Returns: new pattern block or NULL on error
613 add_pattern(char *s, PCRE2_SIZE patlen, patstr *after)
615 patstr *p = (patstr *)malloc(sizeof(patstr));
618 fprintf(stderr, "pcre2grep: malloc failed\n");
621 if (patlen > MAXPATLEN)
623 fprintf(stderr, "pcre2grep: pattern is too long (limit is %d bytes)\n",
635 p->next = after->next;
642 /*************************************************
643 * Free chain of patterns *
644 *************************************************/
646 /* Used for several chains of patterns.
648 Argument: pointer to start of chain
653 free_pattern_chain(patstr *pc)
659 if (p->compiled != NULL) pcre2_code_free(p->compiled);
665 /*************************************************
666 * Free chain of file names *
667 *************************************************/
670 Argument: pointer to start of chain
675 free_file_chain(fnstr *fn)
686 /*************************************************
687 * OS-specific functions *
688 *************************************************/
690 /* These definitions are needed in all Windows environments, even those where
691 Unix-style directory scanning can be used (see below). */
698 #ifndef WIN32_LEAN_AND_MEAN
699 # define WIN32_LEAN_AND_MEAN
704 #define iswild(name) (strpbrk(name, "*?") != NULL)
706 /* Convert ANSI BGR format to RGB used by Windows */
707 #define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))
709 static HANDLE hstdout;
710 static CONSOLE_SCREEN_BUFFER_INFO csbi;
711 static WORD match_colour;
714 decode_ANSI_colour(const char *cs)
716 WORD result = csbi.wAttributes;
722 if (code == 1) result |= 0x08;
723 else if (code == 4) result |= 0x8000;
724 else if (code == 5) result |= 0x80;
725 else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
726 else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
727 else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
728 else if (code == 49) result = (result & 0x0F) | (csbi.wAttributes & 0xF0);
729 /* aixterm high intensity colour codes */
730 else if (code >= 90 && code <= 97) result = (result & 0xF0) | BGR_RGB(code - 90) | 0x08;
731 else if (code >= 100 && code <= 107) result = (result & 0x0F) | (BGR_RGB(code - 100) << 4) | 0x80;
733 while (isdigit(*cs)) cs++;
746 hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
747 /* This fails when redirected to con; try again if so. */
748 if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
750 HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
751 FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
752 GetConsoleScreenBufferInfo(hcon, &csbi);
755 match_colour = decode_ANSI_colour(colour_string);
756 /* No valid colour found - turn off colouring */
757 if (!match_colour) do_colour = FALSE;
764 /* The following sets of functions are defined so that they can be made system
765 specific. At present there are versions for Unix-style environments, Windows,
766 native z/OS, and "no support". */
769 /************* Directory scanning Unix-style and z/OS ***********/
771 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
772 #include <sys/types.h>
773 #include <sys/stat.h>
776 #if defined NATIVE_ZOS
777 /************* Directory and PDS/E scanning for z/OS ***********/
778 /************* z/OS looks mostly like Unix with USS ************/
779 /* However, z/OS needs the #include statements in this header */
780 #include "pcrzosfs.h"
781 /* That header is not included in the main PCRE distribution because
782 other apparatus is needed to compile pcre2grep for z/OS. The header
783 can be found in the special z/OS distribution, which is available
784 from www.zaconsultants.net or from www.cbttape.org. */
787 typedef DIR directory_type;
791 isdirectory(char *filename)
794 if (stat(filename, &statbuf) < 0)
795 return 0; /* In the expectation that opening as a file will fail */
796 return S_ISDIR(statbuf.st_mode);
799 static directory_type *
800 opendirectory(char *filename)
802 return opendir(filename);
806 readdirectory(directory_type *dir)
810 struct dirent *dent = readdir(dir);
811 if (dent == NULL) return NULL;
812 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
815 /* Control never reaches here */
819 closedirectory(directory_type *dir)
825 /************* Test for regular file, Unix-style **********/
828 isregfile(char *filename)
831 if (stat(filename, &statbuf) < 0)
832 return 1; /* In the expectation that opening as a file will fail */
833 return S_ISREG(statbuf.st_mode);
837 #if defined NATIVE_ZOS
838 /************* Test for a terminal in z/OS **********/
839 /* isatty() does not work in a TSO environment, so always give FALSE.*/
854 /************* Test for a terminal, Unix-style **********/
860 return isatty(fileno(stdout));
866 return isatty(fileno(f));
871 /************* Print optionally coloured match Unix-style and z/OS **********/
874 print_match(const void *buf, int length)
876 if (length == 0) return;
877 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
878 FWRITE_IGNORE(buf, 1, length, stdout);
879 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
882 /* End of Unix-style or native z/OS environment functions. */
885 /************* Directory scanning in Windows ***********/
887 /* I (Philip Hazel) have no means of testing this code. It was contributed by
888 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
889 when it did not exist. David Byron added a patch that moved the #include of
890 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
895 #ifndef INVALID_FILE_ATTRIBUTES
896 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
899 typedef struct directory_type
903 WIN32_FIND_DATA data;
909 isdirectory(char *filename)
911 DWORD attr = GetFileAttributes(filename);
912 if (attr == INVALID_FILE_ATTRIBUTES)
914 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
918 opendirectory(char *filename)
924 len = strlen(filename);
925 pattern = (char *)malloc(len + 3);
926 dir = (directory_type *)malloc(sizeof(*dir));
927 if ((pattern == NULL) || (dir == NULL))
929 fprintf(stderr, "pcre2grep: malloc failed\n");
932 memcpy(pattern, filename, len);
933 if (iswild(filename))
936 memcpy(&(pattern[len]), "\\*", 3);
937 dir->handle = FindFirstFile(pattern, &(dir->data));
938 if (dir->handle != INVALID_HANDLE_VALUE)
944 err = GetLastError();
947 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
952 readdirectory(directory_type *dir)
958 if (!FindNextFile(dir->handle, &(dir->data)))
965 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
966 return dir->data.cFileName;
969 return NULL; /* Keep compiler happy; never executed */
974 closedirectory(directory_type *dir)
976 FindClose(dir->handle);
981 /************* Test for regular file in Windows **********/
983 /* I don't know how to do this, or if it can be done; assume all paths are
984 regular if they are not directories. */
986 int isregfile(char *filename)
988 return !isdirectory(filename);
992 /************* Test for a terminal in Windows **********/
997 return _isatty(_fileno(stdout));
1001 is_file_tty(FILE *f)
1003 return _isatty(_fileno(f));
1007 /************* Print optionally coloured match in Windows **********/
1010 print_match(const void *buf, int length)
1012 if (length == 0) return;
1015 if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1016 else SetConsoleTextAttribute(hstdout, match_colour);
1018 FWRITE_IGNORE(buf, 1, length, stdout);
1021 if (do_ansi) fprintf(stdout, "%c[0m", 0x1b);
1022 else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
1026 /* End of Windows functions */
1029 /************* Directory scanning when we can't do it ***********/
1031 /* The type is void, and apart from isdirectory(), the functions do nothing. */
1036 typedef void directory_type;
1038 int isdirectory(char *filename) { return 0; }
1039 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
1040 char *readdirectory(directory_type *dir) { return (char*)0;}
1041 void closedirectory(directory_type *dir) {}
1044 /************* Test for regular file when we can't do it **********/
1046 /* Assume all files are regular. */
1048 int isregfile(char *filename) { return 1; }
1051 /************* Test for a terminal when we can't do it **********/
1060 is_file_tty(FILE *f)
1066 /************* Print optionally coloured match when we can't do it **********/
1069 print_match(const void *buf, int length)
1071 if (length == 0) return;
1072 FWRITE_IGNORE(buf, 1, length, stdout);
1075 #endif /* End of system-specific functions */
1079 #ifndef HAVE_STRERROR
1080 /*************************************************
1081 * Provide strerror() for non-ANSI libraries *
1082 *************************************************/
1084 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1085 in their libraries, but can provide the same facility by this simple
1086 alternative function. */
1088 extern int sys_nerr;
1089 extern char *sys_errlist[];
1094 if (n < 0 || n >= sys_nerr) return "unknown error number";
1095 return sys_errlist[n];
1097 #endif /* HAVE_STRERROR */
1101 /*************************************************
1103 *************************************************/
1109 fprintf(stderr, "Usage: pcre2grep [-");
1110 for (op = optionlist; op->one_char != 0; op++)
1112 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1114 fprintf(stderr, "] [long options] [pattern] [files]\n");
1115 fprintf(stderr, "Type \"pcre2grep --help\" for more information and the long "
1122 /*************************************************
1124 *************************************************/
1131 printf("Usage: pcre2grep [OPTION]... [PATTERN] [FILE1 FILE2 ...]" STDOUT_NL);
1132 printf("Search for PATTERN in each FILE or standard input." STDOUT_NL);
1133 printf("PATTERN must be present if neither -e nor -f is used." STDOUT_NL);
1135 #ifdef SUPPORT_PCRE2GREP_CALLOUT
1136 printf("Callout scripts in patterns are supported." STDOUT_NL);
1138 printf("Callout scripts are not supported in this pcre2grep." STDOUT_NL);
1141 printf("\"-\" can be used as a file name to mean STDIN." STDOUT_NL);
1144 printf("Files whose names end in .gz are read using zlib." STDOUT_NL);
1147 #ifdef SUPPORT_LIBBZ2
1148 printf("Files whose names end in .bz2 are read using bzlib2." STDOUT_NL);
1151 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1152 printf("Other files and the standard input are read as plain files." STDOUT_NL STDOUT_NL);
1154 printf("All files are read as plain files, without any interpretation." STDOUT_NL STDOUT_NL);
1157 printf("Example: pcre2grep -i " QUOT "hello.*world" QUOT " menu.h main.c" STDOUT_NL STDOUT_NL);
1158 printf("Options:" STDOUT_NL);
1160 for (op = optionlist; op->one_char != 0; op++)
1165 if (op->one_char > 0 && (op->long_name)[0] == 0)
1166 n = 31 - printf(" -%c", op->one_char);
1169 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
1170 else strcpy(s, " ");
1171 n = 31 - printf(" %s --%s", s, op->long_name);
1175 printf("%.*s%s" STDOUT_NL, n, " ", op->help_text);
1178 printf(STDOUT_NL "Numbers may be followed by K or M, e.g. --max-buffer-size=100K." STDOUT_NL);
1179 printf("The default value for --buffer-size is %d." STDOUT_NL, PCRE2GREP_BUFSIZE);
1180 printf("The default value for --max-buffer-size is %d." STDOUT_NL, PCRE2GREP_MAX_BUFSIZE);
1181 printf("When reading patterns or file names from a file, trailing white" STDOUT_NL);
1182 printf("space is removed and blank lines are ignored." STDOUT_NL);
1183 printf("The maximum size of any pattern is %d bytes." STDOUT_NL, MAXPATLEN);
1185 printf(STDOUT_NL "With no FILEs, read standard input. If fewer than two FILEs given, assume -h." STDOUT_NL);
1186 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble." STDOUT_NL);
1191 /*************************************************
1192 * Test exclude/includes *
1193 *************************************************/
1195 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
1196 there are no includes, the path must match an include pattern.
1199 path the path to be matched
1200 ip the chain of include patterns
1201 ep the chain of exclude patterns
1203 Returns: TRUE if the path is not excluded
1207 test_incexc(char *path, patstr *ip, patstr *ep)
1209 int plen = strlen((const char *)path);
1211 for (; ep != NULL; ep = ep->next)
1213 if (pcre2_match(ep->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1217 if (ip == NULL) return TRUE;
1219 for (; ip != NULL; ip = ip->next)
1221 if (pcre2_match(ip->compiled, (PCRE2_SPTR)path, plen, 0, 0, match_data, NULL) >= 0)
1230 /*************************************************
1231 * Decode integer argument value *
1232 *************************************************/
1234 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
1235 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
1236 just keep it simple.
1239 option_data the option data string
1240 op the option item (for error messages)
1241 longop TRUE if option given in long form
1243 Returns: a long integer
1247 decode_number(char *option_data, option_item *op, BOOL longop)
1249 unsigned long int n = 0;
1250 char *endptr = option_data;
1251 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
1252 while (isdigit((unsigned char)(*endptr)))
1253 n = n * 10 + (int)(*endptr++ - '0');
1254 if (toupper(*endptr) == 'K')
1259 else if (toupper(*endptr) == 'M')
1265 if (*endptr != 0) /* Error */
1269 char *equals = strchr(op->long_name, '=');
1270 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1271 (int)(equals - op->long_name);
1272 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after --%.*s\n",
1273 option_data, nlen, op->long_name);
1276 fprintf(stderr, "pcre2grep: Malformed number \"%s\" after -%c\n",
1277 option_data, op->one_char);
1278 pcre2grep_exit(usage(2));
1286 /*************************************************
1287 * Add item to a chain of numbers *
1288 *************************************************/
1290 /* Used to add an item onto a chain, or just return an unconnected item if the
1291 "after" argument is NULL.
1295 after if not NULL points to item to insert after
1297 Returns: new number block
1301 add_number(int n, omstr *after)
1303 omstr *om = (omstr *)malloc(sizeof(omstr));
1307 fprintf(stderr, "pcre2grep: malloc failed\n");
1315 om->next = after->next;
1323 /*************************************************
1324 * Read one line of input *
1325 *************************************************/
1327 /* Normally, input that is to be scanned is read using fread() (or gzread, or
1328 BZ2_read) into a large buffer, so many lines may be read at once. However,
1329 doing this for tty input means that no output appears until a lot of input has
1330 been typed. Instead, tty input is handled line by line. We cannot use fgets()
1331 for this, because it does not stop at a binary zero, and therefore there is no
1332 way of telling how many characters it has read, because there may be binary
1333 zeros embedded in the data. This function is also used for reading patterns
1334 from files (the -f option).
1337 buffer the buffer to read into
1338 length the maximum number of characters to read
1341 Returns: the number of characters read, zero at end of file
1345 read_one_line(char *buffer, int length, FILE *f)
1349 while ((c = fgetc(f)) != EOF)
1351 buffer[yield++] = c;
1352 if (c == '\n' || yield >= length) break;
1359 /*************************************************
1360 * Find end of line *
1361 *************************************************/
1363 /* The length of the endline sequence that is found is set via lenptr. This may
1364 be zero at the very end of the file if there is no line-ending sequence there.
1367 p current position in line
1368 endptr end of available data
1369 lenptr where to put the length of the eol sequence
1371 Returns: pointer after the last byte of the line,
1372 including the newline byte(s)
1376 end_of_line(char *p, char *endptr, int *lenptr)
1380 default: /* Just in case */
1381 case PCRE2_NEWLINE_LF:
1382 while (p < endptr && *p != '\n') p++;
1391 case PCRE2_NEWLINE_CR:
1392 while (p < endptr && *p != '\r') p++;
1401 case PCRE2_NEWLINE_NUL:
1402 while (p < endptr && *p != '\0') p++;
1411 case PCRE2_NEWLINE_CRLF:
1414 while (p < endptr && *p != '\r') p++;
1428 case PCRE2_NEWLINE_ANYCRLF:
1432 int c = *((unsigned char *)p);
1434 if (utf && c >= 0xc0)
1437 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1439 c = (c & utf8_table3[extra]) << gcss;
1440 for (gcii = 1; gcii <= extra; gcii++)
1443 c |= (p[gcii] & 0x3f) << gcss;
1456 if (p < endptr && *p == '\n')
1467 } /* End of loop for ANYCRLF case */
1469 *lenptr = 0; /* Must have hit the end */
1472 case PCRE2_NEWLINE_ANY:
1476 int c = *((unsigned char *)p);
1478 if (utf && c >= 0xc0)
1481 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1483 c = (c & utf8_table3[extra]) << gcss;
1484 for (gcii = 1; gcii <= extra; gcii++)
1487 c |= (p[gcii] & 0x3f) << gcss;
1502 if (p < endptr && *p == '\n')
1511 case 0x85: /* Unicode NEL */
1512 *lenptr = utf? 2 : 1;
1515 case 0x2028: /* Unicode LS */
1516 case 0x2029: /* Unicode PS */
1519 #endif /* Not EBCDIC */
1524 } /* End of loop for ANY case */
1526 *lenptr = 0; /* Must have hit the end */
1528 } /* End of overall switch */
1533 /*************************************************
1534 * Find start of previous line *
1535 *************************************************/
1537 /* This is called when looking back for before lines to print.
1540 p start of the subsequent line
1541 startptr start of available data
1543 Returns: pointer to the start of the previous line
1547 previous_line(char *p, char *startptr)
1551 default: /* Just in case */
1552 case PCRE2_NEWLINE_LF:
1554 while (p > startptr && p[-1] != '\n') p--;
1557 case PCRE2_NEWLINE_CR:
1559 while (p > startptr && p[-1] != '\n') p--;
1562 case PCRE2_NEWLINE_NUL:
1564 while (p > startptr && p[-1] != '\0') p--;
1567 case PCRE2_NEWLINE_CRLF:
1571 while (p > startptr && p[-1] != '\n') p--;
1572 if (p <= startptr + 1 || p[-2] == '\r') return p;
1574 /* Control can never get here */
1576 case PCRE2_NEWLINE_ANY:
1577 case PCRE2_NEWLINE_ANYCRLF:
1578 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1579 if (utf) while ((*p & 0xc0) == 0x80) p--;
1581 while (p > startptr)
1589 while ((*pp & 0xc0) == 0x80) pp--;
1590 c = *((unsigned char *)pp);
1594 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1596 c = (c & utf8_table3[extra]) << gcss;
1597 for (gcii = 1; gcii <= extra; gcii++)
1600 c |= (pp[gcii] & 0x3f) << gcss;
1604 else c = *((unsigned char *)pp);
1606 if (endlinetype == PCRE2_NEWLINE_ANYCRLF) switch (c)
1623 case 0x85: /* Unicode NEL */
1624 case 0x2028: /* Unicode LS */
1625 case 0x2029: /* Unicode PS */
1626 #endif /* Not EBCDIC */
1633 p = pp; /* Back one character */
1634 } /* End of loop for ANY case */
1636 return startptr; /* Hit start of data */
1637 } /* End of overall switch */
1642 /*************************************************
1643 * Print the previous "after" lines *
1644 *************************************************/
1646 /* This is called if we are about to lose said lines because of buffer filling,
1647 and at the end of the file. The data in the line is written using fwrite() so
1648 that a binary zero does not terminate it.
1651 lastmatchnumber the number of the last matching line, plus one
1652 lastmatchrestart where we restarted after the last match
1653 endptr end of available data
1654 printname filename for printing
1660 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1661 char *endptr, const char *printname)
1663 if (after_context > 0 && lastmatchnumber > 0)
1666 while (lastmatchrestart < endptr && count < after_context)
1669 char *pp = end_of_line(lastmatchrestart, endptr, &ellength);
1670 if (ellength == 0 && pp == main_buffer + bufsize) break;
1671 if (printname != NULL) fprintf(stdout, "%s-", printname);
1672 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1673 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1674 lastmatchrestart = pp;
1677 if (count > 0) hyphenpending = TRUE;
1683 /*************************************************
1684 * Apply patterns to subject till one matches *
1685 *************************************************/
1687 /* This function is called to run through all patterns, looking for a match. It
1688 is used multiple times for the same subject when colouring is enabled, in order
1689 to find all possible matches.
1692 matchptr the start of the subject
1693 length the length of the subject to match
1694 options options for pcre_exec
1695 startoffset where to start matching
1696 mrc address of where to put the result of pcre2_match()
1698 Returns: TRUE if there was a match
1699 FALSE if there was no match
1700 invert if there was a non-fatal error
1704 match_patterns(char *matchptr, PCRE2_SIZE length, unsigned int options,
1705 PCRE2_SIZE startoffset, int *mrc)
1708 PCRE2_SIZE slen = length;
1709 patstr *p = patterns;
1710 const char *msg = "this text:\n\n";
1715 msg = "text that starts:\n\n";
1717 for (i = 1; p != NULL; p = p->next, i++)
1719 *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
1720 startoffset, options, match_data, match_context);
1721 if (*mrc >= 0) return TRUE;
1722 if (*mrc == PCRE2_ERROR_NOMATCH) continue;
1723 fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc);
1724 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1725 fprintf(stderr, "%s", msg);
1726 FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
1727 fprintf(stderr, "\n\n");
1728 if (*mrc == PCRE2_ERROR_MATCHLIMIT || *mrc == PCRE2_ERROR_DEPTHLIMIT ||
1729 *mrc == PCRE2_ERROR_HEAPLIMIT || *mrc == PCRE2_ERROR_JIT_STACKLIMIT)
1730 resource_error = TRUE;
1731 if (error_count++ > 20)
1733 fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
1736 return invert; /* No more matching; don't show the line again */
1739 return FALSE; /* No match, no errors */
1743 /*************************************************
1744 * Check output text for errors *
1745 *************************************************/
1748 syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
1750 PCRE2_SPTR begin = string;
1751 for (; *string != 0; string++)
1755 PCRE2_SIZE capture_id = 0;
1760 /* Syntax error: a character must be present after $. */
1764 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1765 (int)(string - begin), "no character after $");
1771 /* Must be a decimal number in braces, e.g: {5} or {38} */
1777 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1781 /* Maximum capture id is 65535. */
1782 if (capture_id <= 65535)
1783 capture_id = capture_id * 10 + (*string - '0');
1787 while (*string >= '0' && *string <= '9');
1791 /* Syntax error: closing brace is missing. */
1795 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1796 (int)(string - begin), "missing closing brace");
1802 /* To negate the effect of the for. */
1808 /* Syntax error: a decimal number required. */
1810 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1811 (int)(string - begin), "decimal number expected");
1814 else if (*string == 'o')
1818 if (*string < '0' || *string > '7')
1820 /* Syntax error: an octal number required. */
1822 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1823 (int)(string - begin), "octal number expected");
1827 else if (*string == 'x')
1831 if (!isxdigit((unsigned char)*string))
1833 /* Syntax error: a hexdecimal number required. */
1835 fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
1836 (int)(string - begin), "hexadecimal number expected");
1847 /*************************************************
1848 * Display output text *
1849 *************************************************/
1851 /* Display the output text, which is assumed to have already been syntax
1852 checked. Output may contain escape sequences started by the dollar sign. The
1853 escape sequences are substituted as follows:
1855 $<digits> or ${<digits>} is replaced by the captured substring of the given
1856 decimal number; zero will substitute the whole match. If the number is
1857 greater than the number of capturing substrings, or if the capture is unset,
1858 the replacement is empty.
1860 $a is replaced by bell.
1861 $b is replaced by backspace.
1862 $e is replaced by escape.
1863 $f is replaced by form feed.
1864 $n is replaced by newline.
1865 $r is replaced by carriage return.
1866 $t is replaced by tab.
1867 $v is replaced by vertical tab.
1869 $o<digits> is replaced by the character represented by the given octal
1870 number; up to three digits are processed.
1872 $x<digits> is replaced by the character represented by the given hexadecimal
1873 number; up to two digits are processed.
1875 Any other character is substituted by itself. E.g: $$ is replaced by a single
1879 string: the output text
1880 callout: TRUE for the builtin callout, FALSE for --output
1881 subject the start of the subject
1882 ovector: capture offsets
1883 capture_top: number of captures
1885 Returns: TRUE if something was output, other than newline
1886 FALSE if nothing was output, or newline was last output
1890 display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
1891 PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
1893 BOOL printed = FALSE;
1895 for (; *string != 0; string++)
1900 PCRE2_SIZE capture_id = 0;
1907 /* Must be a decimal number in braces, e.g: {5} or {38} */
1913 if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
1917 /* Maximum capture id is 65535. */
1918 if (capture_id <= 65535)
1919 capture_id = capture_id * 10 + (*string - '0');
1923 while (*string >= '0' && *string <= '9');
1927 /* To negate the effect of the for. */
1931 if (capture_id < capture_top)
1933 PCRE2_SIZE capturesize;
1936 capturesize = ovector[capture_id + 1] - ovector[capture_id];
1937 if (capturesize > 0)
1939 print_match(subject + ovector[capture_id], capturesize);
1944 else if (*string == 'a') ch = '\a';
1945 else if (*string == 'b') ch = '\b';
1947 else if (*string == 'e') ch = '\033';
1949 else if (*string == 'e') ch = '\047';
1951 else if (*string == 'f') ch = '\f';
1952 else if (*string == 'r') ch = '\r';
1953 else if (*string == 't') ch = '\t';
1954 else if (*string == 'v') ch = '\v';
1955 else if (*string == 'n')
1957 fprintf(stdout, STDOUT_NL);
1960 else if (*string == 'o')
1965 if (string[1] >= '0' && string[1] <= '7')
1968 ch = ch * 8 + (*string - '0');
1970 if (string[1] >= '0' && string[1] <= '7')
1973 ch = ch * 8 + (*string - '0');
1976 else if (*string == 'x')
1980 if (*string >= '0' && *string <= '9')
1983 ch = (*string | 0x20) - 'a' + 10;
1984 if (isxdigit((unsigned char)string[1]))
1988 if (*string >= '0' && *string <= '9')
1989 ch += *string - '0';
1991 ch += (*string | 0x20) - 'a' + 10;
2005 fprintf(stdout, "%c", ch);
2014 #ifdef SUPPORT_PCRE2GREP_CALLOUT
2016 /*************************************************
2017 * Parse and execute callout scripts *
2018 *************************************************/
2020 /* This function parses a callout string block and executes the
2021 program specified by the string. The string is a list of substrings
2022 separated by pipe characters. The first substring represents the
2023 executable name, and the following substrings specify the arguments:
2025 program_name|param1|param2|...
2027 Any substring (including the program name) can contain escape sequences
2028 started by the dollar character. The escape sequences are substituted as
2031 $<digits> or ${<digits>} is replaced by the captured substring of the given
2032 decimal number, which must be greater than zero. If the number is greater
2033 than the number of capturing substrings, or if the capture is unset, the
2034 replacement is empty.
2036 Any other character is substituted by itself. E.g: $$ is replaced by a single
2037 dollar or $| replaced by a pipe character.
2039 Alternatively, if string starts with pipe, the remainder is taken as an output
2040 string, same as --output. In this case, --om-separator is used to separate each
2041 callout, defaulting to newline.
2045 echo -e "abcde\n12345" | pcre2grep \
2046 '(.)(..(.))(?C"/bin/echo|Arg1: [$1] [$2] [$3]|Arg2: $|${1}$| ($4)")()' -
2050 Arg1: [a] [bcd] [d] Arg2: |a| ()
2052 Arg1: [1] [234] [4] Arg2: |1| ()
2056 blockptr the callout block
2058 Returns: currently it always returns with 0
2062 pcre2grep_callout(pcre2_callout_block *calloutptr, void *unused)
2064 PCRE2_SIZE length = calloutptr->callout_string_length;
2065 PCRE2_SPTR string = calloutptr->callout_string;
2066 PCRE2_SPTR subject = calloutptr->subject;
2067 PCRE2_SIZE *ovector = calloutptr->offset_vector;
2068 PCRE2_SIZE capture_top = calloutptr->capture_top;
2069 PCRE2_SIZE argsvectorlen = 2;
2070 PCRE2_SIZE argslen = 1;
2074 char **argsvectorptr;
2080 (void)unused; /* Avoid compiler warning */
2082 /* Only callout with strings are supported. */
2083 if (string == NULL || length == 0) return 0;
2085 /* If there's no command, output the remainder directly. */
2090 if (!syntax_check_output_text(string, TRUE)) return 0;
2091 (void)display_output_text(string, TRUE, subject, ovector, capture_top);
2095 /* Checking syntax and compute the number of string fragments. Callout strings
2096 are ignored in case of a syntax error. */
2104 /* Maximum 10000 arguments allowed. */
2105 if (argsvectorlen > 10000) return 0;
2107 else if (*string == '$')
2109 PCRE2_SIZE capture_id = 0;
2114 /* Syntax error: a character must be present after $. */
2115 if (length == 0) return 0;
2117 if (*string >= '1' && *string <= '9')
2121 /* Maximum capture id is 65535. */
2122 if (capture_id <= 65535)
2123 capture_id = capture_id * 10 + (*string - '0');
2128 while (length > 0 && *string >= '0' && *string <= '9');
2130 /* To negate the effect of string++ below. */
2134 else if (*string == '{')
2136 /* Must be a decimal number in braces, e.g: {5} or {38} */
2140 /* Syntax error: a decimal number required. */
2141 if (length == 0) return 0;
2142 if (*string < '1' || *string > '9') return 0;
2146 /* Maximum capture id is 65535. */
2147 if (capture_id <= 65535)
2148 capture_id = capture_id * 10 + (*string - '0');
2153 /* Syntax error: no more characters */
2154 if (length == 0) return 0;
2156 while (*string >= '0' && *string <= '9');
2158 /* Syntax error: closing brace is missing. */
2159 if (*string != '}') return 0;
2164 if (capture_id < capture_top)
2167 argslen += ovector[capture_id + 1] - ovector[capture_id];
2170 /* To negate the effect of argslen++ below. */
2180 args = (char*)malloc(argslen);
2181 if (args == NULL) return 0;
2183 argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
2184 if (argsvector == NULL)
2191 argsvectorptr = argsvector;
2193 *argsvectorptr++ = argsptr;
2195 length = calloutptr->callout_string_length;
2196 string = calloutptr->callout_string;
2203 *argsvectorptr++ = argsptr;
2205 else if (*string == '$')
2210 if ((*string >= '1' && *string <= '9') || *string == '{')
2212 PCRE2_SIZE capture_id = 0;
2218 /* Maximum capture id is 65535. */
2219 if (capture_id <= 65535)
2220 capture_id = capture_id * 10 + (*string - '0');
2225 while (length > 0 && *string >= '0' && *string <= '9');
2227 /* To negate the effect of string++ below. */
2238 /* Maximum capture id is 65535. */
2239 if (capture_id <= 65535)
2240 capture_id = capture_id * 10 + (*string - '0');
2245 while (*string != '}');
2248 if (capture_id < capture_top)
2250 PCRE2_SIZE capturesize;
2253 capturesize = ovector[capture_id + 1] - ovector[capture_id];
2254 memcpy(argsptr, subject + ovector[capture_id], capturesize);
2255 argsptr += capturesize;
2260 *argsptr++ = *string;
2265 *argsptr++ = *string;
2273 *argsvectorptr = NULL;
2276 result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
2282 (void)execv(argsvector[0], argsvector);
2283 /* Control gets here if there is an error, e.g. a non-existent program */
2287 (void)waitpid(pid, &result, 0);
2293 /* Currently negative return values are not supported, only zero (match
2294 continues) or non-zero (match fails). */
2303 /*************************************************
2304 * Read a portion of the file into buffer *
2305 *************************************************/
2308 fill_buffer(void *handle, int frtype, char *buffer, int length,
2309 BOOL input_line_buffered)
2311 (void)frtype; /* Avoid warning when not used */
2314 if (frtype == FR_LIBZ)
2315 return gzread((gzFile)handle, buffer, length);
2319 #ifdef SUPPORT_LIBBZ2
2320 if (frtype == FR_LIBBZ2)
2321 return BZ2_bzread((BZFILE *)handle, buffer, length);
2325 return (input_line_buffered ?
2326 read_one_line(buffer, length, (FILE *)handle) :
2327 fread(buffer, 1, length, (FILE *)handle));
2332 /*************************************************
2333 * Grep an individual file *
2334 *************************************************/
2336 /* This is called from grep_or_recurse() below. It uses a buffer that is three
2337 times the value of bufthird. The matching point is never allowed to stray into
2338 the top third of the buffer, thus keeping more of the file available for
2339 context printing or for multiline scanning. For large files, the pointer will
2340 be in the middle third most of the time, so the bottom third is available for
2341 "before" context printing.
2344 handle the fopened FILE stream for a normal file
2345 the gzFile pointer when reading is via libz
2346 the BZFILE pointer when reading is via libbz2
2347 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
2348 filename the file name or NULL (for errors)
2349 printname the file name if it is to be printed for each match
2350 or NULL if the file name is not to be printed
2351 it cannot be NULL if filenames[_nomatch]_only is set
2353 Returns: 0 if there was at least one match
2354 1 otherwise (no matches)
2355 2 if an overlong line is encountered
2356 3 if there is a read error on a .bz2 file
2360 pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
2364 unsigned long int linenumber = 1;
2365 unsigned long int lastmatchnumber = 0;
2366 unsigned long int count = 0;
2367 char *lastmatchrestart = main_buffer;
2368 char *ptr = main_buffer;
2370 PCRE2_SIZE bufflength;
2371 BOOL binary = FALSE;
2372 BOOL endhyphenpending = FALSE;
2373 BOOL input_line_buffered = line_buffered;
2374 FILE *in = NULL; /* Ensure initialized */
2376 /* Do the first read into the start of the buffer and set up the pointer to end
2377 of what we have. In the case of libz, a non-zipped .gz file will be read as a
2378 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
2381 if (frtype != FR_LIBZ && frtype != FR_LIBBZ2)
2383 in = (FILE *)handle;
2384 if (is_file_tty(in)) input_line_buffered = TRUE;
2386 else input_line_buffered = FALSE;
2388 bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
2389 input_line_buffered);
2391 #ifdef SUPPORT_LIBBZ2
2392 if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflength is PCRE2_SIZE; */
2395 endptr = main_buffer + bufflength;
2397 /* Unless binary-files=text, see if we have a binary file. This uses the same
2398 rule as GNU grep, namely, a search for a binary zero byte near the start of the
2399 file. However, when the newline convention is binary zero, we can't do this. */
2401 if (binary_files != BIN_TEXT)
2403 if (endlinetype != PCRE2_NEWLINE_NUL)
2404 binary = memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength)
2406 if (binary && binary_files == BIN_NOMATCH) return 1;
2409 /* Loop while the current pointer is not at the end of the file. For large
2410 files, endptr will be at the end of the buffer when we are in the middle of the
2411 file, but ptr will never get there, because as soon as it gets over 2/3 of the
2412 way, the buffer is shifted left and re-filled. */
2414 while (ptr < endptr)
2418 unsigned int options = 0;
2421 PCRE2_SIZE length, linelength;
2422 PCRE2_SIZE startoffset = 0;
2424 /* At this point, ptr is at the start of a line. We need to find the length
2425 of the subject string to pass to pcre2_match(). In multiline mode, it is the
2426 length remainder of the data in the buffer. Otherwise, it is the length of
2427 the next line, excluding the terminating newline. After matching, we always
2428 advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
2429 option is used for compiling, so that any match is constrained to be in the
2432 t = end_of_line(t, endptr, &endlinelength);
2433 linelength = t - ptr - endlinelength;
2434 length = multiline? (PCRE2_SIZE)(endptr - ptr) : linelength;
2436 /* Check to see if the line we are looking at extends right to the very end
2437 of the buffer without a line terminator. This means the line is too long to
2438 handle at the current buffer size. Until the buffer reaches its maximum size,
2439 try doubling it and reading more data. */
2441 if (endlinelength == 0 && t == main_buffer + bufsize)
2443 if (bufthird < max_bufthird)
2446 int new_bufthird = 2*bufthird;
2448 if (new_bufthird > max_bufthird) new_bufthird = max_bufthird;
2449 new_buffer = (char *)malloc(3*new_bufthird);
2451 if (new_buffer == NULL)
2454 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2455 "pcre2grep: not enough memory to increase the buffer size to %d\n",
2457 (filename == NULL)? "" : " of file ",
2458 (filename == NULL)? "" : filename,
2463 /* Copy the data and adjust pointers to the new buffer location. */
2465 memcpy(new_buffer, main_buffer, bufsize);
2466 bufthird = new_bufthird;
2467 bufsize = 3*bufthird;
2468 ptr = new_buffer + (ptr - main_buffer);
2469 lastmatchrestart = new_buffer + (lastmatchrestart - main_buffer);
2471 main_buffer = new_buffer;
2473 /* Read more data into the buffer and then try to find the line ending
2476 bufflength += fill_buffer(handle, frtype, main_buffer + bufflength,
2477 bufsize - bufflength, input_line_buffered);
2478 endptr = main_buffer + bufflength;
2484 "pcre2grep: line %lu%s%s is too long for the internal buffer\n"
2485 "pcre2grep: the maximum buffer size is %d\n"
2486 "pcre2grep: use the --max-buffer-size option to change it\n",
2488 (filename == NULL)? "" : " of file ",
2489 (filename == NULL)? "" : filename,
2495 /* Extra processing for Jeffrey Friedl's debugging. */
2497 #ifdef JFRIEDL_DEBUG
2498 if (jfriedl_XT || jfriedl_XR)
2500 # include <sys/time.h>
2502 struct timeval start_time, end_time;
2503 struct timezone dummy;
2508 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
2509 const char *orig = ptr;
2510 ptr = malloc(newlen + 1);
2512 printf("out of memory");
2516 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
2517 for (i = 0; i < jfriedl_XT; i++) {
2518 strncpy(endptr, orig, length);
2521 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
2525 if (gettimeofday(&start_time, &dummy) != 0)
2526 perror("bad gettimeofday");
2529 for (i = 0; i < jfriedl_XR; i++)
2530 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
2531 PCRE2_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
2533 if (gettimeofday(&end_time, &dummy) != 0)
2534 perror("bad gettimeofday");
2536 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
2538 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
2540 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
2545 /* We come back here after a match when only_matching_count is non-zero, in
2546 order to find any further matches in the same line. This applies to
2547 --only-matching, --file-offsets, and --line-offsets. */
2549 ONLY_MATCHING_RESTART:
2551 /* Run through all the patterns until one matches or there is an error other
2552 than NOMATCH. This code is in a subroutine so that it can be re-used for
2553 finding subsequent matches when colouring matched lines. After finding one
2554 match, set PCRE2_NOTEMPTY to disable any further matches of null strings in
2557 match = match_patterns(ptr, length, options, startoffset, &mrc);
2558 options = PCRE2_NOTEMPTY;
2560 /* If it's a match or a not-match (as required), do what's wanted. NOTE: Use
2561 only FWRITE_IGNORE() - which is just a packaged fwrite() that ignores its
2562 return code - to output data lines, so that binary zeroes are treated as just
2563 another data character. */
2565 if (match != invert)
2567 BOOL hyphenprinted = FALSE;
2569 /* We've failed if we want a file that doesn't have any matches. */
2571 if (filenames == FN_NOMATCH_ONLY) return 1;
2573 /* If all we want is a yes/no answer, we can return immediately. */
2575 if (quiet) return 0;
2577 /* Just count if just counting is wanted. */
2579 else if (count_only || show_total_count) count++;
2581 /* When handling a binary file and binary-files==binary, the "binary"
2582 variable will be set true (it's false in all other cases). In this
2583 situation we just want to output the file name. No need to scan further. */
2587 fprintf(stdout, "Binary file %s matches" STDOUT_NL, filename);
2591 /* Likewise, if all we want is a file name, there is no need to scan any
2592 more lines in the file. */
2594 else if (filenames == FN_MATCH_ONLY)
2596 fprintf(stdout, "%s" STDOUT_NL, printname);
2600 /* The --only-matching option prints just the substring that matched,
2601 and/or one or more captured portions of it, as long as these strings are
2602 not empty. The --file-offsets and --line-offsets options output offsets for
2603 the matching substring (all three set only_matching_count non-zero). None
2604 of these mutually exclusive options prints any context. Afterwards, adjust
2605 the start and then jump back to look for further matches in the same line.
2606 If we are in invert mode, however, nothing is printed and we do not restart
2607 - this could still be useful because the return code is set. */
2609 else if (only_matching_count != 0)
2613 PCRE2_SIZE oldstartoffset;
2615 if (printname != NULL) fprintf(stdout, "%s:", printname);
2616 if (number) fprintf(stdout, "%lu:", linenumber);
2618 /* Handle --line-offsets */
2621 fprintf(stdout, "%d,%d" STDOUT_NL, (int)(ptr + offsets[0] - ptr),
2622 (int)(offsets[1] - offsets[0]));
2624 /* Handle --file-offsets */
2626 else if (file_offsets)
2627 fprintf(stdout, "%d,%d" STDOUT_NL,
2628 (int)(filepos + ptr + offsets[0] - ptr),
2629 (int)(offsets[1] - offsets[0]));
2631 /* Handle --output (which has already been syntax checked) */
2633 else if (output_text != NULL)
2635 if (display_output_text((PCRE2_SPTR)output_text, FALSE,
2636 (PCRE2_SPTR)ptr, offsets, mrc) || printname != NULL ||
2638 fprintf(stdout, STDOUT_NL);
2641 /* Handle --only-matching, which may occur many times */
2645 BOOL printed = FALSE;
2648 for (om = only_matching; om != NULL; om = om->next)
2650 int n = om->groupnum;
2653 int plen = offsets[2*n + 1] - offsets[2*n];
2656 if (printed && om_separator != NULL)
2657 fprintf(stdout, "%s", om_separator);
2658 print_match(ptr + offsets[n*2], plen);
2664 if (printed || printname != NULL || number)
2665 fprintf(stdout, STDOUT_NL);
2668 /* Prepare to repeat to find the next match in the line. */
2671 if (line_buffered) fflush(stdout);
2672 rc = 0; /* Had some success */
2674 /* If the pattern contained a lookbehind that included \K, it is
2675 possible that the end of the match might be at or before the actual
2676 starting offset we have just used. In this case, start one character
2679 startoffset = offsets[1]; /* Restart after the match */
2680 oldstartoffset = pcre2_get_startchar(match_data);
2681 if (startoffset <= oldstartoffset)
2683 if (startoffset >= length) goto END_ONE_MATCH; /* Were at end */
2684 startoffset = oldstartoffset + 1;
2685 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2688 /* If the current match ended past the end of the line (only possible
2689 in multiline mode), we must move on to the line in which it did end
2690 before searching for more matches. */
2692 while (startoffset > linelength)
2694 ptr += linelength + endlinelength;
2695 filepos += (int)(linelength + endlinelength);
2697 startoffset -= (int)(linelength + endlinelength);
2698 t = end_of_line(ptr, endptr, &endlinelength);
2699 linelength = t - ptr - endlinelength;
2700 length = (PCRE2_SIZE)(endptr - ptr);
2703 goto ONLY_MATCHING_RESTART;
2707 /* This is the default case when none of the above options is set. We print
2708 the matching lines(s), possibly preceded and/or followed by other lines of
2713 /* See if there is a requirement to print some "after" lines from a
2714 previous match. We never print any overlaps. */
2716 if (after_context > 0 && lastmatchnumber > 0)
2720 char *p = lastmatchrestart;
2722 while (p < ptr && linecount < after_context)
2724 p = end_of_line(p, ptr, &ellength);
2728 /* It is important to advance lastmatchrestart during this printing so
2729 that it interacts correctly with any "before" printing below. Print
2730 each line's data using fwrite() in case there are binary zeroes. */
2732 while (lastmatchrestart < p)
2734 char *pp = lastmatchrestart;
2735 if (printname != NULL) fprintf(stdout, "%s-", printname);
2736 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
2737 pp = end_of_line(pp, endptr, &ellength);
2738 FWRITE_IGNORE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
2739 lastmatchrestart = pp;
2741 if (lastmatchrestart != ptr) hyphenpending = TRUE;
2744 /* If there were non-contiguous lines printed above, insert hyphens. */
2748 fprintf(stdout, "--" STDOUT_NL);
2749 hyphenpending = FALSE;
2750 hyphenprinted = TRUE;
2753 /* See if there is a requirement to print some "before" lines for this
2754 match. Again, don't print overlaps. */
2756 if (before_context > 0)
2761 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
2762 linecount < before_context)
2765 p = previous_line(p, main_buffer);
2768 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
2769 fprintf(stdout, "--" STDOUT_NL);
2775 if (printname != NULL) fprintf(stdout, "%s-", printname);
2776 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
2777 pp = end_of_line(pp, endptr, &ellength);
2778 FWRITE_IGNORE(p, 1, pp - p, stdout);
2783 /* Now print the matching line(s); ensure we set hyphenpending at the end
2784 of the file if any context lines are being output. */
2786 if (after_context > 0 || before_context > 0)
2787 endhyphenpending = TRUE;
2789 if (printname != NULL) fprintf(stdout, "%s:", printname);
2790 if (number) fprintf(stdout, "%lu:", linenumber);
2792 /* This extra option, for Jeffrey Friedl's debugging requirements,
2793 replaces the matched string, or a specific captured string if it exists,
2794 with X. When this happens, colouring is ignored. */
2796 #ifdef JFRIEDL_DEBUG
2797 if (S_arg >= 0 && S_arg < mrc)
2799 int first = S_arg * 2;
2800 int last = first + 1;
2801 FWRITE_IGNORE(ptr, 1, offsets[first], stdout);
2802 fprintf(stdout, "X");
2803 FWRITE_IGNORE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
2808 /* In multiline mode, or if colouring, we have to split the line(s) up
2809 and search for further matches, but not of course if the line is a
2810 non-match. In multiline mode this is necessary in case there is another
2811 match that spans the end of the current line. When colouring we want to
2812 colour all matches. */
2814 if ((multiline || do_colour) && !invert)
2817 PCRE2_SIZE endprevious;
2819 /* The use of \K may make the end offset earlier than the start. In
2820 this situation, swap them round. */
2822 if (offsets[0] > offsets[1])
2824 PCRE2_SIZE temp = offsets[0];
2825 offsets[0] = offsets[1];
2829 FWRITE_IGNORE(ptr, 1, offsets[0], stdout);
2830 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2834 PCRE2_SIZE oldstartoffset = pcre2_get_startchar(match_data);
2836 endprevious = offsets[1];
2837 startoffset = endprevious; /* Advance after previous match. */
2839 /* If the pattern contained a lookbehind that included \K, it is
2840 possible that the end of the match might be at or before the actual
2841 starting offset we have just used. In this case, start one character
2844 if (startoffset <= oldstartoffset)
2846 startoffset = oldstartoffset + 1;
2847 if (utf) while ((ptr[startoffset] & 0xc0) == 0x80) startoffset++;
2850 /* If the current match ended past the end of the line (only possible
2851 in multiline mode), we must move on to the line in which it did end
2852 before searching for more matches. Because the PCRE2_FIRSTLINE option
2853 is set, the start of the match will always be before the first
2854 newline sequence. */
2856 while (startoffset > linelength + endlinelength)
2858 ptr += linelength + endlinelength;
2859 filepos += (int)(linelength + endlinelength);
2861 startoffset -= (int)(linelength + endlinelength);
2862 endprevious -= (int)(linelength + endlinelength);
2863 t = end_of_line(ptr, endptr, &endlinelength);
2864 linelength = t - ptr - endlinelength;
2865 length = (PCRE2_SIZE)(endptr - ptr);
2868 /* If startoffset is at the exact end of the line it means this
2869 complete line was the final part of the match, so there is nothing
2872 if (startoffset == linelength + endlinelength) break;
2874 /* Otherwise, run a match from within the final line, and if found,
2875 loop for any that may follow. */
2877 if (!match_patterns(ptr, length, options, startoffset, &mrc)) break;
2879 /* The use of \K may make the end offset earlier than the start. In
2880 this situation, swap them round. */
2882 if (offsets[0] > offsets[1])
2884 PCRE2_SIZE temp = offsets[0];
2885 offsets[0] = offsets[1];
2889 FWRITE_IGNORE(ptr + endprevious, 1, offsets[0] - endprevious, stdout);
2890 print_match(ptr + offsets[0], offsets[1] - offsets[0]);
2893 /* In multiline mode, we may have already printed the complete line
2894 and its line-ending characters (if they matched the pattern), so there
2895 may be no more to print. */
2897 plength = (int)((linelength + endlinelength) - endprevious);
2898 if (plength > 0) FWRITE_IGNORE(ptr + endprevious, 1, plength, stdout);
2901 /* Not colouring or multiline; no need to search for further matches. */
2903 else FWRITE_IGNORE(ptr, 1, linelength + endlinelength, stdout);
2906 /* End of doing what has to be done for a match. If --line-buffered was
2907 given, flush the output. */
2909 if (line_buffered) fflush(stdout);
2910 rc = 0; /* Had some success */
2912 /* Remember where the last match happened for after_context. We remember
2913 where we are about to restart, and that line's number. */
2915 lastmatchrestart = ptr + linelength + endlinelength;
2916 lastmatchnumber = linenumber + 1;
2919 /* For a match in multiline inverted mode (which of course did not cause
2920 anything to be printed), we have to move on to the end of the match before
2923 if (multiline && invert && match)
2926 char *endmatch = ptr + offsets[1];
2928 while (t < endmatch)
2930 t = end_of_line(t, endptr, &ellength);
2931 if (t <= endmatch) linenumber++; else break;
2933 endmatch = end_of_line(endmatch, endptr, &ellength);
2934 linelength = endmatch - ptr - ellength;
2937 /* Advance to after the newline and increment the line number. The file
2938 offset to the current line is maintained in filepos. */
2941 ptr += linelength + endlinelength;
2942 filepos += (int)(linelength + endlinelength);
2945 /* If input is line buffered, and the buffer is not yet full, read another
2946 line and add it into the buffer. */
2948 if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
2950 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2955 /* If we haven't yet reached the end of the file (the buffer is full), and
2956 the current point is in the top 1/3 of the buffer, slide the buffer down by
2957 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2958 about to be lost, print them. */
2960 if (bufflength >= (PCRE2_SIZE)bufsize && ptr > main_buffer + 2*bufthird)
2962 if (after_context > 0 &&
2963 lastmatchnumber > 0 &&
2964 lastmatchrestart < main_buffer + bufthird)
2966 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2967 lastmatchnumber = 0; /* Indicates no after lines pending */
2970 /* Now do the shuffle */
2972 (void)memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2975 bufflength = 2*bufthird + fill_buffer(handle, frtype,
2976 main_buffer + 2*bufthird, bufthird, input_line_buffered);
2977 endptr = main_buffer + bufflength;
2979 /* Adjust any last match point */
2981 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2983 } /* Loop through the whole file */
2985 /* End of file; print final "after" lines if wanted; do_after_lines sets
2986 hyphenpending if it prints something. */
2988 if (only_matching_count == 0 && !(count_only|show_total_count))
2990 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2991 hyphenpending |= endhyphenpending;
2994 /* Print the file name if we are looking for those without matches and there
2995 were none. If we found a match, we won't have got this far. */
2997 if (filenames == FN_NOMATCH_ONLY)
2999 fprintf(stdout, "%s" STDOUT_NL, printname);
3003 /* Print the match count if wanted */
3005 if (count_only && !quiet)
3007 if (count > 0 || !omit_zero_count)
3009 if (printname != NULL && filenames != FN_NONE)
3010 fprintf(stdout, "%s:", printname);
3011 fprintf(stdout, "%lu" STDOUT_NL, count);
3016 total_count += count; /* Can be set without count_only */
3022 /*************************************************
3023 * Grep a file or recurse into a directory *
3024 *************************************************/
3026 /* Given a path name, if it's a directory, scan all the files if we are
3027 recursing; if it's a file, grep it.
3030 pathname the path to investigate
3031 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
3032 only_one_at_top TRUE if the path is the only one at toplevel
3034 Returns: -1 the file/directory was skipped
3035 0 if there was at least one match
3036 1 if there were no matches
3037 2 there was some kind of error
3039 However, file opening failures are suppressed if "silent" is set.
3043 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
3049 FILE *in = NULL; /* Ensure initialized */
3055 #ifdef SUPPORT_LIBBZ2
3056 BZFILE *inbz2 = NULL;
3059 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3063 #if defined NATIVE_ZOS
3065 FILE *zos_test_file;
3068 /* If the file name is "-" we scan stdin */
3070 if (strcmp(pathname, "-") == 0)
3072 return pcre2grep(stdin, FR_PLAIN, stdin_name,
3073 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
3077 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
3078 directories, whereas --include and --exclude apply to everything else. The test
3079 is against the final component of the path. */
3081 lastcomp = strrchr(pathname, FILESEP);
3082 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
3084 /* If the file is a directory, skip if not recursing or if explicitly excluded.
3085 Otherwise, scan the directory and recurse for each path within it. The scanning
3086 code is localized so it can be made system-specific. */
3089 /* For z/OS, determine the file type. */
3091 #if defined NATIVE_ZOS
3092 zos_test_file = fopen(pathname,"rb");
3094 if (zos_test_file == NULL)
3096 if (!silent) fprintf(stderr, "pcre2grep: failed to test next file %s\n",
3097 pathname, strerror(errno));
3100 zos_type = identifyzosfiletype (zos_test_file);
3101 fclose (zos_test_file);
3103 /* Handle a PDS in separate code */
3105 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
3107 return travelonpdsdir (pathname, only_one_at_top);
3110 /* Deal with regular files in the normal way below. These types are:
3111 zos_type == __ZOS_PDS_MEMBER
3112 zos_type == __ZOS_PS
3113 zos_type == __ZOS_VSAM_KSDS
3114 zos_type == __ZOS_VSAM_ESDS
3115 zos_type == __ZOS_VSAM_RRDS
3118 /* Handle a z/OS directory using common code. */
3120 else if (zos_type == __ZOS_HFS)
3122 #endif /* NATIVE_ZOS */
3125 /* Handle directories: common code for all OS */
3127 if (isdirectory(pathname))
3129 if (dee_action == dee_SKIP ||
3130 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
3133 if (dee_action == dee_RECURSE)
3135 char buffer[FNBUFSIZ];
3137 directory_type *dir = opendirectory(pathname);
3142 fprintf(stderr, "pcre2grep: Failed to open directory %s: %s\n", pathname,
3147 while ((nextfile = readdirectory(dir)) != NULL)
3150 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
3151 if (fnlength > FNBUFSIZ)
3153 fprintf(stderr, "pcre2grep: recursive filename is too long\n");
3157 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
3158 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3159 if (frc > 1) rc = frc;
3160 else if (frc == 0 && rc == 1) rc = 0;
3163 closedirectory(dir);
3169 if (iswild(pathname))
3174 directory_type *dir = opendirectory(pathname);
3179 for (nextfile = name = pathname; *nextfile != 0; nextfile++)
3180 if (*nextfile == '/' || *nextfile == '\\')
3181 name = nextfile + 1;
3184 while ((nextfile = readdirectory(dir)) != NULL)
3187 sprintf(buffer, "%.512s%.128s", pathname, nextfile);
3188 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
3189 if (frc > 1) rc = frc;
3190 else if (frc == 0 && rc == 1) rc = 0;
3193 closedirectory(dir);
3198 #if defined NATIVE_ZOS
3202 /* If the file is not a directory, check for a regular file, and if it is not,
3203 skip it if that's been requested. Otherwise, check for an explicit inclusion or
3207 #if defined NATIVE_ZOS
3208 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
3209 #else /* all other OS */
3210 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
3212 !test_incexc(lastcomp, include_patterns, exclude_patterns))
3213 return -1; /* File skipped */
3215 /* Control reaches here if we have a regular file, or if we have a directory
3216 and recursion or skipping was not requested, or if we have anything else and
3217 skipping was not requested. The scan proceeds. If this is the first and only
3218 argument at top level, we don't show the file name, unless we are only showing
3219 the file name, or the filename was forced (-H). */
3221 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
3222 pathlen = (int)(strlen(pathname));
3225 /* Open using zlib if it is supported and the file name ends with .gz. */
3228 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
3230 ingz = gzopen(pathname, "rb");
3234 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3238 handle = (void *)ingz;
3244 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
3246 #ifdef SUPPORT_LIBBZ2
3247 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
3249 inbz2 = BZ2_bzopen(pathname, "rb");
3250 handle = (void *)inbz2;
3256 /* Otherwise use plain fopen(). The label is so that we can come back here if
3257 an attempt to read a .bz2 file indicates that it really is a plain file. */
3259 #ifdef SUPPORT_LIBBZ2
3263 in = fopen(pathname, "rb");
3264 handle = (void *)in;
3268 /* All the opening methods return errno when they fail. */
3273 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", pathname,
3278 /* Now grep the file */
3280 rc = pcre2grep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
3281 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
3283 /* Close in an appropriate manner. */
3286 if (frtype == FR_LIBZ)
3291 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
3292 read failed. If the error indicates that the file isn't in fact bzipped, try
3293 again as a normal file. */
3295 #ifdef SUPPORT_LIBBZ2
3296 if (frtype == FR_LIBBZ2)
3301 const char *err = BZ2_bzerror(inbz2, &errnum);
3302 if (errnum == BZ_DATA_ERROR_MAGIC)
3308 fprintf(stderr, "pcre2grep: Failed to read %s using bzlib: %s\n",
3310 rc = 2; /* The normal "something went wrong" code */
3317 /* Normal file close */
3321 /* Pass back the yield from pcre2grep(). */
3328 /*************************************************
3329 * Handle a single-letter, no data option *
3330 *************************************************/
3333 handle_option(int letter, int options)
3337 case N_FOFFSETS: file_offsets = TRUE; break;
3338 case N_HELP: help(); pcre2grep_exit(0); break; /* Stops compiler warning */
3339 case N_LBUFFER: line_buffered = TRUE; break;
3340 case N_LOFFSETS: line_offsets = number = TRUE; break;
3341 case N_NOJIT: use_jit = FALSE; break;
3342 case 'a': binary_files = BIN_TEXT; break;
3343 case 'c': count_only = TRUE; break;
3344 case 'F': options |= PCRE2_LITERAL; break;
3345 case 'H': filenames = FN_FORCE; break;
3346 case 'I': binary_files = BIN_NOMATCH; break;
3347 case 'h': filenames = FN_NONE; break;
3348 case 'i': options |= PCRE2_CASELESS; break;
3349 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
3350 case 'L': filenames = FN_NOMATCH_ONLY; break;
3351 case 'M': multiline = TRUE; options |= PCRE2_MULTILINE|PCRE2_FIRSTLINE; break;
3352 case 'n': number = TRUE; break;
3355 only_matching_last = add_number(0, only_matching_last);
3356 if (only_matching == NULL) only_matching = only_matching_last;
3359 case 'q': quiet = TRUE; break;
3360 case 'r': dee_action = dee_RECURSE; break;
3361 case 's': silent = TRUE; break;
3362 case 't': show_total_count = TRUE; break;
3363 case 'u': options |= PCRE2_UTF; utf = TRUE; break;
3364 case 'v': invert = TRUE; break;
3365 case 'w': extra_options |= PCRE2_EXTRA_MATCH_WORD; break;
3366 case 'x': extra_options |= PCRE2_EXTRA_MATCH_LINE; break;
3370 unsigned char buffer[128];
3371 (void)pcre2_config(PCRE2_CONFIG_VERSION, buffer);
3372 fprintf(stdout, "pcre2grep version %s" STDOUT_NL, buffer);
3378 fprintf(stderr, "pcre2grep: Unknown option -%c\n", letter);
3379 pcre2grep_exit(usage(2));
3387 /*************************************************
3388 * Construct printed ordinal *
3389 *************************************************/
3391 /* This turns a number into "1st", "3rd", etc. */
3396 static char buffer[14];
3398 sprintf(p, "%d", n);
3399 while (*p != 0) p++;
3401 if (n >= 11 && n <= 13) n = 0;
3404 case 1: strcpy(p, "st"); break;
3405 case 2: strcpy(p, "nd"); break;
3406 case 3: strcpy(p, "rd"); break;
3407 default: strcpy(p, "th"); break;
3414 /*************************************************
3415 * Compile a single pattern *
3416 *************************************************/
3418 /* Do nothing if the pattern has already been compiled. This is the case for
3419 include/exclude patterns read from a file.
3421 When the -F option has been used, each "pattern" may be a list of strings,
3422 separated by line breaks. They will be matched literally. We split such a
3423 string and compile the first substring, inserting an additional block into the
3427 p points to the pattern block
3428 options the PCRE options
3429 fromfile TRUE if the pattern was read from a file
3430 fromtext file name or identifying text (e.g. "include")
3431 count 0 if this is the only command line pattern, or
3432 number of the command line pattern, or
3433 linenumber for a pattern from a file
3435 Returns: TRUE on success, FALSE after an error
3439 compile_pattern(patstr *p, int options, int fromfile, const char *fromtext,
3444 PCRE2_SIZE patlen, erroffset;
3445 PCRE2_UCHAR errmessbuffer[ERRBUFSIZ];
3447 if (p->compiled != NULL) return TRUE;
3451 if ((options & PCRE2_LITERAL) != 0)
3454 char *eop = ps + patlen;
3455 char *pe = end_of_line(ps, eop, &ellength);
3459 patlen = pe - ps - ellength;
3460 if (add_pattern(pe, p->length-patlen-ellength, p) == NULL) return FALSE;
3464 p->compiled = pcre2_compile((PCRE2_SPTR)ps, patlen, options, &errcode,
3465 &erroffset, compile_context);
3467 /* Handle successful compile. Try JIT-compiling if supported and enabled. We
3468 ignore any JIT compiler errors, relying falling back to interpreting if
3469 anything goes wrong with JIT. */
3471 if (p->compiled != NULL)
3473 #ifdef SUPPORT_PCRE2GREP_JIT
3474 if (use_jit) (void)pcre2_jit_compile(p->compiled, PCRE2_JIT_COMPLETE);
3479 /* Handle compile errors */
3481 if (erroffset > patlen) erroffset = patlen;
3482 pcre2_get_error_message(errcode, errmessbuffer, sizeof(errmessbuffer));
3486 fprintf(stderr, "pcre2grep: Error in regex in line %d of %s "
3487 "at offset %d: %s\n", count, fromtext, (int)erroffset, errmessbuffer);
3492 fprintf(stderr, "pcre2grep: Error in %s regex at offset %d: %s\n",
3493 fromtext, (int)erroffset, errmessbuffer);
3495 fprintf(stderr, "pcre2grep: Error in %s %s regex at offset %d: %s\n",
3496 ordin(count), fromtext, (int)erroffset, errmessbuffer);
3504 /*************************************************
3505 * Read and compile a file of patterns *
3506 *************************************************/
3508 /* This is used for --filelist, --include-from, and --exclude-from.
3511 name the name of the file; "-" is stdin
3512 patptr pointer to the pattern chain anchor
3513 patlastptr pointer to the last pattern pointer
3515 Returns: TRUE if all went well
3519 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr)
3524 const char *filename;
3525 char buffer[MAXPATLEN+20];
3527 if (strcmp(name, "-") == 0)
3530 filename = stdin_name;
3534 f = fopen(name, "r");
3537 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", name, strerror(errno));
3543 while ((patlen = read_one_line(buffer, sizeof(buffer), f)) > 0)
3545 while (patlen > 0 && isspace((unsigned char)(buffer[patlen-1]))) patlen--;
3547 if (patlen == 0) continue; /* Skip blank lines */
3549 /* Note: this call to add_pattern() puts a pointer to the local variable
3550 "buffer" into the pattern chain. However, that pointer is used only when
3551 compiling the pattern, which happens immediately below, so we flatten it
3552 afterwards, as a precaution against any later code trying to use it. */
3554 *patlastptr = add_pattern(buffer, patlen, *patlastptr);
3555 if (*patlastptr == NULL)
3557 if (f != stdin) fclose(f);
3560 if (*patptr == NULL) *patptr = *patlastptr;
3562 /* This loop is needed because compiling a "pattern" when -F is set may add
3563 on additional literal patterns if the original contains a newline. In the
3564 common case, it never will, because read_one_line() stops at a newline.
3565 However, the -N option can be used to give pcre2grep a different newline
3570 if (!compile_pattern(*patlastptr, pcre2_options, TRUE, filename,
3573 if (f != stdin) fclose(f);
3576 (*patlastptr)->string = NULL; /* Insurance */
3577 if ((*patlastptr)->next == NULL) break;
3578 *patlastptr = (*patlastptr)->next;
3582 if (f != stdin) fclose(f);
3588 /*************************************************
3590 *************************************************/
3592 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
3595 main(int argc, char **argv)
3599 BOOL only_one_at_top;
3602 const char *locale_from = "--locale";
3604 #ifdef SUPPORT_PCRE2GREP_JIT
3605 pcre2_jit_stack *jit_stack = NULL;
3608 /* In Windows, stdout is set up as a text stream, which means that \n is
3609 converted to \r\n. This causes output lines that are copied from the input to
3610 change from ....\r\n to ....\r\r\n, which is not right. We therefore ensure
3611 that stdout is a binary stream. Note that this means all other output to stdout
3612 must use STDOUT_NL to terminate lines. */
3615 _setmode(_fileno(stdout), _O_BINARY);
3618 /* Set up a default compile and match contexts and a match data block. */
3620 compile_context = pcre2_compile_context_create(NULL);
3621 match_context = pcre2_match_context_create(NULL);
3622 match_data = pcre2_match_data_create(OFFSET_SIZE, NULL);
3623 offsets = pcre2_get_ovector_pointer(match_data);
3625 /* If string (script) callouts are supported, set up the callout processing
3628 #ifdef SUPPORT_PCRE2GREP_CALLOUT
3629 pcre2_set_callout(match_context, pcre2grep_callout, NULL);
3632 /* Process the options */
3634 for (i = 1; i < argc; i++)
3636 option_item *op = NULL;
3637 char *option_data = (char *)""; /* default to keep compiler happy */
3639 BOOL longopwasequals = FALSE;
3641 if (argv[i][0] != '-') break;
3643 /* If we hit an argument that is just "-", it may be a reference to STDIN,
3644 but only if we have previously had -e or -f to define the patterns. */
3646 if (argv[i][1] == 0)
3648 if (pattern_files != NULL || patterns != NULL) break;
3649 else pcre2grep_exit(usage(2));
3652 /* Handle a long name option, or -- to terminate the options */
3654 if (argv[i][1] == '-')
3656 char *arg = argv[i] + 2;
3657 char *argequals = strchr(arg, '=');
3659 if (*arg == 0) /* -- terminates options */
3662 break; /* out of the options-handling loop */
3667 /* Some long options have data that follows after =, for example file=name.
3668 Some options have variations in the long name spelling: specifically, we
3669 allow "regexp" because GNU grep allows it, though I personally go along
3670 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
3671 These options are entered in the table as "regex(p)". Options can be in
3672 both these categories. */
3674 for (op = optionlist; op->one_char != 0; op++)
3676 char *opbra = strchr(op->long_name, '(');
3677 char *equals = strchr(op->long_name, '=');
3679 /* Handle options with only one spelling of the name */
3681 if (opbra == NULL) /* Does not contain '(' */
3683 if (equals == NULL) /* Not thing=data case */
3685 if (strcmp(arg, op->long_name) == 0) break;
3687 else /* Special case xxx=data */
3689 int oplen = (int)(equals - op->long_name);
3690 int arglen = (argequals == NULL)?
3691 (int)strlen(arg) : (int)(argequals - arg);
3692 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
3694 option_data = arg + arglen;
3695 if (*option_data == '=')
3698 longopwasequals = TRUE;
3705 /* Handle options with an alternate spelling of the name */
3713 int baselen = (int)(opbra - op->long_name);
3714 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
3715 int arglen = (argequals == NULL || equals == NULL)?
3716 (int)strlen(arg) : (int)(argequals - arg);
3718 if ((ret = snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name),
3719 ret < 0 || ret > (int)sizeof(buff1)) ||
3720 (ret = snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
3721 fulllen - baselen - 2, opbra + 1),
3722 ret < 0 || ret > (int)sizeof(buff2)))
3724 fprintf(stderr, "pcre2grep: Buffer overflow when parsing %s option\n",
3729 if (strncmp(arg, buff1, arglen) == 0 ||
3730 strncmp(arg, buff2, arglen) == 0)
3732 if (equals != NULL && argequals != NULL)
3734 option_data = argequals;
3735 if (*option_data == '=')
3738 longopwasequals = TRUE;
3746 if (op->one_char == 0)
3748 fprintf(stderr, "pcre2grep: Unknown option %s\n", argv[i]);
3749 pcre2grep_exit(usage(2));
3753 /* Jeffrey Friedl's debugging harness uses these additional options which
3754 are not in the right form for putting in the option table because they use
3755 only one hyphen, yet are more than one character long. By putting them
3756 separately here, they will not get displayed as part of the help() output,
3757 but I don't think Jeffrey will care about that. */
3759 #ifdef JFRIEDL_DEBUG
3760 else if (strcmp(argv[i], "-pre") == 0) {
3761 jfriedl_prefix = argv[++i];
3763 } else if (strcmp(argv[i], "-post") == 0) {
3764 jfriedl_postfix = argv[++i];
3766 } else if (strcmp(argv[i], "-XT") == 0) {
3767 sscanf(argv[++i], "%d", &jfriedl_XT);
3769 } else if (strcmp(argv[i], "-XR") == 0) {
3770 sscanf(argv[++i], "%d", &jfriedl_XR);
3776 /* One-char options; many that have no data may be in a single argument; we
3777 continue till we hit the last one or one that needs data. */
3781 char *s = argv[i] + 1;
3786 for (op = optionlist; op->one_char != 0; op++)
3788 if (*s == op->one_char) break;
3790 if (op->one_char == 0)
3792 fprintf(stderr, "pcre2grep: Unknown option letter '%c' in \"%s\"\n",
3794 pcre2grep_exit(usage(2));
3799 /* Break out if this is the last character in the string; it's handled
3800 below like a single multi-char option. */
3802 if (*option_data == 0) break;
3804 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
3805 are used for ones that either have a numerical number or defaults, i.e.
3806 the data is optional. If a digit follows, there is data; if not, carry on
3807 with other single-character options in the same string. */
3809 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
3811 if (isdigit((unsigned char)s[1])) break;
3813 else /* Check for an option with data */
3815 if (op->type != OP_NODATA) break;
3818 /* Handle a single-character option with no data, then loop for the
3819 next character in the string. */
3821 pcre2_options = handle_option(*s++, pcre2_options);
3825 /* At this point we should have op pointing to a matched option. If the type
3826 is NO_DATA, it means that there is no data, and the option might set
3827 something in the PCRE options. */
3829 if (op->type == OP_NODATA)
3831 pcre2_options = handle_option(op->one_char, pcre2_options);
3835 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
3836 either has a value or defaults to something. It cannot have data in a
3837 separate item. At the moment, the only such options are "colo(u)r",
3838 "only-matching", and Jeffrey Friedl's special -S debugging option. */
3840 if (*option_data == 0 &&
3841 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
3842 op->type == OP_OP_NUMBERS))
3844 switch (op->one_char)
3847 colour_option = "auto";
3851 only_matching_last = add_number(0, only_matching_last);
3852 if (only_matching == NULL) only_matching = only_matching_last;
3855 #ifdef JFRIEDL_DEBUG
3864 /* Otherwise, find the data string for the option. */
3866 if (*option_data == 0)
3868 if (i >= argc - 1 || longopwasequals)
3870 fprintf(stderr, "pcre2grep: Data missing after %s\n", argv[i]);
3871 pcre2grep_exit(usage(2));
3873 option_data = argv[++i];
3876 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
3877 added to a chain of numbers. */
3879 if (op->type == OP_OP_NUMBERS)
3881 unsigned long int n = decode_number(option_data, op, longop);
3882 omdatastr *omd = (omdatastr *)op->dataptr;
3883 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
3884 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
3887 /* If the option type is OP_PATLIST, it's the -e option, or one of the
3888 include/exclude options, which can be called multiple times to create lists
3891 else if (op->type == OP_PATLIST)
3893 patdatastr *pd = (patdatastr *)op->dataptr;
3894 *(pd->lastptr) = add_pattern(option_data, (PCRE2_SIZE)strlen(option_data),
3896 if (*(pd->lastptr) == NULL) goto EXIT2;
3897 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
3900 /* If the option type is OP_FILELIST, it's one of the options that names a
3903 else if (op->type == OP_FILELIST)
3905 fndatastr *fd = (fndatastr *)op->dataptr;
3906 fn = (fnstr *)malloc(sizeof(fnstr));
3909 fprintf(stderr, "pcre2grep: malloc failed\n");
3913 fn->name = option_data;
3914 if (*(fd->anchor) == NULL)
3917 (*(fd->lastptr))->next = fn;
3918 *(fd->lastptr) = fn;
3921 /* Handle OP_BINARY_FILES */
3923 else if (op->type == OP_BINFILES)
3925 if (strcmp(option_data, "binary") == 0)
3926 binary_files = BIN_BINARY;
3927 else if (strcmp(option_data, "without-match") == 0)
3928 binary_files = BIN_NOMATCH;
3929 else if (strcmp(option_data, "text") == 0)
3930 binary_files = BIN_TEXT;
3933 fprintf(stderr, "pcre2grep: unknown value \"%s\" for binary-files\n",
3935 pcre2grep_exit(usage(2));
3939 /* Otherwise, deal with a single string or numeric data value. */
3941 else if (op->type != OP_NUMBER && op->type != OP_U32NUMBER &&
3942 op->type != OP_OP_NUMBER && op->type != OP_SIZE)
3944 *((char **)op->dataptr) = option_data;
3948 unsigned long int n = decode_number(option_data, op, longop);
3949 if (op->type == OP_U32NUMBER) *((uint32_t *)op->dataptr) = n;
3950 else if (op->type == OP_SIZE) *((PCRE2_SIZE *)op->dataptr) = n;
3951 else *((int *)op->dataptr) = n;
3955 /* Options have been decoded. If -C was used, its value is used as a default
3958 if (both_context > 0)
3960 if (after_context == 0) after_context = both_context;
3961 if (before_context == 0) before_context = both_context;
3964 /* Only one of --only-matching, --output, --file-offsets, or --line-offsets is
3965 permitted. They display, each in their own way, only the data that has matched.
3968 only_matching_count = (only_matching != NULL) + (output_text != NULL) +
3969 file_offsets + line_offsets;
3971 if (only_matching_count > 1)
3973 fprintf(stderr, "pcre2grep: Cannot mix --only-matching, --output, "
3974 "--file-offsets and/or --line-offsets\n");
3975 pcre2grep_exit(usage(2));
3978 /* Check the text supplied to --output for errors. */
3980 if (output_text != NULL &&
3981 !syntax_check_output_text((PCRE2_SPTR)output_text, FALSE))
3984 /* Put limits into the match data block. */
3986 if (heap_limit != PCRE2_UNSET) pcre2_set_heap_limit(match_context, heap_limit);
3987 if (match_limit > 0) pcre2_set_match_limit(match_context, match_limit);
3988 if (depth_limit > 0) pcre2_set_depth_limit(match_context, depth_limit);
3990 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3991 LC_ALL environment variable is set, and if so, use it. */
3995 locale = getenv("LC_ALL");
3996 locale_from = "LC_ALL";
4001 locale = getenv("LC_CTYPE");
4002 locale_from = "LC_CTYPE";
4005 /* If a locale is set, use it to generate the tables the PCRE needs. Passing
4006 NULL to pcre2_maketables() means that malloc() is used to get the memory. */
4010 if (setlocale(LC_CTYPE, locale) == NULL)
4012 fprintf(stderr, "pcre2grep: Failed to set locale %s (obtained from %s)\n",
4013 locale, locale_from);
4016 character_tables = pcre2_maketables(NULL);
4017 pcre2_set_character_tables(compile_context, character_tables);
4020 /* Sort out colouring */
4022 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
4024 if (strcmp(colour_option, "always") == 0)
4026 do_ansi = !is_stdout_tty(),
4029 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
4032 fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
4038 char *cs = getenv("PCRE2GREP_COLOUR");
4039 if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
4040 if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
4041 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
4042 if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
4043 if (cs == NULL) cs = getenv("GREP_COLOR");
4046 if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
4049 init_colour_output();
4054 /* Sort out a newline setting. */
4056 if (newline_arg != NULL)
4058 for (endlinetype = 1; endlinetype < (int)(sizeof(newlines)/sizeof(char *));
4061 if (strcmpic(newline_arg, newlines[endlinetype]) == 0) break;
4063 if (endlinetype < (int)(sizeof(newlines)/sizeof(char *)))
4064 pcre2_set_newline(compile_context, endlinetype);
4067 fprintf(stderr, "pcre2grep: Invalid newline specifier \"%s\"\n",
4073 /* Find default newline convention */
4077 (void)pcre2_config(PCRE2_CONFIG_NEWLINE, &endlinetype);
4080 /* Interpret the text values for -d and -D */
4082 if (dee_option != NULL)
4084 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
4085 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
4086 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
4089 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -d\n", dee_option);
4094 if (DEE_option != NULL)
4096 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
4097 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
4100 fprintf(stderr, "pcre2grep: Invalid value \"%s\" for -D\n", DEE_option);
4105 /* Set the extra options */
4107 (void)pcre2_set_compile_extra_options(compile_context, extra_options);
4109 /* Check the values for Jeffrey Friedl's debugging options. */
4111 #ifdef JFRIEDL_DEBUG
4114 fprintf(stderr, "pcre2grep: bad value for -S option\n");
4117 if (jfriedl_XT != 0 || jfriedl_XR != 0)
4119 if (jfriedl_XT == 0) jfriedl_XT = 1;
4120 if (jfriedl_XR == 0) jfriedl_XR = 1;
4124 /* If use_jit is set, check whether JIT is available. If not, do not try
4130 (void)pcre2_config(PCRE2_CONFIG_JIT, &answer);
4131 if (!answer) use_jit = FALSE;
4134 /* Get memory for the main buffer. */
4138 fprintf(stderr, "pcre2grep: --buffer-size must be greater than zero\n");
4142 bufsize = 3*bufthird;
4143 main_buffer = (char *)malloc(bufsize);
4145 if (main_buffer == NULL)
4147 fprintf(stderr, "pcre2grep: malloc failed\n");
4151 /* If no patterns were provided by -e, and there are no files provided by -f,
4152 the first argument is the one and only pattern, and it must exist. */
4154 if (patterns == NULL && pattern_files == NULL)
4156 if (i >= argc) return usage(2);
4157 patterns = patterns_last = add_pattern(argv[i], (PCRE2_SIZE)strlen(argv[i]),
4160 if (patterns == NULL) goto EXIT2;
4163 /* Compile the patterns that were provided on the command line, either by
4164 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
4165 after all the command-line options are read so that we know which PCRE options
4166 to use. When -F is used, compile_pattern() may add another block into the
4167 chain, so we must not access the next pointer till after the compile. */
4169 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
4171 if (!compile_pattern(cp, pcre2_options, FALSE, "command-line",
4172 (j == 1 && patterns->next == NULL)? 0 : j))
4176 /* Read and compile the regular expressions that are provided in files. */
4178 for (fn = pattern_files; fn != NULL; fn = fn->next)
4180 if (!read_pattern_file(fn->name, &patterns, &patterns_last)) goto EXIT2;
4183 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
4185 #ifdef SUPPORT_PCRE2GREP_JIT
4188 jit_stack = pcre2_jit_stack_create(32*1024, 1024*1024, NULL);
4189 if (jit_stack != NULL )
4190 pcre2_jit_stack_assign(match_context, NULL, jit_stack);
4194 /* -F, -w, and -x do not apply to include or exclude patterns, so we must
4195 adjust the options. */
4197 pcre2_options &= ~PCRE2_LITERAL;
4198 (void)pcre2_set_compile_extra_options(compile_context, 0);
4200 /* If there are include or exclude patterns read from the command line, compile
4203 for (j = 0; j < 4; j++)
4206 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
4208 if (!compile_pattern(cp, pcre2_options, FALSE, incexname[j],
4209 (k == 1 && cp->next == NULL)? 0 : k))
4214 /* Read and compile include/exclude patterns from files. */
4216 for (fn = include_from; fn != NULL; fn = fn->next)
4218 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last))
4222 for (fn = exclude_from; fn != NULL; fn = fn->next)
4224 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last))
4228 /* If there are no files that contain lists of files to search, and there are
4229 no file arguments, search stdin, and then exit. */
4231 if (file_lists == NULL && i >= argc)
4233 rc = pcre2grep(stdin, FR_PLAIN, stdin_name,
4234 (filenames > FN_DEFAULT)? stdin_name : NULL);
4238 /* If any files that contains a list of files to search have been specified,
4239 read them line by line and search the given files. */
4241 for (fn = file_lists; fn != NULL; fn = fn->next)
4243 char buffer[FNBUFSIZ];
4245 if (strcmp(fn->name, "-") == 0) fl = stdin; else
4247 fl = fopen(fn->name, "rb");
4250 fprintf(stderr, "pcre2grep: Failed to open %s: %s\n", fn->name,
4255 while (fgets(buffer, sizeof(buffer), fl) != NULL)
4258 char *end = buffer + (int)strlen(buffer);
4259 while (end > buffer && isspace(end[-1])) end--;
4263 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
4264 if (frc > 1) rc = frc;
4265 else if (frc == 0 && rc == 1) rc = 0;
4268 if (fl != stdin) fclose(fl);
4271 /* After handling file-list, work through remaining arguments. Pass in the fact
4272 that there is only one argument at top level - this suppresses the file name if
4273 the argument is not a directory and filenames are not otherwise forced. */
4275 only_one_at_top = i == argc - 1 && file_lists == NULL;
4277 for (; i < argc; i++)
4279 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
4281 if (frc > 1) rc = frc;
4282 else if (frc == 0 && rc == 1) rc = 0;
4285 #ifdef SUPPORT_PCRE2GREP_CALLOUT
4286 /* If separating builtin echo callouts by implicit newline, add one more for
4289 if (om_separator != NULL && strcmp(om_separator, STDOUT_NL) == 0)
4290 fprintf(stdout, STDOUT_NL);
4293 /* Show the total number of matches if requested, but not if only one file's
4294 count was printed. */
4296 if (show_total_count && counts_printed != 1 && filenames != FN_NOMATCH_ONLY)
4298 if (counts_printed != 0 && filenames >= FN_DEFAULT)
4299 fprintf(stdout, "TOTAL:");
4300 fprintf(stdout, "%lu" STDOUT_NL, total_count);
4304 #ifdef SUPPORT_PCRE2GREP_JIT
4305 if (jit_stack != NULL) pcre2_jit_stack_free(jit_stack);
4309 free((void *)character_tables);
4311 pcre2_compile_context_free(compile_context);
4312 pcre2_match_context_free(match_context);
4313 pcre2_match_data_free(match_data);
4315 free_pattern_chain(patterns);
4316 free_pattern_chain(include_patterns);
4317 free_pattern_chain(include_dir_patterns);
4318 free_pattern_chain(exclude_patterns);
4319 free_pattern_chain(exclude_dir_patterns);
4321 free_file_chain(exclude_from);
4322 free_file_chain(include_from);
4323 free_file_chain(pattern_files);
4324 free_file_chain(file_lists);
4326 while (only_matching != NULL)
4328 omstr *this = only_matching;
4329 only_matching = this->next;
4340 /* End of pcre2grep */