1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
42 /* This module contains a single function that scans through a compiled pattern
43 until it finds a capturing bracket with the given number, or, if the number is
44 negative, an instance of OP_REVERSE for a lookbehind. The function is called
45 from pcre2_compile.c and also from pcre2_study.c when finding the minimum
53 #include "pcre2_internal.h"
56 /*************************************************
57 * Scan compiled regex for specific bracket *
58 *************************************************/
62 code points to start of expression
64 number the required bracket number or negative to find a lookbehind
66 Returns: pointer to the opcode for the bracket, or NULL if not found
70 PRIV(find_bracket)(PCRE2_SPTR code, BOOL utf, int number)
74 PCRE2_UCHAR c = *code;
76 if (c == OP_END) return NULL;
78 /* XCLASS is used for classes that cannot be represented just by a bit map.
79 This includes negated single high-valued characters. CALLOUT_STR is used for
80 callouts with string arguments. In both cases the length in the table is
81 zero; the actual length is stored in the compiled code. */
83 if (c == OP_XCLASS) code += GET(code, 1);
84 else if (c == OP_CALLOUT_STR) code += GET(code, 1 + 2*LINK_SIZE);
86 /* Handle lookbehind */
88 else if (c == OP_REVERSE)
90 if (number < 0) return (PCRE2_UCHAR *)code;
91 code += PRIV(OP_lengths)[c];
94 /* Handle capturing bracket */
96 else if (c == OP_CBRA || c == OP_SCBRA ||
97 c == OP_CBRAPOS || c == OP_SCBRAPOS)
99 int n = (int)GET2(code, 1+LINK_SIZE);
100 if (n == number) return (PCRE2_UCHAR *)code;
101 code += PRIV(OP_lengths)[c];
104 /* Otherwise, we can get the item's length from the table, except that for
105 repeated character types, we have to test for \p and \P, which have an extra
106 two bytes of parameters, and for MARK/PRUNE/SKIP/THEN with an argument, we
107 must add in its length. */
118 case OP_TYPEMINQUERY:
121 case OP_TYPEPOSQUERY:
122 if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2;
129 if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
142 /* Add in the fixed length from the table */
144 code += PRIV(OP_lengths)[c];
146 /* In UTF-8 and UTF-16 modes, opcodes that are followed by a character may be
147 followed by a multi-byte character. The length in the table is a minimum, so
148 we have to arrange to skip the extra bytes. */
150 #ifdef MAYBE_UTF_MULTI
204 case OP_NOTMINQUERYI:
208 case OP_NOTPOSQUERYI:
209 if (HAS_EXTRALEN(code[-1])) code += GET_EXTRALEN(code[-1]);
213 (void)(utf); /* Keep compiler happy by referencing function argument */
214 #endif /* MAYBE_UTF_MULTI */
219 /* End of pcre2_find_bracket.c */