1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
46 #include "pcre2_internal.h"
49 /*************************************************
50 * Return info about compiled pattern *
51 *************************************************/
55 code points to compiled code
56 what what information is required
57 where where to put the information; if NULL, return length
59 Returns: 0 when data returned
60 > 0 when length requested
61 < 0 on error or unset value
64 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
65 pcre2_pattern_info(const pcre2_code *code, uint32_t what, void *where)
67 const pcre2_real_code *re = (pcre2_real_code *)code;
69 if (where == NULL) /* Requests field length */
73 case PCRE2_INFO_ALLOPTIONS:
74 case PCRE2_INFO_ARGOPTIONS:
75 case PCRE2_INFO_BACKREFMAX:
77 case PCRE2_INFO_CAPTURECOUNT:
78 case PCRE2_INFO_DEPTHLIMIT:
79 case PCRE2_INFO_EXTRAOPTIONS:
80 case PCRE2_INFO_FIRSTCODETYPE:
81 case PCRE2_INFO_FIRSTCODEUNIT:
82 case PCRE2_INFO_HASBACKSLASHC:
83 case PCRE2_INFO_HASCRORLF:
84 case PCRE2_INFO_HEAPLIMIT:
85 case PCRE2_INFO_JCHANGED:
86 case PCRE2_INFO_LASTCODETYPE:
87 case PCRE2_INFO_LASTCODEUNIT:
88 case PCRE2_INFO_MATCHEMPTY:
89 case PCRE2_INFO_MATCHLIMIT:
90 case PCRE2_INFO_MAXLOOKBEHIND:
91 case PCRE2_INFO_MINLENGTH:
92 case PCRE2_INFO_NAMEENTRYSIZE:
93 case PCRE2_INFO_NAMECOUNT:
94 case PCRE2_INFO_NEWLINE:
95 return sizeof(uint32_t);
97 case PCRE2_INFO_FIRSTBITMAP:
98 return sizeof(const uint8_t *);
100 case PCRE2_INFO_JITSIZE:
101 case PCRE2_INFO_SIZE:
102 case PCRE2_INFO_FRAMESIZE:
103 return sizeof(size_t);
105 case PCRE2_INFO_NAMETABLE:
106 return sizeof(PCRE2_SPTR);
110 if (re == NULL) return PCRE2_ERROR_NULL;
112 /* Check that the first field in the block is the magic number. If it is not,
113 return with PCRE2_ERROR_BADMAGIC. */
115 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
117 /* Check that this pattern was compiled in the correct bit mode */
119 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
123 case PCRE2_INFO_ALLOPTIONS:
124 *((uint32_t *)where) = re->overall_options;
127 case PCRE2_INFO_ARGOPTIONS:
128 *((uint32_t *)where) = re->compile_options;
131 case PCRE2_INFO_BACKREFMAX:
132 *((uint32_t *)where) = re->top_backref;
136 *((uint32_t *)where) = re->bsr_convention;
139 case PCRE2_INFO_CAPTURECOUNT:
140 *((uint32_t *)where) = re->top_bracket;
143 case PCRE2_INFO_DEPTHLIMIT:
144 *((uint32_t *)where) = re->limit_depth;
145 if (re->limit_depth == UINT32_MAX) return PCRE2_ERROR_UNSET;
148 case PCRE2_INFO_EXTRAOPTIONS:
149 *((uint32_t *)where) = re->extra_options;
152 case PCRE2_INFO_FIRSTCODETYPE:
153 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)? 1 :
154 ((re->flags & PCRE2_STARTLINE) != 0)? 2 : 0;
157 case PCRE2_INFO_FIRSTCODEUNIT:
158 *((uint32_t *)where) = ((re->flags & PCRE2_FIRSTSET) != 0)?
159 re->first_codeunit : 0;
162 case PCRE2_INFO_FIRSTBITMAP:
163 *((const uint8_t **)where) = ((re->flags & PCRE2_FIRSTMAPSET) != 0)?
164 &(re->start_bitmap[0]) : NULL;
167 case PCRE2_INFO_FRAMESIZE:
168 *((size_t *)where) = offsetof(heapframe, ovector) +
169 re->top_bracket * 2 * sizeof(PCRE2_SIZE);
172 case PCRE2_INFO_HASBACKSLASHC:
173 *((uint32_t *)where) = (re->flags & PCRE2_HASBKC) != 0;
176 case PCRE2_INFO_HASCRORLF:
177 *((uint32_t *)where) = (re->flags & PCRE2_HASCRORLF) != 0;
180 case PCRE2_INFO_HEAPLIMIT:
181 *((uint32_t *)where) = re->limit_heap;
182 if (re->limit_heap == UINT32_MAX) return PCRE2_ERROR_UNSET;
185 case PCRE2_INFO_JCHANGED:
186 *((uint32_t *)where) = (re->flags & PCRE2_JCHANGED) != 0;
189 case PCRE2_INFO_JITSIZE:
191 *((size_t *)where) = (re->executable_jit != NULL)?
192 PRIV(jit_get_size)(re->executable_jit) : 0;
194 *((size_t *)where) = 0;
198 case PCRE2_INFO_LASTCODETYPE:
199 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)? 1 : 0;
202 case PCRE2_INFO_LASTCODEUNIT:
203 *((uint32_t *)where) = ((re->flags & PCRE2_LASTSET) != 0)?
204 re->last_codeunit : 0;
207 case PCRE2_INFO_MATCHEMPTY:
208 *((uint32_t *)where) = (re->flags & PCRE2_MATCH_EMPTY) != 0;
211 case PCRE2_INFO_MATCHLIMIT:
212 *((uint32_t *)where) = re->limit_match;
213 if (re->limit_match == UINT32_MAX) return PCRE2_ERROR_UNSET;
216 case PCRE2_INFO_MAXLOOKBEHIND:
217 *((uint32_t *)where) = re->max_lookbehind;
220 case PCRE2_INFO_MINLENGTH:
221 *((uint32_t *)where) = re->minlength;
224 case PCRE2_INFO_NAMEENTRYSIZE:
225 *((uint32_t *)where) = re->name_entry_size;
228 case PCRE2_INFO_NAMECOUNT:
229 *((uint32_t *)where) = re->name_count;
232 case PCRE2_INFO_NAMETABLE:
233 *((PCRE2_SPTR *)where) = (PCRE2_SPTR)((char *)re + sizeof(pcre2_real_code));
236 case PCRE2_INFO_NEWLINE:
237 *((uint32_t *)where) = re->newline_convention;
240 case PCRE2_INFO_SIZE:
241 *((size_t *)where) = re->blocksize;
244 default: return PCRE2_ERROR_BADOPTION;
252 /*************************************************
253 * Callout enumerator *
254 *************************************************/
258 code points to compiled code
259 callback function called for each callout block
260 callout_data user data passed to the callback
262 Returns: 0 when successfully completed
264 != 0 for callback error
267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
268 pcre2_callout_enumerate(const pcre2_code *code,
269 int (*callback)(pcre2_callout_enumerate_block *, void *), void *callout_data)
271 pcre2_real_code *re = (pcre2_real_code *)code;
272 pcre2_callout_enumerate_block cb;
274 #ifdef SUPPORT_UNICODE
278 if (re == NULL) return PCRE2_ERROR_NULL;
280 #ifdef SUPPORT_UNICODE
281 utf = (re->overall_options & PCRE2_UTF) != 0;
284 /* Check that the first field in the block is the magic number. If it is not,
285 return with PCRE2_ERROR_BADMAGIC. */
287 if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
289 /* Check that this pattern was compiled in the correct bit mode */
291 if ((re->flags & (PCRE2_CODE_UNIT_WIDTH/8)) == 0) return PCRE2_ERROR_BADMODE;
294 cc = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code))
295 + re->name_count * re->name_entry_size;
353 case OP_NOTMINQUERYI:
359 case OP_NOTPOSQUERYI:
361 cc += PRIV(OP_lengths)[*cc];
362 #ifdef SUPPORT_UNICODE
363 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
372 case OP_TYPEMINQUERY:
378 case OP_TYPEPOSQUERY:
380 cc += PRIV(OP_lengths)[*cc];
381 #ifdef SUPPORT_UNICODE
382 if (cc[-1] == OP_PROP || cc[-1] == OP_NOTPROP) cc += 2;
386 #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
397 cc += PRIV(OP_lengths)[*cc] + cc[1];
401 cb.pattern_position = GET(cc, 1);
402 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
403 cb.callout_number = cc[1 + 2*LINK_SIZE];
404 cb.callout_string_offset = 0;
405 cb.callout_string_length = 0;
406 cb.callout_string = NULL;
407 rc = callback(&cb, callout_data);
408 if (rc != 0) return rc;
409 cc += PRIV(OP_lengths)[*cc];
413 cb.pattern_position = GET(cc, 1);
414 cb.next_item_length = GET(cc, 1 + LINK_SIZE);
415 cb.callout_number = 0;
416 cb.callout_string_offset = GET(cc, 1 + 3*LINK_SIZE);
417 cb.callout_string_length =
418 GET(cc, 1 + 2*LINK_SIZE) - (1 + 4*LINK_SIZE) - 2;
419 cb.callout_string = cc + (1 + 4*LINK_SIZE) + 1;
420 rc = callback(&cb, callout_data);
421 if (rc != 0) return rc;
422 cc += GET(cc, 1 + 2*LINK_SIZE);
426 cc += PRIV(OP_lengths)[*cc];
432 /* End of pcre2_pattern_info.c */