1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016-2018 University of Cambridge
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
46 #include "pcre2_internal.h"
50 /*************************************************
51 * Copy named captured string to given buffer *
52 *************************************************/
54 /* This function copies a single captured substring into a given buffer,
55 identifying it by name. If the regex permits duplicate names, the first
56 substring that is set is chosen.
59 match_data points to the match data
60 stringname the name of the required substring
61 buffer where to put the substring
62 sizeptr the size of the buffer, updated to the size of the substring
64 Returns: if successful: zero
65 if not successful, a negative error code:
66 (1) an error from nametable_scan()
67 (2) an error from copy_bynumber()
68 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
69 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
72 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
73 pcre2_substring_copy_byname(pcre2_match_data *match_data, PCRE2_SPTR stringname,
74 PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
76 PCRE2_SPTR first, last, entry;
77 int failrc, entrysize;
78 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
79 return PCRE2_ERROR_DFA_UFUNC;
80 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
82 if (entrysize < 0) return entrysize;
83 failrc = PCRE2_ERROR_UNAVAILABLE;
84 for (entry = first; entry <= last; entry += entrysize)
86 uint32_t n = GET2(entry, 0);
87 if (n < match_data->oveccount)
89 if (match_data->ovector[n*2] != PCRE2_UNSET)
90 return pcre2_substring_copy_bynumber(match_data, n, buffer, sizeptr);
91 failrc = PCRE2_ERROR_UNSET;
99 /*************************************************
100 * Copy numbered captured string to given buffer *
101 *************************************************/
103 /* This function copies a single captured substring into a given buffer,
104 identifying it by number.
107 match_data points to the match data
108 stringnumber the number of the required substring
109 buffer where to put the substring
110 sizeptr the size of the buffer, updated to the size of the substring
112 Returns: if successful: 0
113 if not successful, a negative error code:
114 PCRE2_ERROR_NOMEMORY: buffer too small
115 PCRE2_ERROR_NOSUBSTRING: no such substring
116 PCRE2_ERROR_UNAVAILABLE: ovector too small
117 PCRE2_ERROR_UNSET: substring is not set
120 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
121 pcre2_substring_copy_bynumber(pcre2_match_data *match_data,
122 uint32_t stringnumber, PCRE2_UCHAR *buffer, PCRE2_SIZE *sizeptr)
126 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
127 if (rc < 0) return rc;
128 if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY;
129 memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2],
138 /*************************************************
139 * Extract named captured string *
140 *************************************************/
142 /* This function copies a single captured substring, identified by name, into
143 new memory. If the regex permits duplicate names, the first substring that is
147 match_data pointer to match_data
148 stringname the name of the required substring
149 stringptr where to put the pointer to the new memory
150 sizeptr where to put the length of the substring
152 Returns: if successful: zero
153 if not successful, a negative value:
154 (1) an error from nametable_scan()
155 (2) an error from get_bynumber()
156 (3) PCRE2_ERROR_UNAVAILABLE: no group is in ovector
157 (4) PCRE2_ERROR_UNSET: all named groups in ovector are unset
160 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
161 pcre2_substring_get_byname(pcre2_match_data *match_data,
162 PCRE2_SPTR stringname, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
164 PCRE2_SPTR first, last, entry;
165 int failrc, entrysize;
166 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
167 return PCRE2_ERROR_DFA_UFUNC;
168 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
170 if (entrysize < 0) return entrysize;
171 failrc = PCRE2_ERROR_UNAVAILABLE;
172 for (entry = first; entry <= last; entry += entrysize)
174 uint32_t n = GET2(entry, 0);
175 if (n < match_data->oveccount)
177 if (match_data->ovector[n*2] != PCRE2_UNSET)
178 return pcre2_substring_get_bynumber(match_data, n, stringptr, sizeptr);
179 failrc = PCRE2_ERROR_UNSET;
187 /*************************************************
188 * Extract captured string to new memory *
189 *************************************************/
191 /* This function copies a single captured substring into a piece of new
195 match_data points to match data
196 stringnumber the number of the required substring
197 stringptr where to put a pointer to the new memory
198 sizeptr where to put the size of the substring
200 Returns: if successful: 0
201 if not successful, a negative error code:
202 PCRE2_ERROR_NOMEMORY: failed to get memory
203 PCRE2_ERROR_NOSUBSTRING: no such substring
204 PCRE2_ERROR_UNAVAILABLE: ovector too small
205 PCRE2_ERROR_UNSET: substring is not set
208 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
209 pcre2_substring_get_bynumber(pcre2_match_data *match_data,
210 uint32_t stringnumber, PCRE2_UCHAR **stringptr, PCRE2_SIZE *sizeptr)
215 rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size);
216 if (rc < 0) return rc;
217 yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) +
218 (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data);
219 if (yield == NULL) return PCRE2_ERROR_NOMEMORY;
220 yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl));
221 memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2],
231 /*************************************************
232 * Free memory obtained by get_substring *
233 *************************************************/
236 Argument: the result of a previous pcre2_substring_get_byxxx()
240 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
241 pcre2_substring_free(PCRE2_UCHAR *string)
245 pcre2_memctl *memctl = (pcre2_memctl *)((char *)string - sizeof(pcre2_memctl));
246 memctl->free(memctl, memctl->memory_data);
252 /*************************************************
253 * Get length of a named substring *
254 *************************************************/
256 /* This function returns the length of a named captured substring. If the regex
257 permits duplicate names, the first substring that is set is chosen.
260 match_data pointer to match data
261 stringname the name of the required substring
262 sizeptr where to put the length
264 Returns: 0 if successful, else a negative error number
267 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
268 pcre2_substring_length_byname(pcre2_match_data *match_data,
269 PCRE2_SPTR stringname, PCRE2_SIZE *sizeptr)
271 PCRE2_SPTR first, last, entry;
272 int failrc, entrysize;
273 if (match_data->matchedby == PCRE2_MATCHEDBY_DFA_INTERPRETER)
274 return PCRE2_ERROR_DFA_UFUNC;
275 entrysize = pcre2_substring_nametable_scan(match_data->code, stringname,
277 if (entrysize < 0) return entrysize;
278 failrc = PCRE2_ERROR_UNAVAILABLE;
279 for (entry = first; entry <= last; entry += entrysize)
281 uint32_t n = GET2(entry, 0);
282 if (n < match_data->oveccount)
284 if (match_data->ovector[n*2] != PCRE2_UNSET)
285 return pcre2_substring_length_bynumber(match_data, n, sizeptr);
286 failrc = PCRE2_ERROR_UNSET;
294 /*************************************************
295 * Get length of a numbered substring *
296 *************************************************/
298 /* This function returns the length of a captured substring. If the start is
299 beyond the end (which can happen when \K is used in an assertion), it sets the
303 match_data pointer to match data
304 stringnumber the number of the required substring
305 sizeptr where to put the length, if not NULL
307 Returns: if successful: 0
308 if not successful, a negative error code:
309 PCRE2_ERROR_NOSUBSTRING: no such substring
310 PCRE2_ERROR_UNAVAILABLE: ovector is too small
311 PCRE2_ERROR_UNSET: substring is not set
314 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
315 pcre2_substring_length_bynumber(pcre2_match_data *match_data,
316 uint32_t stringnumber, PCRE2_SIZE *sizeptr)
318 PCRE2_SIZE left, right;
319 int count = match_data->rc;
320 if (count == PCRE2_ERROR_PARTIAL)
322 if (stringnumber > 0) return PCRE2_ERROR_PARTIAL;
325 else if (count < 0) return count; /* Match failed */
327 if (match_data->matchedby != PCRE2_MATCHEDBY_DFA_INTERPRETER)
329 if (stringnumber > match_data->code->top_bracket)
330 return PCRE2_ERROR_NOSUBSTRING;
331 if (stringnumber >= match_data->oveccount)
332 return PCRE2_ERROR_UNAVAILABLE;
333 if (match_data->ovector[stringnumber*2] == PCRE2_UNSET)
334 return PCRE2_ERROR_UNSET;
336 else /* Matched using pcre2_dfa_match() */
338 if (stringnumber >= match_data->oveccount) return PCRE2_ERROR_UNAVAILABLE;
339 if (count != 0 && stringnumber >= (uint32_t)count) return PCRE2_ERROR_UNSET;
342 left = match_data->ovector[stringnumber*2];
343 right = match_data->ovector[stringnumber*2+1];
344 if (sizeptr != NULL) *sizeptr = (left > right)? 0 : right - left;
350 /*************************************************
351 * Extract all captured strings to new memory *
352 *************************************************/
354 /* This function gets one chunk of memory and builds a list of pointers and all
355 the captured substrings in it. A NULL pointer is put on the end of the list.
356 The substrings are zero-terminated, but also, if the final argument is
357 non-NULL, a list of lengths is also returned. This allows binary data to be
361 match_data points to the match data
362 listptr set to point to the list of pointers
363 lengthsptr set to point to the list of lengths (may be NULL)
365 Returns: if successful: 0
366 if not successful, a negative error code:
367 PCRE2_ERROR_NOMEMORY: failed to get memory,
368 or a match failure code
371 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
372 pcre2_substring_list_get(pcre2_match_data *match_data, PCRE2_UCHAR ***listptr,
373 PCRE2_SIZE **lengthsptr)
375 int i, count, count2;
383 if ((count = match_data->rc) < 0) return count; /* Match failed */
384 if (count == 0) count = match_data->oveccount; /* Ovector too small */
387 ovector = match_data->ovector;
388 size = sizeof(pcre2_memctl) + sizeof(PCRE2_UCHAR *); /* For final NULL */
389 if (lengthsptr != NULL) size += sizeof(PCRE2_SIZE) * count; /* For lengths */
391 for (i = 0; i < count2; i += 2)
393 size += sizeof(PCRE2_UCHAR *) + CU2BYTES(1);
394 if (ovector[i+1] > ovector[i]) size += CU2BYTES(ovector[i+1] - ovector[i]);
397 memp = PRIV(memctl_malloc)(size, (pcre2_memctl *)match_data);
398 if (memp == NULL) return PCRE2_ERROR_NOMEMORY;
400 *listptr = listp = (PCRE2_UCHAR **)((char *)memp + sizeof(pcre2_memctl));
401 lensp = (PCRE2_SIZE *)((char *)listp + sizeof(PCRE2_UCHAR *) * (count + 1));
403 if (lengthsptr == NULL)
405 sp = (PCRE2_UCHAR *)lensp;
411 sp = (PCRE2_UCHAR *)((char *)lensp + sizeof(PCRE2_SIZE) * count);
414 for (i = 0; i < count2; i += 2)
416 size = (ovector[i+1] > ovector[i])? (ovector[i+1] - ovector[i]) : 0;
418 /* Size == 0 includes the case when the capture is unset. Avoid adding
419 PCRE2_UNSET to match_data->subject because it overflows, even though with
420 zero size calling memcpy() is harmless. */
422 if (size != 0) memcpy(sp, match_data->subject + ovector[i], CU2BYTES(size));
424 if (lensp != NULL) *lensp++ = size;
435 /*************************************************
436 * Free memory obtained by substring_list_get *
437 *************************************************/
440 Argument: the result of a previous pcre2_substring_list_get()
444 PCRE2_EXP_DEFN void PCRE2_CALL_CONVENTION
445 pcre2_substring_list_free(PCRE2_SPTR *list)
449 pcre2_memctl *memctl = (pcre2_memctl *)((char *)list - sizeof(pcre2_memctl));
450 memctl->free(memctl, memctl->memory_data);
456 /*************************************************
457 * Find (multiple) entries for named string *
458 *************************************************/
460 /* This function scans the nametable for a given name, using binary chop. It
461 returns either two pointers to the entries in the table, or, if no pointers are
462 given, the number of a unique group with the given name. If duplicate names are
463 permitted, and the name is not unique, an error is generated.
466 code the compiled regex
467 stringname the name whose entries required
468 firstptr where to put the pointer to the first entry
469 lastptr where to put the pointer to the last entry
471 Returns: PCRE2_ERROR_NOSUBSTRING if the name is not found
472 otherwise, if firstptr and lastptr are NULL:
473 a group number for a unique substring
474 else PCRE2_ERROR_NOUNIQUESUBSTRING
476 the length of each entry, having set firstptr and lastptr
479 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
480 pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR stringname,
481 PCRE2_SPTR *firstptr, PCRE2_SPTR *lastptr)
484 uint16_t top = code->name_count;
485 uint16_t entrysize = code->name_entry_size;
486 PCRE2_SPTR nametable = (PCRE2_SPTR)((char *)code + sizeof(pcre2_real_code));
490 uint16_t mid = (top + bot) / 2;
491 PCRE2_SPTR entry = nametable + entrysize*mid;
492 int c = PRIV(strcmp)(stringname, entry + IMM2_SIZE);
497 PCRE2_SPTR lastentry;
498 lastentry = nametable + entrysize * (code->name_count - 1);
499 first = last = entry;
500 while (first > nametable)
502 if (PRIV(strcmp)(stringname, (first - entrysize + IMM2_SIZE)) != 0) break;
505 while (last < lastentry)
507 if (PRIV(strcmp)(stringname, (last + entrysize + IMM2_SIZE)) != 0) break;
510 if (firstptr == NULL) return (first == last)?
511 (int)GET2(entry, 0) : PCRE2_ERROR_NOUNIQUESUBSTRING;
516 if (c > 0) bot = mid + 1; else top = mid;
519 return PCRE2_ERROR_NOSUBSTRING;
523 /*************************************************
524 * Find number for named string *
525 *************************************************/
527 /* This function is a convenience wrapper for pcre2_substring_nametable_scan()
528 when it is known that names are unique. If there are duplicate names, it is not
529 defined which number is returned.
532 code the compiled regex
533 stringname the name whose number is required
535 Returns: the number of the named parenthesis, or a negative number
536 PCRE2_ERROR_NOSUBSTRING if not found
537 PCRE2_ERROR_NOUNIQUESUBSTRING if not unique
540 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
541 pcre2_substring_number_from_name(const pcre2_code *code,
542 PCRE2_SPTR stringname)
544 return pcre2_substring_nametable_scan(code, stringname, NULL, NULL);
547 /* End of pcre2_substring.c */