pcre2_substitute.c

   1 /*************************************************
   2 *      Perl-Compatible Regular Expressions       *
   3 *************************************************/
   4
   5 /* PCRE is a library of functions to support regular expressions whose syntax
   6 and semantics are as close as possible to those of the Perl 5 language.
   7
   8                        Written by Philip Hazel
   9      Original API code Copyright (c) 1997-2012 University of Cambridge
  10           New API code Copyright (c) 2016-2018 University of Cambridge
  11
  12 -----------------------------------------------------------------------------
  13 Redistribution and use in source and binary forms, with or without
  14 modification, are permitted provided that the following conditions are met:
  15
  16     * Redistributions of source code must retain the above copyright notice,
  17       this list of conditions and the following disclaimer.
  18
  19     * Redistributions in binary form must reproduce the above copyright
  20       notice, this list of conditions and the following disclaimer in the
  21       documentation and/or other materials provided with the distribution.
  22
  23     * Neither the name of the University of Cambridge nor the names of its
  24       contributors may be used to endorse or promote products derived from
  25       this software without specific prior written permission.
  26
  27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37 POSSIBILITY OF SUCH DAMAGE.
  38 -----------------------------------------------------------------------------
  39 */
  40
  41
  42 #ifdef HAVE_CONFIG_H
  43 #include "config.h"
  44 #endif
  45
  46 #include "pcre2_internal.h"
  47
  48 #define PTR_STACK_SIZE 20
  49
  50 #define SUBSTITUTE_OPTIONS \
  51   (PCRE2_SUBSTITUTE_EXTENDED|PCRE2_SUBSTITUTE_GLOBAL| \
  52    PCRE2_SUBSTITUTE_OVERFLOW_LENGTH|PCRE2_SUBSTITUTE_UNKNOWN_UNSET| \
  53    PCRE2_SUBSTITUTE_UNSET_EMPTY)
  54
  55
  56
  57 /*************************************************
  58 *           Find end of substitute text          *
  59 *************************************************/
  60
  61 /* In extended mode, we recognize ${name:+set text:unset text} and similar
  62 constructions. This requires the identification of unescaped : and }
  63 characters. This function scans for such. It must deal with nested ${
  64 constructions. The pointer to the text is updated, either to the required end
  65 character, or to where an error was detected.
  66
  67 Arguments:
  68   code      points to the compiled expression (for options)
  69   ptrptr    points to the pointer to the start of the text (updated)
  70   ptrend    end of the whole string
  71   last      TRUE if the last expected string (only } recognized)
  72
  73 Returns:    0 on success
  74             negative error code on failure
  75 */
  76
  77 static int
  78 find_text_end(const pcre2_code *code, PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend,
  79   BOOL last)
  80 {
  81 int rc = 0;
  82 uint32_t nestlevel = 0;
  83 BOOL literal = FALSE;
  84 PCRE2_SPTR ptr = *ptrptr;
  85
  86 for (; ptr < ptrend; ptr++)
  87   {
  88   if (literal)
  89     {
  90     if (ptr[0] == CHAR_BACKSLASH && ptr < ptrend - 1 && ptr[1] == CHAR_E)
  91       {
  92       literal = FALSE;
  93       ptr += 1;
  94       }
  95     }
  96
  97   else if (*ptr == CHAR_RIGHT_CURLY_BRACKET)
  98     {
  99     if (nestlevel == 0) goto EXIT;
 100     nestlevel--;
 101     }
 102
 103   else if (*ptr == CHAR_COLON && !last && nestlevel == 0) goto EXIT;
 104
 105   else if (*ptr == CHAR_DOLLAR_SIGN)
 106     {
 107     if (ptr < ptrend - 1 && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
 108       {
 109       nestlevel++;
 110       ptr += 1;
 111       }
 112     }
 113
 114   else if (*ptr == CHAR_BACKSLASH)
 115     {
 116     int erc;
 117     int errorcode;
 118     uint32_t ch;
 119
 120     if (ptr < ptrend - 1) switch (ptr[1])
 121       {
 122       case CHAR_L:
 123       case CHAR_l:
 124       case CHAR_U:
 125       case CHAR_u:
 126       ptr += 1;
 127       continue;
 128       }
 129
 130     ptr += 1;  /* Must point after \ */
 131     erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
 132       code->overall_options, FALSE, NULL);
 133     ptr -= 1;  /* Back to last code unit of escape */
 134     if (errorcode != 0)
 135       {
 136       rc = errorcode;
 137       goto EXIT;
 138       }
 139
 140     switch(erc)
 141       {
 142       case 0:      /* Data character */
 143       case ESC_E:  /* Isolated \E is ignored */
 144       break;
 145
 146       case ESC_Q:
 147       literal = TRUE;
 148       break;
 149
 150       default:
 151       rc = PCRE2_ERROR_BADREPESCAPE;
 152       goto EXIT;
 153       }
 154     }
 155   }
 156
 157 rc = PCRE2_ERROR_REPMISSINGBRACE;   /* Terminator not found */
 158
 159 EXIT:
 160 *ptrptr = ptr;
 161 return rc;
 162 }
 163
 164
 165
 166 /*************************************************
 167 *              Match and substitute              *
 168 *************************************************/
 169
 170 /* This function applies a compiled re to a subject string and creates a new
 171 string with substitutions. The first 7 arguments are the same as for
 172 pcre2_match(). Either string length may be PCRE2_ZERO_TERMINATED.
 173
 174 Arguments:
 175   code            points to the compiled expression
 176   subject         points to the subject string
 177   length          length of subject string (may contain binary zeros)
 178   start_offset    where to start in the subject string
 179   options         option bits
 180   match_data      points to a match_data block, or is NULL
 181   context         points a PCRE2 context
 182   replacement     points to the replacement string
 183   rlength         length of replacement string
 184   buffer          where to put the substituted string
 185   blength         points to length of buffer; updated to length of string
 186
 187 Returns:          >= 0 number of substitutions made
 188                   < 0 an error code
 189                   PCRE2_ERROR_BADREPLACEMENT means invalid use of $
 190 */
 191
 192 /* This macro checks for space in the buffer before copying into it. On
 193 overflow, either give an error immediately, or keep on, accumulating the
 194 length. */
 195
 196 #define CHECKMEMCPY(from,length) \
 197   if (!overflowed && lengthleft < length) \
 198     { \
 199     if ((suboptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) == 0) goto NOROOM; \
 200     overflowed = TRUE; \
 201     extra_needed = length - lengthleft; \
 202     } \
 203   else if (overflowed) \
 204     { \
 205     extra_needed += length; \
 206     }  \
 207   else \
 208     {  \
 209     memcpy(buffer + buff_offset, from, CU2BYTES(length)); \
 210     buff_offset += length; \
 211     lengthleft -= length; \
 212     }
 213
 214 /* Here's the function */
 215
 216 PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
 217 pcre2_substitute(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
 218   PCRE2_SIZE start_offset, uint32_t options, pcre2_match_data *match_data,
 219   pcre2_match_context *mcontext, PCRE2_SPTR replacement, PCRE2_SIZE rlength,
 220   PCRE2_UCHAR *buffer, PCRE2_SIZE *blength)
 221 {
 222 int rc;
 223 int subs;
 224 int forcecase = 0;
 225 int forcecasereset = 0;
 226 uint32_t ovector_count;
 227 uint32_t goptions = 0;
 228 uint32_t suboptions;
 229 BOOL match_data_created = FALSE;
 230 BOOL literal = FALSE;
 231 BOOL overflowed = FALSE;
 232 #ifdef SUPPORT_UNICODE
 233 BOOL utf = (code->overall_options & PCRE2_UTF) != 0;
 234 #endif
 235 PCRE2_UCHAR temp[6];
 236 PCRE2_SPTR ptr;
 237 PCRE2_SPTR repend;
 238 PCRE2_SIZE extra_needed = 0;
 239 PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength;
 240 PCRE2_SIZE *ovector;
 241 PCRE2_SIZE ovecsave[3];
 242
 243 buff_offset = 0;
 244 lengthleft = buff_length = *blength;
 245 *blength = PCRE2_UNSET;
 246 ovecsave[0] = ovecsave[1] = ovecsave[2] = PCRE2_UNSET;
 247
 248 /* Partial matching is not valid. */
 249
 250 if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0)
 251   return PCRE2_ERROR_BADOPTION;
 252
 253 /* If no match data block is provided, create one. */
 254
 255 if (match_data == NULL)
 256   {
 257   pcre2_general_context *gcontext = (mcontext == NULL)?
 258     (pcre2_general_context *)code :
 259     (pcre2_general_context *)mcontext;
 260   match_data = pcre2_match_data_create_from_pattern(code, gcontext);
 261   if (match_data == NULL) return PCRE2_ERROR_NOMEMORY;
 262   match_data_created = TRUE;
 263   }
 264 ovector = pcre2_get_ovector_pointer(match_data);
 265 ovector_count = pcre2_get_ovector_count(match_data);
 266
 267 /* Find lengths of zero-terminated strings and the end of the replacement. */
 268
 269 if (length == PCRE2_ZERO_TERMINATED) length = PRIV(strlen)(subject);
 270 if (rlength == PCRE2_ZERO_TERMINATED) rlength = PRIV(strlen)(replacement);
 271 repend = replacement + rlength;
 272
 273 /* Check UTF replacement string if necessary. */
 274
 275 #ifdef SUPPORT_UNICODE
 276 if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
 277   {
 278   rc = PRIV(valid_utf)(replacement, rlength, &(match_data->rightchar));
 279   if (rc != 0)
 280     {
 281     match_data->leftchar = 0;
 282     goto EXIT;
 283     }
 284   }
 285 #endif  /* SUPPORT_UNICODE */
 286
 287 /* Save the substitute options and remove them from the match options. */
 288
 289 suboptions = options & SUBSTITUTE_OPTIONS;
 290 options &= ~SUBSTITUTE_OPTIONS;
 291
 292 /* Copy up to the start offset */
 293
 294 if (start_offset > length)
 295   {
 296   match_data->leftchar = 0;
 297   rc = PCRE2_ERROR_BADOFFSET;
 298   goto EXIT;
 299   }
 300 CHECKMEMCPY(subject, start_offset);
 301
 302 /* Loop for global substituting. */
 303
 304 subs = 0;
 305 do
 306   {
 307   PCRE2_SPTR ptrstack[PTR_STACK_SIZE];
 308   uint32_t ptrstackptr = 0;
 309
 310   rc = pcre2_match(code, subject, length, start_offset, options|goptions,
 311     match_data, mcontext);
 312
 313 #ifdef SUPPORT_UNICODE
 314   if (utf) options |= PCRE2_NO_UTF_CHECK;  /* Only need to check once */
 315 #endif
 316
 317   /* Any error other than no match returns the error code. No match when not
 318   doing the special after-empty-match global rematch, or when at the end of the
 319   subject, breaks the global loop. Otherwise, advance the starting point by one
 320   character, copying it to the output, and try again. */
 321
 322   if (rc < 0)
 323     {
 324     PCRE2_SIZE save_start;
 325
 326     if (rc != PCRE2_ERROR_NOMATCH) goto EXIT;
 327     if (goptions == 0 || start_offset >= length) break;
 328
 329     /* Advance by one code point. Then, if CRLF is a valid newline sequence and
 330     we have advanced into the middle of it, advance one more code point. In
 331     other words, do not start in the middle of CRLF, even if CR and LF on their
 332     own are valid newlines. */
 333
 334     save_start = start_offset++;
 335     if (subject[start_offset-1] == CHAR_CR &&
 336         code->newline_convention != PCRE2_NEWLINE_CR &&
 337         code->newline_convention != PCRE2_NEWLINE_LF &&
 338         start_offset < length &&
 339         subject[start_offset] == CHAR_LF)
 340       start_offset++;
 341
 342     /* Otherwise, in UTF mode, advance past any secondary code points. */
 343
 344     else if ((code->overall_options & PCRE2_UTF) != 0)
 345       {
 346 #if PCRE2_CODE_UNIT_WIDTH == 8
 347       while (start_offset < length && (subject[start_offset] & 0xc0) == 0x80)
 348         start_offset++;
 349 #elif PCRE2_CODE_UNIT_WIDTH == 16
 350       while (start_offset < length &&
 351             (subject[start_offset] & 0xfc00) == 0xdc00)
 352         start_offset++;
 353 #endif
 354       }
 355
 356     /* Copy what we have advanced past, reset the special global options, and
 357     continue to the next match. */
 358
 359     fraglength = start_offset - save_start;
 360     CHECKMEMCPY(subject + save_start, fraglength);
 361     goptions = 0;
 362     continue;
 363     }
 364
 365   /* Handle a successful match. Matches that use \K to end before they start
 366   or start before the current point in the subject are not supported. */
 367
 368   if (ovector[1] < ovector[0] || ovector[0] < start_offset)
 369     {
 370     rc = PCRE2_ERROR_BADSUBSPATTERN;
 371     goto EXIT;
 372     }
 373
 374   /* Check for the same match as previous. This is legitimate after matching an
 375   empty string that starts after the initial match offset. We have tried again
 376   at the match point in case the pattern is one like /(?<=\G.)/ which can never
 377   match at its starting point, so running the match achieves the bumpalong. If
 378   we do get the same (null) match at the original match point, it isn't such a
 379   pattern, so we now do the empty string magic. In all other cases, a repeat
 380   match should never occur. */
 381
 382   if (ovecsave[0] == ovector[0] && ovecsave[1] == ovector[1])
 383     {
 384     if (ovector[0] == ovector[1] && ovecsave[2] != start_offset)
 385       {
 386       goptions = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
 387       ovecsave[2] = start_offset;
 388       continue;    /* Back to the top of the loop */
 389       }
 390     rc = PCRE2_ERROR_INTERNAL_DUPMATCH;
 391     goto EXIT;
 392     }
 393
 394   /* Count substitutions with a paranoid check for integer overflow; surely no
 395   real call to this function would ever hit this! */
 396
 397   if (subs == INT_MAX)
 398     {
 399     rc = PCRE2_ERROR_TOOMANYREPLACE;
 400     goto EXIT;
 401     }
 402   subs++;
 403
 404   /* Copy the text leading up to the match. */
 405
 406   if (rc == 0) rc = ovector_count;
 407   fraglength = ovector[0] - start_offset;
 408   CHECKMEMCPY(subject + start_offset, fraglength);
 409
 410   /* Process the replacement string. Literal mode is set by \Q, but only in
 411   extended mode when backslashes are being interpreted. In extended mode we
 412   must handle nested substrings that are to be reprocessed. */
 413
 414   ptr = replacement;
 415   for (;;)
 416     {
 417     uint32_t ch;
 418     unsigned int chlen;
 419
 420     /* If at the end of a nested substring, pop the stack. */
 421
 422     if (ptr >= repend)
 423       {
 424       if (ptrstackptr <= 0) break;       /* End of replacement string */
 425       repend = ptrstack[--ptrstackptr];
 426       ptr = ptrstack[--ptrstackptr];
 427       continue;
 428       }
 429
 430     /* Handle the next character */
 431
 432     if (literal)
 433       {
 434       if (ptr[0] == CHAR_BACKSLASH && ptr < repend - 1 && ptr[1] == CHAR_E)
 435         {
 436         literal = FALSE;
 437         ptr += 2;
 438         continue;
 439         }
 440       goto LOADLITERAL;
 441       }
 442
 443     /* Not in literal mode. */
 444
 445     if (*ptr == CHAR_DOLLAR_SIGN)
 446       {
 447       int group, n;
 448       uint32_t special = 0;
 449       BOOL inparens;
 450       BOOL star;
 451       PCRE2_SIZE sublength;
 452       PCRE2_SPTR text1_start = NULL;
 453       PCRE2_SPTR text1_end = NULL;
 454       PCRE2_SPTR text2_start = NULL;
 455       PCRE2_SPTR text2_end = NULL;
 456       PCRE2_UCHAR next;
 457       PCRE2_UCHAR name[33];
 458
 459       if (++ptr >= repend) goto BAD;
 460       if ((next = *ptr) == CHAR_DOLLAR_SIGN) goto LOADLITERAL;
 461
 462       group = -1;
 463       n = 0;
 464       inparens = FALSE;
 465       star = FALSE;
 466
 467       if (next == CHAR_LEFT_CURLY_BRACKET)
 468         {
 469         if (++ptr >= repend) goto BAD;
 470         next = *ptr;
 471         inparens = TRUE;
 472         }
 473
 474       if (next == CHAR_ASTERISK)
 475         {
 476         if (++ptr >= repend) goto BAD;
 477         next = *ptr;
 478         star = TRUE;
 479         }
 480
 481       if (!star && next >= CHAR_0 && next <= CHAR_9)
 482         {
 483         group = next - CHAR_0;
 484         while (++ptr < repend)
 485           {
 486           next = *ptr;
 487           if (next < CHAR_0 || next > CHAR_9) break;
 488           group = group * 10 + next - CHAR_0;
 489
 490           /* A check for a number greater than the hightest captured group
 491           is sufficient here; no need for a separate overflow check. If unknown
 492           groups are to be treated as unset, just skip over any remaining
 493           digits and carry on. */
 494
 495           if (group > code->top_bracket)
 496             {
 497             if ((suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
 498               {
 499               while (++ptr < repend && *ptr >= CHAR_0 && *ptr <= CHAR_9);
 500               break;
 501               }
 502             else
 503               {
 504               rc = PCRE2_ERROR_NOSUBSTRING;
 505               goto PTREXIT;
 506               }
 507             }
 508           }
 509         }
 510       else
 511         {
 512         const uint8_t *ctypes = code->tables + ctypes_offset;
 513         while (MAX_255(next) && (ctypes[next] & ctype_word) != 0)
 514           {
 515           name[n++] = next;
 516           if (n > 32) goto BAD;
 517           if (++ptr >= repend) break;
 518           next = *ptr;
 519           }
 520         if (n == 0) goto BAD;
 521         name[n] = 0;
 522         }
 523
 524       /* In extended mode we recognize ${name:+set text:unset text} and
 525       ${name:-default text}. */
 526
 527       if (inparens)
 528         {
 529         if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
 530              !star && ptr < repend - 2 && next == CHAR_COLON)
 531           {
 532           special = *(++ptr);
 533           if (special != CHAR_PLUS && special != CHAR_MINUS)
 534             {
 535             rc = PCRE2_ERROR_BADSUBSTITUTION;
 536             goto PTREXIT;
 537             }
 538
 539           text1_start = ++ptr;
 540           rc = find_text_end(code, &ptr, repend, special == CHAR_MINUS);
 541           if (rc != 0) goto PTREXIT;
 542           text1_end = ptr;
 543
 544           if (special == CHAR_PLUS && *ptr == CHAR_COLON)
 545             {
 546             text2_start = ++ptr;
 547             rc = find_text_end(code, &ptr, repend, TRUE);
 548             if (rc != 0) goto PTREXIT;
 549             text2_end = ptr;
 550             }
 551           }
 552
 553         else
 554           {
 555           if (ptr >= repend || *ptr != CHAR_RIGHT_CURLY_BRACKET)
 556             {
 557             rc = PCRE2_ERROR_REPMISSINGBRACE;
 558             goto PTREXIT;
 559             }
 560           }
 561
 562         ptr++;
 563         }
 564
 565       /* Have found a syntactically correct group number or name, or *name.
 566       Only *MARK is currently recognized. */
 567
 568       if (star)
 569         {
 570         if (PRIV(strcmp_c8)(name, STRING_MARK) == 0)
 571           {
 572           PCRE2_SPTR mark = pcre2_get_mark(match_data);
 573           if (mark != NULL)
 574             {
 575             PCRE2_SPTR mark_start = mark;
 576             while (*mark != 0) mark++;
 577             fraglength = mark - mark_start;
 578             CHECKMEMCPY(mark_start, fraglength);
 579             }
 580           }
 581         else goto BAD;
 582         }
 583
 584       /* Substitute the contents of a group. We don't use substring_copy
 585       functions any more, in order to support case forcing. */
 586
 587       else
 588         {
 589         PCRE2_SPTR subptr, subptrend;
 590
 591         /* Find a number for a named group. In case there are duplicate names,
 592         search for the first one that is set. If the name is not found when
 593         PCRE2_SUBSTITUTE_UNKNOWN_EMPTY is set, set the group number to a
 594         non-existent group. */
 595
 596         if (group < 0)
 597           {
 598           PCRE2_SPTR first, last, entry;
 599           rc = pcre2_substring_nametable_scan(code, name, &first, &last);
 600           if (rc == PCRE2_ERROR_NOSUBSTRING &&
 601               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
 602             {
 603             group = code->top_bracket + 1;
 604             }
 605           else
 606             {
 607             if (rc < 0) goto PTREXIT;
 608             for (entry = first; entry <= last; entry += rc)
 609               {
 610               uint32_t ng = GET2(entry, 0);
 611               if (ng < ovector_count)
 612                 {
 613                 if (group < 0) group = ng;          /* First in ovector */
 614                 if (ovector[ng*2] != PCRE2_UNSET)
 615                   {
 616                   group = ng;                       /* First that is set */
 617                   break;
 618                   }
 619                 }
 620               }
 621
 622             /* If group is still negative, it means we did not find a group
 623             that is in the ovector. Just set the first group. */
 624
 625             if (group < 0) group = GET2(first, 0);
 626             }
 627           }
 628
 629         /* We now have a group that is identified by number. Find the length of
 630         the captured string. If a group in a non-special substitution is unset
 631         when PCRE2_SUBSTITUTE_UNSET_EMPTY is set, substitute nothing. */
 632
 633         rc = pcre2_substring_length_bynumber(match_data, group, &sublength);
 634         if (rc < 0)
 635           {
 636           if (rc == PCRE2_ERROR_NOSUBSTRING &&
 637               (suboptions & PCRE2_SUBSTITUTE_UNKNOWN_UNSET) != 0)
 638             {
 639             rc = PCRE2_ERROR_UNSET;
 640             }
 641           if (rc != PCRE2_ERROR_UNSET) goto PTREXIT;  /* Non-unset errors */
 642           if (special == 0)                           /* Plain substitution */
 643             {
 644             if ((suboptions & PCRE2_SUBSTITUTE_UNSET_EMPTY) != 0) continue;
 645             goto PTREXIT;                             /* Else error */
 646             }
 647           }
 648
 649         /* If special is '+' we have a 'set' and possibly an 'unset' text,
 650         both of which are reprocessed when used. If special is '-' we have a
 651         default text for when the group is unset; it must be reprocessed. */
 652
 653         if (special != 0)
 654           {
 655           if (special == CHAR_MINUS)
 656             {
 657             if (rc == 0) goto LITERAL_SUBSTITUTE;
 658             text2_start = text1_start;
 659             text2_end = text1_end;
 660             }
 661
 662           if (ptrstackptr >= PTR_STACK_SIZE) goto BAD;
 663           ptrstack[ptrstackptr++] = ptr;
 664           ptrstack[ptrstackptr++] = repend;
 665
 666           if (rc == 0)
 667             {
 668             ptr = text1_start;
 669             repend = text1_end;
 670             }
 671           else
 672             {
 673             ptr = text2_start;
 674             repend = text2_end;
 675             }
 676           continue;
 677           }
 678
 679         /* Otherwise we have a literal substitution of a group's contents. */
 680
 681         LITERAL_SUBSTITUTE:
 682         subptr = subject + ovector[group*2];
 683         subptrend = subject + ovector[group*2 + 1];
 684
 685         /* Substitute a literal string, possibly forcing alphabetic case. */
 686
 687         while (subptr < subptrend)
 688           {
 689           GETCHARINCTEST(ch, subptr);
 690           if (forcecase != 0)
 691             {
 692 #ifdef SUPPORT_UNICODE
 693             if (utf)
 694               {
 695               uint32_t type = UCD_CHARTYPE(ch);
 696               if (PRIV(ucp_gentype)[type] == ucp_L &&
 697                   type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
 698                 ch = UCD_OTHERCASE(ch);
 699               }
 700             else
 701 #endif
 702               {
 703               if (((code->tables + cbits_offset +
 704                   ((forcecase > 0)? cbit_upper:cbit_lower)
 705                   )[ch/8] & (1 << (ch%8))) == 0)
 706                 ch = (code->tables + fcc_offset)[ch];
 707               }
 708             forcecase = forcecasereset;
 709             }
 710
 711 #ifdef SUPPORT_UNICODE
 712           if (utf) chlen = PRIV(ord2utf)(ch, temp); else
 713 #endif
 714             {
 715             temp[0] = ch;
 716             chlen = 1;
 717             }
 718           CHECKMEMCPY(temp, chlen);
 719           }
 720         }
 721       }
 722
 723     /* Handle an escape sequence in extended mode. We can use check_escape()
 724     to process \Q, \E, \c, \o, \x and \ followed by non-alphanumerics, but
 725     the case-forcing escapes are not supported in pcre2_compile() so must be
 726     recognized here. */
 727
 728     else if ((suboptions & PCRE2_SUBSTITUTE_EXTENDED) != 0 &&
 729               *ptr == CHAR_BACKSLASH)
 730       {
 731       int errorcode;
 732
 733       if (ptr < repend - 1) switch (ptr[1])
 734         {
 735         case CHAR_L:
 736         forcecase = forcecasereset = -1;
 737         ptr += 2;
 738         continue;
 739
 740         case CHAR_l:
 741         forcecase = -1;
 742         forcecasereset = 0;
 743         ptr += 2;
 744         continue;
 745
 746         case CHAR_U:
 747         forcecase = forcecasereset = 1;
 748         ptr += 2;
 749         continue;
 750
 751         case CHAR_u:
 752         forcecase = 1;
 753         forcecasereset = 0;
 754         ptr += 2;
 755         continue;
 756
 757         default:
 758         break;
 759         }
 760
 761       ptr++;  /* Point after \ */
 762       rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
 763         code->overall_options, FALSE, NULL);
 764       if (errorcode != 0) goto BADESCAPE;
 765
 766       switch(rc)
 767         {
 768         case ESC_E:
 769         forcecase = forcecasereset = 0;
 770         continue;
 771
 772         case ESC_Q:
 773         literal = TRUE;
 774         continue;
 775
 776         case 0:      /* Data character */
 777         goto LITERAL;
 778
 779         default:
 780         goto BADESCAPE;
 781         }
 782       }
 783
 784     /* Handle a literal code unit */
 785
 786     else
 787       {
 788       LOADLITERAL:
 789       GETCHARINCTEST(ch, ptr);    /* Get character value, increment pointer */
 790
 791       LITERAL:
 792       if (forcecase != 0)
 793         {
 794 #ifdef SUPPORT_UNICODE
 795         if (utf)
 796           {
 797           uint32_t type = UCD_CHARTYPE(ch);
 798           if (PRIV(ucp_gentype)[type] == ucp_L &&
 799               type != ((forcecase > 0)? ucp_Lu : ucp_Ll))
 800             ch = UCD_OTHERCASE(ch);
 801           }
 802         else
 803 #endif
 804           {
 805           if (((code->tables + cbits_offset +
 806               ((forcecase > 0)? cbit_upper:cbit_lower)
 807               )[ch/8] & (1 << (ch%8))) == 0)
 808             ch = (code->tables + fcc_offset)[ch];
 809           }
 810         forcecase = forcecasereset;
 811         }
 812
 813 #ifdef SUPPORT_UNICODE
 814       if (utf) chlen = PRIV(ord2utf)(ch, temp); else
 815 #endif
 816         {
 817         temp[0] = ch;
 818         chlen = 1;
 819         }
 820       CHECKMEMCPY(temp, chlen);
 821       } /* End handling a literal code unit */
 822     }   /* End of loop for scanning the replacement. */
 823
 824   /* The replacement has been copied to the output. Save the details of this
 825   match. See above for how this data is used. If we matched an empty string, do
 826   the magic for global matches. Finally, update the start offset to point to
 827   the rest of the subject string. */
 828
 829   ovecsave[0] = ovector[0];
 830   ovecsave[1] = ovector[1];
 831   ovecsave[2] = start_offset;
 832
 833   goptions = (ovector[0] != ovector[1] || ovector[0] > start_offset)? 0 :
 834     PCRE2_ANCHORED|PCRE2_NOTEMPTY_ATSTART;
 835   start_offset = ovector[1];
 836   } while ((suboptions & PCRE2_SUBSTITUTE_GLOBAL) != 0);  /* Repeat "do" loop */
 837
 838 /* Copy the rest of the subject. */
 839
 840 fraglength = length - start_offset;
 841 CHECKMEMCPY(subject + start_offset, fraglength);
 842 temp[0] = 0;
 843 CHECKMEMCPY(temp , 1);
 844
 845 /* If overflowed is set it means the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set,
 846 and matching has carried on after a full buffer, in order to compute the length
 847 needed. Otherwise, an overflow generates an immediate error return. */
 848
 849 if (overflowed)
 850   {
 851   rc = PCRE2_ERROR_NOMEMORY;
 852   *blength = buff_length + extra_needed;
 853   }
 854
 855 /* After a successful execution, return the number of substitutions and set the
 856 length of buffer used, excluding the trailing zero. */
 857
 858 else
 859   {
 860   rc = subs;
 861   *blength = buff_offset - 1;
 862   }
 863
 864 EXIT:
 865 if (match_data_created) pcre2_match_data_free(match_data);
 866   else match_data->rc = rc;
 867 return rc;
 868
 869 NOROOM:
 870 rc = PCRE2_ERROR_NOMEMORY;
 871 goto EXIT;
 872
 873 BAD:
 874 rc = PCRE2_ERROR_BADREPLACEMENT;
 875 goto PTREXIT;
 876
 877 BADESCAPE:
 878 rc = PCRE2_ERROR_BADREPESCAPE;
 879
 880 PTREXIT:
 881 *blength = (PCRE2_SIZE)(ptr - replacement);
 882 goto EXIT;
 883 }
 884
 885 /* End of pcre2_substitute.c */