1 # This set of tests is for UTF-8 support and Unicode property support, with
2 # relevance only for the 8-bit library.
4 # The next 4 patterns have UTF-8 errors
7 Failed: error -8 at offset 1: UTF-8 error: byte 2 top bits not 0x80
10 Failed: error -3 at offset 0: UTF-8 error: 1 byte missing at end
13 Failed: error -8 at offset 0: UTF-8 error: byte 2 top bits not 0x80
15 /Ã
\82\82\82\82\82\82\82\82Ã/utf
16 Failed: error -22 at offset 2: UTF-8 error: isolated byte with 0x80 bit set
21 \= Expect UTF-8 errors
23 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
25 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
27 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
29 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
31 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
33 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
35 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
37 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
39 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
41 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
43 Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
45 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
47 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
49 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
51 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
53 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
55 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
57 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
59 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
61 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
63 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
65 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
67 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
69 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
71 Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
72 \xfd\x7f\x80\x80\x80\x80
73 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 0
74 \xfd\x80\x7f\x80\x80\x80
75 Failed: error -9: UTF-8 error: byte 3 top bits not 0x80 at offset 0
76 \xfd\x80\x80\x7f\x80\x80
77 Failed: error -10: UTF-8 error: byte 4 top bits not 0x80 at offset 0
78 \xfd\x80\x80\x80\x7f\x80
79 Failed: error -11: UTF-8 error: byte 5 top bits not 0x80 at offset 0
80 \xfd\x80\x80\x80\x80\x7f
81 Failed: error -12: UTF-8 error: byte 6 top bits not 0x80 at offset 0
83 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
85 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 0
87 Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 0
89 Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
91 Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
92 \xfc\x80\x80\x80\x80\x8f
93 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
95 Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
97 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
99 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
102 \= Expect UTF-8 errors
103 XX\xfb\x80\x80\x80\x80
104 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 2
105 XX\xfd\x80\x80\x80\x80\x80
106 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 2
108 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
111 \= Expect UTF-8 errors
113 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
115 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
117 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
119 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
121 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
123 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
125 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
127 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
129 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
131 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
133 Failed: error -7: UTF-8 error: 5 bytes missing at end at offset 0
135 Failed: error -6: UTF-8 error: 4 bytes missing at end at offset 0
137 Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
139 Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
140 \xfd\x80\x80\x80\x80\=ph
141 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 0
144 \= Expect UTF-8 errors
146 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 1
148 Failed: error -17: UTF-8 error: overlong 2-byte sequence at offset 2
150 Failed: error -18: UTF-8 error: overlong 3-byte sequence at offset 3
152 Failed: error -19: UTF-8 error: overlong 4-byte sequence at offset 0
154 Failed: error -20: UTF-8 error: overlong 5-byte sequence at offset 0
155 \xfc\x83\x80\x80\x80\x80
156 Failed: error -21: UTF-8 error: overlong 6-byte sequence at offset 0
157 \xfe\x80\x80\x80\x80\x80
158 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
159 \xff\x80\x80\x80\x80\x80
160 Failed: error -23: UTF-8 error: illegal byte (0xfe or 0xff) at offset 0
162 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
164 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
165 \xfc\x84\x80\x80\x80\x80
166 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
167 \xfd\x83\x80\x80\x80\x80
168 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
180 \xf8\x88\x80\x80\x80\=no_utf_check
182 \xf9\x87\x80\x80\x80\=no_utf_check
184 \xfc\x84\x80\x80\x80\x80\=no_utf_check
186 \xfd\x83\x80\x80\x80\x80\=no_utf_check
189 # Similar tests with offsets
192 \= Expect UTF-8 errors
194 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
196 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
202 \= Expect UTF-8 errors
204 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
206 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
208 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
209 X\xdfabcd\xdf\=offset=3
210 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 6
216 \= Expect UTF-8 errors
218 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
220 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
222 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
224 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
227 \= Expect UTF-8 errors
229 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
231 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
233 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
235 Failed: error -8: UTF-8 error: byte 2 top bits not 0x80 at offset 1
236 X\xdfabc\xdf\=offset=6
237 Failed: error -3: UTF-8 error: 1 byte missing at end at offset 5
238 X\xdfabc\xdf\=offset=7
239 Failed: error -33: bad offset value
245 ------------------------------------------------------------------
250 ------------------------------------------------------------------
251 Capturing subpattern count = 0
253 First code unit = \xc4
254 Last code unit = \x80
255 Subject length lower bound = 1
258 ------------------------------------------------------------------
263 ------------------------------------------------------------------
264 Capturing subpattern count = 0
266 First code unit = \xe1
267 Last code unit = \x80
268 Subject length lower bound = 1
271 ------------------------------------------------------------------
276 ------------------------------------------------------------------
277 Capturing subpattern count = 0
279 First code unit = \xf0
280 Last code unit = \x80
281 Subject length lower bound = 1
284 ------------------------------------------------------------------
289 ------------------------------------------------------------------
290 Capturing subpattern count = 0
292 First code unit = \xf4
293 Last code unit = \x80
294 Subject length lower bound = 1
297 ------------------------------------------------------------------
302 ------------------------------------------------------------------
303 Capturing subpattern count = 0
305 First code unit = \xf4
306 Last code unit = \xbf
307 Subject length lower bound = 1
310 ------------------------------------------------------------------
315 ------------------------------------------------------------------
316 Capturing subpattern count = 0
318 First code unit = \xc3
319 Last code unit = \xbf
320 Subject length lower bound = 1
323 ------------------------------------------------------------------
328 ------------------------------------------------------------------
329 Capturing subpattern count = 0
331 First code unit = \xc4
332 Last code unit = \x80
333 Subject length lower bound = 1
336 ------------------------------------------------------------------
341 ------------------------------------------------------------------
342 Capturing subpattern count = 0
344 First code unit = \xc2
345 Last code unit = \x80
346 Subject length lower bound = 1
349 ------------------------------------------------------------------
354 ------------------------------------------------------------------
355 Capturing subpattern count = 0
357 First code unit = \xc3
358 Last code unit = \xbf
359 Subject length lower bound = 1
361 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
362 ------------------------------------------------------------------
364 \x{d55c}\x{ad6d}\x{c5b4}
367 ------------------------------------------------------------------
368 Capturing subpattern count = 0
370 First code unit = \xed
371 Last code unit = \xb4
372 Subject length lower bound = 3
373 \x{D55c}\x{ad6d}\x{C5B4}
374 0: \x{d55c}\x{ad6d}\x{c5b4}
376 /\x{65e5}\x{672c}\x{8a9e}/IB,utf
377 ------------------------------------------------------------------
379 \x{65e5}\x{672c}\x{8a9e}
382 ------------------------------------------------------------------
383 Capturing subpattern count = 0
385 First code unit = \xe6
386 Last code unit = \x9e
387 Subject length lower bound = 3
388 \x{65e5}\x{672c}\x{8a9e}
389 0: \x{65e5}\x{672c}\x{8a9e}
392 ------------------------------------------------------------------
397 ------------------------------------------------------------------
398 Capturing subpattern count = 0
400 First code unit = \xc2
401 Last code unit = \x80
402 Subject length lower bound = 1
405 ------------------------------------------------------------------
410 ------------------------------------------------------------------
411 Capturing subpattern count = 0
413 First code unit = \xc2
414 Last code unit = \x84
415 Subject length lower bound = 1
418 ------------------------------------------------------------------
423 ------------------------------------------------------------------
424 Capturing subpattern count = 0
426 First code unit = \xc4
427 Last code unit = \x84
428 Subject length lower bound = 1
431 ------------------------------------------------------------------
436 ------------------------------------------------------------------
437 Capturing subpattern count = 0
439 First code unit = \xe0
440 Last code unit = \xa1
441 Subject length lower bound = 1
444 ------------------------------------------------------------------
449 ------------------------------------------------------------------
450 Capturing subpattern count = 0
452 First code unit = \xf0
453 Last code unit = \xab
454 Subject length lower bound = 1
456 /[^ab\xC0-\xF0]/IB,utf
457 ------------------------------------------------------------------
459 [\x00-`c-\xbf\xf1-\xff] (neg)
462 ------------------------------------------------------------------
463 Capturing subpattern count = 0
465 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
466 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
467 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
468 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
469 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
470 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
471 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
472 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
473 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
475 Subject length lower bound = 1
491 ------------------------------------------------------------------
497 ------------------------------------------------------------------
498 Capturing subpattern count = 0
500 First code unit = \xc4
501 Last code unit = \x80
502 Subject length lower bound = 3
503 \x{100}\x{100}\x{100}\x{100\x{100}
504 0: \x{100}\x{100}\x{100}
507 ------------------------------------------------------------------
516 ------------------------------------------------------------------
517 Capturing subpattern count = 1
519 Starting code units: x \xc4
520 Subject length lower bound = 1
522 /(\x{100}*a|x)/IB,utf
523 ------------------------------------------------------------------
533 ------------------------------------------------------------------
534 Capturing subpattern count = 1
536 Starting code units: a x \xc4
537 Subject length lower bound = 1
539 /(\x{100}{0,2}a|x)/IB,utf
540 ------------------------------------------------------------------
550 ------------------------------------------------------------------
551 Capturing subpattern count = 1
553 Starting code units: a x \xc4
554 Subject length lower bound = 1
556 /(\x{100}{1,2}a|x)/IB,utf
557 ------------------------------------------------------------------
568 ------------------------------------------------------------------
569 Capturing subpattern count = 1
571 Starting code units: x \xc4
572 Subject length lower bound = 1
575 ------------------------------------------------------------------
580 ------------------------------------------------------------------
581 Capturing subpattern count = 0
583 First code unit = \xc4
584 Last code unit = \x80
585 Subject length lower bound = 1
587 /a\x{100}\x{101}*/IB,utf
588 ------------------------------------------------------------------
594 ------------------------------------------------------------------
595 Capturing subpattern count = 0
597 First code unit = 'a'
598 Last code unit = \x80
599 Subject length lower bound = 2
601 /a\x{100}\x{101}+/IB,utf
602 ------------------------------------------------------------------
608 ------------------------------------------------------------------
609 Capturing subpattern count = 0
611 First code unit = 'a'
612 Last code unit = \x81
613 Subject length lower bound = 3
616 ------------------------------------------------------------------
621 ------------------------------------------------------------------
622 Capturing subpattern count = 0
623 Subject length lower bound = 1
626 ------------------------------------------------------------------
631 ------------------------------------------------------------------
632 Capturing subpattern count = 0
634 First code unit = \xc4
635 Last code unit = \x80
636 Subject length lower bound = 1
645 ------------------------------------------------------------------
650 ------------------------------------------------------------------
651 Capturing subpattern count = 0
653 First code unit = \xc3
654 Last code unit = \xbf
655 Subject length lower bound = 1
660 ------------------------------------------------------------------
665 ------------------------------------------------------------------
666 Capturing subpattern count = 0
668 Subject length lower bound = 1
670 /\x{100}abc(xyz(?1))/IB,utf
671 ------------------------------------------------------------------
680 ------------------------------------------------------------------
681 Capturing subpattern count = 1
683 First code unit = \xc4
685 Subject length lower bound = 7
688 Capturing subpattern count = 0
690 First code unit = \xc7
691 Last code unit = \xbf
692 Subject length lower bound = 1
698 /\x{100}+\x{200}/IB,utf
699 ------------------------------------------------------------------
705 ------------------------------------------------------------------
706 Capturing subpattern count = 0
708 First code unit = \xc4
709 Last code unit = \x80
710 Subject length lower bound = 2
713 ------------------------------------------------------------------
719 ------------------------------------------------------------------
720 Capturing subpattern count = 0
722 First code unit = \xc4
724 Subject length lower bound = 2
727 Failed: error 106 at offset 15: missing terminating ] for character class
729 # This tests the stricter UTF-8 check according to RFC 3629.
732 \= Expect UTF-8 errors
734 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
736 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
738 Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 0
740 Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 0
742 Failed: error -13: UTF-8 error: 5-byte character is not allowed (RFC 3629) at offset 0
744 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
746 \x{d800}\=no_utf_check
748 \x{da00}\=no_utf_check
750 \x{dfff}\=no_utf_check
752 \x{110000}\=no_utf_check
754 \x{2000000}\=no_utf_check
756 \x{7fffffff}\=no_utf_check
763 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
764 Capturing subpattern count = 0
765 Compile options: <none>
767 \R matches any Unicode newline
768 Forced newline is CRLF
769 First code unit = 'a'
771 Subject length lower bound = 3
774 Capturing subpattern count = 0
776 Starting code units: \x09 \x20 \xc2 \xe1 \xe2 \xe3
777 Subject length lower bound = 1
798 Capturing subpattern count = 0
800 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
801 Subject length lower bound = 1
816 Capturing subpattern count = 0
818 Starting code units: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
820 Subject length lower bound = 1
825 Capturing subpattern count = 0
827 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
829 Subject length lower bound = 2
832 Capturing subpattern count = 0
834 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
836 Subject length lower bound = 4
838 /\sxxx\s/I,utf,tables=2
839 Capturing subpattern count = 0
841 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc2
843 Subject length lower bound = 5
849 /\S \S/I,utf,tables=2
850 Capturing subpattern count = 0
852 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
853 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
854 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
855 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
856 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
857 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
858 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
859 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
860 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
862 Subject length lower bound = 3
875 \= Expect bad offset value
877 Failed: error -33: bad offset value
878 \= Expect bad UTF-8 offset
880 Error -36 (bad UTF-8 offset)
886 Capturing subpattern count = 0
887 Options: caseless utf
888 Starting code units: \xe1
889 Subject length lower bound = 1
892 Capturing subpattern count = 0
893 Options: caseless utf
894 Starting code units: \xe1
895 Subject length lower bound = 1
898 Capturing subpattern count = 0
899 Options: caseless utf
900 Starting code units: \xe1
901 Subject length lower bound = 1
904 Capturing subpattern count = 0
905 Options: caseless utf
906 Starting code units: \xe1
907 Subject length lower bound = 2
910 ------------------------------------------------------------------
915 ------------------------------------------------------------------
916 Capturing subpattern count = 0
918 Subject length lower bound = 1
921 ------------------------------------------------------------------
927 ------------------------------------------------------------------
928 Capturing subpattern count = 0
930 First code unit = 'X'
931 Last code unit = \x80
932 Subject length lower bound = 2
935 Capturing subpattern count = 0
937 Starting code units: \x0a \x0b \x0c \x0d \xc2 \xe2
938 Subject length lower bound = 1
941 ------------------------------------------------------------------
946 ------------------------------------------------------------------
947 Capturing subpattern count = 0
949 First code unit = \xc7
950 Last code unit = \xbf
951 Subject length lower bound = 1
954 ------------------------------------------------------------------
960 ------------------------------------------------------------------
964 /\w+\x{C4}/B,utf,tables=2
965 ------------------------------------------------------------------
971 ------------------------------------------------------------------
976 ------------------------------------------------------------------
982 ------------------------------------------------------------------
986 /\W+\x{C4}/B,utf,tables=2
987 ------------------------------------------------------------------
993 ------------------------------------------------------------------
998 ------------------------------------------------------------------
1004 ------------------------------------------------------------------
1008 /\W+\x{A1}/B,utf,tables=2
1009 ------------------------------------------------------------------
1015 ------------------------------------------------------------------
1020 ------------------------------------------------------------------
1027 ------------------------------------------------------------------
1031 /X\s+\x{A0}/B,utf,tables=2
1032 ------------------------------------------------------------------
1039 ------------------------------------------------------------------
1044 ------------------------------------------------------------------
1050 ------------------------------------------------------------------
1054 /\S+\x{A0}/B,utf,tables=2
1055 ------------------------------------------------------------------
1061 ------------------------------------------------------------------
1066 ------------------------------------------------------------------
1073 ------------------------------------------------------------------
1077 /\x{a0}+\s!/B,utf,tables=2
1078 ------------------------------------------------------------------
1085 ------------------------------------------------------------------
1091 ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8
1093 Failed: error -14: UTF-8 error: 6-byte character is not allowed (RFC 3629) at offset 0
1095 /(*UTF8)abc/never_utf
1096 Failed: error 174 at offset 7: using UTF is disabled by the application
1099 Failed: error 174 at offset 0: using UTF is disabled by the application
1101 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
1102 ------------------------------------------------------------------
1104 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1107 ------------------------------------------------------------------
1108 Capturing subpattern count = 0
1109 Options: caseless utf
1110 First code unit = 'A' (caseless)
1111 Subject length lower bound = 5
1113 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
1114 ------------------------------------------------------------------
1116 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
1119 ------------------------------------------------------------------
1120 Capturing subpattern count = 0
1122 First code unit = 'A'
1123 Last code unit = \xb0
1124 Subject length lower bound = 5
1127 ------------------------------------------------------------------
1132 ------------------------------------------------------------------
1133 Capturing subpattern count = 0
1135 First code unit = 'A'
1136 Last code unit = \xb0
1137 Subject length lower bound = 3
1140 ------------------------------------------------------------------
1145 ------------------------------------------------------------------
1146 Capturing subpattern count = 0
1147 Options: caseless utf
1148 First code unit = 'A' (caseless)
1149 Last code unit = 'B' (caseless)
1150 Subject length lower bound = 3
1152 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
1153 Capturing subpattern count = 0
1154 Options: caseless utf
1155 Starting code units: \xd0 \xd1
1156 Subject length lower bound = 17
1157 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1158 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
1159 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1160 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
1163 ------------------------------------------------------------------
1168 ------------------------------------------------------------------
1171 ------------------------------------------------------------------
1176 ------------------------------------------------------------------
1179 Capturing subpattern count = 0
1180 Starting code units: \x09 \x20 \xa0
1181 Subject length lower bound = 1
1184 Capturing subpattern count = 0
1185 Starting code units: \x0a \x0b \x0c \x0d \x85
1186 Subject length lower bound = 1
1189 Capturing subpattern count = 0
1190 Starting code units: \x0a \x0b \x0c \x0d \x85
1191 Subject length lower bound = 1
1194 ------------------------------------------------------------------
1199 ------------------------------------------------------------------
1202 Capturing subpattern count = 0
1203 Options: caseless utf
1204 Starting code units: K k \xe2
1205 Subject length lower bound = 1
1210 Capturing subpattern count = 0
1211 Options: caseless utf
1212 Starting code units: S s \xc5
1213 Subject length lower bound = 1
1218 ------------------------------------------------------------------
1224 ------------------------------------------------------------------
1225 Capturing subpattern count = 0
1227 Starting code units: A \xc4
1228 Last code unit = 'A'
1229 Subject length lower bound = 1
1233 /\x{100}*\d(?R)/IB,utf
1234 ------------------------------------------------------------------
1241 ------------------------------------------------------------------
1242 Capturing subpattern count = 0
1244 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1245 Subject length lower bound = 1
1248 ------------------------------------------------------------------
1253 ------------------------------------------------------------------
1254 Capturing subpattern count = 0
1256 Starting code units: Z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1257 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1258 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1259 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1260 \xfb \xfc \xfd \xfe \xff
1261 Subject length lower bound = 1
1270 ------------------------------------------------------------------
1275 ------------------------------------------------------------------
1276 Capturing subpattern count = 0
1278 Starting code units: z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
1279 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
1280 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7
1281 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6
1282 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1283 Subject length lower bound = 1
1285 /[z\Qa-d]Ā\E]/IB,utf
1286 ------------------------------------------------------------------
1291 ------------------------------------------------------------------
1292 Capturing subpattern count = 0
1294 Starting code units: - ] a d z \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc
1295 \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb
1296 \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea
1297 \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9
1298 \xfa \xfb \xfc \xfd \xfe \xff
1299 Subject length lower bound = 1
1305 /[ab\x{100}]abc(xyz(?1))/IB,utf
1306 ------------------------------------------------------------------
1316 ------------------------------------------------------------------
1317 Capturing subpattern count = 1
1319 Starting code units: a b \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1320 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1321 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1322 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1323 \xfb \xfc \xfd \xfe \xff
1324 Last code unit = 'z'
1325 Subject length lower bound = 7
1328 ------------------------------------------------------------------
1334 ------------------------------------------------------------------
1335 Capturing subpattern count = 0
1337 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xc4
1338 Subject length lower bound = 1
1341 ------------------------------------------------------------------
1347 ------------------------------------------------------------------
1348 Capturing subpattern count = 0
1350 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xc4
1351 Subject length lower bound = 1
1354 ------------------------------------------------------------------
1360 ------------------------------------------------------------------
1361 Capturing subpattern count = 0
1363 Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1364 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1366 Subject length lower bound = 1
1369 ------------------------------------------------------------------
1375 ------------------------------------------------------------------
1376 Capturing subpattern count = 0
1378 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1379 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1380 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1381 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1382 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2
1383 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1384 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1385 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1386 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1388 Subject length lower bound = 1
1391 ------------------------------------------------------------------
1397 ------------------------------------------------------------------
1398 Capturing subpattern count = 0
1400 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1401 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1402 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1403 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1404 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4
1405 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3
1406 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2
1407 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1
1408 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1409 Subject length lower bound = 1
1412 ------------------------------------------------------------------
1418 ------------------------------------------------------------------
1419 Capturing subpattern count = 0
1421 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1422 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1423 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1424 ? @ [ \ ] ^ ` { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9
1425 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8
1426 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7
1427 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6
1428 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1429 Subject length lower bound = 1
1431 /[\x{105}-\x{109}]/IBi,utf
1432 ------------------------------------------------------------------
1437 ------------------------------------------------------------------
1438 Capturing subpattern count = 0
1439 Options: caseless utf
1440 Starting code units: \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce
1441 \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd
1442 \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec
1443 \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
1445 Subject length lower bound = 1
1458 /[z-\x{100}]/IBi,utf
1459 ------------------------------------------------------------------
1461 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1464 ------------------------------------------------------------------
1465 Capturing subpattern count = 0
1466 Options: caseless utf
1467 Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8
1468 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7
1469 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6
1470 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5
1471 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1472 Subject length lower bound = 1
1499 /[z-\x{100}]/IBi,utf
1500 ------------------------------------------------------------------
1502 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1505 ------------------------------------------------------------------
1506 Capturing subpattern count = 0
1507 Options: caseless utf
1508 Starting code units: Z z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8
1509 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7
1510 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6
1511 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5
1512 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1513 Subject length lower bound = 1
1516 ------------------------------------------------------------------
1518 clist 03a3 03c2 03c3
1522 ------------------------------------------------------------------
1523 Capturing subpattern count = 0
1524 Options: caseless utf
1525 Starting code units: \xce \xcf
1526 Last code unit = 'B' (caseless)
1527 Subject length lower bound = 2
1531 Failed: error -3: UTF-8 error: 1 byte missing at end
1533 /(?<=(a)(?-1))x/I,utf
1534 Capturing subpattern count = 1
1537 First code unit = 'x'
1538 Subject length lower bound = 1
1540 Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 1
1543 ------------------------------------------------------------------
1545 [\x00-/:-@[-^`{-\xff\p{Any}]
1548 ------------------------------------------------------------------
1555 ------------------------------------------------------------------
1557 [\x00-/:-@[-^`{-\xff\p{L}]
1560 ------------------------------------------------------------------
1567 /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
1568 Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)
1570 /[\s[:^ascii:]]/B,ucp
1571 ------------------------------------------------------------------
1576 ------------------------------------------------------------------
1578 # A special extra option allows excaped surrogate code points in 8-bit mode,
1579 # but subjects containing them must not be UTF-checked.
1581 /\x{d800}/I,utf,allow_surrogate_escapes
1582 Capturing subpattern count = 0
1584 Extra options: allow_surrogate_escapes
1585 First code unit = \xed
1586 Last code unit = \x80
1587 Subject length lower bound = 1
1588 \x{d800}\=no_utf_check
1591 /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1592 \x{dfff}\x{df01}\=no_utf_check
1595 # This has different starting code units in 8-bit mode.
1598 ------------------------------------------------------------------
1601 [\x00-`c-\xff] (neg)
1604 ------------------------------------------------------------------
1605 Capturing subpattern count = 0
1606 Compile options: utf
1607 Overall options: anchored utf
1608 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1609 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1610 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1611 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1612 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1613 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
1614 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
1615 \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
1616 \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
1618 Subject length lower bound = 1
1629 # End of testinput10