1 # This set of tests is for UTF-16 and UTF-32 support, including Unicode
2 # properties. It is relevant only to the 16-bit and 32-bit libraries. The
3 # output is different for each library, so there are separate output files.
5 /ÃÃÃxxx/IB,utf,no_utf_check
6 ** Failed: invalid UTF-8 string cannot be converted to 16-bit string
10 ** Failed: invalid UTF-8 string cannot be used as input in UTF mode
12 # Check maximum character size
15 ------------------------------------------------------------------
20 ------------------------------------------------------------------
21 Capturing subpattern count = 0
23 First code unit = \x{ffff}
24 Subject length lower bound = 1
27 ------------------------------------------------------------------
32 ------------------------------------------------------------------
33 Capturing subpattern count = 0
35 First code unit = \x{d800}
36 Last code unit = \x{dc00}
37 Subject length lower bound = 1
40 ------------------------------------------------------------------
45 ------------------------------------------------------------------
46 Capturing subpattern count = 0
48 First code unit = \x{100}
49 Subject length lower bound = 1
52 ------------------------------------------------------------------
57 ------------------------------------------------------------------
58 Capturing subpattern count = 0
60 First code unit = \x{1000}
61 Subject length lower bound = 1
64 ------------------------------------------------------------------
69 ------------------------------------------------------------------
70 Capturing subpattern count = 0
72 First code unit = \x{d800}
73 Last code unit = \x{dc00}
74 Subject length lower bound = 1
77 ------------------------------------------------------------------
82 ------------------------------------------------------------------
83 Capturing subpattern count = 0
85 First code unit = \x{dbc0}
86 Last code unit = \x{dc00}
87 Subject length lower bound = 1
90 ------------------------------------------------------------------
95 ------------------------------------------------------------------
96 Capturing subpattern count = 0
98 First code unit = \x{dbff}
99 Last code unit = \x{dfff}
100 Subject length lower bound = 1
103 ------------------------------------------------------------------
108 ------------------------------------------------------------------
109 Capturing subpattern count = 0
111 First code unit = \xff
112 Subject length lower bound = 1
115 ------------------------------------------------------------------
120 ------------------------------------------------------------------
121 Capturing subpattern count = 0
123 First code unit = \x{100}
124 Subject length lower bound = 1
127 ------------------------------------------------------------------
132 ------------------------------------------------------------------
133 Capturing subpattern count = 0
135 First code unit = \x80
136 Subject length lower bound = 1
139 ------------------------------------------------------------------
144 ------------------------------------------------------------------
145 Capturing subpattern count = 0
147 First code unit = \xff
148 Subject length lower bound = 1
150 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
151 ------------------------------------------------------------------
153 \x{d55c}\x{ad6d}\x{c5b4}
156 ------------------------------------------------------------------
157 Capturing subpattern count = 0
159 First code unit = \x{d55c}
160 Last code unit = \x{c5b4}
161 Subject length lower bound = 3
162 \x{D55c}\x{ad6d}\x{C5B4}
163 0: \x{d55c}\x{ad6d}\x{c5b4}
165 /\x{65e5}\x{672c}\x{8a9e}/IB,utf
166 ------------------------------------------------------------------
168 \x{65e5}\x{672c}\x{8a9e}
171 ------------------------------------------------------------------
172 Capturing subpattern count = 0
174 First code unit = \x{65e5}
175 Last code unit = \x{8a9e}
176 Subject length lower bound = 3
177 \x{65e5}\x{672c}\x{8a9e}
178 0: \x{65e5}\x{672c}\x{8a9e}
181 ------------------------------------------------------------------
186 ------------------------------------------------------------------
187 Capturing subpattern count = 0
189 First code unit = \x80
190 Subject length lower bound = 1
193 ------------------------------------------------------------------
198 ------------------------------------------------------------------
199 Capturing subpattern count = 0
201 First code unit = \x84
202 Subject length lower bound = 1
205 ------------------------------------------------------------------
210 ------------------------------------------------------------------
211 Capturing subpattern count = 0
213 First code unit = \x{104}
214 Subject length lower bound = 1
217 ------------------------------------------------------------------
222 ------------------------------------------------------------------
223 Capturing subpattern count = 0
225 First code unit = \x{861}
226 Subject length lower bound = 1
229 ------------------------------------------------------------------
234 ------------------------------------------------------------------
235 Capturing subpattern count = 0
237 First code unit = \x{d844}
238 Last code unit = \x{deab}
239 Subject length lower bound = 1
241 /[^ab\xC0-\xF0]/IB,utf
242 ------------------------------------------------------------------
244 [\x00-`c-\xbf\xf1-\xff] (neg)
247 ------------------------------------------------------------------
248 Capturing subpattern count = 0
250 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
251 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
252 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
253 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
254 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
255 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
256 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
257 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
258 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
259 \xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
261 Subject length lower bound = 1
277 ------------------------------------------------------------------
283 ------------------------------------------------------------------
284 Capturing subpattern count = 0
286 First code unit = \x{100}
287 Last code unit = \x{100}
288 Subject length lower bound = 3
289 \x{100}\x{100}\x{100}\x{100\x{100}
290 0: \x{100}\x{100}\x{100}
293 ------------------------------------------------------------------
302 ------------------------------------------------------------------
303 Capturing subpattern count = 1
305 Starting code units: x \xff
306 Subject length lower bound = 1
308 /(\x{100}*a|x)/IB,utf
309 ------------------------------------------------------------------
319 ------------------------------------------------------------------
320 Capturing subpattern count = 1
322 Starting code units: a x \xff
323 Subject length lower bound = 1
325 /(\x{100}{0,2}a|x)/IB,utf
326 ------------------------------------------------------------------
336 ------------------------------------------------------------------
337 Capturing subpattern count = 1
339 Starting code units: a x \xff
340 Subject length lower bound = 1
342 /(\x{100}{1,2}a|x)/IB,utf
343 ------------------------------------------------------------------
354 ------------------------------------------------------------------
355 Capturing subpattern count = 1
357 Starting code units: x \xff
358 Subject length lower bound = 1
361 ------------------------------------------------------------------
366 ------------------------------------------------------------------
367 Capturing subpattern count = 0
369 First code unit = \x{100}
370 Subject length lower bound = 1
372 /a\x{100}\x{101}*/IB,utf
373 ------------------------------------------------------------------
379 ------------------------------------------------------------------
380 Capturing subpattern count = 0
382 First code unit = 'a'
383 Last code unit = \x{100}
384 Subject length lower bound = 2
386 /a\x{100}\x{101}+/IB,utf
387 ------------------------------------------------------------------
393 ------------------------------------------------------------------
394 Capturing subpattern count = 0
396 First code unit = 'a'
397 Last code unit = \x{101}
398 Subject length lower bound = 3
401 ------------------------------------------------------------------
406 ------------------------------------------------------------------
407 Capturing subpattern count = 0
408 Subject length lower bound = 1
411 ------------------------------------------------------------------
416 ------------------------------------------------------------------
417 Capturing subpattern count = 0
419 First code unit = \x{100}
420 Subject length lower bound = 1
429 ------------------------------------------------------------------
434 ------------------------------------------------------------------
435 Capturing subpattern count = 0
437 First code unit = \xff
438 Subject length lower bound = 1
443 ------------------------------------------------------------------
448 ------------------------------------------------------------------
449 Capturing subpattern count = 0
451 Subject length lower bound = 1
453 /\x{100}abc(xyz(?1))/IB,utf
454 ------------------------------------------------------------------
463 ------------------------------------------------------------------
464 Capturing subpattern count = 1
466 First code unit = \x{100}
468 Subject length lower bound = 7
471 Capturing subpattern count = 0
473 First code unit = \x{1ff}
474 Subject length lower bound = 1
480 /\x{100}+\x{200}/IB,utf
481 ------------------------------------------------------------------
487 ------------------------------------------------------------------
488 Capturing subpattern count = 0
490 First code unit = \x{100}
491 Last code unit = \x{200}
492 Subject length lower bound = 2
495 ------------------------------------------------------------------
501 ------------------------------------------------------------------
502 Capturing subpattern count = 0
504 First code unit = \x{100}
506 Subject length lower bound = 2
509 Failed: error 106 at offset 13: missing terminating ] for character class
512 XX\x{d800}\=no_utf_check
514 XX\x{da00}\=no_utf_check
516 XX\x{dc00}\=no_utf_check
518 XX\x{de00}\=no_utf_check
520 XX\x{dfff}\=no_utf_check
524 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
526 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
528 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
530 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
532 Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
534 ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
536 Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
543 Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
550 Capturing subpattern count = 0
551 Compile options: <none>
553 First code unit = \x{d804}
554 Last code unit = \x{de34}
555 Subject length lower bound = 1
560 Failed: error 160 at offset 5: (*VERB) not recognized or malformed
564 Failed: error 160 at offset 5: (*VERB) not recognized or malformed
567 /(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
568 Capturing subpattern count = 0
569 Compile options: <none>
571 \R matches any Unicode newline
572 Forced newline is CRLF
573 First code unit = 'a'
575 Subject length lower bound = 3
577 /(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
578 Failed: error 160 at offset 12: (*VERB) not recognized or malformed
581 Capturing subpattern count = 0
583 Starting code units: \x09 \x20 \xa0 \xff
584 Subject length lower bound = 1
605 Capturing subpattern count = 0
607 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
608 Subject length lower bound = 1
623 Capturing subpattern count = 0
625 Starting code units: \x09 \x20 A \xa0 \xff
627 Subject length lower bound = 1
633 /\R*A/I,bsr=unicode,utf
634 Capturing subpattern count = 0
636 \R matches any Unicode newline
637 Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
639 Subject length lower bound = 1
646 Capturing subpattern count = 0
648 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
650 Subject length lower bound = 2
653 Capturing subpattern count = 0
655 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
657 Subject length lower bound = 4
659 /\sxxx\s/I,utf,tables=2
660 Capturing subpattern count = 0
662 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
664 Subject length lower bound = 5
670 /\S \S/I,utf,tables=2
671 Capturing subpattern count = 0
673 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
674 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
675 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
676 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
677 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
678 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
679 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
680 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
681 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
682 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
683 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
684 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
685 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
688 Subject length lower bound = 3
704 \= Expect bad offset error
706 Failed: error -33: bad offset value
708 Failed: error -33: bad offset value
711 Capturing subpattern count = 0
712 Options: caseless utf
713 First code unit = \x{1234}
714 Subject length lower bound = 1
717 Capturing subpattern count = 0
718 Options: caseless utf
719 First code unit = \x{1234}
720 Subject length lower bound = 1
723 Capturing subpattern count = 0
724 Options: caseless utf
725 First code unit = \x{1234}
726 Subject length lower bound = 1
729 Capturing subpattern count = 0
730 Options: caseless utf
731 First code unit = \x{1234}
732 Last code unit = \x{1234}
733 Subject length lower bound = 2
736 ------------------------------------------------------------------
741 ------------------------------------------------------------------
742 Capturing subpattern count = 0
744 Subject length lower bound = 1
747 ------------------------------------------------------------------
753 ------------------------------------------------------------------
754 Capturing subpattern count = 0
756 First code unit = 'X'
757 Last code unit = \x{200}
758 Subject length lower bound = 2
761 Capturing subpattern count = 0
763 Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
764 Subject length lower bound = 1
769 \= Expect bad UTF-16 offset, or no match in 32-bit
771 Error -36 (bad UTF-16 offset)
772 \x{10000}ab\=offset=1
773 Error -36 (bad UTF-16 offset)
774 \= Expect 16-bit match, 32-bit no match
775 \x{10000}ab\=offset=2
778 \x{10000}ab\=offset=3
780 \= Expect no match in 16-bit, bad offset in 32-bit
781 \x{10000}ab\=offset=4
784 \x{10000}ab\=offset=5
785 Failed: error -33: bad offset value
788 Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
791 ------------------------------------------------------------------
797 ------------------------------------------------------------------
801 /\w+\x{C4}/B,utf,tables=2
802 ------------------------------------------------------------------
808 ------------------------------------------------------------------
813 ------------------------------------------------------------------
819 ------------------------------------------------------------------
823 /\W+\x{C4}/B,utf,tables=2
824 ------------------------------------------------------------------
830 ------------------------------------------------------------------
835 ------------------------------------------------------------------
841 ------------------------------------------------------------------
845 /\W+\x{A1}/B,utf,tables=2
846 ------------------------------------------------------------------
852 ------------------------------------------------------------------
857 ------------------------------------------------------------------
864 ------------------------------------------------------------------
868 /X\s+\x{A0}/B,utf,tables=2
869 ------------------------------------------------------------------
876 ------------------------------------------------------------------
881 ------------------------------------------------------------------
887 ------------------------------------------------------------------
891 /\S+\x{A0}/B,utf,tables=2
892 ------------------------------------------------------------------
898 ------------------------------------------------------------------
903 ------------------------------------------------------------------
910 ------------------------------------------------------------------
914 /\x{a0}+\s!/B,utf,tables=2
915 ------------------------------------------------------------------
922 ------------------------------------------------------------------
927 Failed: error 174 at offset 6: using UTF is disabled by the application
930 Failed: error 174 at offset 0: using UTF is disabled by the application
932 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
933 ------------------------------------------------------------------
935 /i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
938 ------------------------------------------------------------------
939 Capturing subpattern count = 0
940 Options: caseless utf
941 First code unit = 'A' (caseless)
942 Last code unit = \x{1fb0} (caseless)
943 Subject length lower bound = 5
945 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
946 ------------------------------------------------------------------
948 A\x{391}\x{10427}\x{ff3a}\x{1fb0}
951 ------------------------------------------------------------------
952 Capturing subpattern count = 0
954 First code unit = 'A'
955 Last code unit = \x{1fb0}
956 Subject length lower bound = 5
959 ------------------------------------------------------------------
964 ------------------------------------------------------------------
965 Capturing subpattern count = 0
967 First code unit = 'A'
968 Last code unit = \x{1fb0}
969 Subject length lower bound = 3
972 ------------------------------------------------------------------
977 ------------------------------------------------------------------
978 Capturing subpattern count = 0
979 Options: caseless utf
980 First code unit = 'A' (caseless)
981 Last code unit = \x{1fb0} (caseless)
982 Subject length lower bound = 3
984 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
985 Capturing subpattern count = 0
986 Options: caseless utf
987 First code unit = \x{401} (caseless)
988 Last code unit = \x{42f} (caseless)
989 Subject length lower bound = 17
990 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
991 0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
992 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
993 0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
996 ------------------------------------------------------------------
1001 ------------------------------------------------------------------
1004 ------------------------------------------------------------------
1009 ------------------------------------------------------------------
1012 ------------------------------------------------------------------
1014 [\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
1017 ------------------------------------------------------------------
1020 Capturing subpattern count = 0
1021 Options: caseless utf
1022 Starting code units: K k \xff
1023 Subject length lower bound = 1
1028 Capturing subpattern count = 0
1029 Options: caseless utf
1030 Starting code units: S s \xff
1031 Subject length lower bound = 1
1035 # Non-UTF characters should give errors in both 16-bit and 32-bit modes.
1038 Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
1041 Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
1044 ------------------------------------------------------------------
1050 ------------------------------------------------------------------
1051 Capturing subpattern count = 0
1053 Starting code units: A \xff
1054 Last code unit = 'A'
1055 Subject length lower bound = 1
1059 /\x{100}*\d(?R)/IB,utf
1060 ------------------------------------------------------------------
1067 ------------------------------------------------------------------
1068 Capturing subpattern count = 0
1070 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1071 Subject length lower bound = 1
1074 ------------------------------------------------------------------
1079 ------------------------------------------------------------------
1080 Capturing subpattern count = 0
1082 Starting code units: Z \xff
1083 Subject length lower bound = 1
1092 ------------------------------------------------------------------
1097 ------------------------------------------------------------------
1098 Capturing subpattern count = 0
1100 Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
1101 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
1102 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
1103 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
1104 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
1105 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
1106 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
1107 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
1108 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1109 Subject length lower bound = 1
1111 /[z\Qa-d]Ā\E]/IB,utf
1112 ------------------------------------------------------------------
1117 ------------------------------------------------------------------
1118 Capturing subpattern count = 0
1120 Starting code units: - ] a d z \xff
1121 Subject length lower bound = 1
1127 /[ab\x{100}]abc(xyz(?1))/IB,utf
1128 ------------------------------------------------------------------
1138 ------------------------------------------------------------------
1139 Capturing subpattern count = 1
1141 Starting code units: a b \xff
1142 Last code unit = 'z'
1143 Subject length lower bound = 7
1146 ------------------------------------------------------------------
1152 ------------------------------------------------------------------
1153 Capturing subpattern count = 0
1155 Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
1156 Subject length lower bound = 1
1159 ------------------------------------------------------------------
1165 ------------------------------------------------------------------
1166 Capturing subpattern count = 0
1168 Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
1169 Subject length lower bound = 1
1172 ------------------------------------------------------------------
1178 ------------------------------------------------------------------
1179 Capturing subpattern count = 0
1181 Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
1182 Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
1184 Subject length lower bound = 1
1187 ------------------------------------------------------------------
1193 ------------------------------------------------------------------
1194 Capturing subpattern count = 0
1196 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1197 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1198 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1199 ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
1200 d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
1201 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
1202 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
1203 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
1204 \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
1205 \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
1206 \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
1207 \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
1208 \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
1209 \xfb \xfc \xfd \xfe \xff
1210 Subject length lower bound = 1
1213 ------------------------------------------------------------------
1219 ------------------------------------------------------------------
1220 Capturing subpattern count = 0
1222 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
1223 \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
1224 \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
1225 D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
1226 i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
1227 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
1228 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
1229 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
1230 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
1231 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
1232 \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
1233 \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
1234 \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
1236 Subject length lower bound = 1
1239 ------------------------------------------------------------------
1245 ------------------------------------------------------------------
1246 Capturing subpattern count = 0
1248 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1249 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1250 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
1251 ? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
1252 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
1253 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
1254 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
1255 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
1256 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
1257 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
1258 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
1259 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1260 Subject length lower bound = 1
1262 /[\x{105}-\x{109}]/IBi,utf
1263 ------------------------------------------------------------------
1268 ------------------------------------------------------------------
1269 Capturing subpattern count = 0
1270 Options: caseless utf
1271 Starting code units: \xff
1272 Subject length lower bound = 1
1285 /[z-\x{100}]/IBi,utf
1286 ------------------------------------------------------------------
1288 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1291 ------------------------------------------------------------------
1292 Capturing subpattern count = 0
1293 Options: caseless utf
1294 Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1295 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1296 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1297 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1298 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1299 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1300 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1301 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1302 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1304 Subject length lower bound = 1
1331 /[z-\x{100}]/IBi,utf
1332 ------------------------------------------------------------------
1334 [Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
1337 ------------------------------------------------------------------
1338 Capturing subpattern count = 0
1339 Options: caseless utf
1340 Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
1341 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
1342 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
1343 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
1344 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
1345 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
1346 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
1347 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
1348 \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
1350 Subject length lower bound = 1
1353 ------------------------------------------------------------------
1355 clist 03a3 03c2 03c3
1359 ------------------------------------------------------------------
1360 Capturing subpattern count = 0
1361 Options: caseless utf
1362 Starting code units: \xff
1363 Last code unit = 'B' (caseless)
1364 Subject length lower bound = 2
1368 ** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
1371 ------------------------------------------------------------------
1373 ab\x{fd}\x{bf}\x{bf}\x{bf}\x{bf}\x{bf}z
1376 ------------------------------------------------------------------
1379 ** Failed: character value greater than 0x10ffff cannot be converted to UTF
1382 ------------------------------------------------------------------
1384 [\x00-/:-@[-^`{-\xff\p{Any}\x{100}-\x{ffff}]
1387 ------------------------------------------------------------------
1394 ------------------------------------------------------------------
1396 [\x00-/:-@[-^`{-\xff\p{L}\x{100}-\x{ffff}]
1399 ------------------------------------------------------------------
1410 /[\s[:^ascii:]]/B,ucp
1411 ------------------------------------------------------------------
1413 [\x80-\xff\p{Xsp}\x{100}-\x{ffff}]
1416 ------------------------------------------------------------------
1420 ** Character \x{7fffffff} is greater than 0xffff and UTF-16 mode is not enabled.
1421 ** Truncation will probably give the wrong result.
1424 # A special extra option allows excaped surrogate code points in 32-bit mode,
1425 # but subjects containing them must not be UTF-checked. These patterns give
1426 # errors in 16-bit mode.
1428 /\x{d800}/I,utf,allow_surrogate_escapes
1429 Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
1430 \x{d800}\=no_utf_check
1432 /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
1433 Failed: error 191 at offset 0: PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode
1434 \x{dfff}\x{df01}\=no_utf_check
1436 # This has different starting code units in 8-bit mode.
1439 ------------------------------------------------------------------
1442 [\x00-`c-\xff] (neg)
1445 ------------------------------------------------------------------
1446 Capturing subpattern count = 0
1447 Compile options: utf
1448 Overall options: anchored utf
1449 Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
1450 \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
1451 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
1452 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
1453 Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
1454 \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
1455 \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
1456 \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
1457 \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
1458 \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca
1459 \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9
1460 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8
1461 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7
1462 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
1463 Subject length lower bound = 1
1474 # End of testinput12