1 # This set of tests is for UTF-8 support and Unicode property support, with
2 # relevance only for the 8-bit library.
4 # The next 4 patterns have UTF-8 errors
12 /Ã
\82\82\82\82\82\82\82\82Ã/utf
17 \= Expect UTF-8 errors
43 \xfd\x7f\x80\x80\x80\x80
44 \xfd\x80\x7f\x80\x80\x80
45 \xfd\x80\x80\x7f\x80\x80
46 \xfd\x80\x80\x80\x7f\x80
47 \xfd\x80\x80\x80\x80\x7f
53 \xfc\x80\x80\x80\x80\x8f
59 \= Expect UTF-8 errors
60 XX\xfb\x80\x80\x80\x80
61 XX\xfd\x80\x80\x80\x80\x80
65 \= Expect UTF-8 errors
80 \xfd\x80\x80\x80\x80\=ph
83 \= Expect UTF-8 errors
89 \xfc\x83\x80\x80\x80\x80
90 \xfe\x80\x80\x80\x80\x80
91 \xff\x80\x80\x80\x80\x80
94 \xfc\x84\x80\x80\x80\x80
95 \xfd\x83\x80\x80\x80\x80
102 \xf8\x88\x80\x80\x80\=no_utf_check
103 \xf9\x87\x80\x80\x80\=no_utf_check
104 \xfc\x84\x80\x80\x80\x80\=no_utf_check
105 \xfd\x83\x80\x80\x80\x80\=no_utf_check
107 # Similar tests with offsets
110 \= Expect UTF-8 errors
117 \= Expect UTF-8 errors
121 X\xdfabcd\xdf\=offset=3
126 \= Expect UTF-8 errors
133 \= Expect UTF-8 errors
138 X\xdfabc\xdf\=offset=6
139 X\xdfabc\xdf\=offset=7
161 /\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
162 \x{D55c}\x{ad6d}\x{C5B4}
164 /\x{65e5}\x{672c}\x{8a9e}/IB,utf
165 \x{65e5}\x{672c}\x{8a9e}
177 /[^ab\xC0-\xF0]/IB,utf
187 \x{100}\x{100}\x{100}\x{100\x{100}
191 /(\x{100}*a|x)/IB,utf
193 /(\x{100}{0,2}a|x)/IB,utf
195 /(\x{100}{1,2}a|x)/IB,utf
199 /a\x{100}\x{101}*/IB,utf
201 /a\x{100}\x{101}+/IB,utf
215 /\x{100}abc(xyz(?1))/IB,utf
221 /\x{100}+\x{200}/IB,utf
227 # This tests the stricter UTF-8 check according to RFC 3629.
230 \= Expect UTF-8 errors
238 \x{d800}\=no_utf_check
239 \x{da00}\=no_utf_check
240 \x{dfff}\=no_utf_check
241 \x{110000}\=no_utf_check
242 \x{2000000}\=no_utf_check
243 \x{7fffffff}\=no_utf_check
248 /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I
276 /\sxxx\s/I,utf,tables=2
280 /\S \S/I,utf,tables=2
288 \= Expect bad offset value
290 \= Expect bad UTF-8 offset
314 /\w+\x{C4}/B,utf,tables=2
320 /\W+\x{C4}/B,utf,tables=2
326 /\W+\x{A1}/B,utf,tables=2
332 /X\s+\x{A0}/B,utf,tables=2
338 /\S+\x{A0}/B,utf,tables=2
344 /\x{a0}+\s!/B,utf,tables=2
351 /(*UTF8)abc/never_utf
355 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
357 /A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
363 /\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
364 \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
365 \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
388 /\x{100}*\d(?R)/IB,utf
401 /[ab\x{100}]abc(xyz(?1))/IB,utf
415 /[\x{105}-\x{109}]/IBi,utf
445 /(?<=(a)(?-1))x/I,utf
457 /(*:*++++++++++++''''''''''''''''''''+''+++'+++x+++++++++++++++++++++++++++++++++++(++++++++++++++++++++:++++++%++:''''''''''''''''''''''''+++++++++++++++++++++++++++++++++++++++++++++++++++++-++++++++k+++++++''''+++'+++++++++++++++++++++++''''++++++++++++':ƿ)/utf
459 /[\s[:^ascii:]]/B,ucp
461 # A special extra option allows excaped surrogate code points in 8-bit mode,
462 # but subjects containing them must not be UTF-checked.
464 /\x{d800}/I,utf,allow_surrogate_escapes
465 \x{d800}\=no_utf_check
467 /\udfff\o{157401}/utf,alt_bsux,allow_surrogate_escapes
468 \x{dfff}\x{df01}\=no_utf_check
470 # This has different starting code units in 8-bit mode.