X-Git-Url: http://ftp.carnet.hr/carnet-debian/scm?p=ossec-hids.git;a=blobdiff_plain;f=src%2Fexternal%2Fpcre2-10.32%2Ftestdata%2Ftestinput5;fp=src%2Fexternal%2Fpcre2-10.32%2Ftestdata%2Ftestinput5;h=687de321fa82f340a4f8d4486d3a9fc05a928bf4;hp=0000000000000000000000000000000000000000;hb=3f728675941dc69d4e544d3a880a56240a6e394a;hpb=927951d1c1ad45ba9e7325f07d996154a91c911b diff --git a/src/external/pcre2-10.32/testdata/testinput5 b/src/external/pcre2-10.32/testdata/testinput5 new file mode 100644 index 0000000..687de32 --- /dev/null +++ b/src/external/pcre2-10.32/testdata/testinput5 @@ -0,0 +1,2110 @@ +# This set of tests checks the API, internals, and non-Perl stuff for UTF +# support, including Unicode properties. However, tests that give different +# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and +# 12). + +#newline_default lf any anycrlf + +# PCRE2 and Perl disagree about the characteristics of certain Unicode +# characters. For example, 061C was considered by Perl to be Arabic, though +# it was not listed as such in the Unicode Scripts.txt file for Unicode 8. +# However, it *is* in that file for Unicode 10, but when I came to re-check, +# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic. + +# 2066-2069 are graphic and printable according to Perl, though they are +# actually "isolate" control characters. That is why the following tests are +# here rather than in test 4. + +/^[\p{Arabic}]/utf + \x{061c} + +/^[[:graph:]]+$/utf,ucp +\= Expect no match + \x{61c} + \x{2066} + \x{2067} + \x{2068} + \x{2069} + +/^[[:print:]]+$/utf,ucp +\= Expect no match + \x{61c} + \x{2066} + \x{2067} + \x{2068} + \x{2069} + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680} + \x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069} + +/^[[:^print:]]+$/utf,ucp + \x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067} + \x{2068}\x{2069} + +# Perl does not consider U+180e to be a space character. It is true that it +# does not appear in the Unicode PropList.txt file as such, but in many other +# sources it is listed as a space, and has been treated as such in PCRE for +# a long time. + +/^>[[:blank:]]*/utf,ucp + >\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028} + +/^A\s+Z/utf,ucp + A\x{85}\x{180e}\x{2005}Z + +/^A[\s]+Z/utf,ucp + A\x{2005}Z + A\x{85}\x{2005}Z + +/^[[:graph:]]+$/utf,ucp +\= Expect no match + \x{180e} + +/^[[:print:]]+$/utf,ucp + \x{180e} + +/^[[:^graph:]]+$/utf,ucp + \x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e} + +/^[[:^print:]]+$/utf,ucp +\= Expect no match + \x{180e} + +# End of U+180E tests. + +# --------------------------------------------------------------------- + +/\x{110000}/IB,utf + +/\o{4200000}/IB,utf + +/\x{ffffffff}/utf + +/\o{37777777777}/utf + +/\x{100000000}/utf + +/\o{77777777777}/utf + +/\x{d800}/utf + +/\o{154000}/utf + +/\x{dfff}/utf + +/\o{157777}/utf + +/\x{d7ff}/utf + +/\o{153777}/utf + +/\x{e000}/utf + +/\o{170000}/utf + +/^\x{100}a\x{1234}/utf + \x{100}a\x{1234}bcd + +/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf + \x{0041}\x{2262}\x{0391}\x{002e} + +/.{3,5}X/IB,utf + \x{212ab}\x{212ab}\x{212ab}\x{861}X + +/.{3,5}?/IB,utf + \x{212ab}\x{212ab}\x{212ab}\x{861} + +/^[ab]/IB,utf + bar +\= Expect no match + c + \x{ff} + \x{100} + +/\x{100}*(\d+|"(?1)")/utf + 1234 + "1234" + \x{100}1234 + "\x{100}1234" + \x{100}\x{100}12ab + \x{100}\x{100}"12" +\= Expect no match + \x{100}\x{100}abcd + +/\x{100}*/IB,utf + +/a\x{100}*/IB,utf + +/ab\x{100}*/IB,utf + +/[\x{200}-\x{100}]/utf + +/[Ā-Ą]/utf + \x{100} + \x{104} +\= Expect no match + \x{105} + \x{ff} + +/[\xFF]/IB + >\xff< + +/[^\xFF]/IB + +/[Ä-Ü]/utf + Ö # Matches without Study + \x{d6} + +/[Ä-Ü]/utf + Ö <-- Same with Study + \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö # Matches without Study + \x{d6} + +/[\x{c4}-\x{dc}]/utf + Ö <-- Same with Study + \x{d6} + +/[^\x{100}]abc(xyz(?1))/IB,utf + +/(\x{100}(b(?2)c))?/IB,utf + +/(\x{100}(b(?2)c)){0,2}/IB,utf + +/(\x{100}(b(?1)c))?/IB,utf + +/(\x{100}(b(?1)c)){0,2}/IB,utf + +/\W/utf + A.B + A\x{100}B + +/\w/utf + \x{100}X + +# Use no_start_optimize because the first code unit is different in 8-bit from +# the wider modes. + +/^\ሴ/IB,utf,no_start_optimize + +/()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + ()()()()()()()()()() + A (x) (?41) B/x,utf + AxxB + +/^[\x{100}\E-\Q\E\x{150}]/B,utf + +/^[\QĀ\E-\QŐ\E]/B,utf + +/^abc./gmx,newline=any,utf + abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK + +/abc.$/gmx,newline=any,utf + abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9 + +/^a\Rb/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0cb + a\x{85}b + a\x{2028}b + a\x{2029}b +\= Expect no match + a\n\rb + +/^a\R*b/bsr=unicode,utf + ab + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb + +/^a\R+b/bsr=unicode,utf + a\nb + a\rb + a\r\nb + a\x0bb + a\x0c\x{2028}\x{2029}b + a\x{85}b + a\n\rb + a\n\r\x{85}\x0cb +\= Expect no match + ab + +/^a\R{1,3}b/bsr=unicode,utf + a\nb + a\n\rb + a\n\r\x{85}b + a\r\n\r\nb + a\r\n\r\n\r\nb + a\n\r\n\rb + a\n\n\r\nb +\= Expect no match + a\n\n\n\rb + a\r + +/\H\h\V\v/utf + X X\x0a + X\x09X\x0b +\= Expect no match + \x{a0} X\x0a + +/\H*\h+\V?\v{3,4}/utf + \x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a + \x09\x20\x{a0}\x0a\x0b\x0c +\= Expect no match + \x09\x20\x{a0}\x0a\x0b + +/\H\h\V\v/utf + \x{3001}\x{3000}\x{2030}\x{2028} + X\x{180e}X\x{85} +\= Expect no match + \x{2009} X\x0a + +/\H*\h+\V?\v{3,4}/utf + \x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a + \x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a + \x09\x20\x{202f}\x0a\x0b\x0c +\= Expect no match + \x09\x{200a}\x{a0}\x{2028}\x0b + +/[\h]/B,utf + >\x{1680} + +/[\h]{3,}/B,utf + >\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}< + +/[\v]/B,utf + +/[\H]/B,utf + +/[\V]/B,utf + +/.*$/newline=any,utf + \x{1ec5} + +/a\Rb/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\Rb/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=anycrlf,utf + a\rb + a\nb + a\r\nb +\= Expect no match + a\x{85}b + a\x0bb + +/a\R?b/I,bsr=unicode,utf + a\rb + a\nb + a\r\nb + a\x{85}b + a\x0bb + +/.*a.*=.b.*/utf,newline=any + QQQ\x{2029}ABCaXYZ=!bPQR +\= Expect no match + a\x{2029}b + \x61\xe2\x80\xa9\x62 + +/[[:a\x{100}b:]]/utf + +/a[^]b/utf,alt_bsux,allow_empty_class,match_unset_backref + a\x{1234}b + a\nb +\= Expect no match + ab + +/a[^]+b/utf,alt_bsux,allow_empty_class,match_unset_backref + aXb + a\nX\nX\x{1234}b +\= Expect no match + ab + +/(\x{de})\1/ + \x{de}\x{de} + +/X/newline=any,utf,firstline + A\x{1ec5}ABCXYZ + +/Xa{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/Xa{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\x{123}{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\x{123}{2,4}b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\x{123}{2,4}?b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\x{123}{2,4}+b/utf +\= Expect no match + Xx\=ps + X\x{123}x\=ps + X\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}x\=ps + X\x{123}\x{123}\x{123}\x{123}x\=ps + +/X\d{2,4}b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}?b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\d{2,4}+b/utf + X\=ps + X3\=ps + X33\=ps + X333\=ps + X3333\=ps + +/X\D{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X\D{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\D{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X\D{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc]{2,4}b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}?b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc]{2,4}+b/utf + X\=ps + Xa\=ps + Xaa\=ps + Xaaa\=ps + Xaaaa\=ps + +/X[abc\x{123}]{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc\x{123}]{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[abc\x{123}]{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}?b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}+b/utf + X\=ps + Xz\=ps + Xzz\=ps + Xzzz\=ps + Xzzzz\=ps + +/X[^a]{2,4}b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}?b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/X[^a]{2,4}+b/utf + X\=ps + X\x{123}\=ps + X\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\=ps + X\x{123}\x{123}\x{123}\x{123}\=ps + +/(Y)X\1{2,4}b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}?b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(Y)X\1{2,4}+b/utf + YX\=ps + YXY\=ps + YXYY\=ps + YXYYY\=ps + YXYYYY\=ps + +/(\x{123})X\1{2,4}b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/(\x{123})X\1{2,4}?b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/(\x{123})X\1{2,4}+b/utf + \x{123}X\=ps + \x{123}X\x{123}\=ps + \x{123}X\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\=ps + \x{123}X\x{123}\x{123}\x{123}\x{123}\=ps + +/\bthe cat\b/utf + the cat\=ps + the cat\=ph + +/abcd*/utf + xxxxabcd\=ps + xxxxabcd\=ph + +/abcd*/i,utf + xxxxabcd\=ps + xxxxabcd\=ph + XXXXABCD\=ps + XXXXABCD\=ph + +/abc\d*/utf + xxxxabc1\=ps + xxxxabc1\=ph + +/(a)bc\1*/utf + xxxxabca\=ps + xxxxabca\=ph + +/abc[de]*/utf + xxxxabcde\=ps + xxxxabcde\=ph + +/X\W{3}X/utf + X\=ps + +/\sxxx\s/utf,tables=2 + AB\x{85}xxx\x{a0}XYZ + AB\x{a0}xxx\x{85}XYZ + +/\S \S/utf,tables=2 + \x{a2} \x{84} + +'A#хц'Bx,newline=any,utf + +'A#хц + PQ'Bx,newline=any,utf + +/a+#хaa + z#XX?/Bx,newline=any,utf + +/a+#хaa + z#х?/Bx,newline=any,utf + +/\g{A}xxx#bXX(?'A'123) (?'A'456)/Bx,newline=any,utf + +/\g{A}xxx#bх(?'A'123) (?'A'456)/Bx,newline=any,utf + +/^\cģ/utf + +/(\R*)(.)/s,utf + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/(\R)*(.)/s,utf + \r\n + \r\r\n\n\r + \r\r\n\n\r\n + +/[^\x{1234}]+/Ii,utf + +/[^\x{1234}]+?/Ii,utf + +/[^\x{1234}]++/Ii,utf + +/[^\x{1234}]{2}/Ii,utf + +/f.*/ + for\=ph + +/f.*/s + for\=ph + +/f.*/utf + for\=ph + +/f.*/s,utf + for\=ph + +/\x{d7ff}\x{e000}/utf + +/\x{d800}/utf + +/\x{dfff}/utf + +/\h+/utf + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + +/[\h\x{e000}]+/B,utf + \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} + \x{3001}\x{2fff}\x{200a}\x{a0}\x{2000} + +/\H+/utf + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + +/[\H\x{d7ff}]+/B,utf + \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} + \x{2000}\x{200a}\x{1fff}\x{200b} + \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} + \x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001} + +/\v+/utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/[\v\x{e000}]+/B,utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/\V+/utf + \x{2028}\x{2029}\x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + +/[\V\x{d7ff}]+/B,utf + \x{2028}\x{2029}\x{2027}\x{2030} + \x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86} + +/\R+/bsr=unicode,utf + \x{2027}\x{2030}\x{2028}\x{2029} + \x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d + +/(..)\1/utf + ab\=ps + aba\=ps + abab\=ps + +/(..)\1/i,utf + ab\=ps + abA\=ps + aBAb\=ps + +/(..)\1{2,}/utf + ab\=ps + aba\=ps + abab\=ps + ababa\=ps + ababab\=ps + ababab\=ph + abababa\=ps + abababa\=ph + +/(..)\1{2,}/i,utf + ab\=ps + aBa\=ps + aBAb\=ps + AbaBA\=ps + abABAb\=ps + aBAbaB\=ph + abABabA\=ps + abaBABa\=ph + +/(..)\1{2,}?x/i,utf + ab\=ps + abA\=ps + aBAb\=ps + abaBA\=ps + abAbaB\=ps + abaBabA\=ps + abAbABaBx\=ps + +/./utf,newline=crlf + \r\=ps + \r\=ph + +/.{2,3}/utf,newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/.{2,3}?/utf,newline=crlf + \r\=ps + \r\=ph + \r\r\=ps + \r\r\=ph + \r\r\r\=ps + \r\r\r\=ph + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf + +/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf + +/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf + +/(?<=\x{1234}\x{1234})\bxy/I,utf + +/(?\p{Xsp}/utf + >\x{1680}\x{2028}\x{0b} + >\x{a0} +\= Expect no match + \x{0b} + +/^>\p{Xsp}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}+?/utf + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xsp}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xsp}]/utf + >\x{2028}\x{0b} + +/^>[\p{Xsp}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}/utf + >\x{1680}\x{2028}\x{0b} + >\x{a0} +\= Expect no match + \x{0b} + +/^>\p{Xps}+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}+?/utf + >\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}*/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>\p{Xps}{2,9}?/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^>[\p{Xps}]/utf + >\x{2028}\x{0b} + +/^>[\p{Xps}]+/utf + > \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b} + +/^\p{Xwd}/utf + ABCD + 1234 + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^\p{Xwd}+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}+?/utf + \x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}*/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +/^\p{Xwd}{2,9}/utf + A_B12\x{6ca}\x{a6c}\x{10a7} + +/^\p{Xwd}{2,9}?/utf + \x{6ca}\x{a6c}\x{10a7}_ + +/^[\p{Xwd}]/utf + ABCD1234_ + 1234abcd_ + \x{6ca} + \x{a6c} + \x{10a7} + _ABC +\= Expect no match + [] + +/^[\p{Xwd}]+/utf + ABCD1234\x{6ca}\x{a6c}\x{10a7}_ + +# A check not in UTF-8 mode + +/^[\p{Xwd}]+/ + ABCD1234_ + +# Some negative checks + +/^[\P{Xwd}]+/utf + !.+\x{019}\x{35a}AB + +/^[\p{^Xwd}]+/utf + !.+\x{019}\x{35a}AB + +/[\D]/B,utf,ucp + 1\x{3c8}2 + +/[\d]/B,utf,ucp + >\x{6f4}< + +/[\S]/B,utf,ucp + \x{1680}\x{6f4}\x{1680} + +/[\s]/B,utf,ucp + >\x{1680}< + +/[\W]/B,utf,ucp + A\x{1712}B + +/[\w]/B,utf,ucp + >\x{1723}< + +/\D/B,utf,ucp + 1\x{3c8}2 + +/\d/B,utf,ucp + >\x{6f4}< + +/\S/B,utf,ucp + \x{1680}\x{6f4}\x{1680} + +/\s/B,utf,ucp + >\x{1680}> + +/\W/B,utf,ucp + A\x{1712}B + +/\w/B,utf,ucp + >\x{1723}< + +/[[:alpha:]]/B,ucp + +/[[:lower:]]/B,ucp + +/[[:upper:]]/B,ucp + +/[[:alnum:]]/B,ucp + +/[[:ascii:]]/B,ucp + +/[[:cntrl:]]/B,ucp + +/[[:digit:]]/B,ucp + +/[[:graph:]]/B,ucp + +/[[:print:]]/B,ucp + +/[[:punct:]]/B,ucp + +/[[:space:]]/B,ucp + +/[[:word:]]/B,ucp + +/[[:xdigit:]]/B,ucp + +# Unicode properties for \b abd \B + +/\b...\B/utf,ucp + abc_ + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Without PCRE_UCP, non-ASCII always fail, even if < 256 + +/\b...\B/utf + abc_ +\= Expect no match + \x{37e}abc\x{376} + \x{37e}\x{376}\x{371}\x{393}\x{394} + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties + +/\b...\B/ucp + abc_ + !\x{c0}++\x{c1}\x{c2} + !\x{c0}+++++ + +# Some of these are silly, but they check various combinations + +/[[:^alpha:][:^cntrl:]]+/B,utf,ucp + 123 + abc + +/[[:^cntrl:][:^alpha:]]+/B,utf,ucp + 123 + abc + +/[[:alpha:]]+/B,utf,ucp + abc + +/[[:^alpha:]\S]+/B,utf,ucp + 123 + abc + +/[^\d]+/B,utf,ucp + abc123 + abc\x{123} + \x{660}abc + +/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B + +/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B + +/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B + +/\p{Han}+X\p{Greek}+\x{370}/B,utf + +/\p{Xan}+!\p{Xan}+A/B + +/\p{Xsp}+!\p{Xsp}\t/B + +/\p{Xps}+!\p{Xps}\t/B + +/\p{Xwd}+!\p{Xwd}_/B + +/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp + +# These behaved oddly in Perl, so they are kept in this test + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})?\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)?\1/i,utf + ȺȺȺⱥⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/i,utf +\= Expect no match + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/i,utf +\= Expect no match + ȺȺȺⱥⱥ + +/(\x{23a}\x{23a}\x{23a})\1/i,utf + \x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65} + +/(ȺȺȺ)\1/i,utf + ȺȺȺⱥⱥⱥ + +/(\x{2c65}\x{2c65})\1/i,utf + \x{2c65}\x{2c65}\x{23a}\x{23a} + +/(ⱥⱥ)\1/i,utf + ⱥⱥȺȺ + +/(\x{23a}\x{23a}\x{23a})\1Y/i,utf + X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ + +/(\x{2c65}\x{2c65})\1Y/i,utf + X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ + +# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE + +/^[\p{Batak}]/utf + \x{1bc0} + \x{1bff} +\= Expect no match + \x{1bf4} + +/^[\p{Brahmi}]/utf + \x{11000} + \x{1106f} +\= Expect no match + \x{1104e} + +/^[\p{Mandaic}]/utf + \x{840} + \x{85e} +\= Expect no match + \x{85c} + \x{85d} + +/(\X*)(.)/s,utf + A\x{300} + +/^S(\X*)e(\X*)$/utf + Stéréo + +/^\X/utf + ́réo + +/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames + aX41z +\= Expect no match + aAz + +/\X/ + a\=ps + a\=ph + +/\Xa/ + aa\=ps + aa\=ph + +/\X{2}/ + aa\=ps + aa\=ph + +/\X+a/ + a\=ps + aa\=ps + aa\=ph + +/\X+?a/ + a\=ps + ab\=ps + aa\=ps + aa\=ph + aba\=ps + +# These Unicode 6.1.0 scripts are not known to Perl. + +/\p{Chakma}\d/utf,ucp + \x{11100}\x{1113c} + +/\p{Takri}\d/utf,ucp + \x{11680}\x{116c0} + +/^\X/utf + A\=ps + A\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{301}\=ps + A\x{301}\=ph + +/^\X{2,3}/utf + A\=ps + A\=ph + AA\=ps + AA\=ph + A\x{300}\x{301}\=ps + A\x{300}\x{301}\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X{2}/utf + AA\=ps + AA\=ph + A\x{300}\x{301}A\x{300}\x{301}\=ps + A\x{300}\x{301}A\x{300}\x{301}\=ph + +/^\X+/utf + AA\=ps + AA\=ph + +/^\X+?Z/utf + AA\=ps + AA\=ph + +/A\x{3a3}B/IBi,utf + +/[\x{3a3}]/Bi,utf + +/[^\x{3a3}]/Bi,utf + +/[\x{3a3}]+/Bi,utf + +/[^\x{3a3}]+/Bi,utf + +/a*\x{3a3}/Bi,utf + +/\x{3a3}+a/Bi,utf + +/\x{3a3}*\x{3c2}/Bi,utf + +/\x{3a3}{3}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}{2,4}?/i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}+./i,utf,aftertext + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}++./i,utf,aftertext +\= Expect no match + \x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2} + +/\x{3a3}*\x{3c2}/Bi,utf + +/[^\x{3a3}]*\x{3c2}/Bi,utf + +/[^a]*\x{3c2}/Bi,utf + +/ist/Bi,utf +\= Expect no match + ikt + +/is+t/i,utf + iSs\x{17f}t +\= Expect no match + ikt + +/is+?t/i,utf +\= Expect no match + ikt + +/is?t/i,utf +\= Expect no match + ikt + +/is{2}t/i,utf +\= Expect no match + iskt + +# This property is a PCRE special + +/^\p{Xuc}/utf + $abc + @abc + `abc + \x{1234}abc +\= Expect no match + abc + +/^\p{Xuc}+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}+?\*/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}++/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\p{Xuc}{3,5}?/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^[\p{Xuc}]+/utf + $@`\x{a0}\x{1234}\x{e000}** +\= Expect no match + \x{9f} + +/^\P{Xuc}/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +/^[\P{Xuc}]/utf + abc +\= Expect no match + $abc + @abc + `abc + \x{1234}abc + +# Some auto-possessification tests + +/\pN+\z/B + +/\PN+\z/B + +/\pN+/B + +/\PN+/B + +/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp + +/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp + +/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp + +/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp + +/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp + +/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp + +/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp + +/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp + +/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp + +/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp + +/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp + +/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp + +/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp + +/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp + +/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp + +/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp + +/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp + +/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp + +/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp + +/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp + +# End auto-possessification tests + +/\w+/B,utf,ucp,auto_callout + abcd + +/[\p{N}]?+/B,no_auto_possess + +/[\p{L}ab]{2,3}+/B,no_auto_possess + +/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx + +/.+\X/Bsx + +/\X+$/Bmx + +/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx + +/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp + +/[RST]+/Bi,utf,ucp + +/[R-T]+/Bi,utf,ucp + +/[Q-U]+/Bi,utf,ucp + +/^s?c/Iim,utf + scat + +/\X?abc/utf,no_start_optimize + \xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06 + +/\x{100}\x{200}\K\x{300}/utf,startchar + \x{100}\x{200}\x{300} + +# Test UTF characters in a substitution + +/ábc/utf,replace=XሴZ + 123ábc123 + +/(?<=abc)(|def)/g,utf,replace=<$0> + 123abcáyzabcdef789abcሴqr + +/[A-`]/iB,utf + abcdefghijklmno + +/(?<=\K\x{17f})/g,utf,aftertext + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + +/(?<=\K\x{17f})/altglobal,utf,aftertext + \x{17f}\x{17f}\x{17f}\x{17f}\x{17f} + +"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?())(?))(?\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5" + +/$(&.+[\p{Me}].\s\xdcC*?(?())(?)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/ + +"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'" + +/[\pS#moq]/ + = + +/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark + cxxxz + +/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended + abcd + +/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended + a\x{e0}\x{101}\x{c0}\x{102} + +/((?\d)|(?\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}> + ab12cde + +/(*UCP)(*UTF)[[:>:]]X/B + +/abc/utf,replace=xyz + abc\=zero_terminate + +/a[[:punct:]b]/ucp,bincode + +/a[[:punct:]b]/utf,ucp,bincode + +/a[b[:punct:]]/utf,ucp,bincode + +/[[:^ascii:]]/utf,ucp,bincode + +/[[:^ascii:]\w]/utf,ucp,bincode + +/[\w[:^ascii:]]/utf,ucp,bincode + +/[^[:ascii:]\W]/utf,ucp,bincode + \x{de} + \x{200} +\= Expect no match + \x{300} + \x{37e} + +/[[:^ascii:]a]/utf,ucp,bincode + +/L(?#(|++