1 # This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF)
2 # features that are not compatible with the 8-bit library, or which give
3 # different output in 16-bit or 32-bit mode. The output for the two widths is
4 # different, so they have separate output files.
7 #newline_default LF ANY ANYCRLF
14 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
15 \) )* # optional leading comment
17 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
18 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
20 " (?: # opening quote...
21 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
23 \\ [^\x80-\xff] # Escaped something (something != CR)
27 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
28 \) )* \. (?: [\040\t] | \(
29 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
31 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
32 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
34 " (?: # opening quote...
35 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
37 \\ [^\x80-\xff] # Escaped something (something != CR)
39 ) )* # further okay, if led by a period
41 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
42 \) )* @ (?: [\040\t] | \(
43 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
45 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
46 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
48 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
53 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
54 \) )* \. # if led by a period...
56 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
58 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
59 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
61 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
68 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
69 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
71 " (?: # opening quote...
72 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
74 \\ [^\x80-\xff] # Escaped something (something != CR)
76 ) # one word, optionally followed by....
78 [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or...
80 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
81 \) | # comments, or...
83 " (?: # opening quote...
84 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
86 \\ [^\x80-\xff] # Escaped something (something != CR)
91 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
93 (?: @ (?: [\040\t] | \(
94 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
96 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
97 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
99 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
101 ) # initial subdomain
104 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
105 \) )* \. # if led by a period...
107 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
109 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
110 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
112 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
117 (?: (?: [\040\t] | \(
118 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
119 \) )* , (?: [\040\t] | \(
120 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
121 \) )* @ (?: [\040\t] | \(
122 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
124 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
125 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
127 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
129 ) # initial subdomain
132 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
133 \) )* \. # if led by a period...
135 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
137 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
138 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
140 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
144 )* # further okay, if led by comma
147 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
148 \) )* )? # optional route
150 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
151 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
153 " (?: # opening quote...
154 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
156 \\ [^\x80-\xff] # Escaped something (something != CR)
159 (?: (?: [\040\t] | \(
160 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
161 \) )* \. (?: [\040\t] | \(
162 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
164 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
165 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
167 " (?: # opening quote...
168 [^\\\x80-\xff\n\015"] # Anything except backslash and quote
170 \\ [^\x80-\xff] # Escaped something (something != CR)
172 ) )* # further okay, if led by a period
174 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
175 \) )* @ (?: [\040\t] | \(
176 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
178 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
179 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
181 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
183 ) # initial subdomain
186 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
187 \) )* \. # if led by a period...
189 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
191 [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters...
192 (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
194 (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff
200 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
204 (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )*
205 \) )* # optional trailing comment
219 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
220 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
223 \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
224 \x{3001}\x{2fff}\x{200a}\xa0\x{2000}
227 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
228 \x{2000}\x{200a}\x{1fff}\x{200b}
229 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
230 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
233 \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
234 \x{2000}\x{200a}\x{1fff}\x{200b}
235 \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
236 \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001}
239 \x{2027}\x{2030}\x{2028}\x{2029}
240 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
243 \x{2027}\x{2030}\x{2028}\x{2029}
244 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
247 \x{2028}\x{2029}\x{2027}\x{2030}
248 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
251 \x{2028}\x{2029}\x{2027}\x{2030}
252 \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86
255 \x{2027}\x{2030}\x{2028}\x{2029}
256 \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d
258 /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I
259 \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}
261 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B
263 /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi
265 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B
267 /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi
269 /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark
272 /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark
275 /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref
277 /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref
279 /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref
291 \x{ffff}\x{ffff}\x{ffff}
296 /[^\x00-a]{12,}[^b-\xff]*/B
298 /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B
300 /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B
302 /^[\x{1234}\x{4321}]{2,4}?/
303 \x{1234}\x{1234}\x{1234}
305 # Check maximum non-UTF character size for the 16-bit library.
314 # Check maximum character size for the 32-bit library. These will all give
315 # errors in the 16-bit library.
335 /\x{7fffffff}\x{7fffffff}/I
337 /\x{80000000}\x{80000000}/I
339 /\x{ffffffff}\x{ffffffff}/I
344 \x{400000}\x{400001}\x{400002}\x{400003}
346 /\x{400000}\x{800000}/IBi
348 # Check character ranges
354 /(*THEN:\[A]{65501})/expand
356 # We can use pcre2test's utf8_input modifier to create wide pattern characters,
357 # even though this test is run when UTF is not supported.
359 /abý¿¿¿¿¿z/utf8_input
363 /abÿý¿¿¿¿¿z/utf8_input