3 # Script to turn PCRE2 man pages into HTML
6 # Subroutine to handle font changes and other escapes
11 $s =~ s/</</g; # Deal with < and >
13 $s =~ s"\\fI(.*?)\\f[RP]"<i>$1</i>"g;
14 $s =~ s"\\fB(.*?)\\f[RP]"<b>$1</b>"g;
16 $s =~ s/(?<=Copyright )\(c\)/©/g;
20 # Subroutine to ensure not in a paragraph
25 print TEMP "</PRE>\n" if ($inpre);
32 # Subroutine to start a new paragraph
50 while ($#ARGV >= 0 && $ARGV[0] =~ /^-/)
52 $toc = 1 if $ARGV[0] eq "-toc";
56 # Initial output to STDOUT
61 <title>$ARGV[0] specification</title>
63 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
64 <h1>$ARGV[0] man page</h1>
66 Return to the <a href="index.html">PCRE2 index page</a>.
69 This page is part of the PCRE2 HTML documentation. It was generated
70 automatically from the original man page. If there is any nonsense in it,
71 please consult the man page, in case the conversion went wrong.
75 print "<ul>\n" if ($toc);
77 open(TEMP, ">/tmp/$$") || die "Can't open /tmp/$$ for output\n";
81 # Handle lines beginning with a dot
85 # Some of the PCRE2 man pages used to contain instances of .br. However,
86 # they should have all been removed because they cause trouble in some
87 # (other) automated systems that translate man pages to HTML. Complain if
88 # we find .br or .in (another macro that is deprecated).
90 if (/^\.br/ || /^\.in/)
92 print STDERR "\n*** Deprecated macro encountered - rewrite needed\n";
93 print STDERR "*** $_\n";
94 die "*** Processing abandoned\n";
97 # Instead of .br, relevent "literal" sections are enclosed in .nf/.fi.
109 # Handling .sp is subtle. If it is inside a literal section, do nothing if
110 # the next line is a non literal text line; similarly, if not inside a
111 # literal section, do nothing if a literal follows, unless we are inside
112 # a .nf/.fi section or about to enter one. The point being that the <pre>
113 # and </pre> that delimit literal sections will do the spacing. Always skip
114 # if no previous output.
123 print TEMP "\n" if (/^[\s.]/);
127 print TEMP "<br>\n<br>\n" if ($innf || /^\.nf/ || !/^[\s.]/);
129 redo; # Now process the lookahead line we just read
132 elsif (/^\.TP/ || /^\.PP/ || /^\.P/)
136 elsif (/^\.SH\s*("?)(.*)\1/)
138 # Ignore the NAME section
146 my($title) = &do_line($2);
149 printf("<li><a name=\"TOC%d\" href=\"#SEC%d\">$title</a>\n",
151 printf TEMP ("<br><a name=\"SEC%d\" href=\"#TOC1\">$title</a><br>\n",
157 print TEMP "<br><b>\n$title\n</b><br>\n";
160 elsif (/^\.SS\s*("?)(.*)\1/)
163 my($title) = &do_line($2);
164 print TEMP "<br><b>\n$title\n</b><br>\n";
166 elsif (/^\.B\s*(.*)/)
168 &new_para() if (!$inpara);
171 print TEMP "<b>$_</b>\n";
174 elsif (/^\.I\s*(.*)/)
176 &new_para() if (!$inpara);
179 print TEMP "<i>$_</i>\n";
183 # A comment that starts "HREF" takes the next line as a name that
184 # is turned into a hyperlink, using the text given, which might be
185 # in a special font. If it ends in () or (digits) or punctuation, they
186 # aren't part of the link.
188 elsif (/^\.\\"\s*HREF/)
194 $_ =~ /^(?:<.>)?([^<(]+)(?:\(\))?(?:<\/.>)?(?:\(\d+\))?[.,;:]?$/;
195 print TEMP "<a href=\"$1.html\">$_</a>\n";
198 # A comment that starts "HTML" inserts literal HTML
200 elsif (/^\.\\"\s*HTML\s*(.*)/)
205 # A comment that starts < inserts that HTML at the end of the
206 # *next* input line - so as not to get a newline between them.
208 elsif (/^\.\\"\s*(<.*>)/)
215 print TEMP "$_$markup\n";
218 # A comment that starts JOIN joins the next two lines together, with one
219 # space between them. Then that line is processed. This is used in some
220 # displays where two lines are needed for the "man" version. JOINSH works
221 # the same, except that it assumes this is a shell command, so removes
222 # continuation backslashes.
224 elsif (/^\.\\"\s*JOIN(SH)?/)
229 $one =~ s/\s*\\e\s*$// if (defined($1));
233 redo; # Process the joined lines
236 # .EX/.EE are used in the pcre2demo page to bracket the entire program,
237 # which is unmodified except for turning backslash into "\e".
241 print TEMP "<PRE>\n";
253 # Ignore anything not recognized
258 # Line does not begin with a dot. Replace blank lines with new paragraphs
262 &end_para() if ($wrotetext);
266 # Convert fonts changes and output an ordinary line. Ensure that indented
267 # lines are marked as literal.
270 &new_para() if (!$inpara);
276 print TEMP "<pre>\n";
282 print TEMP "</pre>\n";
286 # Add <br> to the end of a non-literal line if we are within .nf/.fi
288 $_ .= "<br>\n" if (!$inpre && $innf);
294 # The TOC, if present, will have been written - terminate it
296 print "</ul>\n" if ($toc);
298 # Copy the remainder to the standard output
301 open(TEMP, "/tmp/$$") || die "Can't open /tmp/$$ for input\n";
303 print while (<TEMP>);
307 Return to the <a href="index.html">PCRE2 index page</a>.