3 # Convert the HTML pages of the Administrator's Reference into POD man pages.
4 # This script was written by Chas Williams and Russ Allbery, based on work by
5 # Alf Wachsmann and Elizabeth Cassell. It just does a first pass; it's
6 # expected that the results will require further hand-editing.
11 my @ignore_tags = qw(meta head comment html body);
12 my @ignore_elements = qw(script style);
14 my %INLINES = ('b' => 'B<',
27 my %CDATA = ('dd' => 1,
36 # Global state of the conversion.
48 # Output some data. Accumulate this into $results so that we can do some
49 # post-filtering at the end.
51 my ($format, @args) = @_;
52 $result .= sprintf($format, @args);
55 # Handle a single element.
58 $buffer =~ s/^\s+\n/\n/m;
59 $buffer =~ s/\n+$/\n/g;
61 if ($lasttag eq "h2") {
64 $command =~ s/\(1\)>$//;
65 } elsif ($lasttag eq "strong") {
66 if ($buffer eq 'Cautions') {
68 } elsif ($buffer eq 'Related Information') {
73 if ($buffer eq 'PURPOSE') {
74 output "=head1 NAME\n\n%s - ", $command;
76 output "=head1 %s\n\n", $buffer;
78 } elsif ($lasttag eq "h5") {
79 output "=head2 %s\n\n", $buffer;
80 } elsif ($lasttag eq "h6") {
81 output "=head3 %s\n\n", $buffer;
82 } elsif ($lasttag eq "p") {
84 output "%s\n\n", $buffer if $buffer ne "";
85 } elsif ($lasttag eq "pre") {
87 output "%s\n\n", $buffer if $buffer ne "";
88 } elsif ($lasttag eq "ul" || $lasttag eq "dl") {
90 } elsif ($lasttag eq "li") {
91 output "=item *\n\n%s\n\n", $buffer;
92 } elsif ($lasttag eq "dt") {
93 output "=item %s\n\n", $buffer;
94 } elsif ($lasttag eq "dd") {
95 output "%s\n", $buffer;
96 } elsif ($lasttag eq "/ul" || $lasttag eq "/dl") {
100 printf ">>>%s:%s<<<", $lasttag, $buffer;
107 # Handle a single tag.
114 $output = 1 if ($tag eq "h2");
115 $output = 0 if ($tag eq "hr");
117 if (defined $INLINES{$tag}) {
118 if (defined $open && $open eq $tag) {
119 printf STDERR "duplicate tag <%s>\n", $tag;
128 &text(sprintf "%s", $INLINES{$tag});
133 $cdata = 1 if defined $CDATA{$tag};
139 # Do text conversion, mostly undoing SGML escapes.
149 s/\n$//g if defined $open; # in inline seq, remove \n
150 s/L<(\S+) (\S+\(1\))>/L<${1}_${2}>/g;
151 $buffer = $buffer . $_;
155 my $file = shift @ARGV;
157 my $p = HTML::Parser->new(api_version => 3,
158 start_h => [\&tag, "self, text, tag, attr"],
159 end_h => [\&tag, "self, text, tag, attr"],
160 process_h => ["", ""],
161 comment_h => ["", ""],
162 declaration_h => ["", ""],
163 default_h => [\&text, "text"],
165 ignore_tags => \@ignore_tags,
166 ignore_elements => \@ignore_elements,
169 $p->parse_file($file) || die "Can't open file: $!\n";
171 # Fix up a few last things.
172 $result =~ s/L<(\S+) (\S+\(1\))>/L<${1}_${2}>/g;
173 $result =~ s/^(L<\S+>)\n\n(?=L<)/$1,\n/mg;
174 $result =~ s/^(\S+[^\n]+)\n +/$1\n/mg;
175 $result =~ s/^(\s+.*)B<([^>]+)>/$1$2/mg;
177 # Append a stock copyright statement.
181 IBM Corporation 2000. <http://www.ibm.com/> All Rights Reserved.
183 This documentation is covered by the IBM Public License Version 1.0. It was
184 converted from HTML to POD by software written by Chas Williams and Russ
185 Allbery, based on work by Alf Wachsmann and Elizabeth Cassell.
188 # Output the results.