-#!/usr/bin/perl
+#!/usr/bin/perl -CDS
+use utf8;
# char-level modes
my $poetry = 0;
my $verbatim = 0;
close F;
my $header =1;
+LINE:
while (<>) {
$environ = undef;
if (/\\(begin|end){(\w+)}/) {
}
}
next LINE if $header;
-if (/^$/ && $environ && $buffer) {
+if ((/^$/ || $environ) && $buffer) {
#output on empty line (p or stanza) depending on poetry mode
add_to_section(tag($buffer,$poetry?"stanza":"p"));
$buffer="";
}
next LINE if $environ;
# Section headings
-if (/\\(part|chapter|section|subsection|subsubsection){(.*)}/) {
+if (/\\(part|chapter|section|subsection|subsubsection)\*?{(.*)}/) {
+ if ($buffer) {
+ add_to_section(tag($buffer,$poetry?"stanza":"p"));
+ $buffer="";
+ }
pushsection($1,$2);
+ next LINE;
+}
+if (/\\vspace{/) {
+ add_to_section("<empty-line />");
+ next LINE;
}
#normal mode:
if (!$verbatim) {
#strip TeX comments
s/([^\\])%.*$/$1/;
+s/^%.*$//;
+# strip \sloppy
+s/\\sloppy\s+//g;
+s/\\sloppy{}//g;
+s/\\sloppy([^\w])/$1/g;
#replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols
s/~/\xA0/g;
s/---/-/g;
s/<</«/g;
s/>>/»/g;
+s/\\%/%/g;
+s/\\dots/\x{2026}/g;
}
#replace ' and " with entities
s/&/&/g;
}
if ($buffer) {
add_to_section(tag($buffer,"p"));
+ $buffer="";
}
while (@sections) {
sub tag {
my ($content,$name) = @_;
+ return "" if ($content eq "\n");
return "<$name>$content</$name>";
}