X-Git-Url: http://wagner.pp.ru/gitweb/?a=blobdiff_plain;f=Tex2fb2;h=693149fcdc401644ad80bda7cf08af16a61a47eb;hb=90f2749e4d60b2700566294a2dc75370dcf356d2;hp=2fe05809eb85e44e5eadcf554beede9f87138bb3;hpb=37b1eefaa8690036f7e07615a62556746c4e91f4;p=fiction%2FKate-the-Empress.git diff --git a/Tex2fb2 b/Tex2fb2 old mode 100644 new mode 100755 index 2fe0580..693149f --- a/Tex2fb2 +++ b/Tex2fb2 @@ -1,4 +1,5 @@ -#!/usr/bin/perl +#!/usr/bin/perl -CDS +use utf8; # char-level modes my $poetry = 0; my $verbatim = 0; @@ -21,10 +22,11 @@ while () { close F; my $header =1; +LINE: while (<>) { $environ = undef; if (/\\(begin|end){(\w+)}/) { - $environ = $2; + $environ=$2; $begin=$1 eq "begin"; if ($environ eq 'verbatim') { $verbatim=$begin; @@ -32,33 +34,56 @@ if (/\\(begin|end){(\w+)}/) { if ($begin) { pushsection("poem",undef); } else { - flushsection('poem'); + add_to_section(tag($buffer,'stanza')."\n") if $buffer; + $buffer=""; + flushsection('poem'); } $poetry = $begin; - } elsif($environ = 'document' && $begin) { + } elsif($environ eq 'document' && $begin) { $header=0; } } next LINE if $header; -if (/^$/ && $environ && $buffer) { +if ((/^$/ || $environ) && $buffer) { #output on empty line (p or stanza) depending on poetry mode - add_to_section(tag($buffer,$poetry?"stanza":"p")); + add_to_section(tag($buffer,$poetry?"stanza":"p")."\n"); $buffer=""; } next LINE if $environ; # Section headings -if (/\\(part|chapter|section|subsection|subsubsection){(.*)}/) { +if (/\\(part|chapter|section|subsection|subsubsection)\*?{(.*)}/) { + if ($buffer) { + add_to_section(tag($buffer,$poetry?"stanza":"p")); + $buffer=""; + } pushsection($1,$2); + next LINE; +} +if (/\\vspace{/) { + add_to_section(""); + next LINE; } +next LINE if /\\pagebreak\b/; #normal mode: if (!$verbatim) { #strip TeX comments s/([^\\])%.*$/$1/; +s/^%.*$//; +# strip \sloppy +s/\\sloppy\s+//g; +s/\\sloppy{}//g; +s/\\sloppy([^\w])/$1/g; +# strip extra space +s/^\s+//; +s/\s+$//; +s/(\s)\s+/$1/g; #replace TeX ligatures ~ --- << >> \% with appropriate unicode symbols s/~/\xA0/g; s/---/-/g; s/<>/»/g; +s/\\%/%/g; +s/\\dots/\x{2026}/g; } #replace ' and " with entities s/&/&/g; @@ -69,15 +94,17 @@ s/>/>/g; if ($poetry) { chomp; - $buffer.=tag($_,'v'); + s/\s*\\\\$//; + $buffer.=tag($_,'v')."\n"; } elsif ($verbatim) { add_to_section(tag(tag($_,"code"),"p")); } else { - $buffer.=$_; + $buffer.=" ".$_; } } if ($buffer) { add_to_section(tag($buffer,"p")); + $buffer=""; } while (@sections) { @@ -102,7 +129,7 @@ sub flushsection { } $content .= $str->{data}; if ($#sections >=0) { - add_to_section(tag($content,$tag)); + add_to_section(tag($content,$tag)."\n"); } else { print tag($content,$tag); } @@ -128,5 +155,6 @@ sub pushsection { sub tag { my ($content,$name) = @_; + return "" if $content =~ /^\s*$/s; return "<$name>$content"; }