Index: lib/Locale/Po4a/Po.pm =================================================================== RCS file: /cvsroot/po4a/po4a/lib/Locale/Po4a/Po.pm,v retrieving revision 1.31 diff -a -u -p -r1.31 Po.pm --- lib/Locale/Po4a/Po.pm 5 Dec 2004 19:24:29 -0000 1.31 +++ lib/Locale/Po4a/Po.pm 11 Dec 2004 00:50:21 -0000 @@ -972,9 +972,28 @@ sub unescape_text { print STDERR "\nunescape [$text]====" if $debug{'escape'}; $text = join("",split(/\n/,$text)); $text =~ s/\\"/"/g; - $text =~ s/([^\\])\\n/$1\n/g; - $text =~ s/^\\n/\n/mg; - $text =~ s/([^\\])\\t/$1\t/g; + # unescape newlines + # NOTE on \G: + # The following regular expression introduce newlines. + # Thus, ^ doesn't match all beginnings of lines. + # \G is a zero-width assertion that matches the position + # of the previous substitution with s///g. As every + # substitution ends by a newline, it always matches a + # position just after a newline. + $text =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\n # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments + # unescape tabulations + $text =~ s/( # $1: + (^|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\t # and followed by an escaped tabulation + /$1\t/mgx; # multilines string, match globally, allow comments + # and unescape the escape character $text =~ s/\\\\/\\/g; print STDERR ">$text<\n" if $debug{'escape'}; @@ -1004,8 +1023,14 @@ sub quote_text { return '""' unless defined($string) && length($string); print STDERR "\nquote [$string]====" if $debug{'quote'}; - $string =~ s/([^\\])\\n/$1!!DUMMYPOPM!!/gm; - $string =~ s|!!DUMMYPOPM!!|\\n\n|gm; + # break lines on newlines, if any + # see unescape_text for an explanation on \G + $string =~ s/( # $1: + (\G|[^\\]) # beginning of the line or any char + # different from '\' + (\\\\)* # followed by any even number of '\' + )\\n # and followed by an escaped newline + /$1\n/sgx; # single string, match globally, allow comments $string = wrap($string); my @string = split(/\n/,$string); $string = join ("\"\n\"",@string); @@ -1025,6 +1050,8 @@ sub unquote_text { $string =~ s/^""\\n//s; $string =~ s/^"(.*)"$/$1/s; $string =~ s/"\n"//gm; + # Note: an even number of '\' could precede \\n, but I could not build a + # document to test this $string =~ s/([^\\])\\n\n/$1!!DUMMYPOPM!!/gm; $string =~ s|!!DUMMYPOPM!!|\\n|gm; print STDERR ">$string<\n" if $debug{'quote'}; @@ -1032,15 +1059,20 @@ sub unquote_text { } # canonize the string: write it on only one line, changing consecutive whitespace to -# only on space. +# only one space. # Warning, it changes the string and should only be called if the string is plain text sub canonize { my $text=shift; print STDERR "\ncanonize [$text]====" if $debug{'canonize'}; $text =~ s/^ *//s; - $text =~ s/([^\\])\n/$1 /gm; - $text =~ s/ \n/ /gm; - $text =~ s/([^\\])\n/$1 /gm; + # What about lines starting by a newline ? + # FIXME: needed here ? +# $text =~ s/([^\\])\n/$1 /gm; +# $text =~ s/ \n/ /gm; +# $text =~ s/([^\\])\n/$1 /gm; + # FIXME: I rather like only this: + # if ($text eq "\n"), it messed up the first string (header) + $text =~ s/\n/ /gm if ($text ne "\n"); $text =~ s/([.)]) +/$1 /gm; $text =~ s/([^.)]) */$1 /gm; $text =~ s/ *$//s;