diff -urN po4a_orig/lib/Locale/Po4a/Html.pm po4a/lib/Locale/Po4a/Html.pm
--- po4a_orig/lib/Locale/Po4a/Html.pm 2004-08-27 11:31:53.000000000 +0100
+++ po4a/lib/Locale/Po4a/Html.pm 2004-11-28 15:21:01.000000000 +0000
@@ -80,11 +80,16 @@
my ($self,$filename)=@_;
my $stream = HTML::TokeParser->new($filename)
|| die "Couldn't read HTML file $filename : $!";
+
+ $stream->unbroken_text( [1] );
my @type=();
NEXT : while (my $token = $stream->get_token) {
if($token->[0] eq 'T') {
- my $text = trim($token->[1]);
+ my $text = $token->[1];
+ my ($pre_spaces) = ($text =~ /^(\s*)/);
+ my ($post_spaces) = ($text =~ /(\s*)$/);
+ $text = trim($text);
if (notranslation($text) == 1) {
$self->pushline( get_tag( $token ) );
next NEXT;
@@ -97,14 +102,38 @@
# $encoded = HTML::Entities::encode($a);
# $decoded = HTML::Entities::decode($a);
#print STDERR $token->[0];
- $self->pushline( " ".$self->translate($text,
+ $self->pushline( $pre_spaces . $self->translate($text,
"FIXME:0",
(scalar @type ? $type[scalar @type-1]: "NOTYPE")
- )." " );
+ ) . $post_spaces,
+ 'wrap' => 1
+ );
next NEXT;
} elsif ($token->[0] eq 'S') {
push @type,$token->[1];
- $self->pushline( get_tag( $token ) );
+ my $text = get_tag( $token );
+ if ( $token->[1] eq 'img' ) {
+ my %attr = %{$token->[2]};
+ for my $a (qw/title alt/) {
+ my $content = $attr{$a};
+ if (defined $content) {
+ $content = trim($content);
+ my $translated = $self->translate(
+ $content,
+ "FIXME:0",
+ "img_$a"
+ );
+ $attr{$a} = $translated;
+ }
+ }
+ my ($closing) = ( $text =~ /(\s*\/?>)/ );
+ # reconstruct the tag from scratch
+ delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute
+ $text = "pushline( $text );
} elsif ($token->[0] eq 'E') {
pop @type;
$self->pushline( get_tag( $token ) );
@@ -136,11 +165,12 @@
sub trim {
my $s=shift;
- $s =~ s/\n//g; # remove \n in text
- $s =~ s/\r//g; # remove \r in text
- $s =~ s/\t//g; # remove tabulations
- $s =~ s/^\s+//; # remove leading spaces
- $s =~ s/\s+$//; # remove trailing spaces
+ $s =~ s/\n/ /g; # remove \n in text
+ $s =~ s/\r/ /g; # remove \r in text
+ $s =~ s/\t/ /g; # remove tabulations
+ $s =~ s/\s+/ /g; # remove multiple spaces
+ $s =~ s/^\s*//g; # remove leading spaces
+ $s =~ s/\s*$//g; # remove trailing spaces
return $s;
}
@@ -163,6 +193,11 @@
# don't translate entries composed of one entity
return 1 if ($s =~ /^&[^;]*;$/);
+# don't translate entries with no letters
+# (happens with e.g. Hello, world )
+# ^^
+# ", " doesn't need translation
+ return 1 unless $s =~ /\w/;
return 0;
}
diff -urN po4a_orig/t/22-html.t po4a/t/22-html.t
--- po4a_orig/t/22-html.t 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/22-html.t 2004-11-28 01:43:34.000000000 +0000
@@ -0,0 +1,65 @@
+#! /usr/bin/perl
+# HTML module tester.
+
+#########################
+
+use strict;
+use warnings;
+
+my @tests;
+
+mkdir "t/tmp" unless -e "t/tmp";
+
+my $diff_po_flags = " -I '^# SOME' -I '^# Test' ".
+ "-I '^\"POT-Creation-Date: ' -I '^\"Content-Transfer-Encoding:'";
+
+push @tests, {
+ 'run' => 'perl ../../po4a-gettextize -f html -m ../data-22/html.html -p html.po',
+ 'test'=> "diff -u $diff_po_flags ../data-22/html.po html.po",
+ 'doc' => 'General',
+}, {
+ 'run' => 'perl ../../po4a-normalize -f html ../data-22/spaces.html',
+ 'test'=> "diff -u $diff_po_flags ../data-22/spaces.po po4a-normalize.po".
+ "&& diff -u $diff_po_flags ../data-22/spaces_out.html po4a-normalize.output",
+ 'doc' => 'Spaces',
+}, {
+ 'run' => 'perl ../../po4a-gettextize -f html -m ../data-22/attribute.html -p attribute.po;'.
+ 'sed "s/msgstr \"\"/msgstr \"baz\"/" attribute.po > attribute2.po;'.
+ 'perl ../../po4a-translate -f html -m ../data-22/attribute.html -p attribute2.po -l attribute.html'
+ ,
+ 'test'=> "diff -u $diff_po_flags ../data-22/attribute_out.html attribute.html",
+ 'doc' => 'Attribute replacement'
+};
+
+use Test::More tests => 6;
+
+chdir "t/tmp" || die "Can't chdir to my test directory";
+
+foreach my $test ( @tests ) {
+ my ($val,$name);
+
+ my $cmd=$test->{'run'};
+ $val=system($cmd);
+
+ $name=$test->{'doc'}.' runs';
+ ok($val == 0,$name);
+ diag($test->{'run'}) unless ($val == 0);
+
+ SKIP: {
+ skip ("Command didn't run, can't test the validity of its return",1)
+ if $val;
+ $val=system($test->{'test'});
+ $name=$test->{'doc'}.' returns what is expected';
+ ok($val == 0,$name);
+ unless ($val == 0) {
+ diag ("Failed (retval=$val) on:");
+ diag ($test->{'test'});
+ diag ("Was created with:");
+ diag ($test->{'run'});
+ }
+ }
+}
+
+chdir "../.." || die "Can't chdir back to my root";
+
+0;
diff -urN po4a_orig/t/data-22/attribute.html po4a/t/data-22/attribute.html
--- po4a_orig/t/data-22/attribute.html 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/attribute.html 2004-11-28 01:38:48.000000000 +0000
@@ -0,0 +1,4 @@
+
+
+
+
diff -urN po4a_orig/t/data-22/attribute_out.html po4a/t/data-22/attribute_out.html
--- po4a_orig/t/data-22/attribute_out.html 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/attribute_out.html 2004-11-28 01:38:38.000000000 +0000
@@ -0,0 +1,4 @@
+
+
+
+
diff -urN po4a_orig/t/data-22/html.html po4a/t/data-22/html.html
--- po4a_orig/t/data-22/html.html 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/html.html 2004-11-27 02:21:11.000000000 +0000
@@ -0,0 +1,26 @@
+
+
+
+
+
+
+ Title string
+
+
+
+
+
Header
+
+
+
Strongnot strong
+
+ My link,
+ link on next line, line on same line.
+
+
+
+
+
+
diff -urN po4a_orig/t/data-22/html.po po4a/t/data-22/html.po
--- po4a_orig/t/data-22/html.po 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/html.po 2004-11-28 15:24:26.000000000 +0000
@@ -0,0 +1,75 @@
+# SOME DESCRIPTIVE TITLE
+# Copyright (C) YEAR Free Software Foundation, Inc.
+# FIRST AUTHOR
, YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2004-11-28 15:24+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME \n"
+"Language-Team: LANGUAGE \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: ENCODING"
+
+# type: title
+#: FIXME:0
+#, no-wrap
+msgid "Title string"
+msgstr ""
+
+# type: h1
+#: FIXME:0
+#, no-wrap
+msgid "Header"
+msgstr ""
+
+# type: img_title
+#: FIXME:0
+#, no-wrap
+msgid "My picture"
+msgstr ""
+
+# type: img_alt
+#: FIXME:0
+#, no-wrap
+msgid "Some text"
+msgstr ""
+
+# type: strong
+#: FIXME:0
+#, no-wrap
+msgid "Strong"
+msgstr ""
+
+# type: p
+#: FIXME:0
+#, no-wrap
+msgid "not strong"
+msgstr ""
+
+# type: a
+#: FIXME:0
+#, no-wrap
+msgid "My link"
+msgstr ""
+
+# type: a
+#: FIXME:0
+#, no-wrap
+msgid "link on next line"
+msgstr ""
+
+# type: a
+#: FIXME:0
+#, no-wrap
+msgid "line on same line"
+msgstr ""
+
+# type: img_alt
+#: FIXME:0
+#, no-wrap
+msgid "picture"
+msgstr ""
diff -urN po4a_orig/t/data-22/spaces.html po4a/t/data-22/spaces.html
--- po4a_orig/t/data-22/spaces.html 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/spaces.html 2004-11-28 00:38:55.000000000 +0000
@@ -0,0 +1,13 @@
+Title string
+
+ Header1
+ Header2
+ Strong1not strong 1
+ Strong2 not strong 2
+ Strong3not strong 2
+first line
+second lineglued spaced
+link on next line, line on same line.
+
+
+
diff -urN po4a_orig/t/data-22/spaces.po po4a/t/data-22/spaces.po
--- po4a_orig/t/data-22/spaces.po 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/spaces.po 2004-11-28 15:25:37.000000000 +0000
@@ -0,0 +1,99 @@
+# SOME DESCRIPTIVE TITLE
+# Copyright (C) YEAR Free Software Foundation, Inc.
+# FIRST AUTHOR , YEAR.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: PACKAGE VERSION\n"
+"POT-Creation-Date: 2004-11-28 15:25+0000\n"
+"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
+"Last-Translator: FULL NAME \n"
+"Language-Team: LANGUAGE \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=CHARSET\n"
+"Content-Transfer-Encoding: ENCODING"
+
+# type: title
+#: FIXME:0
+#, no-wrap
+msgid "Title string"
+msgstr ""
+
+# type: h1
+#: FIXME:0
+#, no-wrap
+msgid "Header1"
+msgstr ""
+
+# type: h1
+#: FIXME:0
+#, no-wrap
+msgid "Header2"
+msgstr ""
+
+# type: strong
+#: FIXME:0
+#, no-wrap
+msgid "Strong1"
+msgstr ""
+
+# type: p
+#: FIXME:0
+#, no-wrap
+msgid "not strong 1"
+msgstr ""
+
+# type: strong
+#: FIXME:0
+#, no-wrap
+msgid "Strong2"
+msgstr ""
+
+# type: p
+#: FIXME:0 FIXME:0
+#, no-wrap
+msgid "not strong 2"
+msgstr ""
+
+# type: strong
+#: FIXME:0
+#, no-wrap
+msgid "Strong3"
+msgstr ""
+
+# type: b
+#: FIXME:0
+#, no-wrap
+msgid "first line"
+msgstr ""
+
+# type: b
+#: FIXME:0
+#, no-wrap
+msgid "second line"
+msgstr ""
+
+# type: b
+#: FIXME:0
+#, no-wrap
+msgid "glued"
+msgstr ""
+
+# type: b
+#: FIXME:0
+#, no-wrap
+msgid "spaced"
+msgstr ""
+
+# type: a
+#: FIXME:0
+#, no-wrap
+msgid "link on next line"
+msgstr ""
+
+# type: a
+#: FIXME:0
+#, no-wrap
+msgid "line on same line"
+msgstr ""
diff -urN po4a_orig/t/data-22/spaces_out.html po4a/t/data-22/spaces_out.html
--- po4a_orig/t/data-22/spaces_out.html 1970-01-01 01:00:00.000000000 +0100
+++ po4a/t/data-22/spaces_out.html 2004-11-28 15:25:37.000000000 +0000
@@ -0,0 +1,13 @@
+Title string
+
+ Header1
+ Header2
+ Strong1not strong 1
+ Strong2 not strong 2
+ Strong3not strong 2
+first line
+second lineglued spaced
+link on next line, line on same line.
+
+
+