diff -urN po4a_orig/lib/Locale/Po4a/Html.pm po4a/lib/Locale/Po4a/Html.pm --- po4a_orig/lib/Locale/Po4a/Html.pm 2004-08-27 11:31:53.000000000 +0100 +++ po4a/lib/Locale/Po4a/Html.pm 2004-11-28 15:21:01.000000000 +0000 @@ -80,11 +80,16 @@ my ($self,$filename)=@_; my $stream = HTML::TokeParser->new($filename) || die "Couldn't read HTML file $filename : $!"; + + $stream->unbroken_text( [1] ); my @type=(); NEXT : while (my $token = $stream->get_token) { if($token->[0] eq 'T') { - my $text = trim($token->[1]); + my $text = $token->[1]; + my ($pre_spaces) = ($text =~ /^(\s*)/); + my ($post_spaces) = ($text =~ /(\s*)$/); + $text = trim($text); if (notranslation($text) == 1) { $self->pushline( get_tag( $token ) ); next NEXT; @@ -97,14 +102,38 @@ # $encoded = HTML::Entities::encode($a); # $decoded = HTML::Entities::decode($a); #print STDERR $token->[0]; - $self->pushline( " ".$self->translate($text, + $self->pushline( $pre_spaces . $self->translate($text, "FIXME:0", (scalar @type ? $type[scalar @type-1]: "NOTYPE") - )." " ); + ) . $post_spaces, + 'wrap' => 1 + ); next NEXT; } elsif ($token->[0] eq 'S') { push @type,$token->[1]; - $self->pushline( get_tag( $token ) ); + my $text = get_tag( $token ); + if ( $token->[1] eq 'img' ) { + my %attr = %{$token->[2]}; + for my $a (qw/title alt/) { + my $content = $attr{$a}; + if (defined $content) { + $content = trim($content); + my $translated = $self->translate( + $content, + "FIXME:0", + "img_$a" + ); + $attr{$a} = $translated; + } + } + my ($closing) = ( $text =~ /(\s*\/?>)/ ); + # reconstruct the tag from scratch + delete $attr{'/'}; # Parser thinks closing / in XHTML is an attribute + $text = "pushline( $text ); } elsif ($token->[0] eq 'E') { pop @type; $self->pushline( get_tag( $token ) ); @@ -136,11 +165,12 @@ sub trim { my $s=shift; - $s =~ s/\n//g; # remove \n in text - $s =~ s/\r//g; # remove \r in text - $s =~ s/\t//g; # remove tabulations - $s =~ s/^\s+//; # remove leading spaces - $s =~ s/\s+$//; # remove trailing spaces + $s =~ s/\n/ /g; # remove \n in text + $s =~ s/\r/ /g; # remove \r in text + $s =~ s/\t/ /g; # remove tabulations + $s =~ s/\s+/ /g; # remove multiple spaces + $s =~ s/^\s*//g; # remove leading spaces + $s =~ s/\s*$//g; # remove trailing spaces return $s; } @@ -163,6 +193,11 @@ # don't translate entries composed of one entity return 1 if ($s =~ /^&[^;]*;$/); +# don't translate entries with no letters +# (happens with e.g. Hello, world ) +# ^^ +# ", " doesn't need translation + return 1 unless $s =~ /\w/; return 0; } diff -urN po4a_orig/t/22-html.t po4a/t/22-html.t --- po4a_orig/t/22-html.t 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/22-html.t 2004-11-28 01:43:34.000000000 +0000 @@ -0,0 +1,65 @@ +#! /usr/bin/perl +# HTML module tester. + +######################### + +use strict; +use warnings; + +my @tests; + +mkdir "t/tmp" unless -e "t/tmp"; + +my $diff_po_flags = " -I '^# SOME' -I '^# Test' ". + "-I '^\"POT-Creation-Date: ' -I '^\"Content-Transfer-Encoding:'"; + +push @tests, { + 'run' => 'perl ../../po4a-gettextize -f html -m ../data-22/html.html -p html.po', + 'test'=> "diff -u $diff_po_flags ../data-22/html.po html.po", + 'doc' => 'General', +}, { + 'run' => 'perl ../../po4a-normalize -f html ../data-22/spaces.html', + 'test'=> "diff -u $diff_po_flags ../data-22/spaces.po po4a-normalize.po". + "&& diff -u $diff_po_flags ../data-22/spaces_out.html po4a-normalize.output", + 'doc' => 'Spaces', +}, { + 'run' => 'perl ../../po4a-gettextize -f html -m ../data-22/attribute.html -p attribute.po;'. + 'sed "s/msgstr \"\"/msgstr \"baz\"/" attribute.po > attribute2.po;'. + 'perl ../../po4a-translate -f html -m ../data-22/attribute.html -p attribute2.po -l attribute.html' + , + 'test'=> "diff -u $diff_po_flags ../data-22/attribute_out.html attribute.html", + 'doc' => 'Attribute replacement' +}; + +use Test::More tests => 6; + +chdir "t/tmp" || die "Can't chdir to my test directory"; + +foreach my $test ( @tests ) { + my ($val,$name); + + my $cmd=$test->{'run'}; + $val=system($cmd); + + $name=$test->{'doc'}.' runs'; + ok($val == 0,$name); + diag($test->{'run'}) unless ($val == 0); + + SKIP: { + skip ("Command didn't run, can't test the validity of its return",1) + if $val; + $val=system($test->{'test'}); + $name=$test->{'doc'}.' returns what is expected'; + ok($val == 0,$name); + unless ($val == 0) { + diag ("Failed (retval=$val) on:"); + diag ($test->{'test'}); + diag ("Was created with:"); + diag ($test->{'run'}); + } + } +} + +chdir "../.." || die "Can't chdir back to my root"; + +0; diff -urN po4a_orig/t/data-22/attribute.html po4a/t/data-22/attribute.html --- po4a_orig/t/data-22/attribute.html 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/attribute.html 2004-11-28 01:38:48.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -urN po4a_orig/t/data-22/attribute_out.html po4a/t/data-22/attribute_out.html --- po4a_orig/t/data-22/attribute_out.html 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/attribute_out.html 2004-11-28 01:38:38.000000000 +0000 @@ -0,0 +1,4 @@ + + + + diff -urN po4a_orig/t/data-22/html.html po4a/t/data-22/html.html --- po4a_orig/t/data-22/html.html 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/html.html 2004-11-27 02:21:11.000000000 +0000 @@ -0,0 +1,26 @@ + + + + + + + Title string + + + +
+

Header

+ Some text + +

Strongnot strong

+

+ My link, + link on next line, line on same line. +

+ + picture + + + diff -urN po4a_orig/t/data-22/html.po po4a/t/data-22/html.po --- po4a_orig/t/data-22/html.po 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/html.po 2004-11-28 15:24:26.000000000 +0000 @@ -0,0 +1,75 @@ +# SOME DESCRIPTIVE TITLE +# Copyright (C) YEAR Free Software Foundation, Inc. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2004-11-28 15:24+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: ENCODING" + +# type: title +#: FIXME:0 +#, no-wrap +msgid "Title string" +msgstr "" + +# type: h1 +#: FIXME:0 +#, no-wrap +msgid "Header" +msgstr "" + +# type: img_title +#: FIXME:0 +#, no-wrap +msgid "My picture" +msgstr "" + +# type: img_alt +#: FIXME:0 +#, no-wrap +msgid "Some text" +msgstr "" + +# type: strong +#: FIXME:0 +#, no-wrap +msgid "Strong" +msgstr "" + +# type: p +#: FIXME:0 +#, no-wrap +msgid "not strong" +msgstr "" + +# type: a +#: FIXME:0 +#, no-wrap +msgid "My link" +msgstr "" + +# type: a +#: FIXME:0 +#, no-wrap +msgid "link on next line" +msgstr "" + +# type: a +#: FIXME:0 +#, no-wrap +msgid "line on same line" +msgstr "" + +# type: img_alt +#: FIXME:0 +#, no-wrap +msgid "picture" +msgstr "" diff -urN po4a_orig/t/data-22/spaces.html po4a/t/data-22/spaces.html --- po4a_orig/t/data-22/spaces.html 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/spaces.html 2004-11-28 00:38:55.000000000 +0000 @@ -0,0 +1,13 @@ +Title string + +

Header1

+

Header2

+

Strong1not strong 1

+

Strong2 not strong 2

+

Strong3not strong 2

+first line +second lineglued spaced +link on next line, line on same line. + + + diff -urN po4a_orig/t/data-22/spaces.po po4a/t/data-22/spaces.po --- po4a_orig/t/data-22/spaces.po 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/spaces.po 2004-11-28 15:25:37.000000000 +0000 @@ -0,0 +1,99 @@ +# SOME DESCRIPTIVE TITLE +# Copyright (C) YEAR Free Software Foundation, Inc. +# FIRST AUTHOR , YEAR. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"POT-Creation-Date: 2004-11-28 15:25+0000\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=CHARSET\n" +"Content-Transfer-Encoding: ENCODING" + +# type: title +#: FIXME:0 +#, no-wrap +msgid "Title string" +msgstr "" + +# type: h1 +#: FIXME:0 +#, no-wrap +msgid "Header1" +msgstr "" + +# type: h1 +#: FIXME:0 +#, no-wrap +msgid "Header2" +msgstr "" + +# type: strong +#: FIXME:0 +#, no-wrap +msgid "Strong1" +msgstr "" + +# type: p +#: FIXME:0 +#, no-wrap +msgid "not strong 1" +msgstr "" + +# type: strong +#: FIXME:0 +#, no-wrap +msgid "Strong2" +msgstr "" + +# type: p +#: FIXME:0 FIXME:0 +#, no-wrap +msgid "not strong 2" +msgstr "" + +# type: strong +#: FIXME:0 +#, no-wrap +msgid "Strong3" +msgstr "" + +# type: b +#: FIXME:0 +#, no-wrap +msgid "first line" +msgstr "" + +# type: b +#: FIXME:0 +#, no-wrap +msgid "second line" +msgstr "" + +# type: b +#: FIXME:0 +#, no-wrap +msgid "glued" +msgstr "" + +# type: b +#: FIXME:0 +#, no-wrap +msgid "spaced" +msgstr "" + +# type: a +#: FIXME:0 +#, no-wrap +msgid "link on next line" +msgstr "" + +# type: a +#: FIXME:0 +#, no-wrap +msgid "line on same line" +msgstr "" diff -urN po4a_orig/t/data-22/spaces_out.html po4a/t/data-22/spaces_out.html --- po4a_orig/t/data-22/spaces_out.html 1970-01-01 01:00:00.000000000 +0100 +++ po4a/t/data-22/spaces_out.html 2004-11-28 15:25:37.000000000 +0000 @@ -0,0 +1,13 @@ +Title string + +

Header1

+

Header2

+

Strong1not strong 1

+

Strong2 not strong 2

+

Strong3not strong 2

+first line +second lineglued spaced +link on next line, line on same line. + + +