[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: Bug#845297: converting translation metadata



On Wed, May 16, 2018 at 12:05:17AM +0100, Steve McIntyre wrote:
>I'm writing a script switch_to_git_translations.pl to walk through all
>the wml files and switch from cvs revision numbers to git revision
>numbers. I'm doing consistency checks as I slowly develop the
>script, for the sake of paranoia :-).
>
>I've found that there are some files that appear to have broken
>translation-check metadata. I've mentioned some in IRC, but for
>completeness we have a few more listed here.
>
>Laura and Thomas have already fixed some of these (as tagged
>here). I'm about to fix the rest myself and push the fixes.

These are all fixed now, thanks Thomas!

OK, so one more thing. In the wiki page about this area:

  "Note that there can be multiple levels of translation-check headers
   chaining through different files, for example:"

Thinking about this, that's actually irrelevant to this work. I *can*
track through a chain of dependencies here (tedious, but possible -
may potentially involve checking out different versions of files from
git), but I really don't think we need to at all. Feel free to try and
convince me otherwise!

Initial script attached.

Checking the git diff output after running the script without
--dry-run (i.e. making changes), I can see a few more files which I
think look bogus for their translation-check metadata. They've all got
multiple translation-check lines in the header, with (maybe?)
conflicting data:

tack:~/debian/www/test_webwml_cvs2git$ git diff --stat | grep -v 2
 french/consultants/xpile.wml                          |   4 ++--
 japanese/international/Vietnamese.wml                 |   4 ++--
 russian/consultants/xpile.wml                         |   4 ++--
 russian/international/Croatian/index.wml              |   4 ++--
 russian/legal/anssi.wml                               |   4 ++--
 43566 files changed, 43578 insertions(+), 43578 deletions(-)

For the sake of 5 files, I'm tempted to (again) just fix up the
metadata in CVS to remove any amiguity here.

-- 
Steve McIntyre, Cambridge, UK.                                steve@einval.com
Who needs computer imagery when you've got Brian Blessed?
#!/usr/bin/perl

# This script walks the webwml tree to look for translated files. It
# looks for the wml::debian::translation-check header to see if a file
# is a stranslation of an original, then checks for the revision
# status of the master document.
#
# Part of the effort to switch from CVS to Git
#
# Originally written 2018 by Steve McIntyre <93sam@debian.org>
# © Copyright 2018 Software in the public interest, Inc.
# This program is released under the GNU General Public License, v2.

use strict;
use warnings;

use Getopt::Long;
use Data::Dumper;
use File::Spec::Functions;
use File::Find;
use lib ($0 =~ m|(.*)/|, $1 or ".") ."/Perl";
use Webwml::TransCheck;

my $help = 0;
my $verbose = 0;
my $dry_run = 0;
my $revs_file = "";
my %rev_map;

sub usage {
        print <<'EOT';
Usage: switch_to_git_translations.pl [options]
Options:
  --help         display this message
  --verbose      run verbosely
  --dry-run      do not modify translation-check headers
  --revisions=REVISIONS  location of the cvs2git revisions map file

Find all wml files under the current directory, updating revisions for
translations.
EOT
        exit(0);
}

# log very verbose messages
sub vvlog {
    if ($verbose >= 2) {
	print STDOUT $_[0] . "\n";
    }
}

# log verbose messages
sub vlog {
    if ($verbose >= 1) {
	print STDOUT $_[0] . "\n";
    }
}

# Parse the revisions file for use, building a hash of the git and cvs versions for each file
sub parse_revisions
{
    my $revs_file = shift;
    open(IN, "<", "$revs_file") or die "Can't open revisions file \$revs_file\" for reading: $!\n";
    while (my $line = <IN>) {
	chomp $line;
	my ($file, $cvs_ver, $git_hash);
	if ($line =~ m,^(\S+) ([.\d]+) ([[:xdigit:]]+)$,)
	{
	    $file = $1;
	    $cvs_ver = $2;
	    $git_hash = $3;
	    $rev_map{"$file"}{"$cvs_ver"}{"git_hash"} = $git_hash;
#	    $rev_map{"$file"}{"$git_hash"}{"cvs_ver"} = $cvs_ver;
	} else {
	    die "Failed to parse revisions file at line $.\n";
	}
	vvlog("Found file $file with CVS version $cvs_ver in git hash $git_hash");
    }
    close IN;
    vlog("Parsed revisions file \"$revs_file\", found revisions for " . scalar(keys %rev_map) . " files");
}

# return a list of filenames with the given extension
sub find_files_ext
{
    my $dir = shift or die('Internal error: No dir specified');
    my $ext = shift or die('Internal error: No ext specified');

    my @files;
    find( sub { if (-f and m/\.$ext$/) { my $filename = $File::Find::name; $filename =~ s,\.\/,,; push @files, $filename }}, $dir );
    return @files;
}

# Update the translation-check metadata header in a wml file
sub update_wml_file_metadata
{
    my $file = shift;
    my $revision = shift;
    my $hash = shift;
    my $text = "";

    open (IN, "< $file") or die "Can't open $file for reading: $!\n";
    while (<IN>) {
	if (m/^#use wml::debian::translation-check/) {
	    s/(translation="?)($revision)("?)/$1$hash$3/;
	}
	$text .= $_;
    }
    close(IN);
    open(OUT, "> $file") or die "Can't open $file for writing: $!\n";
    print OUT $text;
    close OUT;
}

# Parse a wml file, and see if there's a translation-check header. If
# so, use the rev_map data to switch the translation information from
# the cvs version to the git hash *if available*. If it's not
# available, report an error.
sub parse_wml_file
{
    my $file = shift;
    my $info = 0; # Do we have any translation header info at all?
    my $tc = Webwml::TransCheck->new("$file") or die "Failed transcheck: $!\n";
    vlog("Looking at wml file $file");
    my $target_lang = "english";
    my $maint = $tc->maintainer();
    if (defined($maint)) {
	vvlog("  Maintainer: $maint");
	$info += 1;
    }
    my $revision = $tc->revision();
    if (defined($revision)) {
	vvlog("  Revision: $revision");
	$info += 1;
    }
    my $original = $tc->original();
    if (defined($original)) {
	vvlog("  Original: $original");
	$info += 1;
	$target_lang = $original;
    }
    my $mindelta = $tc->mindelta();
    if (defined($mindelta)) {
	vvlog("  Mindelta: $mindelta");
	$info += 1;
    }
    my $maxdelta = $tc->maxdelta();
    if (defined($maxdelta)) {
	vvlog("  Maxdelta: $maxdelta");
	$info += 1;
    }
    if ($info > 0) {
	my $targetfile = $file;
	$targetfile =~ s,^[^/]+,$target_lang,;
	vvlog("  Depends on $targetfile");
	if (defined($revision)) {
	    # Do we have a cvs->git map for that file and revision?
	    my $hash = $rev_map{"$targetfile"}{"$revision"}{"git_hash"};
	    if (defined $hash) {
		vlog("  Depends on $targetfile with cvs rev $revision, git hash $hash");
	    } else {
		vlog("  Looking up $targetfile with cvs rev $revision, no mapping found");
		return 1;
	    }
	    if (!$dry_run) {
		vlog ("  Updating the file data");
		update_wml_file_metadata($file, $revision, $hash);
	    }
	} else {
	    vlog("  But no revision data!");
	    return 1;
	}
    }
}

#    open(IN, "<", "$file") or die "Can't open file \$wml_file\" for reading: $!#\n";
#    while (my $line = <IN>) {
#	chomp $line;
#	if ($line =~ m/^#use wml::debian::translation-check/) {
#	    my $original="english"; # default
#	}
#    }
#}

# "main"

if (not GetOptions ("help"      => \$help,
		    "verbose=i" => \$verbose,
		    "dry-run"   => \$dry_run,
		    "revisions=s" => \$revs_file))
{
        warn "Try `$0 --help' for more information.\n";
        exit(1);
}

if ($help) {
    usage();
}

if (! -f $revs_file) {
    die "Can't open revisions file, abort!\n";
}
parse_revisions($revs_file);

my @wmlfiles = find_files_ext(".", 'wml');
vlog("Found " . scalar(@wmlfiles) . " files to work on\n");
for my $wml_file (@wmlfiles) {
    parse_wml_file($wml_file);
}

Reply to: