--- Begin Message ---
Package: www.debian.org
Severity: wishlist
Tags: patch
converting the website to UTF-8
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=567781
is fixed and no languages use other encoding.
so this is a patch to clean those routines
--
victory
http://userscripts.org/scripts/show/102724
Index: copypage.pl
===================================================================
RCS file: /cvs/webwml/webwml/copypage.pl,v
retrieving revision 1.40
diff -u -r1.40 copypage.pl
--- copypage.pl 4 Jun 2011 14:09:23 -0000 1.40
+++ copypage.pl 12 Oct 2012 19:31:07 -0000
@@ -25,14 +25,6 @@
use File::Temp qw/tempfile/;
use Getopt::Std;
-
-# Declare variables only used in references to avoid warnings
-use vars qw(@iso_8859_2_compat @iso_8859_3_compat @iso_8859_4_compat
- @iso_8859_5_compat @iso_8859_6_compat @iso_8859_7_compat
- @iso_8859_8_compat @iso_8859_9_compat @iso_8859_10_compat
- @iso_8859_13_compat @iso_8859_14_compat @iso_8859_15_compat
- @iso_8859_16_compat);
-
# Get configuration
# Read first two valid lines from language.conf
if (open CONF, "<language.conf")
@@ -106,70 +98,13 @@
#warn "Maintainer name not defined in DWWW_MAINT or language.conf\n"
# if not defined $maintainer;
-
-# Table of entities used when copying to non-latin1 encodings
-@entities = (
- ' ', '¡', '¢', '£', '¤', '¥',
- '¦', '§', '¨', '©', 'ª', '«', '¬',
- '­', '®', '¯', '°', '±', '²', '³',
- '´', 'µ', '¶', '·', '¸', '¹',
- 'º', '»', '¼', '½', '¾', '¿',
- 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å',
- 'Æ', 'Ç', 'È', 'É', 'Ê', 'Ë',
- 'Ì', 'Í', 'Î', 'Ï', 'Ð', 'Ñ',
- 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×',
- 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý',
- 'Þ', 'ß', 'à', 'á', 'â', 'ã',
- 'ä', 'å', 'æ', 'ç', 'è', 'é',
- 'ê', 'ë', 'ì', 'í', 'î', 'ï',
- 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ',
- 'ö', '÷', 'ø', 'ù', 'ú', 'û',
- 'ü', 'ý', 'þ', 'ÿ'
-);
-
-# Compatibility tables for the iso-8859 series; 1 indicates that the
-# codepoint is the same as in iso-8859-1. Used to perform partial remaps
-# for these.
-@iso_8859_2_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0);
-@iso_8859_3_compat = (1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0);
-@iso_8859_4_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0);
-@iso_8859_5_compat = (1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_6_compat = (1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_7_compat = (1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_8_compat = (1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_9_compat = (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1);
-@iso_8859_10_compat =(1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0);
-@iso_8859_13_compat =(1,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0);
-@iso_8859_14_compat =(1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1);
-@iso_8859_15_compat =(1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
-@iso_8859_16_compat =(1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1);
-
-# Check destination character encoding
-my $recode = 0;
-if (open WMLRC, "$language/.wmlrc")
-{
- while (<WMLRC>)
- {
- if (s/^-D CHARSET=//)
- {
- $recode = 1 unless /^utf-8$/i;
- if ($recode && /^iso-8859-([0-9]+)$/)
- {
- my $compattablename = 'iso_8859_' . $1 . '_compat';
- $compat = \@{$compattablename} if defined @{$compattablename};
- }
- last;
- }
- }
-}
-
# Loop over command line
foreach $page (@ARGV)
{
# Check if valid source
if ($page =~ /wml$/ || $page =~ /src$/)
{
- ©($page, $recode, $compat);
+ ©($page);
}
else
{
@@ -181,8 +116,6 @@
sub copy
{
my $page = shift;
- my $recodelatin1 = shift;
- my $compattable = shift;
print "Processing $page...\n";
# Remove english/ from path
@@ -294,30 +227,6 @@
}
else
{
- # Transform the string into a string that is fit for the encoding
- # of the output language. We do that by first converting any
- # SGML entities in the input stream into 8-bit ISO 8859-1
- # encoding, and then convert extended characters (back) into
- # entities if necessary for the target encoding.
-
- # Decode
- s/(&[^#;]+;)/&decodeentity($1)/ge;
- s/&#(1[6-9][0-9]|2[0-4][0-9]|25[0-5]);/chr($1)/ge;
-
- # Encode
- if (defined $compattable)
- {
- # Output encoding is in part compatible with ISO 8859-1, only
- # convert incompatible characters into entities.
- s/([\xA0-\xFF])/$$compattable[ord($1)-160]?$1:$entities[ord($1)-160]/ge;
- }
- elsif ($recodelatin1)
- {
- # Output encoding is incompatible with ISO 8859-1, convert all
- # 8-bit characters into entities.
- s/([\xA0-\xFF])/$entities[ord($1)-160]/ge;
- }
-
print DST $_;
}
}
@@ -339,18 +248,6 @@
if defined $dsttitle;
}
-# Return the ISO-8859-1 character that corresponds to the given entity
-sub decodeentity
-{
- my $ent = shift;
- # Start at one to avoid decoding
- for (my $i = 1; $i < $#entities; ++ $i)
- {
- return chr($i + 160) if $entities[$i] eq $ent;
- }
- return $ent;
-}
-
# Find for old translations in the CVS Attic
sub find_files_attic
{
Index: stattrans.pl
===================================================================
RCS file: /cvs/webwml/webwml/stattrans.pl,v
retrieving revision 1.110
diff -u -r1.110 stattrans.pl
--- stattrans.pl 18 Jun 2012 09:30:04 -0000 1.110
+++ stattrans.pl 12 Oct 2012 19:40:52 -0000
@@ -394,16 +394,6 @@
@processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh";
foreach $l (@processed_langs) {
print "$l.wml " if ($config{'verbose'});
-
- $charset{$lang};
- open (wmlrc,"$opt_w/$lang/.wmlrc") ;
- while (<wmlrc>) {
- if ( /^-D CHARSET=(.*)$/ ) {
- $charset{$lang} = $1;
- }
- }
- close wmlrc ;
-
$t_body = $u_body = $ui_body = $un_body = $uu_body = $o_body = "";
$translated{$lang} = $outdated{$lang} = $untranslated{$lang} = 0;
--- End Message ---