[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Bug#690331: marked as done (copypage and stattrans cleaning)



Your message dated Sun, 2 Dec 2012 19:01:15 +0900
with message-id <20121202190115.cf5083f0c1e3199e83521394@gmail.com>
and subject line Bug#690331: copypage and stattrans cleaning
has caused the Debian Bug report #690331,
regarding copypage and stattrans cleaning
to be marked as done.

This means that you claim that the problem has been dealt with.
If this is not the case it is now your responsibility to reopen the
Bug report if necessary, and/or fix the problem forthwith.

(NB: If you are a system administrator and have no idea what this
message is talking about, this may indicate a serious mail system
misconfiguration somewhere. Please contact owner@bugs.debian.org
immediately.)


-- 
690331: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=690331
Debian Bug Tracking System
Contact owner@bugs.debian.org with problems
--- Begin Message ---
Package: www.debian.org
Severity: wishlist
Tags: patch

converting the website to UTF-8
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=567781
is fixed and no languages use other encoding.

so this is a patch to clean those routines

-- 
victory
http://userscripts.org/scripts/show/102724
Index: copypage.pl
===================================================================
RCS file: /cvs/webwml/webwml/copypage.pl,v
retrieving revision 1.40
diff -u -r1.40 copypage.pl
--- copypage.pl	4 Jun 2011 14:09:23 -0000	1.40
+++ copypage.pl	12 Oct 2012 19:31:07 -0000
@@ -25,14 +25,6 @@
 use File::Temp qw/tempfile/;
 use Getopt::Std;
 
-
-# Declare variables only used in references to avoid warnings
-use vars qw(@iso_8859_2_compat  @iso_8859_3_compat  @iso_8859_4_compat
-            @iso_8859_5_compat  @iso_8859_6_compat  @iso_8859_7_compat
-            @iso_8859_8_compat  @iso_8859_9_compat  @iso_8859_10_compat
-            @iso_8859_13_compat @iso_8859_14_compat @iso_8859_15_compat
-            @iso_8859_16_compat);
-
 # Get configuration
 # Read first two valid lines from language.conf
 if (open CONF, "<language.conf")
@@ -106,70 +98,13 @@
 #warn "Maintainer name not defined in DWWW_MAINT or language.conf\n"
 #	if not defined $maintainer;
 
-
-# Table of entities used when copying to non-latin1 encodings
-@entities = (
-	'&nbsp;', '&iexcl;', '&cent;', '&pound;', '&curren;', '&yen;',
-	'&brvbar;', '&sect;', '&uml;', '&copy;', '&ordf;', '&laquo;', '&not;',
-	'&shy;', '&reg;', '&macr;', '&deg;', '&plusmn;', '&sup2;', '&sup3;',
-	'&acute;', '&micro;', '&para;', '&middot;', '&cedil;', '&sup1;',
-	'&ordm;', '&raquo;', '&frac14;', '&frac12;', '&frac34;', '&iquest;',
-	'&Agrave;', '&Aacute;', '&Acirc;', '&Atilde;', '&Auml;', '&Aring;',
-	'&AElig;', '&Ccedil;', '&Egrave;', '&Eacute;', '&Ecirc;', '&Euml;',
-	'&Igrave;', '&Iacute;', '&Icirc;', '&Iuml;', '&ETH;', '&Ntilde;',
-	'&Ograve;', '&Oacute;', '&Ocirc;', '&Otilde;', '&Ouml;', '&times;',
-	'&Oslash;', '&Ugrave;', '&Uacute;', '&Ucirc;', '&Uuml;', '&Yacute;',
-	'&THORN;', '&szlig;', '&agrave;', '&aacute;', '&acirc;', '&atilde;',
-	'&auml;', '&aring;', '&aelig;', '&ccedil;', '&egrave;', '&eacute;',
-	'&ecirc;', '&euml;', '&igrave;', '&iacute;', '&icirc;', '&iuml;',
-	'&eth;', '&ntilde;', '&ograve;', '&oacute;', '&ocirc;', '&otilde;',
-	'&ouml;', '&divide;', '&oslash;', '&ugrave;', '&uacute;', '&ucirc;',
-	'&uuml;', '&yacute;', '&thorn;', '&yuml;'
-);
-
-# Compatibility tables for the iso-8859 series; 1 indicates that the
-# codepoint is the same as in iso-8859-1. Used to perform partial remaps
-# for these.
-@iso_8859_2_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,1,0,1,1,0,1,0,0,1,0,1,0,1,0,1,1,0,0,0,0,1,1,0,1,1,0,0,1,0,1,1,0,0);
-@iso_8859_3_compat = (1,0,0,1,1,0,0,1,1,0,0,0,0,1,0,0,1,0,1,1,1,1,0,1,1,0,0,0,0,1,0,0,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,0,1,1,1,1,0,0,0);
-@iso_8859_4_compat = (1,0,0,0,1,0,0,1,1,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,0,0,0,0,0,1,1,1,1,1,0,1,1,1,0,0,0);
-@iso_8859_5_compat = (1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_6_compat = (1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_7_compat = (1,0,0,1,0,0,1,1,1,1,0,1,1,1,0,0,1,1,1,1,0,0,0,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_8_compat = (1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0);
-@iso_8859_9_compat = (1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1);
-@iso_8859_10_compat =(1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,0,1,0,1,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,0);
-@iso_8859_13_compat =(1,0,1,1,1,0,1,1,0,1,0,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,0,1,1,1,1,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,1,0,0,0,0,1,0,0,0);
-@iso_8859_14_compat =(1,0,0,1,0,0,0,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1);
-@iso_8859_15_compat =(1,1,1,1,0,1,0,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1);
-@iso_8859_16_compat =(1,0,0,0,0,0,0,1,0,1,0,1,0,1,0,0,1,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,0,1,0,0,1,1,1,1,0,0,1);
-
-# Check destination character encoding
-my $recode = 0;
-if (open WMLRC, "$language/.wmlrc")
-{
-	while (<WMLRC>)
-	{
-		if (s/^-D CHARSET=//)
-		{
-			$recode = 1 unless /^utf-8$/i;
-			if ($recode && /^iso-8859-([0-9]+)$/)
-			{
-				my $compattablename = 'iso_8859_' . $1 . '_compat';
-				$compat = \@{$compattablename} if defined @{$compattablename};
-			}
-			last;
-		}
-	}
-}
-
 # Loop over command line
 foreach $page (@ARGV)
 {
 	# Check if valid source
 	if ($page =~ /wml$/ || $page =~ /src$/)
 	{
-		&copy($page, $recode, $compat);
+		&copy($page);
 	}
 	else
 	{
@@ -181,8 +116,6 @@
 sub copy
 {
 	my $page = shift;
-	my $recodelatin1 = shift;
-	my $compattable = shift;
 	print "Processing $page...\n";
 
 	# Remove english/ from path
@@ -294,30 +227,6 @@
 		}
 		else
 		{
-			# Transform the string into a string that is fit for the encoding
-			# of the output language. We do that by first converting any
-			# SGML entities in the input stream into 8-bit ISO 8859-1
-			# encoding, and then convert extended characters (back) into
-			# entities if necessary for the target encoding.
-
-			# Decode
-			s/(&[^#;]+;)/&decodeentity($1)/ge;
-			s/&#(1[6-9][0-9]|2[0-4][0-9]|25[0-5]);/chr($1)/ge;
-
-			# Encode
-			if (defined $compattable)
-			{
-				# Output encoding is in part compatible with ISO 8859-1, only
-				# convert incompatible characters into entities.
-				s/([\xA0-\xFF])/$$compattable[ord($1)-160]?$1:$entities[ord($1)-160]/ge;
-			}
-			elsif ($recodelatin1)
-			{
-				# Output encoding is incompatible with ISO 8859-1, convert all
-				# 8-bit characters into entities.
-				s/([\xA0-\xFF])/$entities[ord($1)-160]/ge;
-			}
-
 			print DST $_;
 		}
 	}
@@ -339,18 +248,6 @@
 		if defined $dsttitle;
 }
 
-# Return the ISO-8859-1 character that corresponds to the given entity
-sub decodeentity
-{
-	my $ent = shift;
-	# Start at one to avoid decoding &nbsp;
-	for (my $i = 1; $i < $#entities; ++ $i)
-	{
-		return chr($i + 160) if $entities[$i] eq $ent;
-	}
-	return $ent;
-}
-
 # Find for old translations in the CVS Attic 
 sub find_files_attic
 {
Index: stattrans.pl
===================================================================
RCS file: /cvs/webwml/webwml/stattrans.pl,v
retrieving revision 1.110
diff -u -r1.110 stattrans.pl
--- stattrans.pl	18 Jun 2012 09:30:04 -0000	1.110
+++ stattrans.pl	12 Oct 2012 19:40:52 -0000
@@ -394,16 +394,6 @@
     @processed_langs = ("zh-cn", "zh-tw") if $langs{$lang} eq "zh";
     foreach $l (@processed_langs) {
         print "$l.wml " if ($config{'verbose'});
-
-		$charset{$lang};
-		open (wmlrc,"$opt_w/$lang/.wmlrc") ;
-		while (<wmlrc>) {
-			if ( /^-D CHARSET=(.*)$/ ) { 
-				$charset{$lang} = $1;
-			}
-		}		
-		close wmlrc ;
-
         $t_body = $u_body = $ui_body = $un_body = $uu_body = $o_body = "";
         $translated{$lang} = $outdated{$lang} = $untranslated{$lang} = 0;
 


--- End Message ---
--- Begin Message ---
fixed by:
copypage.pl r1.42
stattrans.pl r1.111

-- 
victory
no need to CC me :-)
http://userscripts.org/scripts/show/102724
0.0.1.3

--- End Message ---

Reply to: