[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: amd64 situation in Mirrors.masterlist



Joey Hess wrote:
Jo Shields wrote:
Would it be useful to refine the rather rushed tools used to create the diff, removing any manual steps from Mirrors.masterlist to .diff?

Yes.

Okay then. Attached is the new, improved Masterlister program. It should just compile with "mcs Masterlister.cs", using the MCS compiler from Mono. Usage is "Masterlister.exe path/to/source/masterlist" - a diff is produced on STDOUT (with program info on STDERR, a bit like OggEnc).

This program goes through the source file, and for each Site: entry with an Archive-http or Archive-ftp (HTTP preferred) will download a directory listing for $MIRRORLOCATION/dists/sid/main/ to a temporary location. Mirrors are tried only once, and given a 30 second timeout.

The listing file is then parsed using a simple regular expression to build a hashtable of architectures held on a given mirror. The source file is re-read, written verbatim to the temporary location, with Archive-architecture: lines replaced where a replacement is stored in the hashtable. Finally, diff is called to compare the source and new files.


Also attached is another diff. Due to a pair stupid 1-character typos on my previous effort, FTP-only mirrors were being counted as bad, as were HTTP mirrors whose HREFs didn't end with "/". New statistics are: 330 sites in Mirrors.masterlist, of which 289 are archive mirrors. 254 of the archive mirrors are polled automatically without issue. Of those 254, only 2 returned bad directory listings (HTTP but not FTP on ftp.is.co.za appears broken; ftp.mcc.ac.uk seems to have hiccuped as it's fine via a web browser)
//
// Masterlister.cs: Reads Mirrors.masterlist file, attempts to contact
//                  the mirrors, and updates the Archive-architecture
//                  lines appropriately
//
// Author:
//   Jo Shields (directhex@apebox.org)
//
// Licensed under Do What The Fuck You Want To Public License (WTFPL)
// Version 2.
// 
using System;
using System.Collections;
using System.Diagnostics;
using System.IO;
using System.Text.RegularExpressions;

namespace Masterlister
{
	class MainClass
	{
		public static void Main(string[] args)
		{
			Console.Error.WriteLine( "Masterlister v0.01 by Jo Shields\n================================\n" );
			if( args.Length != 1 )
				ShowUsageThenExit( );
			FileInfo SourceMasterlist = new FileInfo( args[0] );
			if( !SourceMasterlist.Exists )
				ShowUsageThenExit( );
   			string tempFolder = Path.Combine( Path.GetTempPath(), Path.GetTempFileName() );
			File.Delete( tempFolder );
			Directory.CreateDirectory( tempFolder );
			GetMirrorListings( tempFolder, SourceMasterlist.FullName );
			Hashtable HostedArches = GetHostedArches( tempFolder );
			CreateNewMasterlist( tempFolder, SourceMasterlist.FullName, HostedArches );
			Process diffProcess = new Process( );
			diffProcess.StartInfo.FileName = "diff";
			diffProcess.StartInfo.Arguments = "-u " + SourceMasterlist.FullName + " " + Path.Combine( tempFolder, "Mirrors.masterlist" );
			diffProcess.Start( );
			diffProcess.WaitForExit( );
			Directory.Delete( tempFolder, true );
		}
		
		private static void ShowUsageThenExit( )
		{
			Console.Error.WriteLine( "Usage: masterlister.exe original-list\n\nDiffed list is written to STDOUT,\n program messages to STDERR" );
			Environment.Exit( 0 );
		}
		
		private static void CreateNewMasterlist( string tempFolder, string sourceFile, Hashtable HostedArches )
		{
			StreamReader SourceReader;
			StreamWriter TargetWriter;
			string currentLine = "";
			Regex HostMatch = new Regex( "Site: (.*)" );
   			Regex ArchMatch = new Regex( "Archive-architecture: (.*)" );
   			SourceReader = File.OpenText( sourceFile );
    		TargetWriter = File.CreateText( Path.Combine( tempFolder, "Mirrors.masterlist" ) );
    		currentLine = SourceReader.ReadLine();
    		string CurrentHost = "";
    		while( currentLine != null )
    		{
    			if( currentLine.Trim( ) == "" )
    				CurrentHost = "";
    			if( HostMatch.IsMatch( currentLine.Trim( ) ) )
    				CurrentHost = HostMatch.Split( currentLine.Trim( ) )[1];
    			if( ArchMatch.IsMatch( currentLine.Trim( ) ) )
    			{
    				if( CurrentHost != "" && HostedArches.ContainsKey( CurrentHost ) )
    				{
    					TargetWriter.Write( "Archive-architecture:" );
    					foreach( string Arch in (string[])(HostedArches[CurrentHost]) )
    						TargetWriter.Write( " {0}", Arch );
    					TargetWriter.WriteLine( );
    					CurrentHost = "";
    				}
    				else
    					TargetWriter.WriteLine( currentLine );
    			}
    			else
    				TargetWriter.WriteLine( currentLine );
    			currentLine = SourceReader.ReadLine( );
    		}
    		SourceReader.Close( );
    		TargetWriter.Close( );
		}
		
		private static Hashtable GetHostedArches( string tempFolder )
		{
			Hashtable Arches = new Hashtable( );
			StreamReader MirrorReader;
			ArrayList MirrorArches = new ArrayList( );
			Regex ArchesMatch = new Regex( "binary-([a-zA-Z0-9-]+)" );
			FileInfo[] ArchesFolder = (new DirectoryInfo( tempFolder )).GetFiles( );
			foreach( FileInfo Mirror in ArchesFolder )
			{
				MirrorArches.Clear( );
				string HtmlLine = "";
    			int NumArches = 0;
				MirrorReader = File.OpenText( Mirror.FullName );
				HtmlLine = MirrorReader.ReadLine( );
				while( HtmlLine != null )
				{
					if( HtmlLine != null )
					{
						if( ArchesMatch.IsMatch( HtmlLine ) )
						{
							NumArches++;
							MirrorArches.Add( ArchesMatch.Split( HtmlLine )[1] );
						}
					}
					HtmlLine = MirrorReader.ReadLine( );
				}
				if( NumArches > 0 )
				{
					Console.Error.WriteLine( "{0} hosts {1} architectures", Mirror.Name.PadLeft( 32 ), NumArches.ToString( ).PadLeft( 2 ) );
					Arches.Add( Mirror.Name, (string[])MirrorArches.ToArray( typeof( string ) ) );
				}
				else
					Console.Error.WriteLine( "{0} has *NO* architectures", Mirror.Name.PadLeft( 32 ) );
				MirrorReader.Close( );
				Mirror.Delete( );
			}
			return Arches;
		}
		
		private static void GetMirrorListings( string tempFolder, string mirrorList )
		{
			Regex HostMatch = new Regex( "Site: (.*)" );
   			Regex HttpMatch = new Regex( "Archive-http: (.*)" );
   			Regex FtpMatch = new Regex( "Archive-ftp: (.*)" );
   			string Host = "";
   			string HttpDir = "";
   			string FtpDir = "";
   			string CompleteUrl = "";
   			Process wgetMaster = new Process( );
   			wgetMaster.StartInfo.FileName = "wget";
   			wgetMaster.StartInfo.UseShellExecute = false;
   			wgetMaster.StartInfo.RedirectStandardError = true;
			StreamReader SourceReader = File.OpenText( mirrorList );
			string CurrentLine = SourceReader.ReadLine( );
			while( CurrentLine != null )
			{
    			if( CurrentLine != null )
    				if( HostMatch.IsMatch( CurrentLine.Trim( ) ) )
    					Host = HostMatch.Split( CurrentLine.Trim( ) )[1];
    				else if( HttpMatch.IsMatch( CurrentLine.Trim( ) ) )
    					HttpDir = HttpMatch.Split( CurrentLine.Trim( ) )[1];
    				else if( FtpMatch.IsMatch( CurrentLine.Trim( ) ) )
    					FtpDir = FtpMatch.Split( CurrentLine.Trim( ) )[1];
    				else if( CurrentLine.Trim( ) == "" )
    				{
	    				if( Host != "" )
	    				{
	    					Console.Error.Write( "{0} : ", Host.PadRight( 32 ) );
    						if( HttpDir != "" )
    						{
    							CompleteUrl = "http://"; + Host + HttpDir + "dists/sid/main/";
    							Console.Error.WriteLine( "HTTP Archive" );
    						}
    						else if( FtpDir != "" )
    						{
    							CompleteUrl = "ftp://"; + Host + FtpDir + "dists/sid/main/";
    							Console.Error.WriteLine( "FTP Archive" );
    						}
    						else
    							Console.Error.WriteLine( "-- Not an archive mirror --" );
    						if( CompleteUrl.Trim( ) != "" )
    						{
    							wgetMaster.StartInfo.Arguments = "--timeout=30 --no-remove-listing -t 1 -O " + Path.Combine( tempFolder, Host ) + " " + CompleteUrl;
    							wgetMaster.Start( );
    							wgetMaster.WaitForExit( );
    							FileInfo downloadedListing = new FileInfo( Path.Combine( tempFolder, Host ) );
    							if( downloadedListing.Length == 0 )
    							{
    								File.Delete( downloadedListing.FullName );
    								Console.Error.WriteLine( "\t^^ Bad mirror, excluding from update" );
    							}
    						}
    					}
	    				Host = "";
    					HttpDir = "";
    					FtpDir = "";
    					CompleteUrl = "";
    				}
				CurrentLine = SourceReader.ReadLine();
			}
			SourceReader.Close( );
		}
	}
}
--- /home/directhex/Projects/Masterlister/bin/Debug/Mirrors.masterlist	2006-08-09 22:20:29.000000000 +0100
+++ /tmp/tmp20536772.tmp/Mirrors.masterlist	2006-08-10 01:19:05.000000000 +0100
@@ -3,7 +3,7 @@
 Includes: saens.debian.org ftp.egr.msu.edu raff.debian.org mirrors.kernel.org archive.progeny.com debian.osuosl.org
 Type: Push-Primary
 Archive-http: /debian/
-Archive-architecture: !m68k !s390
+Archive-architecture: amd64 i386
 Archive-ftp: /debian/
 Archive-rsync: debian/
 Country: US United States
@@ -206,7 +206,7 @@
 Alias: ftp.demon.net
 Type: leaf
 Archive-ftp: /pub/mirrors/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Maintainer: uploads@demon.net, Malcolm Muir <malcolm@demon.net>
 Country: GB Great Britain
 Sponsor: Demon Internet Ltd http://www.demon.net/
@@ -214,7 +214,7 @@
 Site: ftp.uwa.edu.au
 Type: leaf
 Archive-ftp: /mirrors/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sh sparc
 NonUS-ftp: /mirrors/linux/debian-non-US/
 Mirrors-from: ftp.au.debian.org
 Maintainer: David Luyer <luyer@ucs.uwa.edu.au>
@@ -412,7 +412,7 @@
 Country: DE Germany
 Location: Clausthal-Zellerfeld, Niedersachsen
 Sponsor: Rechenzentrum der TU-Clausthal http://www.rz.tu-clausthal.de/
-Archive-architecture: alpha arm i386 ia64 m68k mips mipsel powerpc sparc
+Archive-architecture: amd64 arm i386 ia64 m68k mips powerpc sparc
 Comment: s390, sh, hppa and hurd excluded from main archive;
  hppa, mips, mipsel and s390 excluded from CDs, (jigdo available);
  s390 and hppa excluded from non-US. Bandwidth limitation: 1 Mbit per
@@ -795,7 +795,7 @@
 Site: ftp.arnes.si
 Type: leaf
 Archive-ftp: /packages/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386
 NonUS-ftp: /packages/debian-non-US/
 Mirrors-from: ftp.de.debian.org
 Country: SI Slovenia
@@ -838,7 +838,7 @@
 Aliases: ftp.proxad.fr
 Type: leaf
 Archive-ftp: /mirrors/ftp.debian.org/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /mirrors/nonus.debian.org/
 CDImage-ftp: /mirrors/cdimage.debian.org/debian-cd/
 WWW-ftp: /mirrors/web.debian.org/
@@ -984,7 +984,7 @@
 Country: CA Canada
 Location: Vancouver
 Sponsor: ID Internet Direct Ltd. http://www.direct.ca/
-Archive-architecture: amd64 i386 ia64 sparc
+Archive-architecture: amd64 i386
 
 Site: ftp.si.debian.org
 Alias: ftp.camtp.uni-mb.si
@@ -1077,7 +1077,7 @@
 Type: leaf
 Alias: ftp.tuwien.ac.at
 Archive-ftp: /opsys/linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Archive-http: /opsys/linux/debian/
 Archive-rsync: opsys/linux/debian/
 NonUS-ftp: /opsys/linux/debian-non-US/
@@ -1126,7 +1126,7 @@
 Aliases: ftp.uninett.no
 Type: Push-Secondary
 Archive-ftp: /debian/
-Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sparc
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 powerpc s390 sparc
 Archive-http: /debian/
 Archive-rsync: debian/
 NonUS-ftp: /debian-non-US/
@@ -1661,7 +1661,7 @@
 Country: US United States
 Location: Minneapolis, Minnesota
 Sponsor: Real-Time Enterprises http://www.real-time.com/
-Archive-architecture: i386
+Archive-architecture: alpha amd64 i386 powerpc sparc
 
 Site: mirrors.kernel.org
 Aliases: rsync.kernel.org
@@ -1731,7 +1731,7 @@
 Site: ftp.eutelia.it
 Type: leaf
 Archive-ftp: /pub/Debian_Mirror/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Mirrors-from: ftp.de.debian.org
 Maintainer: Max Gargani <ftpadmin@eutelia.com>
 Country: IT Italy
@@ -1929,7 +1929,7 @@
 Alias: dl.xs4all.nl
 Type: leaf
 Archive-ftp: /pub/mirror/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Maintainer: unixbeheer@xs4all.nl, info@xs4all.nl
 Country: NL Netherlands
 Location: AMS-IX, Amsterdam
@@ -2080,7 +2080,7 @@
 Alias: slagroom.snt.utwente.nl
 Type: Push-Primary
 Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha
 Archive-http: /debian/
 Archive-rsync: debian/
 NonUS-ftp: /debian-non-US/
@@ -2105,7 +2105,7 @@
 Site: ftp.iinet.net.au
 Type: leaf
 Archive-ftp: /debian/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Archive-http: /debian/debian/
 CDImage-ftp: /debian/debian-cd/
 CDImage-http: /debian/debian-cd/
@@ -2208,7 +2208,7 @@
 Alias: Hefe.ZEDAT.FU-Berlin.DE
 Type: leaf
 Archive-ftp: /pub/unix/linux/mirrors/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /pub/unix/linux/mirrors/debian-non-US/
 Mirrors-from: ftp.de.debian.org
 Maintainer: ftp-adm@FU-Berlin.DE
@@ -2303,7 +2303,7 @@
 Aliases: download.nectec.or.th
 Type: leaf
 Archive-ftp: /pub/linux-distributions/Debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /pub/linux-distributions/Debian-non-US/
 Mirrors-from: ftp.jp.debian.org
 Maintainer: PubNet team of NECTEC <pubnet@nectec.or.th>
@@ -2442,7 +2442,7 @@
 Site: ftp.mpi-sb.mpg.de
 Type: leaf
 Archive-ftp: /pub/linux/distributions/debian/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /pub/linux/distributions/debian/non-us/debian-non-US/
 CDImage-ftp: /pub/linux/distributions/debian/debian-cd/
 Mirrors-from: ftp.debian.org
@@ -2544,7 +2544,7 @@
 Country: FI Finland
 Location: Jyv&auml;skyl&auml;
 Sponsor: University of Jyv&auml;skyl&auml; http://www.jyu.fi/
-Archive-architecture: !arm !m68k !mips !mipsel !s390
+Archive-architecture: alpha amd64 hppa hurd-i386 i386 ia64 powerpc sparc
 
 Site: ftp.bittivuoto.net
 Type: leaf
@@ -3057,7 +3057,7 @@
 Location: Sofia
 Sponsor: Lirex Net http://www.lirex.net/
 Sponsor: Ludost.net http://www.ludost.net/
-Archive-architecture: amd64 i386
+Archive-architecture: i386
 
 Site: debian.blueyonder.co.uk
 Alias: mirror2.blueyonder.co.uk
@@ -3173,7 +3173,7 @@
 Site: nisamox.fciencias.unam.mx
 Type: leaf
 Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Archive-http: /debian/
 NonUS-ftp: /debian-non-US/
 NonUS-http: /debian-non-US/
@@ -3422,7 +3422,7 @@
 Site: ftp.linux.org.tr
 Type: leaf
 Archive-ftp: /pub/mirrors/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 i386
 CDImage-ftp: /pub/mirrors/debian-cd/
 Maintainer: ftp@linux.org.tr
 Country: TR Turkey
@@ -3569,7 +3569,7 @@
 Site: debian.midco.net
 Type: leaf
 X-Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Archive-http: /debian/
 Archive-rsync: /debian/
 X-CDImage-ftp: /pub/iso/
@@ -3759,7 +3759,7 @@
 Country: NZ New Zealand
 Location: Auckland
 Sponsor: Ihug Ltd http://www.ihug.co.nz/
-Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sh sparc
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 mips mipsel powerpc s390 sparc
 
 Site: debian.spark.net.gr
 Type: leaf
@@ -3948,7 +3948,7 @@
 Country: RU Russia
 Location: Moscow
 Sponsor: Corbina telecom http://www.corbina.ru/
-Archive-architecture: i386 ia64
+Archive-architecture: amd64 i386 ia64
 
 Site: debian.indika.net.id
 Type: leaf
@@ -3981,7 +3981,7 @@
 Alias: ftp.mines.inpl-nancy.fr
 Type: leaf
 Archive-ftp: /debian/
-Archive-architecture: !alpha !arm !m68k
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 Archive-http: /debian/
 CDImage-ftp: /debian-cd/
 CDImage-http: /debian-cd/
@@ -4060,7 +4060,7 @@
 Alias: horacio.cica.es
 Type: leaf
 Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386
 CDImage-ftp: /debian-cd/
 NonUS-ftp: /debian-non-US/
 Mirrors-from: ftp.es.debian.org
@@ -4174,7 +4174,7 @@
 Site: ftp.informatik.hu-berlin.de
 Type: leaf
 Archive-ftp: /pub/Mirrors/ftp.de.debian.org/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 CDImage-ftp: /pub/Mirrors/ftp.de.debian.org/debian-cd/
 NonUS-ftp: /pub/Mirrors/ftp.de.debian.org/debian-non-US/
 Mirrors-from: ftp.fu-berlin.de
@@ -4187,7 +4187,7 @@
 Alias: ftp.ecc.u-tokyo.ac.jp
 Type: leaf
 Archive-ftp: /DEBIAN/debian/
-Archive-architecture: !amd64
+Archive-architecture: amd64 hurd-i386 i386 powerpc
 NonUS-ftp: /DEBIAN/debian-non-US/
 Mirrors-from: ftp.jp.debian.org
 Maintainer: NOGAMI Daisuke <dnogami@niwa.c.u-tokyo.ac.jp>
@@ -4263,7 +4263,7 @@
 Site: ftp.debian.ikoula.com
 Type: leaf
 Archive-ftp: /debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /debian-non-US/
 Mirrors-from: ftp2.fr.debian.org
 Maintainer: Florence LIU <tech@ikoula.com>
@@ -4352,7 +4352,7 @@
 Aliases: rubycon.man.szczecin.pl
 Type: leaf
 Archive-ftp: /pub/Linux/debian/
-Archive-architecture: !amd64
+Archive-architecture: alpha amd64 arm hppa hurd-i386 i386 ia64 m68k mips mipsel powerpc s390 sparc
 NonUS-ftp: /pub/Linux/debian-non-US/
 Mirrors-from: ftp.de.debian.org
 Maintainer: Tomasz Grabowski <mirroring@rubycon.man.szczecin.pl>
@@ -4372,7 +4372,7 @@
 Country: BR Brazil
 Location: Campinas - SP
 Sponsor: LAS-IC-UNICAMP - Systems Administration and Security Laboratory http://www.las.ic.unicamp.br/
-Archive-architecture: amd64 i386
+Archive-architecture: amd64 hurd-i386 i386 powerpc sparc
 
 Site: ftp.gwdg.de
 Type: leaf

Reply to: