[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

RFC: Yet Another Mirror Script



Hi,

 I am adding yet another useless (partial, pool) mirror script to the world:

o Can be used on different archives
o Selecting mirror / dist / section / type via arguments
o Syncing of essential non-deb material
o Syncing non-deb material/updates for the special dist/type

 I have had success (read: apt et all still seemed to work for the
different dists afterwards) using it via this wrapper script (being a
good usage example):

---
#!/bin/bash

sync=/home/absurd/share/bin/absurd_debian-sync-pool2

$sync "/home/ftp/pub"		"security.debian.org" 				"debian-security" \
      "potato" 			"updates/main updates/contrib updates/non-free" "source i386"
$sync "/home/ftp/pub" 		"ftp.de.debian.org" 				"debian-non-US" \
      "potato woody sid" 	"non-US/main non-US/contrib non-US/non-free" 	"source i386"
$sync "/home/ftp/pub"		"ftp.de.debian.org"				"debian" \
      "potato woody sid"	"main contrib non-free"				"source i386"
---

 If someone would like to try the script yourself, I suggest you add some
harmless user and put it as SYNC_RUNUSER into the script. Running it without
args will give a usage help.

 As I said, it kind of worked for me, but there are still some things
I am not really sure about (especially the cleanup). The "philosophy"
of the script is to 1st download the selected packages files, and then
to sync only filenames therein. Other data corresponding to your
selections is synced the "standard way".
 I really think there must be a cleaner, simpler way, but this might
work for some time.
 
MfG,

Stephan
---
#!/bin/bash -e 
#
# YAPCARDMS (Yet Another Pool Compatible Anon Rsync Debian Mirror Script)
# STBOA, anyway.
#
# (c) 1999, 2000 Stephan A Suerken <absurd@debian.org>, GPL.
# Copyright 1999, 2000 by Joey Hess <joeyh@debian.org> (gathering filelist)
# Originated from ancient script "anonftpsync" somewhere from www.debian.org/mirror/
#
# Special dependencies: procmail.deb (lockfile), ...?

#
# Configuration
#
SYNC_RUNUSER=absurd
DISK_IMAGES_EXCLUDES="--exclude **images-1.20/ --exclude **images-1.88/"

# Sync won't start if this file is present. Will be removed (trapped) on script
# exit, but that is not neccesarily fool proof.
SYNC_LOCKFILE=".sync_LOCK"
# If sync succedded, this file will be touched plus the mirror host name.
SYNC_TRACEFILE=".sync_trace_of_"

#
# there should be no need to touch anything below
#

#
# Collect arguments
#
# "/home/ftp/pub"
SYNC_DIR=$1

# "ftp.de.debian.org"
SYNC_HOST=$2

# "debian" "debian-non-US"
SYNC_ARCHIVE=$3

# "slink potato woody"
SYNC_DISTS=$4

# "main contrib non-free"
SYNC_SECTIONS=$5

# "source i386 m68k"
SYNC_TYPES=$6


SYNC_DEST=${SYNC_DIR}/${SYNC_ARCHIVE}


#
# Arguments to rsync
#
# For tests: --partial		Do NOT remove (and DO continue later) partially
#				files.
#            --timeout=n	Leave after a n seconds of no traffic.
#            --verbose		Increase verbosity level.
#            --dry-run		Test run
#            --delete		This should normally be given. Is dangerous in case
#                               the rsyncd file list is bogus (?!).

#
# rsync usage
#
# o "mirroring" configuration (sort of)
# o Choose otrher options explicitly (especially: recursion, symlink handling)
#RSYNC="rsync --perms --times --links --partial --verbose"
RSYNC="rsync --perms --times --partial --verbose"


#
# Part ???: Closeup, misc
#

# I think this gives you sort of orientation in the output
# Many will think this is ugly, but it's actually ascii art.
function echo_header () # 1: level  2: text
{
  echo
  echo "$2"
  case $1 in
       0) echo "**********************************************************************"
          ;;
       1) echo "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
          ;;
       2) echo "======================================================================"
          ;;
       3) echo "----------------------------------------------------------------------"
          ;;
       *) ;;
  esac
}

# This removes the lock file
function remlock
{
    rm -f ${SYNC_DEST}/${SYNC_LOCKFILE}
    echo "Lockfile removed: ${SYNC_LOCKFILE}"
}

# standard message unless specified otherwise after successful run at the end of script
SYNC_CLOSEUP_MESSAGE="Mirror Run Interrupted Or Bogus."

function sync_closeup
{
    echo_header 1 "Mirror Closeup"
    remlock

    echo_header 2 "Detecting and removing dangling symlinks:"
    symlinks -r -d ${SYNC_DEST}

    echo_header 2 "Disk usage statistic"
    du --summarize --human-readable ${SYNC_DEST}/dists/* ${SYNC_DEST}/pool/*
    echo
    echo "Time is now: `date`"
    echo
    echo_header 0 "$SYNC_CLOSEUP_MESSAGE"
}


function sync_packages_file ()
{
   local path=$1
   local type=$2
   local flat=$3
   local packages_file=""
   local release_file=""

   if [ "${type}" = "source" ] ;
      then packages_file=${path}/${type}/Sources.gz
           release_file=${path}/${type}/Release 
           SOURCE_PACKAGES_FILES="${SOURCE_PACKAGES_FILES} ${packages_file}" ;
      else packages_file=${path}/binary-${type}/Packages.gz
           release_file=${path}/binary-${type}/Release 
           BINARY_PACKAGES_FILES="${BINARY_PACKAGES_FILES} ${packages_file}" ;
   fi
   
   mkdir -p `dirname ${packages_file}` || true
   ${RSYNC} --copy-links ${SYNC_HOST}::${SYNC_ARCHIVE}/${packages_file} ${packages_file}
   ${RSYNC} --copy-links ${SYNC_HOST}::${SYNC_ARCHIVE}/${release_file}  ${release_file} || true
}

#############################################################
#                     Start Script                          #
#############################################################

#
# Check: Usage
#
if [ "$2" == "" ]; 
   then cat <<EOF
Usage 0: `basename $0` <dir> <host> <archive> <dists> <sections> <types>
         <dir>     = Dest directory                    "/home/ftp/pub"
         <host>    = Rsync host                        "ftp.de.debian.org"
         <archive> = Rsync archive, subdir in <dir>    "debian", "debian-non-US"
         <dists>   = List. Distribution codenames.     "potato woody"
                     !The real, non-symlink codenames!
         <sections>= List. Sections.                   "main contrib non-free"
                     There are 6 possible.             "non-US/main non-US/contrib non-US/non-free"
         <types>   = List. Source+Architectures.       "source i386 m68k"
EOF
      exit 1
fi

#
# Check: User ID
# 
if [ "`id --name --user`" != "${SYNC_RUNUSER}" ] ;
   then echo "Error: Must be run as user ${SYNC_RUNUSER}!" >&2
	exit 1
fi

#
# Check: Lockfile
#
if ! lockfile -l 43200 -r 0 "${SYNC_DEST}/${SYNC_LOCKFILE}" ;
   then echo "Error mirroring ${SYNC_DEST}: Lockfile exists!"
        exit 1
fi

#
# Everything's fine so far. Let's try it
#

# We need this minimal skeleton
mkdir -p ${SYNC_DEST}/dists ${SYNC_DEST}/pool

# Now we do everything inside destination dir
cd ${SYNC_DEST} || exit 1

# 
umask 002

# trap closeup _now_. It's the (textual) counterpart of "Starting New Mirror"
trap "sync_closeup" EXIT

echo_header 0 "Starting New Mirror Run"
echo "Start time : `date`"
echo "Mirror host: $SYNC_HOST"
echo "Archive    : $SYNC_ARCHIVE"
echo "Dists      : $SYNC_DISTS"
echo "Sections   : $SYNC_SECTIONS"
echo "Types      : $SYNC_TYPES"
echo "Dest       : $SYNC_DEST"
echo


echo_header 1 "Phase I: Debian Global Files"
############################################

echo_header 2 "/dists/*. Ensures all dists dirs/symlinks are up-to date."
${RSYNC} --links "${SYNC_HOST}::${SYNC_ARCHIVE}/dists/*" dists/

echo_header 2 "Top level READMES etc"
${RSYNC} --links --delete \
         --exclude "/ls-lR*" \
         "${SYNC_HOST}::${SYNC_ARCHIVE}/*" .

echo_header 2 "Top level doc dir"
${RSYNC} --recursive --links --delete "${SYNC_HOST}::${SYNC_ARCHIVE}/doc/*" doc/ || true

echo_header 2 "Top level tools dir"
${RSYNC} --recursive --links --delete "${SYNC_HOST}::${SYNC_ARCHIVE}/tools/*" tools/ || true


echo_header 1 "Phase II: Selected Global Files"
###############################################

# these two are assembled here, used in phase III
BINARY_PACKAGES_FILES=""
SOURCE_PACKAGES_FILES=""

for dist in ${SYNC_DISTS} ; do                           # potato woody ...
    for type in ${SYNC_TYPES} ; do                       # alpha i386 source ...

        echo_header 2 "Global Files For: ${dist}/${type}"

        echo_header 3 "Contents, Release, Changelog, etc"
        ${RSYNC} --links --delete \
                 --include "Contents-${type}.gz" \
                 --exclude "Contents-*.gz" \
                 --include "*" \
                 "${SYNC_HOST}::${SYNC_ARCHIVE}/dists/${dist}/*" dists/${dist}/

        if [ "${type}" = "source" ] ;
	    then echo_header 3 "Proposed Updates"
                 ${RSYNC} --recursive --links --delete \
                          --include "*.dsc" \
                          --include "*.diff.gz" \
                          --include "*.orig.tar.gz" \
                          --include "Sources.gz" \
                          --exclude "*" \
                          "${SYNC_HOST}::${SYNC_ARCHIVE}/dists/${dist}-proposed-updates/*" \
                          dists/${dist}-proposed-updates/ || true

            else echo_header 3 "Upgrade Directory"
                 main_path=dists/${dist}/main
                 mkdir -p ${main_path} || true
                 ${RSYNC} --recursive --links --delete \
                          "${SYNC_HOST}::${SYNC_ARCHIVE}/${main_path}/upgrade-${type}/*" \
                          ${main_path}/upgrade-${type}/ || true

                 echo_header 3 "Disk Images"
                 ${RSYNC} --recursive --links --delete \
                          ${DISK_IMAGES_EXCLUDES} \
                          "${SYNC_HOST}::${SYNC_ARCHIVE}/${main_path}/disks-${type}/*" \
                          ${main_path}/disks-${type}/ || true

                 echo_header 3 "Proposed Updates"
                 ${RSYNC} --recursive --links --delete \
                          --include "*_all.deb" \
                          --include "*_${type}.deb" \
                          --include "Packages.gz" \
                          --exclude "*" \
                          "${SYNC_HOST}::${SYNC_ARCHIVE}/dists/${dist}-proposed-updates/*" \
                          dists/${dist}-proposed-updates/ || true
        fi

        echo_header 3 "Packages Files"
        for section in ${SYNC_SECTIONS} ; do              # main contrib non-free ...
            sync_packages_file "dists/${dist}/${section}" "${type}"
	done
    done
done


echo_header 1 "Phase III: Updating Packages"
############################################

echo "Using these packages files:"
echo "Source: ${SOURCE_PACKAGES_FILES}"
echo "Binary: ${BINARY_PACKAGES_FILES}"

# debug time
date

echo "Generating filelist..."

# This really takes some time.
# TODO: Faster method.

pf=.sync_pf
# Make sure the files are there && empty
rm -f ${pf}
touch ${pf}
for file in `echo ${BINARY_PACKAGES_FILES} | \
		xargs -r zgrep -i ^Filename: | cut -d ' ' -f 2` \
	    `echo ${SOURCE_PACKAGES_FILES} | xargs -r zcat | \
		    awk '/^Directory:/ {D=$2} /Files:/,/^$/ { \
			if ($1 != "Files:" && $0 != "") print D "/" $3; \
		}'` 
         do
# TODO: fear rsync really needs these dirs to exist.
# TODO: What if the dir is actually a symlink, and does not exist yet?
    mkdir -p `dirname $file` || true
    echo $file >> ${pf}
done

# debug time
date

# sync only the files in the pf_sync
# TODO: Using "--links": If filename is actually a symlink its not guaranteed that
# TODO: the actual file is really retrieved.
# TODO: Using "--copy-links": If filename is actually a symlink, you might download
# TODO: the same file several times (but it will work).
# TODO: afaik, there should not be any symlinks in Packages list files. In the future
# TODO: at least.
${RSYNC} --recursive --links \
         --include-from ${pf} \
         --exclude "*" \
         ${SYNC_HOST}::${SYNC_ARCHIVE}/ .


echo_header 1 "Phase IV: Clean Up"

# TODO: This cleanup really is too complicated ;(. It only cleans up obsolete
#       Packages files.
# TODO: Can "rsync --recursive --delete ..." be used somehow only to DELETE
#       files not in the rsync hosts complete filelist?

echo "NOTE: Only listing for now, not deleting."
echo

tempfile=`tempfile`

# debug time
date

echo "Building filelist from all existing packages files..."
pf_packages=.sync_pf_packages
# Make sure the files are there && empty
rm -f $pf_packages
touch $pf_packages
for file in `find . -type f -name Packages.gz | \
		xargs -r zgrep -i ^Filename: | cut -d ' ' -f 2` \
	    `find . -type f -name Sources.gz | xargs -r zcat | \
		    awk '/^Directory:/ {D=$2} /Files:/,/^$/ { \
			if ($1 != "Files:" && $0 != "") print D "/" $3; \
		}'` 
         do
    echo "./${file}" >> $pf_packages
done
echo "Sorting..."
sort $pf_packages > ${tempfile}
cp ${tempfile} $pf_packages

# debug time
date

echo "Building filelist of all present packages files..."
pf_installed=.sync_pf_installed
# Make sure the files are there && empty
rm -f $pf_installed
touch $pf_installed
for file in `find . -name "*.deb" -or -name "*.dsc" -or -name "*.diff.gz" -or -name "*.orig.tar.gz"` ; do
    echo "${file}" >> $pf_installed
done
echo "Sorting..."
sort $pf_installed > ${tempfile}
cp ${tempfile} $pf_installed

# debug time
date

echo "Building filelist of obsolete packages files..."
pf_obsolete=.sync_pf_obsolete
# Make sure the files are there && empty
rm -f $pf_obsolete
touch $pf_obsolete
# comm  |<Column
#       1: Only in file1
#       2: Only in file2
#       3: In file1 AND in file2
# Arg -n: supresses column n.
comm -2 -3 ${pf_installed} ${pf_packages} > ${pf_obsolete}

# debug time
date

#echo
#echo "These files seem to be obsolete (not deleting yet):"
#echo
#cat ${pf_obsolete}

# Use these lines if you are brave
echo
echo "Removing obsolete files:"
echo
cat ${pf_obsolete} | xargs rm -f -v

#
# Clean Up Of Clean Up
#
rm -f ${tempfile}
# do not delete as long as we are testing
#rm -f ${pf} ${pf_packages} ${pf_installed} ${pf_obsolete}

#
# Well done.
#
SYNC_CLOSEUP_MESSAGE="Mirror Run Successfully Finished."
date -u > "${SYNC_TRACEFILE}${SYNC_HOST}"
exit 0
---

-- 
s-Stephan A Suerken <absurd@olurdix.de>
s-WWW http://www.fh-worms.de/~inf222
s-Debian-related mail: <absurd@debian.org>



Reply to: