* Joachim Breitner <nomeata@debian.org> [2015-06-28 10:29:23+0200] > Hi, > > Am Sonntag, den 28.06.2015, 05:26 +0300 schrieb Dmitry Bogatov: > > * Joachim Breitner <nomeata@debian.org> [2015-06-15 20:38:25+0200] > > > C. One big repo, haskell-foo’s debian/ files in haskell > > > -foo/debian/ > > > > > > Having one repo has various advantages: > > > [..] > > > But there are also disadvantages: > > > [..] > > > > Seems that most of us are prefering this variant. I am going to play > > with it during next 48 hours. Is it something to base on around or > > someone who already started hacking on it? > > I haven’t, if you want to give it a shot, that’d be great. I assume you > are talking about converting the darcs repos to a git repository. Take a look in /var/lib/gforge/chroot/home/users/kaction-guest/bare-united What do you think? It was generated by attached script (takes several hours and a lot of heat) -- Accept: text/plain, text/x-diff Accept-Language: eo,en,ru X-Keep-In-CC: yes
#!/bin/bash -eu
# Script converts all darcs repositories, used for individual packages by
# debian haskell group into single git repository. It has following
# implementation steps:
#
# * Download list of repositories. See `list_darcs_repos' function and
# `DARCS_REPOS_LISTFILE' variable.
# * Clone every darcs repository. If it is already cloned, up it to date.
# See `clone_or_update_darcs_repos' function and `DARCS_LOCAL_REPOS_DIR'
# variable.
# * With `darcs-to-git' script convert every darcs repository into git one.
# See functions `convert_darcs_to_git_repo' and `convert_darcs_to_git_repos'
# functions and `GIT_LOCAL_REPOS_DIR' variable.
# * Convert every git repository into set of patches, renaming patches to
# make them be sorted by date. Every patch contains extra information
# about it's tag and repository.
# * Unite them with `git-am`. See `assemble_united_git_repository' function.
#
# Script performs many separated calls to `ssh' command. The following lines
# in your `~/.ssh/config' will improve perfomance.
#
# Host *
# ControlPath ~/.ssh/master-%l-%r@%h:%p
# ControlMaster auto
# ControlPersist 100
#
# All in all, I would suggest to run this script nightly. There should not be any
# surprise, but I can't prove it.
#
# If global variable is exported, it means it is used in some function
# called by GNU Parallel.
## This file contains list of darcs repositories we are going to
## incorporate into big git repository. This list is generated by
## `list_darcs_repos' function.
DARCS_REPOS_LISTFILE="repolist.darcs"
## Host where darcs repositories are stored. You are assumed to
## have paswordless ssh access to this host.
export DARCS_REPOS_HOST="darcs.debian.org"
## Directory on `DARCS_REPOS_HOST', containing all darcs repositories
## we are interested in.
export DARCS_REPOS_DIR="/darcs/pkg-haskell"
## Directory where all cloned darcs repositories are stored. It greatly
## improve perfomance.
export DARCS_LOCAL_REPOS_DIR="darcs"
## Directory where all converted git repositories will be stored.
export GIT_LOCAL_REPOS_DIR="git"
## Directory, where all patches, representing git repositories will be stored.
## It contains subdirectories for every repository, to make convering from
## git repository to patches parallelable.
export PATCHES_DIR="patches"
## Git repository, that will absorb all patches.
export UNITED_GIT_REPO="united-git-repository"
### list_darcs_repos
## Print darcs repositories in `DARCS_REPOS_DIR' directory on `DARCS_REPOS_HOST'
## host to the stdout. Directory assumed to be valid darcs repository, if it
## contains `_darcs/inventories' subdirectory and does not contains `DELETE' in its name.
list_darcs_repos () {
tmpfile=$(mktemp)
cat << EOF > "$tmpfile"
#!/bin/bash
for dir in \$(ls -1A "$DARCS_REPOS_DIR") ; do
if [[ -d "$DARCS_REPOS_DIR/\$dir/_darcs/inventories" ]] ; then
echo "\$dir"
fi
done
rm -f "\$0"
EOF
scp "$tmpfile" "$DARCS_REPOS_HOST:$tmpfile"
ssh "$DARCS_REPOS_HOST" /bin/bash "$tmpfile" \
| grep -v DELETE
rm -f "$tmpfile"
}
### clone_or_update_darcs_repo NAME
## Clone repository with NAME from host `DARCS_REPOS_HOST'
## in directory `DARCS_REPOS_DIR' into `DARCS_LOCAL_REPOS_DIR'.
## If it is already there, update it.
clone_or_update_darcs_repo () {
local repo=$1
local cwd="$PWD"
cd "$DARCS_LOCAL_REPOS_DIR"
if [[ -d "$repo" ]] ; then
darcs pull -q --all --repodir="$repo"
else
darcs clone -q "$DARCS_REPOS_HOST:$DARCS_REPOS_DIR/$repo" --complete
fi
## This is strange hack to workaround mysterios files, appearing in some repositories.
## FIXME: Remove this hack and try again.
cd "$repo"
find ! -regex ".*/_darcs.*" -delete
darcs revert -a > /dev/null
cd "$cwd"
}
## This way we can pass it to GNU Parallel.
export -f clone_or_update_darcs_repo
### clone_or_update_darcs_repos
## Call `clone_or_update_darcs_repo' function for every repository
## name in file `DARCS_REPOS_LISTFILE'
##
## It will take a lot of time.
clone_or_update_darcs_repos () {
mkdir -p "$DARCS_LOCAL_REPOS_DIR"
parallel clone_or_update_darcs_repo < "$DARCS_REPOS_LISTFILE"
# for dir in $(cat "$DARCS_REPOS_LISTFILE") ; do
# clone_or_update_darcs_repo "$dir"
# done
}
### check_for_required_tools
## Check for presence of all tools required. As exception, if it
## is the only missing dependency, download `darcs-to-git' script
## and add it to path.
check_for_required_tools () {
retval=0
echo -n "Checking for darcs >= 2.10... "
if which darcs >/dev/null ; then
if darcs --version | grep -q 2.10 ; then
echo ok
else
echo too old
retval=1
fi
else
echo missing
retval=1
fi
for tool in ruby git parallel formail ; do
echo -n "Checking for $tool..."
if which "$tool" > /dev/null ; then
echo ok
else
echo missing
retval=1
fi
done
if [[ $retval = 1 ]] ; then
echo "ERROR: some dependencies are missing" >&2
return 1
fi
echo -n "Checking for darcs-to-git..."
export PATH="$PWD/darcs-to-git:$PATH"
if which darcs-to-git > /dev/null; then
echo ok
else
git clone git://github.com/kaction/darcs-to-git
echo 'ok (downloaded)'
fi
}
### convert_darcs_to_git_repo NAME
## Convert darcs repository with NAME, already present in `DARCS_LOCAL_REPOS_DIR'
## into git repository with same name in `GIT_LOCAL_REPOS_DIR'.
convert_darcs_to_git_repo () {
local name=$1
local darcs_abs_path=$(readlink -f "$DARCS_LOCAL_REPOS_DIR/$name")
local git_path="$GIT_LOCAL_REPOS_DIR/$name"
if [[ ! -d "$git_path" ]] ; then
mkdir "$git_path"
fi
cd "$git_path"
local tmpfile=$(mktemp)
if ! darcs-to-git --clean-commit-messages "$darcs_abs_path" &> "$tmpfile" ; then
echo >&2 "ERROR: converting $name"
cat "$tmpfile"
rm -f "$tmpfile"
exit 2
fi
}
## This function will only will be called by GNU Parallel, so
## it does not matter how it changes working directory.
export -f convert_darcs_to_git_repo
### convert_darcs_to_git_repos
## Convert every darcs repository listed in `DARCS_REPOS_LISTFILE', which
## are assumed to be already present and up-to-date in `DARCS_LOCAL_REPOS_DIR'
## into git repositories in `GIT_LOCAL_REPOS_DIR'.
convert_darcs_to_git_repos () {
mkdir -p "$GIT_LOCAL_REPOS_DIR"
parallel convert_darcs_to_git_repo < "$DARCS_REPOS_LISTFILE"
}
### git_commit_hash [FILENAME]
## Parse git patch, generated by `git-format-patch(1)' from FILENAME or stdin,
## if no FILENAME is specified and output commit hash on stdout.
git_commit_hash () {
local filename=${1:-/dev/stdin}
head -n1 "$filename"| cut -d' ' -f 2
}
export -f git_commit_hash
### convert_git_repo_into_patches NAME
## Convert git repository with NAME into set of patches,
## storing them in `PATCHES_DIR'/NAME directory, with
## extra headers X-Repo and, possible X-Tag
convert_git_repo_into_patches () {
local name=$1
local patchdir="$PATCHES_DIR/$name"
mkdir -p "$patchdir"
patchdir=$(readlink -f "$patchdir")
local tagsdir=$(mktemp -d)
cd "$GIT_LOCAL_REPOS_DIR/$name"
git format-patch --root --output-directory "$patchdir" > /dev/null
## Now we must attach tags information to generated patches.
## First, we create file with names as hashes and contens as
## tag names in `tagsdir'.
for tag in $(git tag) ; do
local commit_hash=$(git format-patch -1 --stdout "$tag" | git_commit_hash)
echo "$tag" > "$tagsdir/$commit_hash"
done
## Now, for each patch in `patchdir' we rewrite Subject: field to mention repository name and
## add line into signature, if it has corresponding tag.
local tmpfile=$(mktemp)
for patch in "$patchdir"/* ; do
local commit_hash=$(git_commit_hash "$patch")
if [[ -f "$tagsdir/$commit_hash" ]] ; then
local tag=$(cat "$tagsdir/$commit_hash")
formail -I"X-Tag: ${tag}" < "$patch" > "$tmpfile"
mv "$tmpfile" "$patch"
fi
## Rewrite Subject and add X-Repo header
formail -I"X-Repo: $name" -I"Subject: $(formail -x Subject < "$patch" | sed "s#\]#] $name:#")" \
< "$patch" > "$tmpfile"
mv -f "$tmpfile" "$patch"
done
rm -fr "$tagsdir" "$tempfile"
}
export -f convert_git_repo_into_patches
### convert_git_repos_into_patches
## Call `convert_git_repo_into_patches' for every repository, listed
## in `DARCS_REPOS_LISTFILE'.
convert_git_repos_into_patches () {
parallel convert_git_repo_into_patches < "$DARCS_REPOS_LISTFILE"
}
### order_patches
## Copy patches from subdirectories of `PATCHES_DIR' into `PATCHES_DIR'.
## renaming according to date and their apply order.
order_patches () {
rm -f "$PATCHES_DIR"/*.patch
for repo in $(cat "$DARCS_REPOS_LISTFILE") ; do
local prev_date=
local prev_epoch=
local misorder=no
local misorder_offset=1
for patch in $(ls -r "$PATCHES_DIR/$repo"/*.patch) ; do
local patch_date=$(formail -x Date < "$patch")
local patch_epoch=$(date -d "$patch_date" +%s)
if [[ "$misorder" = no && -n "$prev_epoch" && "$patch_epoch" -gt "$prev_epoch" ]] ; then
misorder=yes
echo "Misorder in $repo: $patch_date > $prev_date"
elif [[ $misorder = no ]] ; then
prev_epoch="$patch_epoch"
prev_date="$patch_date"
fi
local suffix=999
## We hope, that no more than 999 patches have same commit time.
## Otherwise, just increase this number
if [[ $misorder = yes ]] ; then
newname="$((prev_epoch - misorder_offset))"
((misorder_offset++))
else
newname="$patch_epoch"
fi
while [[ -f "$PATCHES_DIR/$newname.$suffix.patch" ]] ; do
((suffix--))
done
## Extra precautions for patches, containing email in commit message.
## Extra headers confuses git, so I massage it a bit, replacing colon
## with %.
formail -I"X-File: $patch" < "$patch" \
| sed -r \
-e 's/^(From|To):([^@<?>]*)$/\1%\2/' \
-e 's/^(Date):([^+]*)$/\1%\2/' \
-e 's/^(Subject):([^]]*)$/\1%\2/' \
> "$PATCHES_DIR/$newname.$suffix.patch"
done
done
}
### trim
## Copy stdin to stdout, removing whitespaces on start and end of lines.
trim () {
sed -r -e 's/^[ ]+//' -e 's/[ ]+$//'
}
### _git_am
## Wrapper around git-am, with all required options set
_git_am () {
local debian_dir=$1
local repo=$2
## This is hell. With option `--whitespace=fix' some repositories
## breaks (does not apply), and some does not work without.
## Since whitespace errors is HUGE problem, I set `--whitespace=fix'
## by default and with try-and-error method exclude repositories, which
## breaks by it.
##
## For sure `c2hs' breaks with fix, and `yi' breaks without.
for exception in c2hs \
haskell-regex-base \
haskell-regex-posix \
haskell-http \
haskell-regex-compat \
uuagc \
haskell-yesod-core
do
if [[ "$repo" = "$exception" ]] ; then
git am --directory "$debian_dir" --committer-date-is-author-date
return
fi
done
git am --directory "$debian_dir" --committer-date-is-author-date --whitespace=fix
}
### assemble_united_git_repository
## Assemble git repository in `UNITED_GIT_REPO' from patches in `PATCHES_DIR'.
assemble_united_git_repository () {
local errfile=$(mktemp)
local patches_dir=$(readlink -f "$PATCHES_DIR")
local patch_ix=0
local patches_count=$(ls -A "$patches_dir"/*.patch | wc -l)
mkdir -p "$UNITED_GIT_REPO"
cd "$UNITED_GIT_REPO"
git init
git config user.name "Joachim Breitner"
git config user.email "mail@joachim-breitner.de"
for patch in $(ls "$patches_dir/"*.patch | sort -V) ; do
local repo=$(formail -xX-Repo < "$patch" | trim)
local tag=$(formail -xX-Tag < "$patch" | trim)
local file=$(formail -xX-File < "$patch" | trim)
local debian_dir="p/$repo/debian"
[[ -d "$debian_dir" ]] || mkdir -p "$debian_dir"
: $((patch_ix++))
echo >&2 "($patch_ix/$patches_count) $repo: $file"
if ! _git_am "$debian_dir" "$repo" < "$patch" &> "$errfile" ; then
echo >&2 "ERROR: Failed to apply patch $patch"
cat "$patch" >&2
echo >&2 "GIT_AM ERROR:"
cat "$errfile" >&2
exit 4
fi
if [[ -n "$tag" ]] ; then
git tag "${repo}_${tag}"
fi
done
rm -f "$errfile"
cd -
}
## Here starts actual actions and function calls.
check_for_required_tools
## List of darcs repositories is generated only once. If new repository is added, you
## should force regeneration by just removing `DARCS_REPOS_LISTFILE'.
echo "Fetching list of darcs repositories... $(date)"
[ -f "$DARCS_REPOS_LISTFILE" ] || list_darcs_repos > "$DARCS_REPOS_LISTFILE"
echo "Cloning/updating darcs repositories... $(date)"
# clone_or_update_darcs_repos
echo "Converting darcs repositories into git... $(date)"
# convert_darcs_to_git_repos
echo "Convering git repositories into patches... $(date)"
# convert_git_repos_into_patches
echo "Ordering patches... $(date)"
# order_patches
echo "Assembling git repository... $(date)"
rm -fr "$UNITED_GIT_REPO"
assemble_united_git_repository
echo "Done. $(date)"
Attachment:
pgpifP3Fy0ien.pgp
Description: PGP signature