* Joachim Breitner <nomeata@debian.org> [2015-06-28 10:29:23+0200] > Hi, > > Am Sonntag, den 28.06.2015, 05:26 +0300 schrieb Dmitry Bogatov: > > * Joachim Breitner <nomeata@debian.org> [2015-06-15 20:38:25+0200] > > > C. One big repo, haskell-foo’s debian/ files in haskell > > > -foo/debian/ > > > > > > Having one repo has various advantages: > > > [..] > > > But there are also disadvantages: > > > [..] > > > > Seems that most of us are prefering this variant. I am going to play > > with it during next 48 hours. Is it something to base on around or > > someone who already started hacking on it? > > I haven’t, if you want to give it a shot, that’d be great. I assume you > are talking about converting the darcs repos to a git repository. Take a look in /var/lib/gforge/chroot/home/users/kaction-guest/bare-united What do you think? It was generated by attached script (takes several hours and a lot of heat) -- Accept: text/plain, text/x-diff Accept-Language: eo,en,ru X-Keep-In-CC: yes
#!/bin/bash -eu # Script converts all darcs repositories, used for individual packages by # debian haskell group into single git repository. It has following # implementation steps: # # * Download list of repositories. See `list_darcs_repos' function and # `DARCS_REPOS_LISTFILE' variable. # * Clone every darcs repository. If it is already cloned, up it to date. # See `clone_or_update_darcs_repos' function and `DARCS_LOCAL_REPOS_DIR' # variable. # * With `darcs-to-git' script convert every darcs repository into git one. # See functions `convert_darcs_to_git_repo' and `convert_darcs_to_git_repos' # functions and `GIT_LOCAL_REPOS_DIR' variable. # * Convert every git repository into set of patches, renaming patches to # make them be sorted by date. Every patch contains extra information # about it's tag and repository. # * Unite them with `git-am`. See `assemble_united_git_repository' function. # # Script performs many separated calls to `ssh' command. The following lines # in your `~/.ssh/config' will improve perfomance. # # Host * # ControlPath ~/.ssh/master-%l-%r@%h:%p # ControlMaster auto # ControlPersist 100 # # All in all, I would suggest to run this script nightly. There should not be any # surprise, but I can't prove it. # # If global variable is exported, it means it is used in some function # called by GNU Parallel. ## This file contains list of darcs repositories we are going to ## incorporate into big git repository. This list is generated by ## `list_darcs_repos' function. DARCS_REPOS_LISTFILE="repolist.darcs" ## Host where darcs repositories are stored. You are assumed to ## have paswordless ssh access to this host. export DARCS_REPOS_HOST="darcs.debian.org" ## Directory on `DARCS_REPOS_HOST', containing all darcs repositories ## we are interested in. export DARCS_REPOS_DIR="/darcs/pkg-haskell" ## Directory where all cloned darcs repositories are stored. It greatly ## improve perfomance. export DARCS_LOCAL_REPOS_DIR="darcs" ## Directory where all converted git repositories will be stored. export GIT_LOCAL_REPOS_DIR="git" ## Directory, where all patches, representing git repositories will be stored. ## It contains subdirectories for every repository, to make convering from ## git repository to patches parallelable. export PATCHES_DIR="patches" ## Git repository, that will absorb all patches. export UNITED_GIT_REPO="united-git-repository" ### list_darcs_repos ## Print darcs repositories in `DARCS_REPOS_DIR' directory on `DARCS_REPOS_HOST' ## host to the stdout. Directory assumed to be valid darcs repository, if it ## contains `_darcs/inventories' subdirectory and does not contains `DELETE' in its name. list_darcs_repos () { tmpfile=$(mktemp) cat << EOF > "$tmpfile" #!/bin/bash for dir in \$(ls -1A "$DARCS_REPOS_DIR") ; do if [[ -d "$DARCS_REPOS_DIR/\$dir/_darcs/inventories" ]] ; then echo "\$dir" fi done rm -f "\$0" EOF scp "$tmpfile" "$DARCS_REPOS_HOST:$tmpfile" ssh "$DARCS_REPOS_HOST" /bin/bash "$tmpfile" \ | grep -v DELETE rm -f "$tmpfile" } ### clone_or_update_darcs_repo NAME ## Clone repository with NAME from host `DARCS_REPOS_HOST' ## in directory `DARCS_REPOS_DIR' into `DARCS_LOCAL_REPOS_DIR'. ## If it is already there, update it. clone_or_update_darcs_repo () { local repo=$1 local cwd="$PWD" cd "$DARCS_LOCAL_REPOS_DIR" if [[ -d "$repo" ]] ; then darcs pull -q --all --repodir="$repo" else darcs clone -q "$DARCS_REPOS_HOST:$DARCS_REPOS_DIR/$repo" --complete fi ## This is strange hack to workaround mysterios files, appearing in some repositories. ## FIXME: Remove this hack and try again. cd "$repo" find ! -regex ".*/_darcs.*" -delete darcs revert -a > /dev/null cd "$cwd" } ## This way we can pass it to GNU Parallel. export -f clone_or_update_darcs_repo ### clone_or_update_darcs_repos ## Call `clone_or_update_darcs_repo' function for every repository ## name in file `DARCS_REPOS_LISTFILE' ## ## It will take a lot of time. clone_or_update_darcs_repos () { mkdir -p "$DARCS_LOCAL_REPOS_DIR" parallel clone_or_update_darcs_repo < "$DARCS_REPOS_LISTFILE" # for dir in $(cat "$DARCS_REPOS_LISTFILE") ; do # clone_or_update_darcs_repo "$dir" # done } ### check_for_required_tools ## Check for presence of all tools required. As exception, if it ## is the only missing dependency, download `darcs-to-git' script ## and add it to path. check_for_required_tools () { retval=0 echo -n "Checking for darcs >= 2.10... " if which darcs >/dev/null ; then if darcs --version | grep -q 2.10 ; then echo ok else echo too old retval=1 fi else echo missing retval=1 fi for tool in ruby git parallel formail ; do echo -n "Checking for $tool..." if which "$tool" > /dev/null ; then echo ok else echo missing retval=1 fi done if [[ $retval = 1 ]] ; then echo "ERROR: some dependencies are missing" >&2 return 1 fi echo -n "Checking for darcs-to-git..." export PATH="$PWD/darcs-to-git:$PATH" if which darcs-to-git > /dev/null; then echo ok else git clone git://github.com/kaction/darcs-to-git echo 'ok (downloaded)' fi } ### convert_darcs_to_git_repo NAME ## Convert darcs repository with NAME, already present in `DARCS_LOCAL_REPOS_DIR' ## into git repository with same name in `GIT_LOCAL_REPOS_DIR'. convert_darcs_to_git_repo () { local name=$1 local darcs_abs_path=$(readlink -f "$DARCS_LOCAL_REPOS_DIR/$name") local git_path="$GIT_LOCAL_REPOS_DIR/$name" if [[ ! -d "$git_path" ]] ; then mkdir "$git_path" fi cd "$git_path" local tmpfile=$(mktemp) if ! darcs-to-git --clean-commit-messages "$darcs_abs_path" &> "$tmpfile" ; then echo >&2 "ERROR: converting $name" cat "$tmpfile" rm -f "$tmpfile" exit 2 fi } ## This function will only will be called by GNU Parallel, so ## it does not matter how it changes working directory. export -f convert_darcs_to_git_repo ### convert_darcs_to_git_repos ## Convert every darcs repository listed in `DARCS_REPOS_LISTFILE', which ## are assumed to be already present and up-to-date in `DARCS_LOCAL_REPOS_DIR' ## into git repositories in `GIT_LOCAL_REPOS_DIR'. convert_darcs_to_git_repos () { mkdir -p "$GIT_LOCAL_REPOS_DIR" parallel convert_darcs_to_git_repo < "$DARCS_REPOS_LISTFILE" } ### git_commit_hash [FILENAME] ## Parse git patch, generated by `git-format-patch(1)' from FILENAME or stdin, ## if no FILENAME is specified and output commit hash on stdout. git_commit_hash () { local filename=${1:-/dev/stdin} head -n1 "$filename"| cut -d' ' -f 2 } export -f git_commit_hash ### convert_git_repo_into_patches NAME ## Convert git repository with NAME into set of patches, ## storing them in `PATCHES_DIR'/NAME directory, with ## extra headers X-Repo and, possible X-Tag convert_git_repo_into_patches () { local name=$1 local patchdir="$PATCHES_DIR/$name" mkdir -p "$patchdir" patchdir=$(readlink -f "$patchdir") local tagsdir=$(mktemp -d) cd "$GIT_LOCAL_REPOS_DIR/$name" git format-patch --root --output-directory "$patchdir" > /dev/null ## Now we must attach tags information to generated patches. ## First, we create file with names as hashes and contens as ## tag names in `tagsdir'. for tag in $(git tag) ; do local commit_hash=$(git format-patch -1 --stdout "$tag" | git_commit_hash) echo "$tag" > "$tagsdir/$commit_hash" done ## Now, for each patch in `patchdir' we rewrite Subject: field to mention repository name and ## add line into signature, if it has corresponding tag. local tmpfile=$(mktemp) for patch in "$patchdir"/* ; do local commit_hash=$(git_commit_hash "$patch") if [[ -f "$tagsdir/$commit_hash" ]] ; then local tag=$(cat "$tagsdir/$commit_hash") formail -I"X-Tag: ${tag}" < "$patch" > "$tmpfile" mv "$tmpfile" "$patch" fi ## Rewrite Subject and add X-Repo header formail -I"X-Repo: $name" -I"Subject: $(formail -x Subject < "$patch" | sed "s#\]#] $name:#")" \ < "$patch" > "$tmpfile" mv -f "$tmpfile" "$patch" done rm -fr "$tagsdir" "$tempfile" } export -f convert_git_repo_into_patches ### convert_git_repos_into_patches ## Call `convert_git_repo_into_patches' for every repository, listed ## in `DARCS_REPOS_LISTFILE'. convert_git_repos_into_patches () { parallel convert_git_repo_into_patches < "$DARCS_REPOS_LISTFILE" } ### order_patches ## Copy patches from subdirectories of `PATCHES_DIR' into `PATCHES_DIR'. ## renaming according to date and their apply order. order_patches () { rm -f "$PATCHES_DIR"/*.patch for repo in $(cat "$DARCS_REPOS_LISTFILE") ; do local prev_date= local prev_epoch= local misorder=no local misorder_offset=1 for patch in $(ls -r "$PATCHES_DIR/$repo"/*.patch) ; do local patch_date=$(formail -x Date < "$patch") local patch_epoch=$(date -d "$patch_date" +%s) if [[ "$misorder" = no && -n "$prev_epoch" && "$patch_epoch" -gt "$prev_epoch" ]] ; then misorder=yes echo "Misorder in $repo: $patch_date > $prev_date" elif [[ $misorder = no ]] ; then prev_epoch="$patch_epoch" prev_date="$patch_date" fi local suffix=999 ## We hope, that no more than 999 patches have same commit time. ## Otherwise, just increase this number if [[ $misorder = yes ]] ; then newname="$((prev_epoch - misorder_offset))" ((misorder_offset++)) else newname="$patch_epoch" fi while [[ -f "$PATCHES_DIR/$newname.$suffix.patch" ]] ; do ((suffix--)) done ## Extra precautions for patches, containing email in commit message. ## Extra headers confuses git, so I massage it a bit, replacing colon ## with %. formail -I"X-File: $patch" < "$patch" \ | sed -r \ -e 's/^(From|To):([^@<?>]*)$/\1%\2/' \ -e 's/^(Date):([^+]*)$/\1%\2/' \ -e 's/^(Subject):([^]]*)$/\1%\2/' \ > "$PATCHES_DIR/$newname.$suffix.patch" done done } ### trim ## Copy stdin to stdout, removing whitespaces on start and end of lines. trim () { sed -r -e 's/^[ ]+//' -e 's/[ ]+$//' } ### _git_am ## Wrapper around git-am, with all required options set _git_am () { local debian_dir=$1 local repo=$2 ## This is hell. With option `--whitespace=fix' some repositories ## breaks (does not apply), and some does not work without. ## Since whitespace errors is HUGE problem, I set `--whitespace=fix' ## by default and with try-and-error method exclude repositories, which ## breaks by it. ## ## For sure `c2hs' breaks with fix, and `yi' breaks without. for exception in c2hs \ haskell-regex-base \ haskell-regex-posix \ haskell-http \ haskell-regex-compat \ uuagc \ haskell-yesod-core do if [[ "$repo" = "$exception" ]] ; then git am --directory "$debian_dir" --committer-date-is-author-date return fi done git am --directory "$debian_dir" --committer-date-is-author-date --whitespace=fix } ### assemble_united_git_repository ## Assemble git repository in `UNITED_GIT_REPO' from patches in `PATCHES_DIR'. assemble_united_git_repository () { local errfile=$(mktemp) local patches_dir=$(readlink -f "$PATCHES_DIR") local patch_ix=0 local patches_count=$(ls -A "$patches_dir"/*.patch | wc -l) mkdir -p "$UNITED_GIT_REPO" cd "$UNITED_GIT_REPO" git init git config user.name "Joachim Breitner" git config user.email "mail@joachim-breitner.de" for patch in $(ls "$patches_dir/"*.patch | sort -V) ; do local repo=$(formail -xX-Repo < "$patch" | trim) local tag=$(formail -xX-Tag < "$patch" | trim) local file=$(formail -xX-File < "$patch" | trim) local debian_dir="p/$repo/debian" [[ -d "$debian_dir" ]] || mkdir -p "$debian_dir" : $((patch_ix++)) echo >&2 "($patch_ix/$patches_count) $repo: $file" if ! _git_am "$debian_dir" "$repo" < "$patch" &> "$errfile" ; then echo >&2 "ERROR: Failed to apply patch $patch" cat "$patch" >&2 echo >&2 "GIT_AM ERROR:" cat "$errfile" >&2 exit 4 fi if [[ -n "$tag" ]] ; then git tag "${repo}_${tag}" fi done rm -f "$errfile" cd - } ## Here starts actual actions and function calls. check_for_required_tools ## List of darcs repositories is generated only once. If new repository is added, you ## should force regeneration by just removing `DARCS_REPOS_LISTFILE'. echo "Fetching list of darcs repositories... $(date)" [ -f "$DARCS_REPOS_LISTFILE" ] || list_darcs_repos > "$DARCS_REPOS_LISTFILE" echo "Cloning/updating darcs repositories... $(date)" # clone_or_update_darcs_repos echo "Converting darcs repositories into git... $(date)" # convert_darcs_to_git_repos echo "Convering git repositories into patches... $(date)" # convert_git_repos_into_patches echo "Ordering patches... $(date)" # order_patches echo "Assembling git repository... $(date)" rm -fr "$UNITED_GIT_REPO" assemble_united_git_repository echo "Done. $(date)"
Attachment:
pgpifP3Fy0ien.pgp
Description: PGP signature