This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: onwards to git


> On Wed, Apr 29, 2009 at 11:27:57AM -0700, Ulrich Drepper wrote:
>> Immediately after the release of 2.10 on 5/9 we'll switch over the master 
>> and *only* repository for glibc to git.  There is already a git mirror in 
>> place so it's not unheard of.  We'll recreate the git archive once more 
>> from scratch.
> 
>   Excellent news! Will you also move to the practice of atomic commits
> (i.e. keeping it one changelog entry - one commit) so that the history
> becomes bisectable, among other things?

I would like to have atomic commits for prior versions too.  I attach a
script to do the conversion for the existing history too, it would be
great to have it applied before the switch.  It is based on
git-filter-branch but quite heavily modified to do just what it needs to
do (for example it expects no merges, and it does not need to read and
write tree objects, since those can be reused completely).

Commits are split when the author changes, or at ChangeLog checkins.
This does *not* include checkins of other changelogs such as
nptl/ChangeLog.  This is the only known limitation, basically.

It is as easy as running "sh ~/glibc-reconstruct-commits <BRANCH1>
<BRANCH2>..." where the branch names are the git branch names that you
will want to publish.  It can be aborted and restarted, but the
processing will restart from scratch (it took ~1 hour to do the
conversion of the master branch on my not-so-new machine, so you can say
it's not so slow).

Paolo
#!/bin/sh

warn () {
        echo "$*" >&2
}

map()
{
	# if it was not rewritten, take the original
	if test -r "map/$1"
	then
		cat "map/$1"
	else
		echo "$1"
	fi
}

# override die(): this version puts in an extra line break, so that
# the progress is still visible

die()
{
	echo >&2
	echo "$*" >&2
	exit 1
}

# When piped a commit, output a script to set the ident of either
# "author" or "committer

set_ident () {
	lid="$(echo "$1" | tr "[A-Z]" "[a-z]")"
	uid="$(echo "$1" | tr "[a-z]" "[A-Z]")"
	pick_id_script='
		/^'$lid' /{
			s/'\''/'\''\\'\'\''/g
			h
			s/^'$lid' \([^<]*\) <[^>]*> .*$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/GIT_'$uid'_NAME='\''&'\''; export GIT_'$uid'_NAME/p

			g
			s/^'$lid' [^<]* <\([^>]*\)> .*$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/GIT_'$uid'_EMAIL='\''&'\''; export GIT_'$uid'_EMAIL/p

			g
			s/^'$lid' [^<]* <[^>]*> \(.*\)$/\1/
			s/'\''/'\''\'\'\''/g
			s/.*/GIT_'$uid'_DATE='\''&'\''; export GIT_'$uid'_DATE/p

			q
		}
	'

	LANG=C LC_ALL=C sed -ne "$pick_id_script"
	# Ensure non-empty id name.
	echo "case \"\$GIT_${uid}_NAME\" in \"\") GIT_${uid}_NAME=\"\${GIT_${uid}_EMAIL%%@*}\" && export GIT_${uid}_NAME;; esac"
}

: ${skip_up_to=1872320d7784f0ad6c01fdaf05b063284b474fbe}
: ${suspended_tree=}
: ${suspended_commit=}

do_commit ()
{
  git-commit-tree "$1^{tree}" ${2+'-p'} $2 > map/$1
}

skip_commit()
{
	echo "$2" > map/$1
}

filter_commit ()
{
  commit="$1"
  parent="$2"
  if test "$parent" = "$skip_up_to"; then
    echo 'initial import' | do_commit $commit
  else
    if [ "$GIT_AUTHOR_NAME" != "$PREV_AUTHOR_NAME" -a -n "$suspended_commit" ]; then
##### warn committing suspended "$suspended_commit" "($PREV_AUTHOR_NAME)"
      echo . | \
        GIT_AUTHOR_NAME="$PREV_AUTHOR_NAME" \
        GIT_AUTHOR_EMAIL="$PREV_AUTHOR_EMAIL" \
        GIT_AUTHOR_DATE="$PREV_AUTHOR_DATE" \
	do_commit "$suspended_commit" "$parent"
      parent=`map "$suspended_commit"`
    fi
    new_changelog=`git ls-tree $commit^{tree} ChangeLog | awk '{print $3}'`
    old_changelog=`git ls-tree $parent ChangeLog | awk '{print $3}'`

    if test "$old_changelog" = "$new_changelog"; then
##### warn skipping "$commit" "($GIT_AUTHOR_NAME)"
      suspended_commit="$commit"
      skip_commit "$commit" "$parent"
    else
##### warn consolidating up to "$commit" "($GIT_AUTHOR_NAME)"
      suspended_commit=
      { git log -1 --pretty=$'format:%s\n%b' $commit
      git diff $old_changelog $new_changelog | sed -n \
        -e '1,/^+/ s/^ //p' \
        -e '1,/^@@/d' \
        -e 's/^+//p' ; } | do_commit $commit "$parent"
    fi
  fi
  PREV_AUTHOR_NAME="$GIT_AUTHOR_NAME"
  PREV_AUTHOR_EMAIL="$GIT_AUTHOR_EMAIL"
  PREV_AUTHOR_DATE="$GIT_AUTHOR_DATE"
}

USAGE="[--original <namespace>] [-d <directory>] [-f | --force] \
[<rev-list options>...]"

OPTIONS_SPEC=
. `which git-sh-setup`

git diff-files --quiet &&
	git diff-index --cached --quiet HEAD -- ||
	die "Cannot rewrite branch(es) with a dirty working directory."

tempdir=.git-rewrite
orig_namespace=refs/original/
force=
while :
do
	case "$1" in
	--)
		shift
		break
		;;
	--force|-f)
		shift
		force=t
		continue
		;;
	-*)
		;;
	*)
		break;
	esac

	# all switches take one argument
	ARG="$1"
	case "$#" in 1) usage ;; esac
	shift
	OPTARG="$1"
	shift

	case "$ARG" in
	-d)
		tempdir="$OPTARG"
		;;
	--original)
		orig_namespace=$(expr "$OPTARG/" : '\(.*[^/]\)/*$')/
		;;
	*)
		usage
		;;
	esac
done

case "$force" in
t)
	rm -rf "$tempdir"
;;
'')
	test -d "$tempdir" &&
		die "$tempdir already exists, please remove it"
esac
mkdir -p "$tempdir/t" || die ""
rmdir "$tempdir/t" || die ""
cd "$tempdir"
tempdir=$(pwd)

# Remove tempdir on exit
trap 'cd ..; rm -rf "$tempdir"' 0

# Make sure refs/original is empty
git for-each-ref > "$tempdir"/backup-refs
while read sha1 type name
do
	case "$force,$name" in
	,$orig_namespace*)
		die "Namespace $orig_namespace not empty"
	;;
	t,$orig_namespace*)
		git update-ref -d "$name" $sha1
	;;
	esac
done < "$tempdir"/backup-refs

ORIG_GIT_DIR="$GIT_DIR"
ORIG_GIT_WORK_TREE="$GIT_WORK_TREE"
ORIG_GIT_INDEX_FILE="$GIT_INDEX_FILE"
GIT_WORK_TREE=.
export GIT_DIR GIT_WORK_TREE

# The refs should be updated if their heads were rewritten
if test "$#" = 0; then set HEAD; fi
git rev-parse --no-flags --revs-only --symbolic-full-name "$@" |
sed -e '/^^/d' >"$tempdir"/heads

test -s "$tempdir"/heads ||
	die "Which ref do you want to rewrite?"

ret=0

# map old->new commit ids for rewriting parents
mkdir map || die "Could not create map/ directory"

git rev-list --reverse --topo-order --parents "$@" ^$skip_up_to > revs || die "Could not get the commits"
commits=$(wc -l <revs | tr -d " ")

test $commits -eq 0 && die "Found nothing to rewrite"

# Rewrite the commits

i=0
while read commit parent blah; do
	test -n "$blah" && die unexpected merge
	i=$(($i+1))
	printf "Rewrite $commit ($i/$commits)\n"

	git cat-file commit "$commit" >commit ||
		die "Cannot read commit $commit"

	eval "$(set_ident AUTHOR <commit)" ||
		die "setting author failed for commit $commit"
	eval "$(set_ident COMMITTER <commit)" ||
		die "setting committer failed for commit $commit"
	filter_commit $commit `map "$parent"`
done <revs

# Finally update the refs

_x40='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
_x40="$_x40$_x40$_x40$_x40$_x40$_x40$_x40$_x40"
echo
while read ref
do
	# avoid rewriting a ref twice
	test -f "$orig_namespace$ref" && continue

	sha1=$(git rev-parse "$ref"^0)
	rewritten=$(map $sha1)

	test $sha1 = "$rewritten" &&
		warn "WARNING: Ref '$ref' is unchanged" &&
		continue

	case "$rewritten" in
	'')
		echo "Ref '$ref' was deleted"
		git update-ref -m "filter-branch: delete" -d "$ref" $sha1 ||
			die "Could not delete $ref"
	;;
	$_x40)
		echo "Ref '$ref' was rewritten"
		git update-ref -m "filter-branch: rewrite" \
				"$ref" $rewritten $sha1 ||
			die "Could not rewrite $ref"
	;;
	*)
		# NEEDSWORK: possibly add -Werror, making this an error
		warn "WARNING: '$ref' was rewritten into multiple commits:"
		warn "$rewritten"
		warn "WARNING: Ref '$ref' points to the first one now."
		rewritten=$(echo "$rewritten" | head -n 1)
		git update-ref -m "filter-branch: rewrite to first" \
				"$ref" $rewritten $sha1 ||
			die "Could not rewrite $ref"
	;;
	esac
	git update-ref -m "filter-branch: backup" "$orig_namespace$ref" $sha1
done < "$tempdir"/heads

cd ..
rm -rf "$tempdir"

trap - 0

unset GIT_DIR GIT_WORK_TREE GIT_INDEX_FILE
test -z "$ORIG_GIT_DIR" || {
	GIT_DIR="$ORIG_GIT_DIR" && export GIT_DIR
}
test -z "$ORIG_GIT_WORK_TREE" || {
	GIT_WORK_TREE="$ORIG_GIT_WORK_TREE" &&
	export GIT_WORK_TREE
}
test -z "$ORIG_GIT_INDEX_FILE" || {
	GIT_INDEX_FILE="$ORIG_GIT_INDEX_FILE" &&
	export GIT_INDEX_FILE
}
git read-tree -u -m HEAD

exit $ret

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]