From: bdowning@lavos•net (Brian Downing)
To: Felipe Contreras <felipe.contreras@gmail•com>
Cc: git@vger•kernel.org, monotone-devel@nongnu•org
Subject: Re: [RFC] mtn to git conversion script
Date: Mon, 25 Aug 2008 11:35:31 -0500 [thread overview]
Message-ID: <20080825163530.GJ31114@lavos.net> (raw)
In-Reply-To: <94a0d4530808240218j4bedbe3di99303da9addc93a4@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1740 bytes --]
On Sun, Aug 24, 2008 at 12:18:50PM +0300, Felipe Contreras wrote:
> I developed a script that converts a monotone repository into a git
> one (exact clone), I want to contribute it so everybody can use it.
>
> This is the gist of the script:
>
> mtn update --revision #{@id} --reallyquiet
> git ls-files --modified --others --exclude-standard -z | git
> update-index --add --remove -z --stdin
> git write-tree
> git write-raw < /tmp/commit.txt
> git update-ref refs/mtn/#{@id} #{@git_id}
>
> branches.each do |e|
> git update-ref refs/heads/#{e} #{@git_id}
> end
You definitely want to use fast-import, but you probably want to do
something a lot closer to fast-export for monotone (read: use its
automate stdio interface and avoid expensive calls).
Here's a simple monotone to git converter I wrote. You'll need the
Monotone::AutomateStdio perl module to use it (which I think I got it
from monotone's net.venge.monotone.contrib.lib.automate-stdio branch).
It is very fast; it can convert the OpenEmbedded repo in something like
5-10 minutes on my machine.
Note that for monotone export to go fast you absolutely /must/ avoid the
get_manifest operation. In my converter I use the revision information
directly. Getting the renames right with this is a little tricky; IIRC,
the ordering that works is:
* Rename all renamed files, innermost files first, to temporary names.
* Delete all deleted files, innermost first.
* Rename all temporary names to permanent names, outermost first.
* Add all new/modified files.
Conveniently, all of the above can be done by directly emitting
fast-import commands, so you don't have to keep track of trees directly.
(With one exception, which I'll elaborate on in a different email.)
-bcd
[-- Attachment #2: mtn-to-git.pl --]
[-- Type: text/x-perl, Size: 4759 bytes --]
#!/usr/bin/perl
# Copyright (C) 2007-2008 Brian Downing
# This program is licensed under version 2 of the GNU GPL.
use strict;
use Monotone::AutomateStdio;
use Date::Parse;
my $D = 0;
my $m = Monotone::AutomateStdio->new($ARGV[0]);
my $revlist = [];
$m->graph($revlist);
my $sorted = [];
for my $rev (@$revlist) {
push(@$sorted, $rev->{revision_id});
}
my $leaves = [];
$m->leaves($leaves);
$m->toposort($sorted, @$sorted);
my $marks = {};
my $mark = 1;
my $c = 0;
sub quote_file {
$_ = shift;
return $_;
s/\\/\\\\/g;
s/\n/\\n/g;
s/"/\\"/g;
return qq("$_");
}
sub lprint {
my $fh = shift;
print @_ if $D;
print $fh @_;
}
sub lprintf {
my $fh = shift;
printf @_ if $D;
printf $fh @_;
}
my $tmptag = "624d893e-ae1a-42d8-90a9-926a6ceffae8";
open my $fi, '|-', 'git-fast-import --export-marks=file';
for my $rev (@$sorted) {
my ($time, $author, $msg) = ("0", "__UNKNOWN__", "__UNKNOWN__");
my @certs;
my @branches;
$m->certs(\@certs, $rev);
for my $cert (@certs) {
my ($n, $v) = ($cert->{name}, $cert->{value});
$author = $v if ($n eq 'author');
$time = $v if ($n eq 'date');
$msg = $v if ($n eq 'changelog');
push(@branches, $v) if ($n eq 'branch');
}
my $email = $author;
$msg .= "\nmtn-revision: $rev\n";
for my $b (sort @branches) {
$msg .= "mtn-branch: $b\n";
}
$time = str2time($time, 'UTC');
my $mfest = [];
$m->get_revision($mfest, $rev);
my $orcount = 0;
my $add_files = {};
my $add_dirs = {};
my $delete_files = {};
my $from_tmpnames = {};
my $to_tmpnames = {};
my $curtmp = 0;
my @parents;
for my $e (@$mfest) {
if ($e->{type} eq 'old_revision') {
push(@parents, $e->{revision_id});
++$orcount;
}
next if $orcount > 1;
if ($e->{type} eq 'add_file' || $e->{type} eq 'patch') {
my $id = $e->{file_id} || $e->{to_file_id};
$add_files->{$e->{name}} = $id;
unless ($marks->{$id}) {
my $data;
$m->get_file(\$data, $id);
print "new file $id\n" if $D;
print $fi "blob\n";
my $len = length($data);
print $fi "mark :$mark\n";
$marks->{$id} = $mark++;
print $fi "data $len\n$data\n";
}
} elsif ($e->{type} eq 'add_dir') {
$add_dirs->{$e->{name}} = 1;
} elsif ($e->{type} eq 'delete') {
$delete_files->{$e->{name}} = 1;
} elsif ($e->{type} eq 'rename') {
$curtmp++;
$from_tmpnames->{$e->{from_name}} = "__tmp_${tmptag}_$curtmp";
$to_tmpnames->{$e->{to_name}} = "__tmp_${tmptag}_$curtmp";
}
}
printf("rev $rev (%d/%d, %.2f%)\n",
++$c, scalar(@$sorted), 100*$c/scalar(@$sorted));
print $fi "reset refs/import\n" unless @parents;
lprint $fi, "commit refs/import\n";
print $fi "mark :$mark\n";
$marks->{$rev} = $mark++;
if ($author =~ m(\s*(.*?\S)\s*<(.*)>\s*)) {
$author = $1;
$email = $2;
}
$author =~ s/[<>]/_/g;
$email =~ s/[<>]/_/g;
$author =~ s/@.*//;
print $fi "committer $author <$email> $time +0000\n";
my $len = length($msg);
print $fi "data $len\n$msg\n";
my $from = "from";
for my $p (@parents) {
lprint $fi, "$from :$marks->{$p}\n";
$from = "merge";
}
for my $f (sort { length($b) <=> length ($a) } keys %$from_tmpnames) {
lprintf($fi, "R %s %s\n",
quote_file($f), quote_file($from_tmpnames->{$f}));
}
for my $f (sort { length($b) <=> length ($a) } keys %$delete_files) {
lprintf($fi, "D %s\n", quote_file($f));
}
for my $f (sort { length($a) <=> length ($b) } keys %$to_tmpnames) {
lprintf($fi, "R %s %s\n",
quote_file($to_tmpnames->{$f}), quote_file($f));
}
for my $f (keys %$add_files) {
lprintf($fi, "M 0644 :%s %s\n",
$marks->{$add_files->{$f}}, quote_file($f));
}
for my $f (keys %$add_dirs) {
$f .= "/" if $f;
lprintf($fi, "M 0644 inline %s\n", quote_file("$f.gitignore"));
lprint($fi, "data 0\n\n");
}
print $fi "\n";
}
my $branches = {};
for my $rev (@$leaves) {
my $branch;
my @certs;
$m->certs(\@certs, $rev);
for my $cert (@certs) {
my ($n, $v) = ($cert->{name}, $cert->{value});
$branch = $v if ($n eq 'branch');
}
my $r = $branches->{$branch};
$branches->{$branch}--;
if ($marks->{$rev}) {
print $fi "reset refs/heads/$branch$r\n";
print $fi "from :$marks->{$rev}\n\n";
}
}
close $fi;
next prev parent reply other threads:[~2008-08-25 16:37 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-08-24 9:18 [RFC] mtn to git conversion script Felipe Contreras
2008-08-24 13:14 ` Miklos Vajna
2008-08-24 18:19 ` Johannes Schindelin
2008-08-24 19:37 ` Miklos Vajna
2008-08-24 18:33 ` Felipe Contreras
2008-08-24 22:46 ` Shawn O. Pearce
2008-08-25 0:45 ` Felipe Contreras
2008-08-28 5:57 ` Anand Kumria
2008-08-28 9:03 ` [Monotone-devel] " Felipe Contreras
2008-09-04 9:43 ` Felipe Contreras
2008-09-04 10:31 ` Jakub Narebski
2008-09-04 13:21 ` Felipe Contreras
2008-09-04 10:50 ` Thomas Moschny
2008-09-04 13:29 ` Felipe Contreras
2008-08-25 16:35 ` Brian Downing [this message]
2008-08-25 16:41 ` Brian Downing
2008-08-25 20:47 ` Felipe Contreras
2008-08-25 21:09 ` Brian Downing
2008-08-28 7:11 ` Anand Kumria
2008-11-11 16:30 ` Juan Jose Comellas
2008-11-11 16:40 ` Juan Jose Comellas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080825163530.GJ31114@lavos.net \
--to=bdowning@lavos$(echo .)net \
--cc=felipe.contreras@gmail$(echo .)com \
--cc=git@vger$(echo .)kernel.org \
--cc=monotone-devel@nongnu$(echo .)org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox