174 lines
4.0 KiB
Perl
Executable File
174 lines
4.0 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
# Copyright 2005, Ryan Anderson <ryan@michonline.com>
|
|
# Distribution permitted under the GPL v2, as distributed
|
|
# by the Free Software Foundation.
|
|
# Later versions of the GPL at the discretion of Linus Torvalds
|
|
#
|
|
# Scan two git object-trees, and hardlink any common objects between them.
|
|
|
|
use 5.006;
|
|
use strict;
|
|
use warnings;
|
|
use Getopt::Long;
|
|
|
|
sub get_canonical_form($);
|
|
sub do_scan_directory($$$);
|
|
sub compare_two_files($$);
|
|
sub usage();
|
|
sub link_two_files($$);
|
|
|
|
# stats
|
|
my $total_linked = 0;
|
|
my $total_already = 0;
|
|
my ($linked,$already);
|
|
|
|
my $fail_on_different_sizes = 0;
|
|
my $help = 0;
|
|
GetOptions("safe" => \$fail_on_different_sizes,
|
|
"help" => \$help);
|
|
|
|
usage() if $help;
|
|
|
|
my (@dirs) = @ARGV;
|
|
|
|
usage() if (!defined $dirs[0] || !defined $dirs[1]);
|
|
|
|
$_ = get_canonical_form($_) foreach (@dirs);
|
|
|
|
my $master_dir = pop @dirs;
|
|
|
|
opendir(D,$master_dir . "objects/")
|
|
or die "Failed to open $master_dir/objects/ : $!";
|
|
|
|
my @hashdirs = grep !/^\.{1,2}$/, readdir(D);
|
|
|
|
foreach my $repo (@dirs) {
|
|
$linked = 0;
|
|
$already = 0;
|
|
printf("Searching '%s' and '%s' for common objects and hardlinking them...\n",
|
|
$master_dir,$repo);
|
|
|
|
foreach my $hashdir (@hashdirs) {
|
|
do_scan_directory($master_dir, $hashdir, $repo);
|
|
}
|
|
|
|
printf("Linked %d files, %d were already linked.\n",$linked, $already);
|
|
|
|
$total_linked += $linked;
|
|
$total_already += $already;
|
|
}
|
|
|
|
printf("Totals: Linked %d files, %d were already linked.\n",
|
|
$total_linked, $total_already);
|
|
|
|
|
|
sub do_scan_directory($$$) {
|
|
my ($srcdir, $subdir, $dstdir) = @_;
|
|
|
|
my $sfulldir = sprintf("%sobjects/%s/",$srcdir,$subdir);
|
|
my $dfulldir = sprintf("%sobjects/%s/",$dstdir,$subdir);
|
|
|
|
opendir(S,$sfulldir)
|
|
or die "Failed to opendir $sfulldir: $!";
|
|
|
|
foreach my $file (grep(!/\.{1,2}$/, readdir(S))) {
|
|
my $sfilename = $sfulldir . $file;
|
|
my $dfilename = $dfulldir . $file;
|
|
|
|
compare_two_files($sfilename,$dfilename);
|
|
|
|
}
|
|
closedir(S);
|
|
}
|
|
|
|
sub compare_two_files($$) {
|
|
my ($sfilename, $dfilename) = @_;
|
|
|
|
# Perl's stat returns relevant information as follows:
|
|
# 0 = dev number
|
|
# 1 = inode number
|
|
# 7 = size
|
|
my @sstatinfo = stat($sfilename);
|
|
my @dstatinfo = stat($dfilename);
|
|
|
|
if (@sstatinfo == 0 && @dstatinfo == 0) {
|
|
die sprintf("Stat of both %s and %s failed: %s\n",$sfilename, $dfilename, $!);
|
|
|
|
} elsif (@dstatinfo == 0) {
|
|
return;
|
|
}
|
|
|
|
if ( ($sstatinfo[0] == $dstatinfo[0]) &&
|
|
($sstatinfo[1] != $dstatinfo[1])) {
|
|
if ($sstatinfo[7] == $dstatinfo[7]) {
|
|
link_two_files($sfilename, $dfilename);
|
|
|
|
} else {
|
|
my $err = sprintf("ERROR: File sizes are not the same, cannot relink %s to %s.\n",
|
|
$sfilename, $dfilename);
|
|
if ($fail_on_different_sizes) {
|
|
die $err;
|
|
} else {
|
|
warn $err;
|
|
}
|
|
}
|
|
|
|
} elsif ( ($sstatinfo[0] == $dstatinfo[0]) &&
|
|
($sstatinfo[1] == $dstatinfo[1])) {
|
|
$already++;
|
|
}
|
|
}
|
|
|
|
sub get_canonical_form($) {
|
|
my $dir = shift;
|
|
my $original = $dir;
|
|
|
|
die "$dir is not a directory." unless -d $dir;
|
|
|
|
$dir .= "/" unless $dir =~ m#/$#;
|
|
$dir .= ".git/" unless $dir =~ m#\.git/$#;
|
|
|
|
die "$original does not have a .git/ subdirectory.\n" unless -d $dir;
|
|
|
|
return $dir;
|
|
}
|
|
|
|
sub link_two_files($$) {
|
|
my ($sfilename, $dfilename) = @_;
|
|
my $tmpdname = sprintf("%s.old",$dfilename);
|
|
rename($dfilename,$tmpdname)
|
|
or die sprintf("Failure renaming %s to %s: %s",
|
|
$dfilename, $tmpdname, $!);
|
|
|
|
if (! link($sfilename,$dfilename)) {
|
|
my $failtxt = "";
|
|
unless (rename($tmpdname,$dfilename)) {
|
|
$failtxt = sprintf(
|
|
"Git Repository containing %s is probably corrupted, " .
|
|
"please copy '%s' to '%s' to fix.\n",
|
|
$tmpdname, $dfilename);
|
|
}
|
|
|
|
die sprintf("Failed to link %s to %s: %s\n%s" .
|
|
$sfilename, $dfilename,
|
|
$!, $dfilename, $failtxt);
|
|
}
|
|
|
|
unlink($tmpdname)
|
|
or die sprintf("Unlink of %s failed: %s\n",
|
|
$dfilename, $!);
|
|
|
|
$linked++;
|
|
}
|
|
|
|
|
|
sub usage() {
|
|
print("Usage: $0 [--safe] <dir> [<dir> ...] <master_dir> \n");
|
|
print("All directories should contain a .git/objects/ subdirectory.\n");
|
|
print("Options\n");
|
|
print("\t--safe\t" .
|
|
"Stops if two objects with the same hash exist but " .
|
|
"have different sizes. Default is to warn and continue.\n");
|
|
exit(1);
|
|
}
|