#!/usr/bin/perl -w

# axp - a GNU Arch command tool, Copyright (C) 2004 Mikhael Goikhman
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use 5.005;
use strict;

use Getopt::Long;
use IO::Handle;
use FindBin '$Bin';

use lib
	"$Bin/archzoom-data/perllib",
	"$Bin/../archzoom-data/perllib", 
	"$Bin/../../archzoom-data/perllib",
	"$Bin/../perllib";

sub wrong_usage (;$) {
	my $text = q{
		axp - advanced arch command tool (minimal archzoom variant)
		Usage:
			# keep the size of revision library at 50Mb at most 
			axp revlib prune --size 50MB

			# keep at most 100 recent revisions in revision library
			axp revlib prune --number 100 --quiet

			# don't actually remove, list revisions to be pruned
			axp revlib prune --number 40 --test --verbose

			# a fancy way to report the number of library revisions
			axp revlib prune --test --all --sort random

			# remove all incomplete temporary dirs ,,new-revision.*
			axp revlib cleanup

		More examples:
			axp revlib prune --size 2GB --sort name --reverse
			axp revlib prune -s 2GB -o time -v -v
			axp revlib cleanup --test --verbose

		Short option abbreviations are supported too.

		Suggested to run hourly or daily from cronjob, like:
			14 0/3 * * * /path/to/axp revlib prune -q -n 2000
			14 1/3 * * * /path/to/axp revlib prune -q -s 1GB
			14 2/3 * * * /path/to/axp revlib cleanup -q

		The non pared axp variant may be fetched using:
			tla get migo@homemail.com--Perl-GPL/axp--devel axp

	};
	$text =~ s/^\n//;
	$text =~ s/^\t\t//mg;
	$text =~ s/^\t$//m;
	my $not_error = shift;
	my $stream = $not_error? \*STDOUT: \*STDERR;
	print $stream $text;
	exit($not_error? 0: 1);
}

my $subcommand1 = shift || wrong_usage();
$subcommand1 eq "revlib" || wrong_usage();
my $subcommand2 = shift || wrong_usage();

if ($subcommand2 eq "prune") {
	&axp_revlib_prune_execute();
} elsif ($subcommand2 eq "cleanup") {
	&axp_revlib_cleanup_execute();
} else {
	wrong_usage();
}

exit 0;

# ----------------------------------------------------------------------------

sub axp_revlib_prune_options () {
	@ARGV || wrong_usage();

	my %options = (
		number  => 0,
		size    => 0,
		all     => 0,
		test    => 0,
		verbose => 0,
		quiet   => 0,
		sort    => undef,
		reverse => 0,
	);

	GetOptions(
		"n|number=i" => \$options{number},
		"s|size=s"   => \$options{size},
		"a|all"      => \$options{all},
		"t|test"     => \$options{test},
		"v|verbose+" => \$options{verbose},
		"q|quiet"    => \$options{quiet},
		"o|sort=s"   => \$options{sort},
		"r|reverse"  => \$options{reverse},
		"help"       => sub { wrong_usage(1) },
	) || wrong_usage();

	if ($options{size}) {
		$options{size} =~ /^(\d+)(|kb|mb|gb)$/i || wrong_usage();
		my $factor = {"" => 1, kb => 1024, mb => 1 << 20, gb => 1 << 30}
			->{lc($2)};
		$options{size} = $1 * $factor;
	}
	return %options;
}

# optimized, never evaluate something twice, or something that is not requested
sub axp_revlib_prune_sort ($$) {
	my ($index, $options) = @_;

	my $sort_name = $options->{sort} || "default";
	my %used_attributes = ();
	my $weight_sub = undef;
	my $cmp_sub = sub { $_[1] <=> $_[0] };

	if ($options->{size}) {
		$used_attributes{dir} = 1;
		$used_attributes{size} = 1;
	}
	if ($sort_name =~ /^d|default|q|quasi|quasi-time$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{mtime} = 1;
		$weight_sub = sub { $_[1]->{mtime} + rand(4 * 24 * 60 * 60) };
	}
	elsif ($sort_name =~ /^t|time|m|mtime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{mtime} = 1;
		$weight_sub = sub { $_[1]->{mtime} };
	}
	elsif ($sort_name =~ /^c|ctime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{ctime} = 1;
		$weight_sub = sub { $_[1]->{ctime} };
	}
	elsif ($sort_name =~ /^a|atime$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{atime} = 1;
		$weight_sub = sub { $_[1]->{atime} };
	}
	elsif ($sort_name =~ /^b|btime$/i) {
		$weight_sub = sub { $_[1]->{btime} };
	}
	elsif ($sort_name =~ /^s|size$/i) {
		$used_attributes{dir} = 1;
		$used_attributes{size} = 1;
		$weight_sub = sub { $_[1]->{size} };
		$cmp_sub = sub { $_[0] <=> $_[1] };
	}
	elsif ($sort_name =~ /^n|name$/i) {
		$weight_sub = sub { $_[0] };
		$cmp_sub = sub { $_[0] cmp $_[1] };
	}
	elsif ($sort_name =~ /^r|random$/i) {
		$weight_sub = sub { rand(100000) };
	}
	else {
		die "Unrecognized sort name ($sort_name), supported:\n\t" .
			"default/quasi-time, time/mtime, ctime, atime, btime, name, size, random\n";
	}
	my $update_sub = sub {
		my $revision = shift;
		my $attributes = shift;
		if ($used_attributes{dir} && !$attributes->{dir}) {
			($attributes->{dir}) = run_tla("library-find", $revision);
			die "Internal error: no reported library revision $revision\n"
				unless $attributes->{dir};
		}
		if ($used_attributes{size} && !$attributes->{size}) {
			my $du_output = run_cmd("du", "-sk", $attributes->{dir});
			die "Unexpected output ($du_output) of [du -sk $attributes->{dir}]\n"
				unless $du_output =~ /^(\d+)/;
			$attributes->{size} = $1;
		}
		if ($used_attributes{atime}) {
			$attributes->{atime} = $^T - 24 * 60 * 60 * (-A $attributes->{dir});
		}
		if ($used_attributes{mtime} && !$attributes->{mtime}) {
			$attributes->{mtime} = $^T - 24 * 60 * 60 * (-M $attributes->{dir});
		}
		if ($used_attributes{ctime} && !$attributes->{ctime}) {
			$attributes->{ctime} = $^T - 24 * 60 * 60 * (-C $attributes->{dir});
		}
		return $attributes;
	};
	$index->update(
		$update_sub,
		sub {
			foreach (keys %used_attributes) {
				return 1 unless defined $_[1]->{$_} && $_ ne "atime";
			}
			return 0;
		}
	);

	my $revision_attributes = $index->hash;
	my %weights = ();
	while (my ($revision, $attributes) = each %$revision_attributes) {
		$weights{$revision} = $weight_sub->($revision, $attributes);
	}

	my @revisions = sort { &$cmp_sub($weights{$a}, $weights{$b}) } keys %$revision_attributes;
	@revisions = reverse @revisions if $options->{reverse};
	return \@revisions;
}

use Arch::Util qw(run_tla run_cmd setup_config_dir);
use Arch::Library;
use Arch::SharedIndex;

sub axp_revlib_prune_execute (%) {
	my %options = axp_revlib_prune_options();
	unless ($options{number} || $options{size} || $options{all}) {
		die "$0: none of the --number, --size, --all options given\n";
	}

	my $dir = setup_config_dir(undef, 'axp');
	my $cache_file = "$dir/revlib-cache";
	my $index = Arch::SharedIndex->new(
		file => $cache_file,
		perl_data => 1,
	);

	my $library = Arch::Library->new;
	my $all_revisions = $library->expanded_revisions;
	my %all_revisions = map { $_ => 1 } @$all_revisions;
	$index->filter(sub { !$all_revisions{$_[0]} });
	$index->fetch_store(
		{ btime => time() }, $all_revisions
	);
	$all_revisions = axp_revlib_prune_sort($index, \%options);

	my $limit;
	if ($options{number}) {
		$limit = $options{number};
	}
	if ($options{size}) {
		my $revision_attributes = $index->hash;
		my $ndx = 0;
		my $used_size = 0;
		while ($ndx < @$all_revisions) {
			my $revision = $all_revisions->[$ndx];
			my $rev_size = $revision_attributes->{$revision}->{size} * 1024;
			last unless $used_size + $rev_size <= $options{size};
			$used_size += $rev_size;
			$ndx++;
		}
		$limit = $ndx;
	}
	if ($options{all}) {
		$limit = 0;
	}
	my $revisions_to_prune = [ @$all_revisions[ $limit .. @$all_revisions - 1 ] ];

	print "* test, no real revisions are removed\n"
		if $options{test} && !$options{quiet};
	print "* going to prune ", scalar @$revisions_to_prune, " out of ",
		scalar @$all_revisions, " revisions\n" unless $options{quiet};
	if ($options{verbose} >= 2) {
		print "** keeping $_\n" foreach @$all_revisions[0 .. $limit - 1];
	}
	foreach my $revision (@$revisions_to_prune) {
		print "** removing $revision\n" if $options{verbose};
		run_tla("library-remove", $revision) unless $options{test};
	}
	$index->delete($revisions_to_prune) unless $options{test};
	print "* done, ", @$all_revisions - @$revisions_to_prune,
		" revisions are left in the library\n" unless $options{quiet};
	return @$revisions_to_prune;
}

# ----------------------------------------------------------------------------

sub axp_revlib_cleanup_options () {
	my %options = (
		test    => 0,
		verbose => 0,
		quiet   => 0,
	);

	GetOptions(
		"test"    => \$options{test},
		"verbose" => \$options{verbose},
		"quiet"   => \$options{quiet},
		"help"    => sub { wrong_usage(1) },
	) || wrong_usage();

	return %options;
}

use Arch::Util qw(load_file);

sub axp_revlib_cleanup_execute () {
	my %options = axp_revlib_cleanup_options();

	my @revlib_location_files = glob("$ENV{HOME}/.arch-params/=revision-library*");
	print "** loading config files:", (map { "\n\t$_" } @revlib_location_files), "\n"
		if $options{verbose};

	my $lines = [];
	my @revlib_locations = map { load_file($_, $lines); @$lines; } @revlib_location_files;
	print "** searching in revlib locations:", (map { "\n\t$_" } @revlib_locations), "\n"
		if $options{verbose};

	my @dirs = map { glob("$_/*/*/*/*/,,new-revision.*") } @revlib_locations;
	@dirs = grep { -d $_ && (-M $_) * 24 * 60 > 15
		or $options{verbose} && print("** skipping recent $_\n") && 0 } @dirs;

	print "* test, no real directories are removed\n"
		if $options{test} && !$options{quiet};
	if (!@dirs) {
		print "* no incomplete temporary directories to cleanup found\n"
			unless $options{quiet};
		exit;
	} else {
		print "* going to cleanup ", scalar(@dirs), " incomplete temporary directories\n"
			unless $options{quiet};
		foreach (@dirs) {
			print "** removing $_\n" if $options{verbose};
			system("/bin/rm", "-rf", $_) unless $options{test};
		}
		print "* done\n"
			unless $options{quiet};
	}
	return @dirs;
}
