#!/usr/bin/perl
    eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
        if $running_under_some_shell;
#!/usr/bin/perl

use strict;
use LWP::UserAgent;
use utf8;
use File::Temp;

my $main_repo="core";
my @pieces=("binfilter", "dictionaries", "help", "translations");

my %bugzillas = (
    fdo  => "https://bugs.freedesktop.org/show_bug.cgi?id=",
    bnc  => "https://bugzilla.novell.com/show_bug.cgi?id=",
    rhbz => "https://bugzilla.redhat.com/show_bug.cgi?id=",
    i    => "https://issues.apache.org/ooo/show_bug.cgi?id=",
    fate => "https://features.opensuse.org/",
);

sub search_bugs($$$$)
{
    my ($pdata, $piece, $commit_id, $line) = @_;

    my $bug = "";
    my $bug_orig;
    while (defined $bug) {

        # match fdo#123, rhz#123, i#123, #123
        # but match only bug number with >= 4 digits
        if ( $line =~ m/(\w+\#+\d{4,})/ ) {
            $bug_orig = $1;
            $bug = $1;
		# default to issuezilla for the #123 variant
        # but match only bug number with >= 4 digits
        } elsif ( $line =~ m/(\#)(\d{4,})/ ) {
            $bug_orig = $1 . $2;
            $bug = "i#$2";
        # match #i123#
        } elsif ( $line =~ m/(\#i)(\d+)(\#)/ ) {
            $bug_orig = $1 . $2 . $3;
            $bug = "i#$2";
        } else {
            $bug = undef;
            next;
        }

#        print "   found $bug\n";
        # remove bug number from the comment; it will be added later a standardized way
        $bug_orig =~ s/\#/\\#/;
        $line =~ s/[Rr]esolve[ds]:?\s*$bug_orig\s*//;
        $line =~ s/\s*-\s*$bug_orig\s*//;
        $line =~ s/\(?$bug_orig\)?\s*[:,-]?\s*//;

        # bnc# is preferred over n# for novell bugs
        $bug =~ s/^n\#/bnc#/;
        # deb# is preferred over debian# for debian bugs
        $bug =~ s/^debian\#/deb#/;
        # easyhack# is sometimes used for fdo# - based easy hacks
        $bug =~ s/^easyhack\#/fdo#/;
        # someone mistyped fdo as fd0
        $bug =~ s/^fd0\#/fdo#/;
        # save the bug number
        %{$pdata->{$piece}{$commit_id}{'bugs'}} = () if (! defined %{$pdata->{$piece}{$commit_id}{'bugs'}});
        $pdata->{$piece}{$commit_id}{'bugs'}{$bug} = 1;
    }

    return $line;
}

sub standardize_summary($)
{
    my $line = shift;

    $line =~ s/^\s*//;
    $line =~ s/\s*$//;

    # lower first letter if the word contains only lowercase letter
    if ( $line =~ m/(^.[a-z]+\b)/ ) {
        $line =~ m/(^.)/;
        my $first_char = lc($1);
        $line =~ s/^./$first_char/;
    }

    # FIXME: remove do at the end of line
    #        remove bug numbers
    return $line;
}

sub generate_git_cherry_ids_log($$$$$) 
{
    my ($pdata, $repo_dir, $piece, $branch_name, $git_args) = @_;

    my $commit_ids_log;
    my $commit_ids_log_fh;
    $commit_ids_log_fh = File::Temp->new(TEMPLATE => 'lo-commit-stat-ids-XXXXXX',
                                         DIR => '/tmp',
                                         UNLINK => 0);
    $commit_ids_log = $commit_ids_log_fh->filename;

    print STDERR "Filtering cherry-picked commits in the git repo: $piece...\n";

    my $cmd = "cd $repo_dir; git cherry $git_args";
    open (GIT, "$cmd 2>&1|") || die "Can't run $cmd: $!";

    while (my $line = <GIT>) {

        # skip cherry-picked commits
        next if ( $line =~ m/^\-/ );
    
        if ( $line =~ m/^\+ / ) {
            $line =~ s/^\+ //;
            print $commit_ids_log_fh $line;
        }
    }
    
    close GIT;
    close $commit_ids_log_fh;
    
    return $commit_ids_log;
}

sub load_git_log($$$$$$$) 
{
    my ($pdata, $repo_dir, $piece, $branch_name, $git_command, $git_cherry, $git_args) = @_;

    my $cmd = "cd $repo_dir;";
    my $commit_ids_log;
    
    if ($git_cherry) {
        $commit_ids_log = generate_git_cherry_ids_log($pdata, $repo_dir, $piece, $branch_name, $git_args);
        $cmd .= " cat $commit_ids_log | xargs -n 1 $git_command -1";
    } else {
        $cmd .= " $git_command $git_args";
    }

    my $commit_id;
    my $summary;

    print STDERR "Analyzing log from the git repo: $piece...\n";

    my $repo_branch_name = get_branch_name($repo_dir);
    if ( $branch_name ne $repo_branch_name ) {
        die "Error: mismatch of branches:\n" .
            "       main repo is on the branch: $branch_name\n" .
            "       $piece repo is on the branch: $repo_branch_name\n";
    }

    open (GIT, "$cmd 2>&1|") || die "Can't run $cmd: $!";
    %{$pdata->{$piece}} = ();

    while (my $line = <GIT>) {
        chomp $line;

        if ( $line =~ m/^commit ([0-9a-z]{20})/ ) {
            $commit_id = "$1";
            $summary=undef;
            %{$pdata->{$piece}{"$commit_id"}} = ();
            next;
        }

        if ( $line =~ /^Author:\s*([^\<]*)\<([^\>]*)>/ ) {
            # get rid of extra empty spaces;
            my $name = "$1";
            $name =~ s/\s+$//;
            die "Error: Author already defined for the commit {$commit_id}\n" if defined ($pdata->{$piece}{$commit_id}{'author'});
            %{$pdata->{$piece}{$commit_id}{'author'}} = ();
            $pdata->{$piece}{$commit_id}{'author'}{'name'} = "$name";
            $pdata->{$piece}{$commit_id}{'author'}{'email'} = "$2";
            next;
        }

        if ( $line =~ /^Date:\s+/ ) {
            # ignore date line
            next;
        }

        if ( $line =~ /^\s*$/ ) {
            # ignore empty line
            next;
        }

        $line = search_bugs($pdata, $piece, $commit_id, $line);
        # FIXME: need to be implemented
#        search_keywords($pdata, $line);

        unless (defined $pdata->{$piece}{$commit_id}{'summary'}) {
            $summary = standardize_summary($line);
            $pdata->{$piece}{$commit_id}{'summary'} = $summary;
        }
    }

    close GIT;
    unlink $commit_ids_log if ($git_cherry);
}

sub get_repo_name($)
{
    my $repo_dir = shift;
    
    open (GIT_CONFIG, "$repo_dir/.git/config") ||
        die "can't open \"$$repo_dir/.git/config\" for reading: $!\n";

    while (my $line = <GIT_CONFIG>) {
        chomp $line;

        if ( $line =~ /^\s*url\s*=\s*(\S+)$/ ) {
            my $repo_name = "$1";
            $repo_name = s/.*\///g;
            return "$repo_name";
        }
    }
    die "Error: can't find repo name in \"$$repo_dir/.git/config\"\n";
}

sub load_data($$$$$$$)
{
    my ($pdata, $top_dir, $piece, $branch_name, $git_command, $git_cherry, $git_args) = @_;

    if (defined $piece) {
        my $piece_dir;
        if ("$piece" eq "$main_repo") {
            $piece_dir = "$top_dir";
        } else {
            $piece_dir = "$top_dir/clone/$piece";
        }
        load_git_log($pdata, $piece_dir, $piece, $branch_name, $git_command, $git_cherry, $git_args);
    } else {
        load_git_log($pdata, $top_dir, $main_repo, $branch_name, $git_command, $git_cherry, $git_args);
        foreach my $piece (sort { $a cmp $b } @pieces) {
            load_git_log($pdata, "$top_dir/clone/$piece", $piece, $branch_name, $git_command, $git_cherry, $git_args);
        }
    }
}

sub get_branch_name($)
{
    my ($top_dir) = @_;

    my $branch;
    my $cmd = "cd $top_dir && git branch";

    open (GIT, "$cmd 2>&1|") || die "Can't run $cmd: $!";

    while (my $line = <GIT>) {
        chomp $line;

        if ( $line =~ m/^\*\s*(\S+)/ ) {
            $branch = "$1";
        }
    }

    close GIT;
    
    die "Error: did not detect git branch name in $top_dir\n" unless defined ($branch);

    return $branch;
}

sub open_log_file($$$$$$)
{
    my ($log_dir, $log_prefix, $log_suffix, $top_dir, $branch_name, $wiki) = @_;

    my $logfilename = "$log_prefix-$branch_name-$log_suffix";
    $logfilename = "$log_dir/$logfilename" if (defined $log_dir);
    if ($wiki) {
        $logfilename .= ".wiki";
    } else {
        $logfilename .= ".log";
    }

    if (-f $logfilename) {
        print "WARNING: The log file already exists: $logfilename\n";
        print "Do you want to overwrite it? (Y/n)?\n";
        my $answer = <STDIN>;
        chomp $answer;
        $answer = "y" unless ($answer);
        die "Please, rename the file or choose another log suffix\n" if ( lc($answer) ne "y" );
    }
    
    my $log;
    open($log, '>', $logfilename) || die "Can't open \"$logfilename\" for writing: $!\n";
    
    return $log;
}

sub print_commit_summary($$$$$$)
{
    my ($summary, $ppiece_title, $pbugs, $pauthors, $prefix, $log) = @_;

    return if ( $summary eq "" );

    # print piece title if not done yet
    if ( defined ${$ppiece_title} ) {
        print $log "${$ppiece_title}\n";
        ${$ppiece_title} = undef;
    }

    # finally print the summary line
    my $bugs = "";
    if ( %{$pbugs} ) {
        $bugs = " (" . join (", ", keys %{$pbugs}) . ")";
    }

    my $authors = "";
    if ( %{$pauthors} ) {
        $authors = " [" . join (", ", keys %{$pauthors}) . "]";
    }

    print $log $prefix, $summary, $bugs, $authors, "\n";
}

sub print_commits($$$)
{
    my ($pdata, $log, $wiki) = @_;

    foreach my $piece ( sort { $a cmp $b } keys %{$pdata}) {
        # check if this piece has any entries at all
        my $piece_title = "+ $piece";
        if ( %{$pdata->{$piece}} ) {
            my $old_summary="";
            my %authors = ();
            my %bugs = ();
            foreach my $id ( sort { lc $pdata->{$piece}{$a}{'summary'} cmp lc $pdata->{$piece}{$b}{'summary'} } keys %{$pdata->{$piece}}) {
                my $summary = $pdata->{$piece}{$id}{'summary'};
                if ($summary ne $old_summary) {
                    print_commit_summary($old_summary, \$piece_title, \%bugs, \%authors, "    + ", $log);
                    $old_summary = $summary;
                    %authors = ();
                    %bugs = ();
                }
                # collect bug numbers
                if (defined $pdata->{$piece}{$id}{'bugs'}) {
                    foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
                        $bugs{$bug} = 1;
                    }
                }
                # collect author names
                my $author = $pdata->{$piece}{$id}{'author'}{'name'};
                $authors{$author} = 1;
            }
            print_commit_summary($old_summary, \$piece_title, \%bugs, \%authors, "    + ", $log);
        }
    }
}

sub get_bug_name($$)
{
    my ($bug, $summary) = @_;
    print "$bug: ";

    $bug =~ m/(?:(\w*)\#+(\d+))/; # fdo#12345
    my $bugzilla = $1;            # fdo
    my $bug_number = $2;          # 12345

    if ( $bugzillas{$bugzilla} ) {
        my $url = $bugzillas{$bugzilla} . $bug_number;
        my $ua = LWP::UserAgent->new;
        $ua->timeout(10);
        $ua->env_proxy;
        my $response = $ua->get($url);
        if ($response->is_success) {
            my $title = $response->title;
            if ( $title =~ s/^Bug \d+ \S+ // ) {
                print "$title\n";
                return $title;
            } else {
                print "warning: not found; using commit message (only got $title)\n";
            }
        } else {
            print "\n";
        }
    } else {
        print "\n";
    }

    return $summary;
}

sub print_bugs($$$)
{
    my ($pdata, $log, $wiki) = @_;

    # associate bugs with their summaries and fixers
    my %bugs = ();
    foreach my $piece ( keys %{$pdata}) {
        foreach my $id ( keys %{$pdata->{$piece}}) {
            foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
                my $author = $pdata->{$piece}{$id}{'author'}{'name'};
                my $summary = $pdata->{$piece}{$id}{'summary'};
                $bugs{$bug}{'summary'} = $summary;
                $bugs{$bug}{'author'}{$author} = 1;
            }
        }
    }

    # try to replace summaries with bug names from bugzilla
    print "Getting bug titles:\n";
    foreach my $bug ( sort { $a cmp $b } keys %bugs) {
        $bugs{$bug}{'summary'} = get_bug_name($bug, $bugs{$bug}{'summary'});
    }

    # print
    foreach my $bug ( sort { $a cmp $b } keys %bugs) {
        my $summary = $bugs{$bug}{'summary'};

        my $authors = "";
        if ( %{$bugs{$bug}{'author'}} ) {
            $authors = " [" . join (", ", keys %{$bugs{$bug}{'author'}}) . "]";
        }

        $bug =~ s/(.*)\#(.*)/* {{$1|$2}}/ if ($wiki);
        print $log $bug, " ", $summary, $authors, "\n";
    }
}

sub print_bugnumbers($$$)
{
    my ($pdata, $log, $wiki) = @_;

    # just collect bugs
    my %bugs = ();
    foreach my $piece ( keys %{$pdata}) {
        foreach my $id ( keys %{$pdata->{$piece}}) {
            foreach my $bug (keys %{$pdata->{$piece}{$id}{'bugs'}}) {
                $bugs{$bug} = 1;
            }
        }
    }

    print $log join ("\n", sort { $a cmp $b } keys %bugs), "\n";
}

sub generate_log($$$$$$$$)
{
    my ($pdata, $print_func, $log_dir, $log_prefix, $log_suffix, $top_dir, $branch_name, $wiki) = @_;

    my $log = open_log_file($log_dir, $log_prefix, $log_suffix, $top_dir, $branch_name, $wiki);
    & {$print_func} ($pdata, $log, $wiki);
    close $log;
}

########################################################################
# help

sub usage()
{
    print "This script generates LO git commit summary\n\n" .
          
          "Usage: lo-commit-stat [--help] [--no-pieces] [--piece=<piece>] --log-dir=<dir> --log-suffix=<string> topdir [git_arg...]\n\n" .

          "Options:\n" .
          "     --help          print this help\n" .
          "     --no-pieces     read changes just from the main repository, ignore other cloned repos\n" .
          "     --piece=<piece> summarize just changes from the given piece\n" .
          "     --log-dir=<dir> directory where to put the generated log\n" .
          "     --log-suffix=<string> suffix of the log file name; the result will be\n" .
          "                     commit-log-<branch>-<log-name-suffix>.log; the branch name\n" .
          "                     is detected automatically\n" .
          "     --commits       generete log with all commits (default)\n" .
          "     --bugs          generate log with bugzilla entries\n" .
          "     --bugs-wiki     generate log with bugzilla entries, use wiki markup\n" .
          "     --bugs-numbers  generate log with bugzilla numbers\n" .
          "     --rev-list      use \"git rev-list\" instead of \"git log\"; useful to check\n" .
          "                     differences between branches\n" .
          "     --cherry        use \"git cherry\" instead of \"git log\"; detects cherry-picked\n" .
          "                     commits between branches\n" .
          "      topdir         directory with the libreoffice/core clone; the piece repos\n" .
          "                     must be cloned in the main-repo-root/clone/<piece> subdirectories\n" .
          "      git_arg        extra parameters passed to the git command to define\n" .
          "                     the area of interest; The default command is \"git log\" and\n" .
          "                     parameters might be, for example, --after=\"2010-09-27\" or\n" .
          "                     TAG..HEAD; with the option --rev-list, useful might be, for\n" .
          "                     example origin/master ^origin/libreoffice-3-3; with the option\n" .
          "                     --rev-list, useful might be, for example libreoffice-3.6.3.2\n" .
          "                     libreoffice-3.6.4.1\n";
}


#######################################################################
#######################################################################
# MAIN
#######################################################################
#######################################################################


my $piece;
my %generate_log = ();
my $top_dir;
my $log_prefix = "commit-log";
my $log_dir;
my $log_suffix;
my $log;
my $branch_name;
my $git_command = "git log";
my $git_cherry;
my $git_args = "";
my $branch_name;
my %data;
my $print_mode = "normal";

        $log_prefix = "bugfixes";
        $print_mode = "bugs";
        $log_prefix = "bugfixes";
        $print_mode = "wikibugs";
        $log_prefix = "bugnumbers";
        $print_mode = "bugnumbers";



foreach my $arg (@ARGV) {
    if ($arg eq '--help') {
        usage();
        exit;
    } elsif ($arg eq '--no-pieces') {
        $piece = "core";
    } elsif ($arg =~ m/--piece=(.*)/) {
	$piece = $1;
    } elsif ($arg =~ m/--log-suffix=(.*)/) {
	$log_suffix = "$1";
    } elsif ($arg =~ m/--log-dir=(.*)/) {
	$log_dir = "$1";
    } elsif ($arg eq '--commits') {
        $generate_log{"commits"} = 1;
    } elsif ($arg eq '--bugs') {
        $generate_log{"bugs"} = 1;
    } elsif ($arg eq '--bugs-wiki' || $arg eq '--wikibugs') {
        $generate_log{"bugs-wiki"} = 1;
    } elsif ($arg eq '--bugs-numbers' || $arg eq '--bug-numbers') {
        $generate_log{"bugs-numbers"} = 1;
    } elsif ($arg eq '--rev-list') {
        $git_command = "git rev-list --pretty=medium"
    } elsif ($arg eq '--cherry') {
        $git_command = "git log";
        $git_cherry = 1;
    } else {
        if (! defined $top_dir) {
            $top_dir=$arg;
        } else {
            $git_args .= " $arg";
        }
    }
}

# default log
if (%generate_log == 0) {
    $generate_log{"commits"} = 1;
}

(defined $top_dir) || die "Error: top directory is not defined\n";
(-d "$top_dir") || die "Error: not a directory: $top_dir\n";
(-f "$top_dir/.git/config") || die "Error: can't find $top_dir/.git/config\n";

(!defined $log_dir) || (-d $log_dir) || die "Error: directory does no exist: $log_dir\n";

(defined $log_suffix) || die "Error: define log suffix using --log-suffix=<string>\n";

$branch_name = get_branch_name($top_dir);

load_data(\%data, $top_dir, $piece, $branch_name, $git_command, $git_cherry, $git_args);

generate_log(\%data, \&print_commits,    $log_dir, "commits",     $log_suffix, $top_dir, $branch_name, 0) if (defined $generate_log{"commits"});
generate_log(\%data, \&print_bugs,       $log_dir, "bugs",        $log_suffix, $top_dir, $branch_name, 0) if (defined $generate_log{"bugs"});
generate_log(\%data, \&print_bugs,       $log_dir, "bugs",        $log_suffix, $top_dir, $branch_name, 1) if (defined $generate_log{"bugs-wiki"});
generate_log(\%data, \&print_bugnumbers, $log_dir, "bug-numbers", $log_suffix, $top_dir, $branch_name, 0) if (defined $generate_log{"bugs-numbers"});
