Skip to content

Commit 3d703a7

Browse files
committed
Add initial support for libgit2/rugged involving massive changes
Large change set here! Difficult to split this up into modular changes as Grit at the time was the heart of the processing. The initial implementation done here for Rugged works, and all the specs are currently passing. The GPG signing issue is no longer an issue, and Rugged also provides a substantial speed boost and memory reduction than the Grit implementation. The current state also run on Ruby 2.0.x as we are not held back by Grit. Remove: * The use of Grit for git processing * repo.rb as Rugged::Repository provided sufficient repo finding capabilities * Mixin for Grit::Blob to include LanguageSniffer functionality * blob_finder.rb as Rugged::Diff::Patch provides sufficient functionality to acquire the individual SHA's of blobs affected in the diff, which then can be Rugged::Repository.lookup(sha) * FileSummary struct and instead just use the patch information from DiffSummary Add: * Rugged for git processing * Direct use of LanguageSniffer in diff_summary to detect the language of the diff's content (using the blob in question) * Additional functionality to diff_summary.rb as it now is responsible for working with patches (provides #additions, #deletions, #status) * Additional information on LanguageSummary (:added_files, :deleted_files, :modified_files) Refactor: * How we found the commits to process, Grit::Commit.find_all(…) was replaces with Rugged::Walker * spec_helper.rb to now use Rugged::Repository instead of our own class * The terminology of :create, :delete to :added_files, :deleted_files Fix: * The ability to handle time_since and time_until for commits to work with Rugged (check on each commit if it is valid -- #valid_commit?) * Specs to work with new implementation and terminologies * fixtures to use the updated key names
1 parent a680753 commit 3d703a7

19 files changed

+223
-409
lines changed

git_statistics.gemspec

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ Gem::Specification.new do |gem|
1717
gem.required_ruby_version = '>= 1.9.1'
1818

1919
gem.add_dependency('json')
20-
gem.add_dependency('grit')
20+
gem.add_dependency('rugged')
2121
gem.add_dependency('language_sniffer')
2222

2323
gem.add_development_dependency "rspec", "~> 2.12.0"

lib/git_statistics.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ class CLI
55
attr_reader :repository, :options
66

77
def initialize(dir)
8-
@repository = dir.nil? ? Repo.new(Dir.pwd) : Repo.new(dir)
8+
repository_location = dir.nil? ? Rugged::Repository.discover(Dir.pwd) : Rugged::Repository.discover(dir)
9+
@repository = Rugged::Repository.new(repository_location)
910
@collected = false
1011
@collector = nil
1112
@options = OpenStruct.new(
@@ -35,14 +36,13 @@ def collect_and_only_update
3536
if options.update
3637
# Ensure commit directory is present
3738
@collector = Collector.new(repository, options.limit, false, options.pretty)
38-
commits_directory = repository.working_dir + ".git_statistics"
39+
commits_directory = repository.workdir + ".git_statistics/"
3940
FileUtils.mkdir_p(commits_directory)
4041
file_count = Utilities.number_of_matching_files(commits_directory, /\d+\.json/) - 1
4142

4243
if file_count >= 0
43-
time_since = Utilities.get_modified_time(commits_directory + "#{file_count}.json")
44-
# Only use --since if there is data present
45-
@collector.collect(options.branch, {:since => time_since})
44+
time_since = Utilities.get_modified_time(commits_directory + "#{file_count}.json").to_s
45+
@collector.collect(options.branch, {:time_since => time_since})
4646
@collected = true
4747
end
4848
end

lib/git_statistics/blob_finder.rb

Lines changed: 0 additions & 54 deletions
This file was deleted.

lib/git_statistics/collector.rb

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,65 @@ class Collector
44
attr_accessor :repo, :commits_path, :commits
55

66
def initialize(repo, limit, fresh, pretty)
7-
Grit::Git.git_timeout = 0
8-
Grit::Git.git_max_size = 0
9-
107
@repo = repo
11-
@commits_path = repo.working_dir + ".git_statistics"
8+
@commits_path = repo.workdir + ".git_statistics"
129
@commits = Commits.new(@commits_path, fresh, limit, pretty)
1310
end
1411

1512
def collect(branch, options = {})
16-
# Collect branches to use for git log
17-
branches = branch ? [] : @repo.branches.compact.map(&:name)
18-
Grit::Commit.find_all(@repo, nil, options).each do |commit|
19-
extract_commit(commit)
20-
@commits.flush_commits
13+
walker = Rugged::Walker.new(repo)
14+
master_head = Rugged::Branch.lookup(repo, "master").tip
15+
16+
walker.push(master_head)
17+
walker.each_with_index do |commit, count|
18+
if valid_commit?(commit, options)
19+
extract_commit(commit, count + 1)
20+
@commits.flush_commits
21+
end
2122
end
23+
2224
@commits.flush_commits(true)
2325
end
2426

27+
def valid_commit?(commit, options)
28+
if !options[:time_since].nil?
29+
return false unless commit.author[:time] > DateTime.parse(options[:time_since].to_s).to_time
30+
end
31+
32+
if !options[:time_until].nil?
33+
return false unless commit.author[:time] < DateTime.parse(options[:time_until].to_s).to_time
34+
end
35+
36+
return true
37+
end
38+
2539
def acquire_commit_meta(commit_summary)
2640
# Initialize commit data
27-
data = (@commits[commit_summary.sha] ||= Hash.new(0))
41+
data = (@commits[commit_summary.oid] ||= Hash.new(0))
2842

29-
data[:author] = commit_summary.author.name
30-
data[:author_email] = commit_summary.author.email
31-
data[:time] = commit_summary.authored_date.to_s
43+
data[:author] = commit_summary.author[:name]
44+
data[:author_email] = commit_summary.author[:email]
45+
data[:time] = commit_summary.author[:time].to_s
3246
data[:merge] = commit_summary.merge?
3347
data[:additions] = commit_summary.additions
3448
data[:deletions] = commit_summary.deletions
3549
data[:net] = commit_summary.net
36-
data[:new_files] = commit_summary.new_files
37-
data[:removed_files] = commit_summary.removed_files
38-
data[:files] = commit_summary.files
50+
data[:added_files] = commit_summary.added_files
51+
data[:deleted_files] = commit_summary.deleted_files
52+
data[:modified_files] = commit_summary.modified_files
53+
data[:files] = commit_summary.file_stats.map{ |file| file.to_json }
3954

4055
return data
4156
end
4257

43-
def extract_commit(commit)
44-
unless commit.nil?
45-
commit_summary = CommitSummary.new(commit)
46-
Log.info "Extracting #{commit_summary.sha}"
58+
def extract_commit(commit, count)
59+
Log.info "Extracting(#{count}) #{commit.oid}"
60+
commit_summary = CommitSummary.new(@repo, commit)
4761

48-
# Acquire meta information about commit
49-
commit_data = acquire_commit_meta(commit_summary)
62+
# Acquire meta information about commit
63+
commit_data = acquire_commit_meta(commit_summary)
5064

51-
return commit_data
52-
end
65+
return commit_data
5366
end
5467

5568
end

lib/git_statistics/commit_summary.rb

Lines changed: 19 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
module GitStatistics
22
class CommitSummary < SimpleDelegator
3-
def initialize(commit)
3+
def initialize(repo, commit)
44
super(commit)
5+
@repo = repo
6+
@diff = diff(commit.parents.first)
7+
@patches = @diff.patches
58
end
69

710
# A Git commit is a merge if it has more than one parent
@@ -10,13 +13,18 @@ def merge?
1013
end
1114

1215
# How many files were removed in this commit
13-
def removed_files
14-
cached_show.select { |diff| diff.deleted_file == true }.count
16+
def deleted_files
17+
file_stats.select { |file| file.status == :deleted }.count
1518
end
1619

1720
# How many files were added in this commit
18-
def new_files
19-
cached_show.select { |diff| diff.new_file == true }.count
21+
def added_files
22+
file_stats.select { |file| file.status == :added }.count
23+
end
24+
25+
# How many files were modified (not added/deleted) in this commit
26+
def modified_files
27+
file_stats.select { |file| file.status == :modified }.count
2028
end
2129

2230
# How many total additions in this commit?
@@ -35,18 +43,18 @@ def net
3543
end
3644

3745
def file_stats
38-
@cached_file_stats ||= diffstats.map { |diff| DiffSummary.new(diff, current_tree) }
46+
@cached_file_stats ||= diffstats.map { |diff| DiffSummary.new(@repo, diff) }
3947
end
4048

41-
LanguageSummary = Struct.new(:name, :additions, :deletions, :net)
49+
LanguageSummary = Struct.new(:name, :additions, :deletions, :net, :added_files, :deleted_files, :modified_files)
4250

4351
# Array of LanguageSummary objects (one for each language) for simple calculations
4452
def languages
4553
grouped_language_files.collect do |language, stats|
4654
additions = summarize(stats, :additions)
4755
deletions = summarize(stats, :deletions)
4856
net = summarize(stats, :net)
49-
LanguageSummary.new(language, additions, deletions, net)
57+
LanguageSummary.new(language, additions, deletions, net, added_files, deleted_files, modified_files)
5058
end
5159
end
5260

@@ -55,72 +63,13 @@ def grouped_language_files
5563
file_stats.group_by(&:language)
5664
end
5765

58-
FileSummary = Struct.new(:name, :language, :additions, :deletions, :net, :filestatus)
59-
60-
# Array of FileSummary objects (one for each file) for simple calculations
61-
def files
62-
file_stats.collect{ |stats| determine_file_summary(stats) }
63-
end
64-
65-
def cached_show
66-
@cached_commit_show ||= show
67-
end
68-
6966
# Files touched in this commit
70-
def file_names
71-
diffstats.map(&:filename)
72-
end
73-
74-
# Fetch the current Grit::Repo tree from this commit
75-
def current_tree
76-
@current_tree ||= repo.tree(sha)
67+
def filenames
68+
file_stats.map(&:filename)
7769
end
7870

7971
private
8072

81-
def determine_file_summary(stats)
82-
filestatus = :modified
83-
language = stats.language
84-
85-
# Determine if this file could be a new or deleted file
86-
if (stats.additions > 0 && stats.deletions == 0) || (stats.additions == 0 && stats.deletions > 0)
87-
# Extract file status from commit's diff object
88-
cached_show.each do |diff|
89-
if stats.filename == diff.b_path
90-
filestatus = :create if diff.new_file
91-
filestatus = :delete if diff.deleted_file
92-
break
93-
end
94-
end
95-
end
96-
97-
# Determine language of blob
98-
if stats.tree?
99-
# Trees have no language (the tree's blobs are still processed via the remainder diffstats)
100-
language = "Unknown"
101-
elsif stats.submodule?
102-
language = "Submodule"
103-
elsif stats.blob.nil?
104-
# If blob is nil (i.e., deleted file) grab the previous version of this blob using the parents of the current commit
105-
blob = BlobFinder.get_blob(self.parents.first, stats.filename)
106-
blob = BlobFinder.get_blob(self.parents.last, stats.filename) if blob.nil?
107-
108-
# Determine language of newly found blob
109-
if blob.kind_of? Grit::Tree
110-
language = "Unknown"
111-
elsif blob.kind_of? Grit::Submodule
112-
language = "Submodule"
113-
elsif blob.nil? || blob.language.nil?
114-
language = "Unknown"
115-
else
116-
language = blob.language.to_s
117-
end
118-
end
119-
120-
# TODO Converts file summary into hash to keep json compatibility (for now)
121-
Hash[FileSummary.new(stats.filename, language, stats.additions, stats.deletions, stats.net, filestatus).each_pair.to_a]
122-
end
123-
12473
def summarize(stats, what)
12574
stats.map(&what).inject(0, :+)
12675
end
@@ -130,19 +79,7 @@ def commit_summary(what)
13079
end
13180

13281
def diffstats
133-
if merge?
134-
merge_diffstats
135-
else
136-
stats.to_diffstat
137-
end
138-
end
139-
140-
# Hackery coming...
141-
DIFFSTAT_REGEX = /([-|\d]+)\s+([-|\d]+)\s+(.+)/i
142-
def merge_diffstats
143-
native_diff = repo.git.native(:diff, {numstat: true}, parents.join("..."))
144-
per_file_info = native_diff.scan(DIFFSTAT_REGEX)
145-
per_file_info.map { |add, del, file| Grit::DiffStat.new(file, add.to_i, del.to_i) }
82+
@patches
14683
end
14784

14885
end

lib/git_statistics/commits.rb

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,9 @@ def add_language_stats(data, file)
117117
data[:languages][file[:language].to_sym][:additions] += file[:additions]
118118
data[:languages][file[:language].to_sym][:deletions] += file[:deletions]
119119

120-
if file[:filestatus] != nil
121-
data[:languages][file[:language].to_sym][file[:filestatus].to_sym] += 1
120+
# Keep count of languages status (i.e., added, deleted) and keep keys consistent (i.e., added_files, deleted_files)
121+
if file[:status] != nil
122+
data[:languages][file[:language].to_sym][(file[:status]+'_files').to_sym] += 1
122123
end
123124

124125
return data
@@ -130,8 +131,8 @@ def add_commit_stats(data, commit)
130131
data[:commits] += 1
131132
data[:additions] += commit[:additions]
132133
data[:deletions] += commit[:deletions]
133-
data[:create] += commit[:new_files] if commit[:new_files] > 0
134-
data[:delete] += commit[:removed_files] if commit[:removed_files] > 0
134+
data[:added_files] += commit[:added_files] if !commit[:added_files].nil?
135+
data[:deleted_files] += commit[:deleted_files] if !commit[:deleted_files].nil?
135136
return data
136137
end
137138

0 commit comments

Comments
 (0)