如何解析git diff的输出并获取行信息(即哪些行已被添加/修改)?
我想要类似的东西
raw = `git diff`
parsed = Git.Diff.parse(raw)
parsed.each do |file|
file.each do |line|
puts "#{file.name} - #{line.number} - #{line.type}"
end
end
[
{
"file": "path/to/file1",
"lines": [
{ number: "1", type: "modified"},
{ number: "4", type: "deleted"},
{ number: "9", type: "added"}
]
},
{
"file": "path/to/file2",
"lines": [
{ number: "4", type: "modified"},
{ number: "5", type: "added"}
]
}
]
您需要的是将输出正确分组到文件块中并保留所需内容。
你可以通过简单地运行一个来获得它
`git --diff`
'diff --git'
开始的行,您可以从中获取文件的名称'+ '
开头的行是添加的行'- '
开头的行是删除的行对于这些东西,Enumerable#slice_before浮现在脑海中。
我最终得到了这个原型:
raw_data = `git diff`.split("\n")
# Keep what is needed
clean_data = raw_data.select { |li|
li.starts_with?('diff --git') ||
li.starts_with?('- ') ||
li.starts_with?('+ ')
}
# Group the by file
# [[file_1, line1, line2, line3], [file_2, line1]]
file_data = clean_data.slice_before { |li| li.starts_with?('diff --git') }
# This is the output format
output = Hash.new {|h,k| h[k] = { added: 0, removed: 0 } }
# Populate the output
file_data.each_with_object(output) do |f_data, memo|
file, *file_info = f_data
file = file.split(' b/').first.gsub('diff --git a/', '')
file_info.each { |f_info|
memo[file][f_info[0] == '+' ? :added : :removed] += 1
}
end
{
"file_1" => { added: 1, removed: 12 },
"file_2" => { added: 0, removed: 1 }
}
我相信它会变得更好:-)
这就是我最终的结果
class Parser
def parse(text)
if text.encoding.name != "UTF-8"
encoded_text = @full_diff.encode("UTF-8", "binary", { :invalid => :replace, :undef => :replace })
else
encoded_text = text
end
hunks = []
hunk = nil
added_line_number = nil
deleted_line_number = nil
lines = encoded_text.strip.split("\n")
lines.each_with_index do |line, index|
if m = /^diff --git a\/(.*?) b\/(.*?)$/.match(line)
raise "Diff formatting error, 'diff --git' is the last line" if index + 1 >= lines.length
# new hunk
added_line_number = nil
delete_line_number = nil
hunk = Hunk.new(m[1], m[2])
hunk.type = hunk_type(lines[index + 1], m[1], m[2])
hunks.push(hunk)
elsif /^Binary files /.match(line)
hunk.is_binary = true
elsif m = /^@@ \-(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@/.match(line)
# (e.g. @@ -19,6 +19,7 @@)
deleted_line_number = Integer(m[1])
added_line_number = Integer(m[2])
else
if !added_line_number.nil?
if line.start_with?('+')
# added line
hunk.lines.push SourceLine.new(added_line_number, SourceLine::Type::Added, line[1..-1])
added_line_number += 1
elsif line.start_with?('-')
# deleted line
hunk.lines.push SourceLine.new(deleted_line_number, SourceLine::Type::Deleted, line[1..-1])
deleted_line_number += 1
else
# unmodified line
added_line_number += 1
deleted_line_number += 1
end
end
end
end
hunks
end
def hunk_type(line, original, renamed)
case line
when /^new file /
type = Hunk::Type::Added
when /^deleted file /
type = Hunk::Type::Deleted
else
type = original == renamed ? Hunk::Type::Modified : Hunk::Type::Renamed
end
type
end
private :hunk_type
end
end
module Type
Added = 'added'
Deleted = 'deleted'
Modified = 'modified'
Renamed = 'renamed'
end
class Hunk
module Type
Added = 'added'
Deleted = 'deleted'
Modified = 'modified'
Renamed = 'renamed'
end
attr_accessor :original_path, :renamed_path, :type, :lines, :is_binary
alias_method :is_binary?, :is_binary
def initialize(original_path, renamed_path)
self.is_binary = false
self.lines = []
self.original_path = original_path
self.renamed_path = renamed_path
end
end
class SourceLine
module Type
Added = 'added'
Deleted = 'deleted'
end
attr_accessor :number, :type, :text
def initialize(number, type, text)
self.number = number
self.type = type
self.text = text
end
end