Ruby newbtard在这里...
我有一个 csv 文件 (logevents.csv),其中有一个“消息”列。 “message”列包含 json 数据行。 使用 Ruby,我想将 json 数据的名称:值对转换为第二个 csv 文件中的列名:行值。 这是 csv 文件的第一行:
留言 "{""version"":""0"",""id"":""fdd11d8a-ef17-75ae-cf50-077285bb7e15"",""详细信息类型"":""Auth0 日志""," "来源"":""aws.partner/auth0.com/website-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs"",""帐户"":""654654277766"",""时间"":""2024-03-27T12:30:51Z"",""region"":""us-east-2"",""资源"":[],""详细信息"":{" "log_id"":""90020240327123051583073000000000000001223372067726119722"",""数据"":{""日期"":""2024-03-27T12:30:51.531Z"",""类型"":""seaft"" ,""description"":"""",""connection_id"":"""",""client_id"":""v00a8B5f1sgCDjVhneXMbMmwxlsbYoHq"",""client_name"":""网站开发"","" ip"":""32.174.36.217"",""user_agent"":""有一天 v1.10.3"",""详细信息"":{""code"":""******** **********************************5kW""},""主机名"":""网站开发.us.auth0.com"",""user_id"":""auth0|648a230ee5ad48ee2ebfb212"",""user_name"":""[电子邮件受保护]"",""auth0_client"":{""名称"":""omniauth-auth0"",""版本"":""2.6.0"",""env"":{""ruby"":""2.6.5"",""rails" ":""6.1.7.4""}},""$event_schema"":{""version"":""1.0.0""},""log_id"":""90020240327123051583073000000000000001223372067726119722""}}}"
对于每一行,我希望将上面的内容写入另一个 csv 文件,但将名称:值对转换为列:行值,并使用“,”(逗号)作为列名称和行值的分隔符,ala:
版本、ID、详细信息类型、来源、帐户...等 0,fdd11d8a-ef17-75ae-cf50-077285bb7e15,Auth0日志,aws.partner/auth0.com/website-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs,654654277766 ....etc
我一直在尝试通过这个 ruby 脚本(runtimetest.rb)来完成此任务:
require 'csv'
require 'json'
CSV.open("C:/Ruby/dev/logevents2.csv", "w") do |csv| #open new file for write
JSON.parse(File.open("C:/Ruby/dev/logevents.csv").read).each do |hash| #open json to parse
csv << hash.values #write value to file
end
end
但在运行时,csv 文件内容 (logevents.csv) 会在屏幕上写入“意外标记”消息:
C:\Users\dclad>runtimetest.rb
C:/Ruby32-x64/lib/ruby/3.2.0/json/common.rb:216:在`parse'中:'"version""处出现意外标记:""0"",""id"": ""fdd11d8a-ef17-75ae-cf50-077285bb7e15"",""详细类型"":""Auth0 日志"",""来源"":""aws.partner/auth0.com/trulab-dev-c36bb924 -cf05-4a5b-8400-7bdfbfe0806c/auth0.logs"",""帐户"":""654654277766"", ........
尝试过这个,我一直在尝试通过这个 ruby 脚本(runtimetest.rb)来完成此任务:
require 'csv'
require 'json'
CSV.open("C:/Ruby/dev/logevents2.csv", "w") do |csv| #open new file for write
JSON.parse(File.open("C:/Ruby/dev/logevents.csv").read).each do |hash| #open json to parse
csv << hash.values #write value to file
end
end
期望输出为第二个 csv 中的列、行表:
版本、ID、详细信息类型、来源、帐户...等 0,fdd11d8a-ef17-75ae-cf50-077285bb7e15,Auth0日志,aws.partner/auth0.com/trulab-dev-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs,654654277766 ....等
我的做法可能全错了。 任何建议将不胜感激!
最诚挚的问候, 唐纳德
我能够让 ruby 解析出单个记录 json 文件,以便它将 json 名称和值对(以及嵌套对)作为标题和行 .csv 文件写入第二个文件。
require 'csv'
require 'json'
# An attempt to figure out what a row will look like
def array_from(json)
queue, next_item = [], json
while !next_item.nil?
return next_item if next_item.is_a? Array
if next_item.is_a? Hash
next_item.each do |k, v|
queue.push next_item[k]
end
end
next_item = queue.shift
end
return [json]
end
# try to build header columns for the nested elements
def flatten(object, path='')
scalars = [String, Integer, FalseClass, TrueClass]
columns = {}
if [Hash, Array].include? object.class
object.each do |k, v|
new_columns = flatten(v, "#{path}#{k}|") if object.class == Hash
new_columns = flatten(k, "#{path}#{k}|") if object.class == Array
columns = columns.merge new_columns
end
return columns
elsif scalars.include? object.class
# Remove trailing slash from path
end_path = path[0, path.length - 1]
columns[end_path] = object
return columns
else
return {}
end
end
json = JSON.parse(File.open('logevent.json').read)
in_array = array_from json
out_array = []
in_array.each do |row|
out_array[out_array.length] = flatten row
end
headers_written = false
CSV.open('logevent.csv', 'w') do |csv|
out_array.each do |row|
csv << row.keys && headers_written = true if headers_written === false
csv << row.values
end
end
这是输入文件“logevent.json”的内容:
{
"Message": [
{
"version": "0",
"id": "fdd11d8a-ef17-75ae-cf50-077285bb7e15",
"detail-type": "Auth0 log",
"source": "aws.partner/auth0.com/website-c36bb924-cf05-4a5b-8400-7bdfbfe0806c/auth0.logs",
"account": "654654277766",
"time": "2024-03-27T12:30:51Z",
"region": "us-east-2",
"resources": [],
"detail": {
"log_id": "90020240327123051583073000000000000001223372067726119722",
"data": {
"date": "2024-03-27T12:30:51.531Z",
"type": "seacft",
"description": "",
"connection_id": "",
"client_id": "v00a8B5f1sgCDjVhneXMbMmwxlsbYoHq",
"client_name": "Website Dev",
"ip": "3.17.36.227",
"user_agent": "JohnDoe v1.10.3",
"details": {
"code": "******************************************5kW"
},
"hostname": "website.us.auth0.com",
"user_id": "auth0|648a230ee5ad48ee2ebfb212",
"user_name": "[email protected]",
"auth0_client": {
"name": "omniauth-auth0",
"version": "2.6.0",
"env": {
"ruby": "2.6.5",
"rails": "6.1.7.4"
}
},
"$event_schema": {
"version": "1.0.0"
},
"log_id": "90020240327123051583073000000000000001223372067726119722"
}
}
}
]
}
请参阅上面的“更新”...