CSV 文件中不需要的空白行

问题描述 投票:0回答:2

我在这里有一个命令,我可以在 Stackoverflow 社区的帮助下将其组合在一起。现在我对脚本有一个小问题,这是一个很小的问题,但它困扰着我。

以下是我使用的脚本:

#!/bin/bash

  read -p "Enter /DIR/PATH/FILENAME where you wish to copy the data: " FILENAME
  echo "Enter the JOB_NAME or %SEARCHSTRING%"


 while read -r i;
   do

  awk '
    BEGIN {
    print "\"insert_job\",\"job_type\",\"box_name\",\"command\",\"machine\",\"owner\",\"date_conditions\",\"condition\",\"run_calendar\",\"exclude_calendar\",\"days_of_week\",\"run_window\",\"start_times\",\"start_mins\",\"resources\",\"profile\",\"term_run_time\",\"watch_file\",\"watch_interval\"" }

/job_type/ {
    if (NR>1){printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, box, cmd, mcn, own, dc, c, rc, ec, dow, ruw, st, sm, res, prof, trt, wf, wi} jn="\""$2"\""; jt="\""$4"\""; box="\" \""; cmd="\" \""; mcn="\" \""; own="\" \""; dc="\" \""; c="\" \""; rc="\" \""; ec="\" \""; dow="\" \""; ruw="\" \""; st="\" \""; sm="\" \""; res="\" \""; prof="\" \""; trt="\" \""; wf="\" \""; wi="\" \""}
    /box_name/ {box="\""$2"\""}
    /command/ {$0=substr($0,index($0,$2)); cmd="\""$0"\""}
    /machine/ {mcn="\""$2"\""}
    /owner/   {own="\""$2"\""}
    /date_conditions/ {dc="\""$2"\""}
    /condition/ {$0=substr($0,index($0,$2)); c="\""$0"\""}
    /run_calendar/ {rc="\""$2"\""}
    /exclude_calendar/ {ec="\""$2"\""}
    /days_of_week/ {dow="\""$2"\""}
    /run_window/ {ruw="\""$2"\""}
    /start_times/ {gsub("\"",""); st="\""$2"\""}
    /^start_mins/ {sm="\""$2"\""}
    /profile/ {prof="\""$2"\""}
    /term_run_time/ {trt="\""$2"\""}
    /watch_file/ {wf="\""$2"\""}
    /watch_interval/ {wi="\""$2"\""}
    /resources/ {res="\""$2"\""}
    END{printf "%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n", jn, jt, box, cmd, mcn, own, dc, c, rc, ec, dow, ruw, st, sm, res, prof, trt, wf, wi}
' < <(autorep -j $i -q) > $FILENAME.csv

break
done

$i
采用
wildcard
条目并根据该条目给出输出。

例如:以下 4 个作业的名称中包含 Test,因此我将给出

Test%
作为通配符值,脚本将给出所有 4 个作业的输出。

这些是我正在使用的测试作业:

/* ----------------- Test_A ----------------- */

insert_job: Test_A  job_type: CMD
command: sleep 3000
machine: machine1
owner: user1
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,FR
start_times: “06:00,08:00,10:00,12:00,14:00,16:00”
term_run_time: 1
alarm_if_fail: 1
alarm_if_terminated: 1


/* ----------------- Test_B ----------------- */

insert_job: Test_B    job_type: CMD
command: echo
machine: machine2
owner: user2
permission:
date_conditions: 0
description: "Test"
std_out_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.out"
std_err_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.err"
max_run_alarm: 1
alarm_if_fail: 0
alarm_if_terminated: 0
send_notification: 1



/* ----------------- Test_c ----------------- */

insert_job: Test_c   job_type: CMD
command: sleep 10
machine: machine3
owner: user3
permission:
date_conditions: 0
alarm_if_fail: 0
alarm_if_terminated: 0


/* ----------------- Test_d ----------------- */

insert_job: Test_d   job_type: CMD
command: ls
machine: machine4
owner: user4
permission:
date_conditions: 0
alarm_if_fail: 1
alarm_if_terminated: 1

但问题是,csv 文件输出在列名和数据之间有一个空白行,如下所示:

"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
,,,,,,,,,,,,,,,,,,
"Test_A","CMD"," ","sleep 3000","machine1","user1","0","0"," "," "," "," "," "," "," "," ","1"," "," "
"Test_B","CMD"," ","echo","machine2","user2","0","0"," "," "," "," "," "," "," "," "," "," "," "
"Test_c","CMD"," ","sleep 10","machine3","user3","0","0"," "," "," "," "," "," "," "," "," "," "," "
"Test_d","CMD"," ","ls","machine4","user4","0","0"," "," "," "," "," "," "," "," "," "," "," "

所需输出为:

"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD"," ","sleep 3000","machine1","user1","0","0"," "," "," "," "," "," "," "," ","1"," "," "
"Test_B","CMD"," ","echo","machine2","user2","0","0"," "," "," "," "," "," "," "," "," "," "," "
"Test_c","CMD"," ","sleep 10","machine3","user3","0","0"," "," "," "," "," "," "," "," "," "," "," "
"Test_d","CMD"," ","ls","machine4","user4","0","0"," "," "," "," "," "," "," "," "," "," "," "

我尝试过使用

(NR>=1)
,但不起作用。我知道这很微不足道,但我无法理解它,有人可以帮助我吗?

linux bash shell awk autosys
2个回答
0
投票

以下是如何稳健地(您的现有脚本对于各种可能的输入值将失败)和可维护地(如果您想添加/删除字段以打印或重新排序它们,只需更新第一个

split()
arg)做您想做的事情,使用
cat file
代替我的系统上没有的
autorep -j $i -q

$ cat tst.sh
#!/usr/bin/env bash

cat file |
awk '
    BEGIN {
        OFS = ","
        numTags = split("insert_job job_type box_name command machine owner date_conditions condition run_calendar exclude_calendar days_of_week run_window start_times start_mins resources profile term_run_time watch_file watch_interval",tags)
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
        }
    }

    !NF || /^\/\*/ { next }
    { gsub(/^[[:space:]]+|[[:space:]]+$/,"") }

    match($0,/[[:space:]]job_type:/) {
        if ( jobNr++ ) {
            prt()
            delete tag2val
        }

        # save "insert_job" value
        tag = substr($1,1,length($1)-1)
        val = substr($0,length($1)+1,RSTART-(length($1)+2))
        gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
        tag2val[tag] = val

        # update $0 to start with "job_type" to look like all other input
        $0 = substr($0,RSTART+1)
    }

    {
        tag = val = $0
        sub(/:.*/,"",tag)
        sub(/[^:]+:[[:space:]]*/,"",val)
        tag2val[tag] = val
    }

    END { prt() }

    function prt(    tagNr,tag,val) {
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            val = tag2val[tag]
            printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
        }
    }
'

$ ./tst.sh
"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","","sleep 3000","machine1","user1","0","","","","","","","","","","1","",""
"Test_B","CMD","","echo","machine2","user2","0","","","","","","","","","","","",""
"Test_c","CMD","","sleep 10","machine3","user3","0","","","","","","","","","","","",""
"Test_d","CMD","","ls","machine4","user4","0","","","","","","","","","","","",""

以上是我对上一个问题的回答的简化版本,其中我提供了一个输出所有字段的 CSV 的脚本,而不是上面仅输出特定字段列表的脚本。

目前尚不清楚为什么在现有脚本中有一个循环调用

autorep
,但如果由于某种原因您确实需要一个循环,那么您很可能应该在循环中调用 autorep 并将循环输出通过管道传输到 awk

while IFS= read -r i; do
    autorep -j "$i" -q
done |
awk '...'

不是调用 autorep 并通过管道将其输出到循环内的 awk:

while IFS= read -r i; do
    autorep -j "$i" -q |
    awk '...'
done

以上是在您的问题的输入复制/粘贴上运行的:

$ cat file
/* ----------------- Test_A ----------------- */

insert_job: Test_A  job_type: CMD
command: sleep 3000
machine: machine1
owner: user1
permission:
date_conditions: 0
term_run_time: 1
alarm_if_fail: 1
alarm_if_terminated: 1


/* ----------------- Test_B ----------------- */

insert_job: Test_B    job_type: CMD
command: echo
machine: machine2
owner: user2
permission:
date_conditions: 0
description: "Test"
std_out_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.out"
std_err_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.err"
max_run_alarm: 1
alarm_if_fail: 0
alarm_if_terminated: 0
send_notification: 1



/* ----------------- Test_c ----------------- */

insert_job: Test_c   job_type: CMD
command: sleep 10
machine: machine3
owner: user3
permission:
date_conditions: 0
alarm_if_fail: 0
alarm_if_terminated: 0


/* ----------------- Test_d ----------------- */

insert_job: Test_d   job_type: CMD
command: ls
machine: machine4
owner: user4
permission:
date_conditions: 0
alarm_if_fail: 1
alarm_if_terminated: 1

0
投票

这是一个红宝石来做到这一点:

ruby -e '

scan1=/^\/\*[ \ta-zA-Z_-]+\*\/\R+([\s\S]*?)(?=(?:^\/\*[ \t-]+)|\z)/
scan2=/([^ \t\n\r:]+): ([^ \t\n\r:]+)/

headers=  "insert_job   job_type    box_name    command machine owner   
        date_conditions condition   run_calendar    exclude_calendar    
        days_of_week    run_window  start_times start_mins  resources   
        profile term_run_time   watch_file  watch_interval".
        split.map{|e| "\"#{e}\""}

data=$<.read.
    scan(scan1).
    map{|block| block[0].scan(scan2)}

hsh=Hash.new {|h,k| h[k] = [""]*data.length}

data.each_with_index{|r,i|
    r.to_h.each{|k,v| hsh[k][i]=v}
}

puts headers.join(",")
hsh.values.transpose.
   each{|r| puts r.map{|e| e.match(/^".*"$/) ? e : "\"#{e}\""}.join(",")}

' file 

打印:

"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","sleep","machine1","user1","0","1","1","1","","","","",""
"Test_B","CMD","echo","machine2","user2","0","","0","0","Test","/tmp/$AUTO_JOB_NAME.$AUTORUN.out","/tmp/$AUTO_JOB_NAME.$AUTORUN.err","1","1"
"Test_c","CMD","sleep","machine3","user3","0","","0","0","","","","",""
"Test_d","CMD","ls","machine4","user4","0","","1","1","","","","",""

这是一个红宝石来做第二个:

ruby -e '
scan1=/(\S[\s\S]*?)(?=(?:^$)|\z)/
scan2=/(^[^:]+):[ \t]+(.*)/

data=$<.read.
    scan(scan1).
    map{|block| block[0].scan(scan2)}.
    map{|s| s.to_h}

hsh=Hash.new{|h,k| h[k] = [""]*data.length}
data.each_with_index{|h, i| h.each{|k,v| hsh[k][i]<<v}}

puts hsh.keys.map{|e| "\"#{e}\""}.join(",")
hsh.values.transpose.
    each{|r| puts r.map{|e| e.match(/^".*"$/) ? e : "\"#{e}\""}.join(",")}
' file 

打印:

"Machine Name","Type","Node Name","Agent Name","Operating System","Agent Release","Agent Build"
"machine1machine2","aa","machine1.testmachine2.test","AGENTAGENT","Windows Server 2012 Windows Server 2012 for amd64","12.012.0","6181, Service Pack 00, Maintenance Level 006181, Service Pack 00, Maintenance Level 00"
"machine1machine2","aa","machine1.testmachine2.test","AGENTAGENT","Windows Server 2012 Windows Server 2012 for amd64","12.012.0","6181, Service Pack 00, Maintenance Level 006181, Service Pack 00, Maintenance Level 00"
© www.soinside.com 2019 - 2024. All rights reserved.