删除重复行保留前面一行

问题描述 投票:0回答:1

我想删除包含“来自单位”的重复行,像这样在前面保留一个

输入文件:

 within unit  1
   9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25    
 173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81 
 from unit  1 to unit  2
   2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66 
 ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07 
 from unit  1 to unit  3
 from unit  1 to unit  4
 172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15    
 173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74    
 from unit  1 to unit  5
 from unit  1 to unit  6
 172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19    
 from unit  1 to unit  7
 from unit  2 to unit  1
 within unit  2
 from unit  2 to unit  3
 from unit  3 to unit  1
 from unit  3 to unit  2
  40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40  

输出文件:

 within unit  1
   9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25   
 173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81  
 from unit  1 to unit  2
   2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66   
 ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07  
 from unit  1 to unit  4
 172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15   
 173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74  
 from unit  1 to unit  6
 172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19  
 from unit  3 to unit  2
  40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40 

#!/bin/bash

declare -A m f

read_matrix() {
    local i=0
    local line
    local j
    # Ignore the first 2 lines containing size of the matrix

    while read -r line; do
        j=0
        # split on spaces
        for v in `echo $line`; do
            m[$i,$j]="$v"
            j=$((j+1))
        done
        i=$((i+1))
    done

 }

 read_matrix      < $1
echo "${1:m[@]}"
a=`awk 'END {print FNR}' $1`

       i=0
       for i in $((a-2)); do
           if [[ m[$i,0] == "from" ]]; then
             if [[ m[$((i+1)),0] != "from" ]]; then
             echo -n  "${m[$i,*]}" >> out.1
             #for ((j=0; j<=5; j++)); do  f[$i,j]=m[$i,j]; done
             #for ((j=0; j<=5; j++)); do unset m[$((i+1)),j]; done
             fi
             else
             for ((j=0; j<=5; j++)); do  f[$i,j]=m[$i,j]; done
             fi
            echo -n  "${f[$i,*]}" >> out.1
            i=$((i+1))
        done
bash sh csh
1个回答
0
投票

这可能就是您想要做的,使用任何

awk
tac
:

$ tac file | awk -v bad='(from|within) unit' '!($0 ~ bad && prev ~ bad); {prev=$0}' | tac
 within unit  1
   9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25
 173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81
 from unit  1 to unit  2
   2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66
 ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07
 from unit  1 to unit  4
 172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15
 173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74
 from unit  1 to unit  6
 172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19
 from unit  3 to unit  2
  40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40
© www.soinside.com 2019 - 2024. All rights reserved.