我试图对一个大的类似床的文件的第 6 7 列进行一些操作,这是文件的前 100 行。
head -n 100 Sample1.bed
chr1 10000 10150 150 0.506667 0.0503 0.0639
chr1 10002 10144 142 0.507042 0.0308 0.0639
chr1 10003 10139 136 0.507353 0.0246 0.0639
chr1 10008 10156 148 0.513514 0.0375 0.0639
chr1 10012 10138 126 0.500000 0.0156 0.0639
chr1 10013 10194 181 0.497238 0.0737 0.0599
chr1 10016 10169 153 0.509804 0.0503 0.0639
chr1 10031 10161 130 0.507692 0.0205 0.0639
chr1 10038 10211 173 0.502890 0.0968 0.0639
chr1 10044 10179 135 0.511111 0.0246 0.0639
chr1 10071 10145 74 0.500000 0.0001 0.0639
chr1 10071 10240 169 0.508876 0.1022 0.0639
chr1 11178 11334 156 0.679487 0.0704 0.0032
chr1 11561 11733 172 0.418605 0.0968 0.0998
chr1 11597 11776 179 0.435754 0.0867 0.0839
chr1 11641 11796 155 0.432258 0.0704 0.0839
chr1 12218 12376 158 0.651899 0.0704 0.0051
chr1 12368 12551 183 0.617486 0.0737 0.0114
chr1 12486 12658 172 0.645349 0.0968 0.0051
chr1 12770 12952 182 0.582418 0.0737 0.0156
chr1 12813 12943 130 0.561538 0.0205 0.0240
chr1 12870 13043 173 0.583815 0.0968 0.0156
chr1 12882 13089 207 0.599034 0.0214 0.0156
chr1 13378 13569 191 0.581152 0.0463 0.0156
chr1 13381 13592 211 0.578199 0.0160 0.0240
chr1 13435 13580 145 0.593103 0.0375 0.0156
chr1 13515 13688 173 0.578035 0.0968 0.0240
chr1 13550 13705 155 0.574194 0.0704 0.0240
chr1 13808 13944 136 0.514706 0.0246 0.0639
chr1 14004 14174 170 0.529412 0.0968 0.0507
chr1 14231 14393 162 0.580247 0.0884 0.0156
chr1 14249 14385 136 0.588235 0.0246 0.0156
chr1 14262 14432 170 0.523529 0.0968 0.0507
chr1 14267 14409 142 0.549296 0.0308 0.0358
chr1 14272 14385 113 0.584071 0.0070 0.0156
chr1 14288 14451 163 0.527607 0.0884 0.0507
chr1 14291 14425 134 0.522388 0.0205 0.0507
chr1 14316 14481 165 0.509091 0.1022 0.0639
chr1 14556 14768 212 0.622642 0.0160 0.0076
chr1 14635 14830 195 0.656410 0.0365 0.0051
chr1 14654 14795 141 0.673759 0.0308 0.0032
chr1 14786 14931 145 0.579310 0.0375 0.0240
chr1 15073 15258 185 0.643243 0.0585 0.0051
chr1 15078 15223 145 0.627586 0.0375 0.0076
chr1 15113 15279 166 0.632530 0.1022 0.0076
chr1 15286 15512 226 0.584071 0.0069 0.0156
chr1 15353 15482 129 0.565891 0.0156 0.0240
chr1 15379 15545 166 0.560241 0.1022 0.0240
chr1 15440 15692 252 0.595238 0.0027 0.0156
chr1 15478 15648 170 0.594118 0.0968 0.0156
chr1 15492 15659 167 0.592814 0.1022 0.0156
chr1 15633 15776 143 0.685315 0.0308 0.0019
chr1 16119 16267 148 0.601351 0.0375 0.0114
chr1 16478 16646 168 0.488095 0.1022 0.0599
chr1 16786 17014 228 0.618421 0.0069 0.0114
chr1 16818 17069 251 0.609562 0.0027 0.0114
chr1 16843 16999 156 0.628205 0.0704 0.0076
chr1 16947 17105 158 0.563291 0.0704 0.0240
chr1 16951 17078 127 0.551181 0.0156 0.0358
chr1 16961 17148 187 0.572192 0.0585 0.0240
chr1 17076 17216 140 0.614286 0.0308 0.0114
chr1 17149 17311 162 0.617284 0.0884 0.0114
chr1 17259 17442 183 0.562842 0.0737 0.0240
chr1 17327 17518 191 0.638743 0.0463 0.0076
chr1 17335 17522 187 0.641711 0.0585 0.0051
chr1 17386 17556 170 0.652941 0.0968 0.0051
chr1 17432 17598 166 0.698795 0.1022 0.0019
chr1 17546 17662 116 0.646552 0.0093 0.0051
chr1 17547 17682 135 0.637037 0.0246 0.0076
chr1 17587 17728 141 0.595745 0.0308 0.0156
chr1 17729 17940 211 0.630332 0.0160 0.0076
chr1 17729 17967 238 0.634454 0.0043 0.0076
chr1 17871 18115 244 0.655738 0.0036 0.0051
chr1 17880 18038 158 0.683544 0.0704 0.0019
chr1 17906 18074 168 0.672619 0.1022 0.0032
chr1 18146 18318 172 0.593023 0.0968 0.0156
chr1 18269 18432 163 0.595092 0.0884 0.0156
chr1 18328 18518 190 0.621053 0.0463 0.0076
chr1 18392 18568 176 0.619318 0.0867 0.0114
chr1 18417 18600 183 0.601093 0.0737 0.0114
chr1 18432 18652 220 0.590909 0.0088 0.0156
chr1 18516 18663 147 0.578231 0.0375 0.0240
chr1 18521 18709 188 0.558511 0.0585 0.0358
chr1 18588 18751 163 0.539877 0.0884 0.0507
chr1 18618 18770 152 0.546053 0.0503 0.0358
chr1 18897 19067 170 0.594118 0.0968 0.0156
chr1 18906 19084 178 0.584270 0.0867 0.0156
chr1 18958 19165 207 0.545894 0.0214 0.0358
chr1 19131 19303 172 0.622093 0.0968 0.0076
chr1 19192 19372 180 0.661111 0.0737 0.0032
chr1 19244 19394 150 0.653333 0.0503 0.0051
chr1 19271 19409 138 0.652174 0.0246 0.0051
chr1 19371 19525 154 0.558442 0.0503 0.0358
chr1 19452 19654 202 0.549505 0.0282 0.0358
chr1 19476 19683 207 0.560386 0.0214 0.0240
chr1 19479 19621 142 0.514085 0.0308 0.0639
chr1 19497 19638 141 0.553191 0.0308 0.0358
chr1 19777 19965 188 0.579787 0.0585 0.0240
chr1 19795 19943 148 0.547297 0.0375 0.0358
chr1 19797 19924 127 0.551181 0.0156 0.0358
我使用这个 bash 脚本来执行此操作,但是无法获得所需的结果,我只得到
iScore="nan"
。请纠正我哪里错了。
#!/bin/bash
dir=$1
file_count=$(ls "$dir" | wc -l)
for filename in "$dir"/*.bed; do
parts="${filename##*/}"
sample_id="${parts%%.*}"
echo "$sample_id"
# Filter dataframe based on conditions
filtered_df="$(awk -F, '($6 > 0 && $6 < 1 && $7 > 0 && $7 < 1)' $filename)"
echo "$filtered_df"
# Check if filtered dataframe is not empty
if [ -n "$filtered_df" ]; then
# Calculate a and b values
a=$(echo "$filtered_df" | awk -F, '{print log(1 / ($6 * $7))/log(2)}' | paste -sd+ - | bc)
b=$(echo "$filtered_df" | awk -F, '{print log(1 / ((1 - $6) * (1 - $7)))/log(2)}' | paste -sd+ - | bc)
# Calculate iScore value
if (( $(echo "$a + $b > 0" | bc -l) )); then
iScore=$(echo "scale=10; $a / ($a + $b)" | bc)
else
iScore="nan"
fi
else
iScore="nan"
fi
echo "iScore: $iScore ($sample_id)\n"
done
尽管如此,当我在终端中手动尝试时,我得到了所需的结果。 这里,
Sample1.csv
是$filtered_df
的前100行
$ a=$(cat Sample1.csv | awk -F, '{print log(1 / ($6 * $7))/log(2)}' | paste -sd+ - | bc)
$ b=$(cat Sample1.csv | awk -F, '{print log(1 / ((1 - $6) * (1 - $7)))/log(2)}' | paste -sd+ - | bc)
$ iScore=$(echo "scale=10; $a / ($b + $a)" | bc)
$ echo "$iScore"
$.9842574596
.bed
文件中的字段分隔符是任意数量的空格;这与您在 bash 脚本中指定给 awk 的逗号分隔符 -F,
不一致。
尝试删除
-F,
awk 分隔符选项,这对于 CSV(逗号分隔值)文件很有用,但对于“.bed”文件则不起作用。通过这种方式,您可以指示 awk 使用它的默认字段分隔符(任何空格或制表符序列)。