import pandas as pd
a = [0.0, 0.6900000000000001, 1.3800000000000001, 1.3800000000000001, 2.0700000000000003, 2.7600000000000002, 2.7600000000000002, 3.45, 4.1262, 4.140000000000001, 4.140000000000001, 4.83, 5.5200000000000005, 5.5200000000000005, 5.9754000000000005, 6.4308000000000005, 6.6654, 6.9]
b = [0.0, 0.6900000000000001, 1.3800000000000001, 1.3800000000000001, 2.0700000000000003, 2.7600000000000002, 2.7600000000000002, 3.45, 4.1262, 4.140000000000001, 4.140000000000001, 4.83, 5.5200000000000005, 5.5200000000000005, 5.9754000000000005, 6.4308000000000005, 6.4308000000000005, 6.6654, 6.9]
df1 = pd.DataFrame(a, columns=['x(m)'])
df2 = pd.DataFrame(b, columns=['x(m)'])
df1:
x(m)
0 0.0000
1 0.6900
2 1.3800
3 1.3800
4 2.0700
5 2.7600
6 2.7600
7 3.4500
8 4.1262
9 4.1400
10 4.1400
11 4.8300
12 5.5200
13 5.5200
14 5.9754
15 6.4308
16 6.6654
17 6.9000
df2:
x(m)
0 0.0000
1 0.6900
2 1.3800
3 1.3800
4 2.0700
5 2.7600
6 2.7600
7 3.4500
8 4.1262
9 4.1400
10 4.1400
11 4.8300
12 5.5200
13 5.5200
14 5.9754
15 6.4308
16 6.4308
17 6.6654
18 6.9000
两个数据帧的差异是
row 16 in df2 is not in df1
。我怎样才能得到差异?输出可以是行索引 16
加上值 6.4308
,或者是第三列指定 not in df1
或类似内容的组合数据框。谢谢。
一种可能的解决方案如下:
import pandas as pd
a = [0.0, 0.6900000000000001, 1.3800000000000001, 1.3800000000000001, 2.0700000000000003, 2.7600000000000002, 2.7600000000000002, 3.45, 4.1262, 4.140000000000001, 4.140000000000001, 4.83, 5.5200000000000005, 5.5200000000000005, 5.9754000000000005, 6.4308000000000005, 6.6654, 6.9]
b = [0.0, 0.6900000000000001, 1.3800000000000001, 3, 1.3800000000000001, 2.0700000000000003, 2.7600000000000002, 2.7600000000000002, 3.45, 4.1262, 4.140000000000001, 4.140000000000001, 4.83, 5.5200000000000005, 5.5200000000000005, 5.9754000000000005, 6.4308000000000005, 6.4308000000000005, 6.6654, 6.9]
df1 = pd.DataFrame(a, columns=['x(m)'])
df2 = pd.DataFrame(b, columns=['x(m)'])
df = pd.merge(df1.reset_index(), df2.reset_index(), on='x(m)', how='outer', indicator=True)
df.rename(columns={'_merge': 'present_in'}, inplace=True)
df['present_in'].replace({'left_only': 'only in df1', 'right_only': 'only in df2', 'both': 'in both'}, inplace=True)
print(df)