我正在尝试将函数转换为lambda表达式,以最大程度地减少计算时间。更具体地说,我想相对于plot2的所有点(其中plot1,plot2数据帧带有经度,纬度列)保持对plot1的每个点的最小正弦距离。这是我的代码:
def calculate_min_haversine_distance(plot1, plot2):
for index,row in plot1.iterrows():
minimum = 100000000
for index2, row2 in plot2.iterrows():
dis = haversine_distance(row.latitude, row.longitude, row2.latitude, row2.longitude)
if (dis<minimum):
minimum=dis
plot1.loc(index,'Min Haversine Distance') = minimum
return plot1
我不确定如何摆脱第一个循环,但这应该可以帮助您摆脱第二个循环:
def calculate_min_haversine_distance(plot1, plot2):
for index,row in plot1.iterrows():
plot2['dist'] = plot2.apply(lambda x: haversine_distance(row.latitude, row.longitude, x.latitude, x.longitude), axis=1)
plot1.loc[index,'Min Haversine Distance'] = min(plot2['dist'])
plot2.drop('dist', axis=1, inplace=True) # Delete the temporary column created
return plot1
我将尝试执行以下操作:希望对您有所帮助。
import pandas as pd
import numpy as np
df1 = pd.DataFrame(data={'lat': [1,2,3,4], 'lon': [5,6,7,8]})
df2 = pd.DataFrame(data={'lat': [9,10,11,12], 'lon': [13,14,15,16]})
df1['key'], df2['key'] = 1,1
df_c = pd.merge(df1, df2, on='key').drop('key', axis=1)
# below function is copied from: https://stackoverflow.com/a/43577275/4450090
def haversine(lat1, lon1, lat2, lon2, to_radians=True, earth_radius=6371):
if to_radians:
lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2])
a = np.sin((lat2-lat1)/2.0)**2 + \
np.cos(lat1) * np.cos(lat2) * np.sin((lon2-lon1)/2.0)**2
return earth_radius * 2 * np.arcsin(np.sqrt(a))
df_c['dist'] = df_c.apply(lambda x: haversine(x['lat_x'], x['lon_x'], x['lat_y'], x['lon_y']), axis=1)
min_val = 1000000
df_c['dist'] = df_c['dist'].apply(lambda x: x if x < min_val else min_val)