我目前正在研究我的python函数的加速问题。
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
def distance(u,v,lon1,lat1):
lat2, lon2 = lat1.copy(), lon1.copy()
lat2[v>0], lat2[v<0], = lat1[v>0]+1, lat1[v<0]-1,
lon2[u>0], lon2[u<0], = lon1[u>0]+1, lon1[u<0]-1,
dlon = lon2 - lon1
dlat = lat2 - lat1
return dlon, dlat
如你所见,这是基于numpy的简单代码。我在网上看了大部分文章,他们说的是只要在函数前面加上@numba.jit作为装饰符,然后就可以使用Numba来加速我的代码。
下面是我做的测试。
u = np.random.randn(10000)
v = np.random.randn(10000)
lon1 = np.random.uniform(-99,-96,10000)
lat1 = np.random.uniform( 23, 25,10000)
print(u)
%%timeit
for i in range(10000):
distance(u,v,lon1,lat1)
每循环5.61秒±58.7毫秒(7次运行的平均值±标准差,每次1次)。
@numba.njit()
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
@numba.njit()
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
@numba.njit()
def distance(u, v, lon1, lat1, R=6.371*1e6):
lat2, lon2 = lat1.copy(), lon1.copy()
lat2[v>0], lat2[v<0], = lat1[v>0]+1, lat1[v<0]-1,
lon2[u>0], lon2[u<0], = lon1[u>0]+1, lon1[u<0]-1,
dlat = lat2 - lat1
dlon = lon2 - lon1
return d_lon(lat1,lat2,dlon), d_lat(dlat)
%%timeit
for i in range(10000):
a,b = distance(u,v,lon1,lat1)
7.76秒±64.9毫秒/循环(平均值±标准差,7次运行,每次1个循环)
如上图所示,我的Numba案例的计算速度比我的纯python案例慢。谁能帮我解决这个问题?
ps:numba的版本 llvmlite 0.32.0rc1 numba 0.49.0rc2。
按照他的回答,现在连Numba都够聪明的了,如果我们想让代码是要用Numba装饰的,最好还是用普通的 "Fortran""C "类型的样式。下面是我思考的不同方法的计算时间对比。
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
def distance(u,v,lon1,lat1):
lat2, lon2 = lat1.copy(), lon1.copy()
lat2[v>0], lat2[v<0], = lat1[v>0]+1, lat1[v<0]-1,
lon2[u>0], lon2[u<0], = lon1[u>0]+1, lon1[u<0]-1,
dlon = lon2 - lon1
dlat = lat2 - lat1
return dlon, dlat
%%timeit
for i in range(10000):
distance(u,v,lon1,lat1)
@numba.jit(nogil=True)
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
@numba.jit(nogil=True)
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
def distance(u, v, lon1, lat1, R=6.371*1e6):
lat2, lon2 = lat1.copy(), lon1.copy()
lat2[v>0], lat2[v<0], = lat1[v>0]+1, lat1[v<0]-1,
lon2[u>0], lon2[u<0], = lon1[u>0]+1, lon1[u<0]-1,
dlat = lat2 - lat1
dlon = lon2 - lon1
return d_lon(lat1,lat2,dlon), d_lat(dlat)
%%timeit
for i in range(10000):
a,b = distance(u,v,lon1,lat1)
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
@numba.njit(nogil=True)
def distance(u, v, lon1, lat1, R=6.371*1e6):
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
lat2, lon2 = lat1.copy(), lon1.copy()
lat2[v>0], lat2[v<0], = lat1[v>0]+1, lat1[v<0]-1,
lon2[u>0], lon2[u<0], = lon1[u>0]+1, lon1[u<0]-1,
dlat = d_lat(lat2 - lat1)
dlon = d_lon(lat1,lat2,lon2 - lon1)
return dlon, dlat
%%timeit
for i in range(10000):
a,b = distance(u,v,lon1,lat1)
@numba.njit()
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
@numba.njit()
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
@numba.njit()
def distance(u, v, lon1, lat1):
lon2 = np.empty_like(lon1)
lat2 = np.empty_like(lat1)
dlon = np.empty_like(lon1)
dlat = np.empty_like(lat1)
for i in range(len(v)):
vi = v[i]
if vi > 0:
lat2[i] = lat1[i]+1
dlat[i] = 1
elif vi < 0:
lat2[i] = lat1[i]-1
dlat[i] = -1
else:
lat2[i] = lat1[i]
dlat[i] = 0
for i in range(len(u)):
ui = u[i]
if ui > 0:
lon2[i] = lon1[i]+1
dlon[i] = 1
elif ui < 0:
lon2[i] = lon1[i]-1
dlon[i] = -1
else:
lon2[i] = lon1[i]
dlon[i] = 0
return d_lon(lat1,lat2,dlon), d_lat(dlat)
%%timeit
for i in range(10000):
distance(u,v,lon1,lat1)
有几个问题跳出来。
首先,你的计算在 distance
函数是不必要的复杂的,而且编写的风格(有很多花哨的索引,如 lat2[v>0]
),这对Numba编译器来说可能并不理想。虽然Numba越来越智能,但我发现用简单的、面向循环的方式写代码还是有很高的回报率。
其次,Numba会被可选参数拖慢一点速度。我发现,这主要是对可选的 R
在你 distance
函数。
修正这两个问题--特别是,用更简单的循环来代替你的矢量化代码,使操作最小化--我们得到的代码形式是
@numba.njit()
def d_lat(dlat,R=6.371*1e6):
return 2 * R * np.sqrt(np.sin(np.deg2rad(dlat)/2)**2)
@numba.njit()
def d_lon(lat1,lat2,dlon,R=6.371*1e6):
return 2 * R * np.sqrt(np.cos(np.deg2rad(lat1)) *
np.cos(np.deg2rad(lat2)) *
np.sin(np.deg2rad(dlon)/2)**2)
@numba.njit()
def distance(u, v, lon1, lat1):
lon2 = np.empty_like(lon1)
lat2 = np.empty_like(lat1)
dlon = np.empty_like(lon1)
dlat = np.empty_like(lat1)
for i in range(len(v)):
vi = v[i]
if vi > 0:
lat2[i] = lat1[i]+1
dlat[i] = 1
elif vi < 0:
lat2[i] = lat1[i]-1
dlat[i] = -1
else:
lat2[i] = lat1[i]
dlat[i] = 0
for i in range(len(u)):
ui = u[i]
if ui > 0:
lon2[i] = lon1[i]+1
dlon[i] = 1
elif ui < 0:
lon2[i] = lon1[i]-1
dlon[i] = -1
else:
lon2[i] = lon1[i]
dlon[i] = 0
return d_lon(lat1,lat2,dlon), d_lat(dlat)
在我的(较慢的)系统上,这将编译的初始成本后的时间从7秒左右降低到4秒左右。在这一点上,我相信成本是由所有函数的原始成本主导的。np.sin
, np.cos
, np.exp
、等。