我想根据给定范围创建直方图 (1d),并且希望它能够快速。这就是为什么也可以考虑 cython 和 numba。
给定两个边界、权重和步长。
例如:
boundaries1 = np.array([511.4, 517.5, 517.2, 520.1, 519.8])
boundaries2 = np.array([517.3, 517.1, 517.8, 518.2, 515.0])
weights = np.array([0.2, 0.4, 0.6, 0.8, 1])
stepsize = 0.1
请注意,边界不按其值排序,这意味着例如边界 1 可以大于或小于对应的边界 2。
此示例的结果应返回具有给定步长、最小值和最大值的直方图。
histogram = np.array([0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.2, 1.2, 1.2,
1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2,
1.2, 1.2, 1.2, 1.2, 1.2, 1.6, 2.2, 2.2, 2. , 2. , 1.6, 1.6, 1.6,
1. , 1. , 1. , 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8,
1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 1.8, 0.8, 0.8, 0.8])
minimum = 511.4
maximum = 520.1
对于如何实现这个以尽可能快地运行有创意吗?
这是我目前的做法:
@njit(cache=True)
def create_histogram(boundaries1, boundaries2, weights, stepsize):
minimum = np.minimum(np.min(boundaries1),np.min(boundaries2))
maximum = np.maximum(np.max(boundaries1),np.max(boundaries2))
num_bins = int(np.round((maximum-minimum+stepsize)/stepsize))
hist = np.zeros(num_bins, dtype=np.float64)
lower = np.round((np.minimum(boundaries1, boundaries2)-minimum)/stepsize).astype(np.int32)
upper = np.round((np.maximum(boundaries1, boundaries2)-minimum)/stepsize).astype(np.int32)
for i in range(lower.shape[0]):
hist[lower[i]:upper[i]+1] += weights[i]
return hist, minimum, maximum
我使用相同的数据针对您当前的方法测试了以下解决方案。看来花费的时间减少了近一半。
测试数据:
boundaries1 = np.array([511.4, 517.5, 517.2, 520.1, 519.8])
boundaries2 = np.array([517.3, 517.1, 517.8, 518.2, 515.0])
weights = np.array([0.2, 0.4, 0.6, 0.8, 1])
stepsize = 0.1
import numpy as np
from numba import jit, prange
import time
@jit(nopython=True, parallel=True, cache=True)
def create_histogram_numba(boundaries1, boundaries2, weights, stepsize):
min_boundary = min(min(boundaries1), min(boundaries2))
max_boundary = max(max(boundaries1), max(boundaries2))
num_bins = int((max_boundary - min_boundary) / stepsize) + 1
hist = np.zeros(num_bins, dtype=np.float64)
for i in prange(len(boundaries1)):
start = int((min(boundaries1[i], boundaries2[i]) - min_boundary) / stepsize)
end = int((max(boundaries1[i], boundaries2[i]) - min_boundary) / stepsize)
for j in range(start, end):
hist[j] += weights[i]
return hist, min_boundary, max_boundary
start = time.time()
print("Faster approach")
hist = create_histogram_numba(boundaries1, boundaries2, weights, stepsize)
end = time.time()
print("Time taken(s): {}".format(end-start))
print("Historgam: ", hist[0], "\nMinimum: ", hist[1], "\nMaximum: ", hist[2])
输出:
Faster approach
Time taken(s): 1.2022387981414795
Historgam: [0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2
0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2
1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2
1.2 1.2 1.2 1.6 2. 2. 2. 1.6 1.6 1. 1. 1. 1. 1. 1.8 1.8 1.8 1.8
1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 0.8 0.8 0.8 0.8 0. ]
Minimum: 511.4
Maximum: 520.1
import numpy as np
from numba import jit, prange
import time
@jit(nopython=True, parallel=True, cache=True)
def create_histogram(boundaries1, boundaries2, weights, stepsize):
minimum = np.minimum(np.min(boundaries1),np.min(boundaries2))
maximum = np.maximum(np.max(boundaries1),np.max(boundaries2))
num_bins = int(np.round((maximum-minimum+stepsize)/stepsize))
hist = np.zeros(num_bins, dtype=np.float64)
lower = np.round((np.minimum(boundaries1, boundaries2)-minimum)/stepsize).astype(np.int32)
upper = np.round((np.maximum(boundaries1, boundaries2)-minimum)/stepsize).astype(np.int32)
for i in range(lower.shape[0]):
hist[lower[i]:upper[i]+1] += weights[i]
return hist, minimum, maximum
start = time.time()
print("your current appraoch")
hist = create_histogram(boundaries1, boundaries2, weights, stepsize)
end = time.time()
print("Time taken(s): {}".format(end-start))
print("Historgam: ", hist[0], "\nMinimum: ", hist[1], "\nMaximum: ", hist[2])
输出:
your current appraoch
Time taken(s): 2.93923277854919434
Historgam: [0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2
0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2 0.2
1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2 1.2
1.2 1.2 1.2 1.6 2.2 2.2 2. 2. 1.6 1.6 1.6 1. 1. 1. 1.8 1.8 1.8 1.8
1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 1.8 0.8 0.8 0.8]
Minimum: 511.4
Maximum: 520.1