# Numpy 比 Mathematica 慢？

##### 问题描述投票：0回答：1

``floatMatrix``
，其维度为 (750000, 4)。函数
``testFunction[x,y,w,z]``

``Compile``
``Parallelization->False``

`````` Compile[{{f, _Real, 1}},
{
{0.011904761904761973` f[[2]]f[[1]]^3 +
0.002976190476190474` f[[1]]f[[2]]^3 - 0.020833333333333325` f[[3]] +
0.002976190476190474` f[[3]]^3 +
f[[2]]^2 (0.0029761904761904778` f[[3]] +...
{0.002976190476190483` f[[1]]^3 + 0.011904761904761906` f[[2]]^3 -
0.0875` f[[3]] + 0.0029761904761904765` f[[3]]^3 +
f[[1]]^2 (0.005952380952380952` f[[2]] +...
},CompilationTarget -> "C", RuntimeAttributes -> {Listable},
Parallelization -> True];

time = RepeatedTiming[testFunction[floatMatrix]];
Print["In Mathematica-C it takes an average of ", time[[1]], " secs."]
``````

``````def testFunction(data):
f1, f2, f3, f4 = data.T

results = np.zeros((data.shape[0], 4))  # Initialize a results array

results[:, 0] = (0.011904761904761973*f2*f1**3 + 0.002976190476190474*f1*f2**3 -
0.020833333333333325*f3 + 0.002976190476190474*f3**3 + f2**2*
(0.0029761904761904778*f3 +...
results[:, 1] = (0.002976190476190483*f1**3 + 0.011904761904761906*f2**3 - 0.0875*f3
+ 0.0029761904761904765*f3**3 + f1**2*(0.005952380952380952*f2 +
0.002976190476190469*f3 + 0.0029761904761904726*f4) +...
return results

duration=0

for i in range(10):
start_time = time.time()
testFunction(floatMatrix)
end_time = time.time()
duration = duration + end_time - start_time

duration=duration*0.1

print(f"With numpy it takes an average of': {duration} seconds")
``````

``````
def testFunction(f1, f2, f3, f4):
results = np.zeros((f1.shape[0], 4))  # Initialize a results array

results[:, 0] = ...
results[:, 1] = ...
results[:, 2] = ...
results[:, 3] = ...

return results

# Transpose the data outside the function
f1, f2, f3, f4 = floatMatrix.T

duration=0

for i in range(10):
start_time = time.time()
testFunction(f1, f2, f3, f4)
end_time = time.time()
duration = duration + end_time - start_time

duration=duration*0.1

print(f"With numpy vectorized operations it takes an average of': {duration} seconds")
``````

• 数学：0.119938 秒
• Numpy（内部转置）：0.206754 秒
• Numpy（外部转置）：0.20789377 秒

numpy performance optimization
##### 1个回答
0

``````import numba as nb

@nb.njit()
def test_function():
...
``````

``````import numba as nb

@nb.njit(fastmath=True):
def test_function()
...
``````

``f1``

### 代码

``````floatMatrix = np.random.rand(1200000, 4)
def test_function_transpose_inside(data):
f1, f2, f3, f4 = data.T

results = np.zeros((data.shape[0], 4))  # Initialize a results array

results[:, 0] = (0.011904761904761973*f2*f1**3 + 0.002976190476190474*f1*f2**3 -
0.020833333333333325*f3 + 0.002976190476190474*f3**3 + f2**2*
(0.0029761904761904778*f3))
results[:, 1] = (0.002976190476190483*f1**3 + 0.011904761904761906*f2**3 - 0.0875*f3
+ 0.0029761904761904765*f3**3 + f1**2*(0.005952380952380952*f2 +
0.002976190476190469*f3 + 0.0029761904761904726*f4))
return results
``````

``````import numba as nb

@nb.njit()
def test_function_numba(data):
f1, f2, f3, f4 = data.T

results = np.zeros((data.shape[0], 4))  # Initialize a results array

results[:, 0] = (0.011904761904761973*f2*f1**3 + 0.002976190476190474*f1*f2**3 -
0.020833333333333325*f3 + 0.002976190476190474*f3**3 + f2**2*
(0.0029761904761904778*f3))
results[:, 1] = (0.002976190476190483*f1**3 + 0.011904761904761906*f2**3 - 0.0875*f3
+ 0.0029761904761904765*f3**3 + f1**2*(0.005952380952380952*f2 +
0.002976190476190469*f3 + 0.0029761904761904726*f4))
return results
``````

Numba 和快速数学

``````import numba as nb

@nb.njit(fastmath=True)
def test_function_numba_fastmath(data):
f1, f2, f3, f4 = data.T

results = np.zeros((data.shape[0], 4))  # Initialize a results array

results[:, 0] = (0.011904761904761973*f2*f1**3 + 0.002976190476190474*f1*f2**3 -
0.020833333333333325*f3 + 0.002976190476190474*f3**3 + f2**2*
(0.0029761904761904778*f3))
results[:, 1] = (0.002976190476190483*f1**3 + 0.011904761904761906*f2**3 - 0.0875*f3
+ 0.0029761904761904765*f3**3 + f1**2*(0.005952380952380952*f2 +
0.002976190476190469*f3 + 0.0029761904761904726*f4))
return results
``````

Numba、快速数学和循环

``````import numba as nb

@nb.njit(fastmath=True)
def test_function_numba_loop(data):
results = np.zeros((data.shape[0], 4))  # Initialize a results array
for i in range(data.shape[0]):
f1, f2, f3, f4 = data[i]
results[i, 0] = (0.011904761904761973*f2*f1**3 + 0.002976190476190474*f1*f2**3 -
0.020833333333333325*f3 + 0.002976190476190474*f3**3 + f2**2*
(0.0029761904761904778*f3))
results[i, 1] = (0.002976190476190483*f1**3 + 0.011904761904761906*f2**3 - 0.0875*f3
+ 0.0029761904761904765*f3**3 + f1**2*(0.005952380952380952*f2 +
0.002976190476190469*f3 + 0.0029761904761904726*f4))
return results
``````

### 基准结果

``````%timeit test_function_transpose_inside(floatMatrix)
215 ms ± 14.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit test_function_numba(floatMatrix)
33.6 ms ± 344 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit test_function_numba_fastmath(floatMatrix)
32.6 ms ± 452 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit test_function_numba_loop(floatMatrix)
22.5 ms ± 68.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
``````