我是新来的,
拜托,我需要帮助将此代码的一部分转换为任何内容以提高速度,并且仍然与 python 程序兼容才能使用此功能。这样可以提高速度吗?
在我的系统上,224x224 颜色的图像需要 360 毫秒才能完成。
#!/usr/bin/env python3
# coding: utf-8
# original code https://github.com/verhovsky/squircle/blob/master/squircle.py
import cv2
import math
import time
import numpy
_epsilon = 0.0000000001
def _sgn(x):
if x == 0.0:
return 0.0
if x < 0:
return -1.0
return 1.0
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / math.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = _sgn(x) * x * reciprocal_hypotenuse
else:
multiplier = _sgn(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
try:
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
except IndexError:
pass
return result
# -- load and test
img = cv2.imread('circle.png')
elapsed = round(time.time() * 1000)
squareImage = _transform(img[0:224, 0:224])
print(str(round(time.time() * 1000) - elapsed)+' ms to squareImage')
cv2.imshow('square', squareImage)
key = cv2.waitKey(0)
cv2.destroyAllWindows()
我希望尝试将此代码或其中一部分转换为更快的代码,可能是 CUDA 直接在 GPU 上运行,或 NUMBA、Cython、lib 等...
我使用
numba
来装饰函数(删除了 _sgn
并使用了 np.sign
,删除了 try..except
- 需要吗?):
import math
import time
import cv2
import numpy
from numba import njit
_epsilon = 0.0000000001
@njit
def _pixel_coordinates_to_unit(coordinate, max_value):
return coordinate / max_value * 2 - 1
@njit
def _one_coordinates_to_pixels(coordinate, max_value):
return (coordinate + 1) / 2 * max_value
@njit
def _stretch_square_to_disc(x, y):
if (abs(x) < _epsilon) or (abs(y) < _epsilon):
return x, y
x2 = x * x
y2 = y * y
hypotenuse_squared = x * x + y * y
reciprocal_hypotenuse = 1.0 / np.sqrt(hypotenuse_squared)
multiplier = 1.0
if x2 > y2:
multiplier = np.sign(x) * x * reciprocal_hypotenuse
else:
multiplier = np.sign(y) * y * reciprocal_hypotenuse
return x * multiplier, y * multiplier
@njit
def _transform(inp):
result = numpy.zeros_like(inp)
for x, row in enumerate(inp):
unit_x = _pixel_coordinates_to_unit(x, len(inp))
for y, _ in enumerate(row):
unit_y = _pixel_coordinates_to_unit(y, len(row))
uv = _stretch_square_to_disc(unit_x, unit_y)
if uv is None:
continue
u, v = uv
u = _one_coordinates_to_pixels(u, len(inp))
v = _one_coordinates_to_pixels(v, len(row))
result[x][y] = inp[math.floor(u)][math.floor(v)]
return result
# -- load and test
img = cv2.imread("circle.png")
# warm jit
# this is needed to let numba do the JIT optimizations
# if you run the the function "cold", the running time will be larger
# you can use copile-ahead-of-time
# https://numba.pydata.org/numba-doc/dev/user/pycc.html
squareImage = _transform(img[0:224, 0:224])
elapsed = time.perf_counter_ns()
squareImage = _transform(img[0:224, 0:224])
print(str((time.perf_counter_ns() - elapsed) / 1000) + " us to squareImage")
cv2.imwrite("shashed.png", squareImage)
在我的计算机(AMD 5700X)上打印:
528.596 us to squareImage
# without using numba:
# 47928.774 us to squareImage
使用的图片:
circle.png