我正在给定坐标周围做 2D 或 3D 二进制掩码,然后使用 scipy.ndimage.label 将它们识别为标签。 现在,我有一个 cupy 解决方案,一个 numpy 解决方案。 Cupy 很快,numpy 很慢,两者都各司其职。我正在尝试让它在其他非 nvidia GPU 或 CPU 上运行,使用 openCl 或任何其他更“跨平台”的形式,例如 pyopencl 或 numba,甚至是tensorflow(或以上所有)
这是我当前的功能:
def make_labels_links(shape,j,radius=5,_algo="GPU"):
import scipy.ndimage as ndi
if _algo == "GPU":
import cupy as cp
if 'z' in j:
pos = cp.dstack((j.z,j.y,j.x))[0]#.astype(int)
print("3D",j)
else:
pos = cp.dstack((j.y,j.x))[0]#.astype(int)
print("2D",j)
ndim = len(shape)
# radius = validate_tuple(radius, ndim)
pos = cp.atleast_2d(pos)
# if include_edge:
in_mask = cp.array([cp.sum(((cp.indices(shape).T - p) / radius)**2, -1) <= 1
for p in pos])
# else:
# in_mask = [np.sum(((np.indices(shape).T - p) / radius)**2, -1) < 1
# for p in pos]
mask_total = cp.any(in_mask, axis=0).T
##if they overlap the labels won't match the points
#we can make np.ones * ID of the point and then np.max(axis=-1)
labels, nb = ndi.label(cp.asnumpy(mask_total))
#this is super slow
elif _algo=='CPU':
if 'z' in j:
# "Need to loop each t and do one at a time"
pos = np.dstack((j.z,j.y,j.x))[0]#.astype(int)
print("3D",j)
else:
pos = np.dstack((j.y,j.x))[0]#.astype(int)
print("2D",j)
ndim = len(shape)
pos = np.atleast_2d(pos)
in_mask = np.array([np.sum(((np.indices(shape).T - p) / radius)**2, -1) <= 1
for p in pos])
mask_total = np.any(in_mask, axis=0).T
labels, nb = ndi.label(mask_total)
形状例如 (1024,1024) 或 (20,1024,1024) j 是 pandas 数据框的位置 例如:
ID Frame z y x mass size ecc
0 14 0 9.121369 24.139295 297.156056 5311.661985 1.383264 NaN
1 7 0 8.270742 24.880990 308.757851 6341.082129 1.403242 NaN
2 1 0 4.881552 28.963021 291.900410 6587.918081 1.330116 NaN
3 8 0 8.015537 31.137655 304.999176 5423.140504 1.341228 NaN
4 15 0 10.910010 32.275774 298.940183 6033.069086 1.333235 NaN
5 4 0 7.057360 33.958352 313.252877 8123.989610 1.395905 NaN
6 9 0 7.922786 39.966382 308.092925 6683.742777 1.398941 NaN
7 5 0 7.163435 40.774834 329.106137 5225.643216 1.402079 NaN
8 10 0 7.665061 49.844910 306.998559 6054.428176 1.408336 NaN
9 11 0 7.909787 50.951857 320.125061 5057.551819 1.314976 NaN
10 6 0 7.148229 52.051917 312.929513 9427.345719 1.351461 NaN
11 12 0 7.599882 55.917379 303.916436 6220.891194 1.401507 NaN
12 0 0 3.992557 59.060168 298.188021 7489.421410 1.391892 NaN
13 13 0 7.684166 62.945226 303.940392 7970.470413 1.384929 NaN
14 3 0 6.102017 70.796726 316.141957 5091.926046 1.393204 NaN
15 2 0 4.937488 87.916144 192.836998 5068.320508 1.330015 NaN
我尝试使用 pyopencl 来实现,但无法实现,有什么建议吗? 谢谢
尝试用数组操作替换
for
循环。例如,在掩码计算中,我们可以按如下方式重新排列平方和:
inarr = np.indices(shape).T
a = np.sum(inarr, axis=-1, keepdims=True)
a2 = np.sum(inarr ** 2, axis=-1, keepdims=True)
b = pos
for _ in range(len(shape)):
b = np.expand_dims(b, axis=0)
b2 = b ** 2
sq_sum = a2 - a * 2 * b + b2 * len(shape)
mask_total = np.any(sq_sum <= radius ** 2, axis=-1).T
这适用于 pyopencl
def make_labels_links_opencl(shape, j, radius=5):
import numpy as np
import pyopencl as cl
from scipy.ndimage import label
if 'z' in j:
# "Need to loop each t and do one at a time"
positions = np.dstack((j.z,j.y,j.x))[0]#.astype(int)
print("3D",j)
else:
positions = np.dstack((j.y,j.x))[0]#.astype(int)
print("2D",j)
# Prepare data
mask = np.zeros(shape, dtype=np.uint8)
positions_flat = positions.flatten().astype(np.float32)
radius = np.float32(radius)
# platform = cl.get_platforms()
# my_gpu_devices = platform[1].get_devices(device_type=cl.device_type.GPU)
# ctx = cl.Context(devices=my_gpu_devices)
# PyOpenCL setup
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)
mf = cl.mem_flags
# Create buffers
mask_buf = cl.Buffer(ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=mask)
positions_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=positions_flat)
# Kernel code
kernel_code = """
__kernel void fill_mask(__global uchar *mask, __global const float *positions, const float radius, const int num_positions) {
int x = get_global_id(0);
int y = get_global_id(1);
int z = get_global_id(2);
int width = get_global_size(0);
int height = get_global_size(1);
int depth = get_global_size(2);
int idx = x + y * width + z * width * height;
for (int i = 0; i < num_positions; ++i) {
float pz = positions[i * 3];
float py = positions[i * 3 + 1];
float px = positions[i * 3 + 2];
float distance = sqrt(pow(px - x, 2) + pow(py - y, 2) + pow(pz - z, 2));
if (distance <= radius) {
mask[idx] = 1;
break;
}
}
}
"""
# Build kernel
prg = cl.Program(ctx, kernel_code).build()
# Execute kernel
global_size = shape[::-1] # Note: PyOpenCL uses column-major order, so we reverse the dimensions
prg.fill_mask(queue, global_size, None, mask_buf, positions_buf, radius, np.int32(len(positions)))
# Read back the results
cl.enqueue_copy(queue, mask, mask_buf)
labels,_=label(mask)
return labels,positions