我正在实现一个 memset 函数,该函数应该将字节对象缓冲区设置为零。
从 Python 3.11 开始,缓冲区 api 函数PyObject_GetBuffer() 和 PyBuffer_Release() 现在是稳定 ABI 的一部分。
下面的代码有效,但是:
def memset(bytes_object):
import ctypes
# Define the Py_buffer structure
class Py_buffer(ctypes.Structure):
_fields_ = [
('buf', ctypes.c_void_p),
('obj', ctypes.py_object),
('len', ctypes.c_ssize_t),
('itemsize', ctypes.c_ssize_t),
('readonly', ctypes.c_int),
('ndim', ctypes.c_int),
('format', ctypes.c_char_p),
('shape', ctypes.POINTER(ctypes.c_ssize_t)),
('strides', ctypes.POINTER(ctypes.c_ssize_t)),
('suboffsets', ctypes.POINTER(ctypes.c_ssize_t)),
('internal', ctypes.c_void_p),
]
buf = Py_buffer()
ctypes.pythonapi.PyObject_GetBuffer(ctypes.py_object(bytes_object), ctypes.byref(buf), ctypes.c_int(0))
try:
ctypes.memset(buf.buf, 0, buf.len)
finally:
ctypes.pythonapi.PyBuffer_Release(ctypes.byref(buf))
obj = bytes("hello world", "ascii")
print("before:", repr(obj))
memset(obj)
print("after:", repr(obj))
给出这个输出:
before: b'hello world'
after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
作为参考,这里有一种使用 已弃用的 PyObject_AsCharBuffer 函数在缓冲区上执行 memset 的旧方法。它仍然适用于 Python 3.11:
def memset_old(bytes_object: bytes):
import ctypes
if not isinstance(bytes_object, bytes):
raise TypeError(f"expected bytes, not {type(bytes_object)}")
data = ctypes.POINTER(ctypes.c_char)()
size = ctypes.c_int()
ctypes.pythonapi.PyObject_AsCharBuffer(ctypes.py_object(bytes_object), ctypes.pointer(data), ctypes.pointer(size))
ctypes.memset(data, 0, size.value)
obj = bytes("hello world", "ascii")
print("old before:", repr(obj))
memset_old(obj)
print("old after:", repr(obj))
感觉很奇怪,自己要定义自己的
类。某处没有预定义的吗?Py_buffer
Py_buffer
没有在任何地方定义,而是在 Python pybuffer.h
目录的 include
标头中定义。
我使用
正确吗?PyBuffer_Release
是的,但是定义
.argtypes
和 .restype
是个好习惯,这样 ctypes
就可以对参数进行类型检查。另外如前所述bytes
对象缓冲区是只读的,因此更改它是未定义的行为。
带有类型检查的完整示例:
import ctypes as ct
PyBUF_SIMPLE = 0
PyBUF_WRITABLE = 0x0001
class Py_buffer(ct.Structure):
_fields_ = (('buf', ct.c_void_p),
('obj', ct.py_object),
('len', ct.c_ssize_t),
('itemsize', ct.c_ssize_t),
('readonly', ct.c_int),
('ndim', ct.c_int),
('format', ct.c_char_p),
('shape', ct.POINTER(ct.c_ssize_t)),
('strides', ct.POINTER(ct.c_ssize_t)),
('suboffsets', ct.POINTER(ct.c_ssize_t)),
('internal', ct.c_void_p))
def __repr__(self):
'''Display representation of a buffer.
'''
return f'Py_buffer(obj={self.obj!r}, readonly={self.readonly})'
# Explicitly define arguments and return values so ctypes can typecheck.
PyObject_CheckBuffer = ct.pythonapi.PyObject_CheckBuffer
PyObject_CheckBuffer.argtypes = ct.py_object,
PyObject_CheckBuffer.restype = ct.c_int
PyObject_GetBuffer = ct.pythonapi.PyObject_GetBuffer
PyObject_GetBuffer.argtypes = ct.py_object, ct.POINTER(Py_buffer), ct.c_int
PyObject_GetBuffer.restype = ct.c_int
PyBuffer_Release = ct.pythonapi.PyBuffer_Release
PyBuffer_Release.argtypes = ct.POINTER(Py_buffer),
PyBuffer_Release.restype = None
def memset(obj):
if PyObject_CheckBuffer(obj): # ensure object passed supports the buffer interface
buf = Py_buffer()
try:
# PyObject_GetBuffer can throw an exception if it fails.
PyObject_GetBuffer(obj, ct.byref(buf), PyBUF_SIMPLE)
print(buf) # View resulting buffer
ct.memset(buf.buf, 0, buf.len) # zero it
finally:
PyBuffer_Release(ct.byref(buf))
obj = b'hello, world!'
print("before:", repr(obj))
memset(obj)
print("after:", repr(obj))
输出:
before: b'hello, world!'
Py_buffer(obj=b'hello, world!', readonly=1)
after: b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'