我需要某种缓存来存储一个函数的结果。f
在Cython中,以备将来重用。一个简单的FIFO缓存策略,当缓存满时丢弃最近计算的最小结果就可以了。每当我从Python中调用另一个使用缓存的函数时,我需要重新初始化缓存,并调用 f
. 我想出了以下解决方案,使用 std::map
包裹在一个扩展类型中。
# distutils: language = c++
import sys
import time
from libcpp.map cimport map as cppmap
from libcpp.utility cimport pair as cpppair
from libcpp.queue cimport queue as cppqueue
from cython.operator cimport dereference as deref
ctypedef cpppair[long, long] mapitem_t
ctypedef cppmap[long, long].iterator mi_t
cdef class Cache_map:
"""Cache container"""
cdef:
cppmap[long, long] _cache_data
cppqueue[long] _order
long _cachesize
long _size
def __init__(self, long cachesize=100):
self._cachesize = cachesize
self._size = 0
cdef mi_t setitem(
self, mi_t it, long key, long value):
"""Insert key/value pair into cache and return position"""
if self._size >= self._cachesize:
self._cache_data.erase(self._order.front())
self._order.pop()
else:
self._size += 1
self._order.push(key)
return self._cache_data.insert(it, mapitem_t(key, value))
@property
def cache_data(self):
return self._cache_data
cdef long f(long x):
"""Expensive function"""
time.sleep(0.01)
return x**2
cdef long cached_f(long x, Cache_map Cache):
cdef mi_t search = Cache._cache_data.lower_bound(x)
if search != Cache._cache_data.end() and x == deref(search).first:
return deref(search).second
return deref(Cache.setitem(search, x, f(x))).second
def use_cache():
# Output container
cdef list cache_size = []
cdef list timings = []
cdef list results = []
cdef long i, r
cdef Cache_map Cache = Cache_map(10) # Initialise cache
cache_size.append(sys.getsizeof(Cache))
go = time.time()
for i in range(100):
# Silly loop using the cache
for r in range(2):
results.append(cached_f(i, Cache))
timings.append(time.time() - go)
go = time.time()
cache_size.append(sys.getsizeof(Cache))
go = time.time()
return cache_size, timings, results
虽然这在原则上是可行的,但它有一些缺点。
cached_f
包裹 f
(不太能重复使用)Cache
到 cached_f
(不必要的昂贵吗??)。Cached_map
明确写入缓存结果,从 f
(不太能重复使用)我想这是一个很标准的任务,那么有没有更好的方法呢?
例如,我试过将一个指向Cache的指针传递给 cached_f
但我似乎不能创建一个指向扩展类型对象的指针?下面的内容。
cdef Cache_map Cache = Cache_map(10)
cdef Cache_map *Cache_ptr
Cache_ptr = &Cache
抛出 cache_map.pyx:66:16: Cannot take address of Python variable 'Cache'
.
我认为从软件工程的角度来看,将函数(在Ccdef-Cython中是一个函数-指针函数)和它的memoization捆绑在一起放在一个objectclass中是一个不错的主意。
我的方法是写一个cdef类(我们称它为 FunWithMemoization
),它有一个函数指针和一个用于存储已知结果的memoization-data-structure。
由于时间太短,无法用Cython写c++代码,我用纯c++写了memoization-class(整个代码可以在下面找到),和你的方法差不多(不过是使用了 unordered_map
),并用Cython进行封装。
%%cython -+
from libcpp cimport bool
cdef extern from *:
"""
// see full code bellow
"""
struct memoization_result:
long value;
bool found;
cppclass memoization:
memoization()
void set_value(long, long)
memoization_result find_value(long key)
ctypedef long(*f_type)(long)
cdef long id_fun(long x):
return x
cdef class FunWithMemoization:
cdef memoization mem
cdef f_type fun
def __cinit__(self):
self.fun = id_fun
cpdef long evaluate(self, long x):
cdef memoization_result look_up = self.mem.find_value(x)
if look_up.found:
return look_up.value
cdef long val = self.fun(x)
self.mem.set_value(x, val)
return val
我已经用 id_fun
默认为初始化 fun
-成员,但我们需要进一步的功能来使 FunWithMemoization
例如,有用的。
import time
cdef long f(long x):
"""Expensive function"""
time.sleep(0.01)
return x**2
def create_f_with_memoization():
fun = FunWithMemoization()
fun.fun = f
return fun
显然还有其他方法来创造一个有用的。FunWithMemoization
,可以使用 ctypes
来获取函数的地址,或者这个 收据.
而现在。
f = create_f_with_memoization()
# first time really calculated:
%timeit -r 1 -n 1 f.evaluate(2)
#10.5 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
# second time - from memoization:
%timeit -r 1 -n 1 f.evaluate(2)
1.4 µs ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
整个代码。
%%cython -+
from libcpp cimport bool
cdef extern from *:
"""
#include<unordered_map>
#include <queue>
struct memoization_result{
long value;
bool found;
};
class memoization{
private:
std::unordered_map<long, long> map;
std::queue<long> key_order;
size_t max_size;
public:
memoization(): max_size(128){}
void set_value(long key, long val){
//assumes key isn't yet in map
map[key]=val;
key_order.push(key);
if(key_order.size()>max_size){
key_order.pop();
}
}
memoization_result find_value(long key) const{
auto it = map.find(key);
if(it==map.cend()){
return {0, false};
}
else{
return {it->second, true};
}
}
};
"""
struct memoization_result:
long value;
bool found;
cppclass memoization:
memoization()
void set_value(long, long)
memoization_result find_value(long key)
ctypedef long(*f_type)(long)
cdef long id_fun(long x):
return x
cdef class FunWithMemoization:
cdef memoization mem
cdef f_type fun
def __cinit__(self):
self.fun = id_fun
cpdef long evaluate(self, long x):
cdef memoization_result look_up = self.mem.find_value(x)
if look_up.found:
return look_up.value
cdef long val = self.fun(x)
self.mem.set_value(x, val)
return val
import time
cdef long f(long x):
"""Expensive function"""
time.sleep(0.01)
return x**2
def create_f_with_memoization():
fun = FunWithMemoization()
fun.fun = f
return fun