意识到这将显示为重复项,但是在查看dask apply: AttributeError: 'DataFrame' object has no attribute 'name'之后,我仍然不清楚我的代码在哪里中断。下面是一个最小的可重现示例,我认为这反映了我在较大代码中遇到的问题。
import pandas as pd
from dask import dataframe as dd
dummy_df = pd.DataFrame({'a' : [1,2,3,4,5]})
dd_dummy = dd.from_pandas(dummy_df, npartitions = 1)
"""Arbitrary function that returns dataframe, taking keyword argument from apply"""
def test(x, bleh):
return pd.DataFrame({'test' : 7 * [bleh]})
ok = dd_dummy.apply(test, axis = 1
, bleh = 'fish'
, meta = {'test' : 'str'}).compute()
[我觉得我很忠实地关注https://docs.dask.org/en/latest/dataframe-api.html#dask.dataframe.DataFrame.apply的文档,所以不确定在这里尝试什么。
错误消息:
AttributeError Traceback (most recent call last)
<ipython-input-55-76aefefcffe4> in <module>()
12 ok = dd_dummy.apply(test, axis = 1
13 , bleh = 'fish'
---> 14 , meta = {'test' : 'str'}).compute()
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/base.py in compute(self, **kwargs)
164 dask.base.compute
165 """
--> 166 (result,) = compute(self, traverse=False, **kwargs)
167 return result
168
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/base.py in compute(*args, **kwargs)
435 keys = [x.__dask_keys__() for x in collections]
436 postcomputes = [x.__dask_postcompute__() for x in collections]
--> 437 results = schedule(dsk, keys, **kwargs)
438 return repack([f(r, *a) for r, (f, a) in zip(results, postcomputes)])
439
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/threaded.py in get(dsk, result, cache, num_workers, pool, **kwargs)
82 get_id=_thread_get_id,
83 pack_exception=pack_exception,
---> 84 **kwargs
85 )
86
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/local.py in get_async(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)
484 _execute_task(task, data) # Re-execute locally
485 else:
--> 486 raise_exception(exc, tb)
487 res, worker_id = loads(res_info)
488 state["cache"][key] = res
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/local.py in reraise(exc, tb)
314 if exc.__traceback__ is not tb:
315 raise exc.with_traceback(tb)
--> 316 raise exc
317
318
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/local.py in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
220 try:
221 task, data = loads(task_info)
--> 222 result = _execute_task(task, data)
223 id = get_id()
224 result = dumps((result, id))
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/optimization.py in __call__(self, *args)
989 if not len(args) == len(self.inkeys):
990 raise ValueError("Expected %d args, got %d" % (len(self.inkeys), len(args)))
--> 991 return core.get(self.dsk, self.outkey, dict(zip(self.inkeys, args)))
992
993 def __reduce__(self):
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/core.py in get(dsk, out, cache)
149 for key in toposort(dsk):
150 task = dsk[key]
--> 151 result = _execute_task(task, cache)
152 cache[key] = result
153 result = _execute_task(out, cache)
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/core.py in _execute_task(arg, cache, dsk)
119 # temporaries by their reference count and can execute certain
120 # operations in-place.
--> 121 return func(*(_execute_task(a, cache) for a in args))
122 elif not ishashable(arg):
123 return arg
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/utils.py in apply(func, args, kwargs)
28 def apply(func, args, kwargs=None):
29 if kwargs:
---> 30 return func(*args, **kwargs)
31 else:
32 return func(*args)
~/anaconda3/envs/python3/lib/python3.6/site-packages/dask/dataframe/core.py in apply_and_enforce(*args, **kwargs)
5141 c = meta.columns
5142 else:
-> 5143 c = meta.name
5144 return _rename(c, df)
5145 return df
~/anaconda3/envs/python3/lib/python3.6/site-packages/pandas/core/generic.py in __getattr__(self, name)
5065 if self._info_axis._can_hold_identifiers_and_holds_name(name):
5066 return self[name]
-> 5067 return object.__getattribute__(self, name)
5068
5069 def __setattr__(self, name, value):
AttributeError: 'DataFrame' object has no attribute 'name'
谢谢!
ok = dd_dummy.apply(test, axis = 1
, bleh = 'fish'
, meta = (None, 'object').compute()
我暂时暂时不提这个问题,看看是否有人可以提供解释,因为我仍然不确定为什么这样做。