Exception:写入.h5文件时找不到正确的原子类型?

问题描述 投票:0回答:1

将数据帧转换为hdf时会导致错误。我不知道背后的原因。我尝试填充nan值,并尝试仍然导致相同的错误。

注意:我的数据框中只有文本数据。没有仅数字列对象。

df.to_hdf('df.h5', 'df', format='table')

我一次回溯遇到两个错误。

---------------------------------------------------------------------------
MemoryError                               Traceback (most recent call last)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
   3932                     errors=self.errors,
-> 3933                     info=self.info,
   3934                 )

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom(self, block, block_items, existing_col, min_itemsize, nan_rep, info, encoding, errors)
   2179                 encoding,
-> 2180                 errors,
   2181             )

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding, errors)
   2218         # itemsize is the maximum length of a string (along any dimension)
-> 2219         data_converted = _convert_string_array(data, encoding, errors)
   2220         itemsize = data_converted.itemsize

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _convert_string_array(data, encoding, errors, itemsize)
   4881 
-> 4882     data = np.asarray(data, dtype="S{size}".format(size=itemsize))
   4883     return data

~/miniconda3/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
     84     
---> 85     return array(a, dtype, copy=False, order=order)
     86 

MemoryError: Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
<ipython-input-12-618772a3e197> in <module>
----> 1 df.to_hdf('df.h5', 'df', format='table')

~/miniconda3/lib/python3.7/site-packages/pandas/core/generic.py in to_hdf(self, path_or_buf, key, **kwargs)
   2528         from pandas.io import pytables
   2529 
-> 2530         pytables.to_hdf(path_or_buf, key, self, **kwargs)
   2531 
   2532     def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs):

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
    276             path_or_buf, mode=mode, complevel=complevel, complib=complib
    277         ) as store:
--> 278             f(store)
    279     else:
    280         f(path_or_buf)

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in <lambda>(store)
    269         f = lambda store: store.append(key, value, **kwargs)
    270     else:
--> 271         f = lambda store: store.put(key, value, **kwargs)
    272 
    273     path_or_buf = _stringify_path(path_or_buf)

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in put(self, key, value, format, append, **kwargs)
    957             format = get_option("io.hdf.default_format") or "fixed"
    958         kwargs = self._validate_format(format, kwargs)
--> 959         self._write_to_group(key, value, append=append, **kwargs)
    960 
    961     def remove(self, key, where=None, start=None, stop=None):

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
   1523 
   1524         # write the object
-> 1525         s.write(obj=value, append=append, complib=complib, **kwargs)
   1526 
   1527         if s.is_table and index:

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
   4192         # create the axes
   4193         self.create_axes(
-> 4194             axes=axes, obj=obj, validate=append, min_itemsize=min_itemsize, **kwargs
   4195         )
   4196 

~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
   3942                     "cannot find the correct atom type -> "
   3943                     "[dtype->{name},items->{items}] {detail!s}".format(
-> 3944                         name=b.dtype.name, items=b_items, detail=detail
   3945                     )
   3946                 )

Exception: cannot find the correct atom type -> [dtype->object,items->Index(['a', 'b', 'c', 'd', 'e'], dtype='object')] Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414
python python-3.x pandas hdf5 hdf
1个回答
0
投票

我认为您可能缺少mode

这对我有用

pd.DataFrame({'a': ['testing to here'], 'b':['testing to the'], 'c': ['here is the'], 'd':'testing'}).to_hdf('data.h5', key='df', mode='w')
© www.soinside.com 2019 - 2024. All rights reserved.