将数据帧转换为hdf时会导致错误。我不知道背后的原因。我尝试填充nan值,并尝试仍然导致相同的错误。
注意:我的数据框中只有文本数据。没有仅数字列对象。
df.to_hdf('df.h5', 'df', format='table')
我一次回溯遇到两个错误。
---------------------------------------------------------------------------
MemoryError Traceback (most recent call last)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3932 errors=self.errors,
-> 3933 info=self.info,
3934 )
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom(self, block, block_items, existing_col, min_itemsize, nan_rep, info, encoding, errors)
2179 encoding,
-> 2180 errors,
2181 )
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in set_atom_string(self, block, block_items, existing_col, min_itemsize, nan_rep, encoding, errors)
2218 # itemsize is the maximum length of a string (along any dimension)
-> 2219 data_converted = _convert_string_array(data, encoding, errors)
2220 itemsize = data_converted.itemsize
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _convert_string_array(data, encoding, errors, itemsize)
4881
-> 4882 data = np.asarray(data, dtype="S{size}".format(size=itemsize))
4883 return data
~/miniconda3/lib/python3.7/site-packages/numpy/core/_asarray.py in asarray(a, dtype, order)
84
---> 85 return array(a, dtype, copy=False, order=order)
86
MemoryError: Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414
During handling of the above exception, another exception occurred:
Exception Traceback (most recent call last)
<ipython-input-12-618772a3e197> in <module>
----> 1 df.to_hdf('df.h5', 'df', format='table')
~/miniconda3/lib/python3.7/site-packages/pandas/core/generic.py in to_hdf(self, path_or_buf, key, **kwargs)
2528 from pandas.io import pytables
2529
-> 2530 pytables.to_hdf(path_or_buf, key, self, **kwargs)
2531
2532 def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs):
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in to_hdf(path_or_buf, key, value, mode, complevel, complib, append, **kwargs)
276 path_or_buf, mode=mode, complevel=complevel, complib=complib
277 ) as store:
--> 278 f(store)
279 else:
280 f(path_or_buf)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in <lambda>(store)
269 f = lambda store: store.append(key, value, **kwargs)
270 else:
--> 271 f = lambda store: store.put(key, value, **kwargs)
272
273 path_or_buf = _stringify_path(path_or_buf)
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in put(self, key, value, format, append, **kwargs)
957 format = get_option("io.hdf.default_format") or "fixed"
958 kwargs = self._validate_format(format, kwargs)
--> 959 self._write_to_group(key, value, append=append, **kwargs)
960
961 def remove(self, key, where=None, start=None, stop=None):
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in _write_to_group(self, key, value, format, index, append, complib, encoding, **kwargs)
1523
1524 # write the object
-> 1525 s.write(obj=value, append=append, complib=complib, **kwargs)
1526
1527 if s.is_table and index:
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in write(self, obj, axes, append, complib, complevel, fletcher32, min_itemsize, chunksize, expectedrows, dropna, **kwargs)
4192 # create the axes
4193 self.create_axes(
-> 4194 axes=axes, obj=obj, validate=append, min_itemsize=min_itemsize, **kwargs
4195 )
4196
~/miniconda3/lib/python3.7/site-packages/pandas/io/pytables.py in create_axes(self, axes, obj, validate, nan_rep, data_columns, min_itemsize, **kwargs)
3942 "cannot find the correct atom type -> "
3943 "[dtype->{name},items->{items}] {detail!s}".format(
-> 3944 name=b.dtype.name, items=b_items, detail=detail
3945 )
3946 )
Exception: cannot find the correct atom type -> [dtype->object,items->Index(['a', 'b', 'c', 'd', 'e'], dtype='object')] Unable to allocate 62.5 GiB for an array with shape (4, 1000000) and data type |S11414
我认为您可能缺少mode
这对我有用
pd.DataFrame({'a': ['testing to here'], 'b':['testing to the'], 'c': ['here is the'], 'd':'testing'}).to_hdf('data.h5', key='df', mode='w')