在分组和聚合数据时发生 TypeError: unhashable numpy.ndarray

问题描述 投票:0回答:1

我已经处理了从 Youtube 频道统计中检索到的数据,当我一直在聚合数据时,我收到了错误消息,指出 numpy.ndarray 是不可散列的类型。为了创建“body_focus”和“type_of_workout”列,我使用了 np.where 函数。我不会发布这部分脚本,因为我认为这不是类型错误问题的原因。 我的代码行:

workout_df = videos_df[['Year','body_focus','type_of_workout','viewCount','commentCount','likeCount']]
workout_df  

workout_df.groupby(by = ['Year','body_focus'])['viewCount','commentCount','likeCount'].sum()\
                  .sort('Year', ascending = True)  

然后我得到错误信息:

----------------------------------------------------------------
TypeError                      Traceback (most recent call last)
Cell In[166], line 1
----> 1 videos_df.groupby(by = ['Year','body_focus'])['viewCount','commentCount','likeCount'].sum()\
      2          .sort('Year', ascending = True)

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\groupby.py:2434, in GroupBy.sum(self, numeric_only, min_count, engine, engine_kwargs)
   2429 else:
   2430     # If we are grouping on categoricals we want unobserved categories to
   2431     # return zero, rather than the default of NaN which the reindexing in
   2432     # _agg_general() returns. GH #31422
   2433     with com.temp_setattr(self, "observed", True):
-> 2434         result = self._agg_general(
   2435             numeric_only=numeric_only,
   2436             min_count=min_count,
   2437             alias="sum",
   2438             npfunc=np.sum,
   2439         )
   2441     return self._reindex_output(result, fill_value=0)

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\groupby.py:1692, in GroupBy._agg_general(self, numeric_only, min_count, alias, npfunc)
   1680 @final
   1681 def _agg_general(
   1682     self,
   (...)
   1687     npfunc: Callable,
   1688 ):
   1690     with self._group_selection_context():
   1691         # try a cython aggregation if we can
-> 1692         result = self._cython_agg_general(
   1693             how=alias,
   1694             alt=npfunc,
   1695             numeric_only=numeric_only,
   1696             min_count=min_count,
   1697         )
   1698         return result.__finalize__(self.obj, method="groupby")

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\groupby.py:1796, in GroupBy._cython_agg_general(self, how, alt, numeric_only, min_count, ignore_failures, **kwargs)
   1793 if not is_ser and len(new_mgr) < orig_len:
   1794     warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)
-> 1796 res = self._wrap_agged_manager(new_mgr)
   1797 if is_ser:
   1798     res.index = self.grouper.result_index

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\generic.py:1511, in DataFrameGroupBy._wrap_agged_manager(self, mgr)
   1509     result = result._consolidate()
   1510 else:
-> 1511     index = self.grouper.result_index
   1512     mgr.set_axis(1, index)
   1513     result = self.obj._constructor(mgr)

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\_libs\properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\ops.py:995, in BaseGrouper.result_index(self)
    992 if len(self.groupings) == 1:
    993     return self.groupings[0].result_index.rename(self.names[0])
--> 995 codes = self.reconstructed_codes
    996 levels = [ping.result_index for ping in self.groupings]
    997 return MultiIndex(
    998     levels=levels, codes=codes, verify_integrity=False, names=self.names
    999 )

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\ops.py:986, in BaseGrouper.reconstructed_codes(self)
    984 @property
    985 def reconstructed_codes(self) -> list[npt.NDArray[np.intp]]:
--> 986     codes = self.codes
    987     ids, obs_ids, _ = self.group_info
    988     return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\ops.py:897, in BaseGrouper.codes(self)
    894 @final
    895 @property
    896 def codes(self) -> list[npt.NDArray[np.signedinteger]]:
--> 897     return [ping.codes for ping in self.groupings]

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\ops.py:897, in <listcomp>(.0)
    894 @final
    895 @property
    896 def codes(self) -> list[npt.NDArray[np.signedinteger]]:
--> 897     return [ping.codes for ping in self.groupings]

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\grouper.py:621, in Grouping.codes(self)
    617 if self._codes is not None:
    618     # _codes is set in __init__ for MultiIndex cases
    619     return self._codes
--> 621 return self._codes_and_uniques[0]

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\_libs\properties.pyx:36, in pandas._libs.properties.CachedProperty.__get__()

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\groupby\grouper.py:692, in Grouping._codes_and_uniques(self)
    685     uniques = (
    686         self.grouping_vector.result_index._values  # type: ignore[assignment]
    687     )
    688 else:
    689     # GH35667, replace dropna=False with use_na_sentinel=False
    690     # error: Incompatible types in assignment (expression has type "Union[
    691     # ndarray[Any, Any], Index]", variable has type "Categorical")
--> 692     codes, uniques = algorithms.factorize(  # type: ignore[assignment]
    693         self.grouping_vector, sort=self._sort, use_na_sentinel=self._dropna
    694     )
    695 return codes, uniques

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\algorithms.py:818, in factorize(values, sort, na_sentinel, use_na_sentinel, size_hint)
    815             # Don't modify (potentially user-provided) array
    816             values = np.where(null_mask, na_value, values)
--> 818     codes, uniques = factorize_array(
    819         values,
    820         na_sentinel=na_sentinel_arg,
    821         size_hint=size_hint,
    822     )
    824 if sort and len(uniques) > 0:
    825     if na_sentinel is None:
    826         # TODO: Can remove when na_sentinel=na_sentinel as in TODO above

File ~\AppData\Roaming\Python\Python310\site-packages\pandas\core\algorithms.py:574, in factorize_array(values, na_sentinel, size_hint, na_value, mask)
    571 hash_klass, values = _get_hashtable_algo(values)
    573 table = hash_klass(size_hint or len(values))
--> 574 uniques, codes = table.factorize(
    575     values,
    576     na_sentinel=na_sentinel,
    577     na_value=na_value,
    578     mask=mask,
    579     ignore_na=ignore_na,
    580 )
    582 # re-cast e.g. i8->dt64/td64, uint8->bool
    583 uniques = _reconstruct_data(uniques, original.dtype, original)

File pandas\_libs\hashtable_class_helper.pxi:5943, in pandas._libs.hashtable.PyObjectHashTable.factorize()

File pandas\_libs\hashtable_class_helper.pxi:5857, in pandas._libs.hashtable.PyObjectHashTable._unique()

TypeError: unhashable type: 'numpy.ndarray'

我检查了每个变量的数据类型,聚合所需的类别是正确的数据类型。请帮我找出这个代码错误以及如何执行我的结果聚合。

group-by numpy-ndarray aggregation
1个回答
0
投票

尝试使用这条线

videos_grouped = videos_df.groupby(by=['Year', 'body_focus'], as_index=False)['viewCount', 'commentCount', 'likeCount'].sum()

videos_sorted = videos_grouped.sort_values(by='Year', ascending=True)

© www.soinside.com 2019 - 2024. All rights reserved.