目标
我正在尝试将来自谷歌实体情绪分析的响应对象展平,该对象位于 python 笔记本中熊猫数据框(
entitysentiment
)中名为df
的字段中。下面是单行 entitysentiment
字段的响应对象条目之一的示例 [1].
需要遍历df的每一行,找到
entitysentiment
字段,将对象及其嵌套对象压平
我试过的最新功能如下[2]。结果错误消息是 [3].
在[2]中,我之前尝试的版本被注释掉了,但是收到错误消息
AttributeError: Unknown field for AnalyzeEntitySentimentResponse: split
.
任何关于如何解决这个问题的意见或对我做错了什么的想法都将不胜感激。
[1]
entities {\n name: "login page"\n type_: OTHER\n salience: 0.5467509031295776\n mentions {\n text {\n content: "login page"\n begin_offset: 24\n }\n type_: COMMON\n sentiment {\n magnitude: 0.4000000059604645\n score: -0.4000000059604645\n }\n }\n sentiment {\n magnitude: 0.4000000059604645\n score: -0.4000000059604645\n }\n}\nentities {\n name: "app"\n type_: CONSUMER_GOOD\n salience: 0.45324909687042236\n mentions {\n text {\n content: "app"\n begin_offset: 52\n }\n type_: COMMON\n sentiment {\n magnitude: 0.4000000059604645\n score: -0.4000000059604645\n }\n }\n sentiment {\n magnitude: 0.4000000059604645\n score: -0.4000000059604645\n }\n}\nlanguage: "en"\n
[2]
"""
# Define a function to extract entity mentions from entitysentiment
def extract_entities(text):
entities = []
for line in text.split('\n'):
if 'content:' in line:
entity = line.strip().split(':')[-1].strip().replace("'", "")
entities.append(entity)
return entities
"""
def extract_entities(text):
entities = []
if 'entity_mentions' not in text:
return entities
for entity in text['entity_mentions']:
entities.append(entity['content'])
return entities
# Apply the function to the entitysentiment column
df['entity_mentions'] = df['entitysentiment'].apply(extract_entities)
# Convert the entity mentions to separate columns
entity_mentions_df = pd.DataFrame(df['entity_mentions'].to_list(), columns=['entity_mention_1', 'entity_mention_2', 'entity_mention_3'])
# Concatenate the original dataframe with the entity mentions dataframe
result = pd.concat([df, entity_mentions_df], axis=1)
# Drop the original entitysentiment and entity_mentions columns
result.drop(['entitysentiment', 'entity_mentions'], axis=1, inplace=True)
# Show the result
print(result)
[3]
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
/tmp/ipykernel_1/3330714854.py in <module>
22
23 # Apply the function to the entitysentiment column
---> 24 df['entity_mentions'] = df['entitysentiment'].apply(extract_entities)
25
26 # Convert the entity mentions to separate columns
/opt/conda/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwargs)
4355 dtype: float64
4356 """
-> 4357 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
4358
4359 def _reduce(
/opt/conda/lib/python3.7/site-packages/pandas/core/apply.py in apply(self)
1041 return self.apply_str()
1042
-> 1043 return self.apply_standard()
1044
1045 def agg(self):
/opt/conda/lib/python3.7/site-packages/pandas/core/apply.py in apply_standard(self)
1099 values,
1100 f, # type: ignore[arg-type]
-> 1101 convert=self.convert_dtype,
1102 )
1103
/opt/conda/lib/python3.7/site-packages/pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()
/tmp/ipykernel_1/3330714854.py in extract_entities(text)
13 def extract_entities(text):
14 entities = []
---> 15 if 'entity_mentions' not in text:
16 return entities
17 for entity in text['entity_mentions']:
/opt/conda/lib/python3.7/site-packages/proto/message.py in __contains__(self, key)
686 wire serialization.
687 """
--> 688 pb_value = getattr(self._pb, key)
689 try:
690 # Protocol buffers "HasField" is unfriendly; it only works
AttributeError: entity_mentions