我已经能够在特定日期后从特定的 subreddit 中删除最热门的 reddit 帖子。我将这些帖子的标题、帖子文本和其他属性收集到一个数据框中。
但是,我还想收集每个帖子的作者的属性。我开始尝试为每个帖子的另一个收集评论业力,但我遇到了错误。
import praw
import pandas as pd
import datetime
subreddit = reddit_read_only.subreddit("SuicideWatch")
# Scraping the top posts of all time
posts = subreddit.top(time_filter = "all", limit = None)
posts_dict = {"Title": [], "Post Text": [], "Author": [],
"Author Karma": [],"ID": [], "Score": [],
"Total Comments": [],"Created On":[], "Post URL": [],
"Original Content": [], "Edited": [], "Saved": []
}
start_date = '01-01-20 00:00:00'
start_date = datetime.datetime.strptime(start_date, '%d-%m-%y %H:%M:%S').timestamp()
for post in posts:
# Date of each posts' creation
date = post.created_utc
if date > start_date:
# Title of each post
posts_dict["Title"].append(post.title)
# Text inside a post
posts_dict["Post Text"].append(post.selftext)
# Author of the post
posts_dict["Author"].append(post.author)
# Author Karama
posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma
# Unique ID of each post
posts_dict["ID"].append(post.id)
# The score of a post
posts_dict["Score"].append(post.score)
# Total number of comments inside the post
posts_dict["Total Comments"].append(post.num_comments)
# Comments: instance of a commentforest
#posts_dict["Comments"].append(post.comments)
# Date the post was Created
posts_dict["Created On"].append(post.created_utc)
# URL of each post
posts_dict["Post URL"].append(post.url)
# Flair of each post
posts_dict["Original Content"].append(post.is_original_content)
# Edited Check for each post
posts_dict["Edited"].append(post.edited)
# Saved check for each post
posts_dict["Saved"].append(post.saved)
# Saving the data in a pandas dataframe
all_posts = pd.DataFrame(posts_dict)
all_posts['Created On'] = pd.to_datetime(all_posts['Created On'], unit='s')
这是我添加的用于收集 redditor 信息的行:
# Author Karama
posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma
这是由此产生的错误:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[11], line 33
30 posts_dict["Author"].append(post.author)
32 # Author Karama
---> 33 posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma
35 # Unique ID of each post
36 posts_dict["ID"].append(post.id)
File ~\anaconda3\lib\site-packages\praw\util\deprecate_args.py:43, in _deprecate_args.<locals>.wrapper.<locals>.wrapped(*args, **kwargs)
36 arg_string = _generate_arg_string(_old_args[: len(args)])
37 warn(
38 f"Positional arguments for {func.__qualname__!r} will no longer be"
39 f" supported in PRAW 8.\nCall this function with {arg_string}.",
40 DeprecationWarning,
41 stacklevel=2,
42 )
---> 43 return func(**dict(zip(_old_args, args)), **kwargs)
File ~\anaconda3\lib\site-packages\praw\reddit.py:908, in Reddit.redditor(self, name, fullname)
896 @_deprecate_args("name", "fullname")
897 def redditor(
898 self, name: Optional[str] = None, *, fullname: Optional[str] = None
899 ) -> "praw.models.Redditor":
900 """Return a lazy instance of :class:`.Redditor`.
901
902 :param name: The name of the redditor.
(...)
906
907 """
--> 908 return models.Redditor(self, name=name, fullname=fullname)
File ~\anaconda3\lib\site-packages\praw\models\reddit\redditor.py:156, in Redditor.__init__(self, reddit, name, fullname, _data)
146 """Initialize a :class:`.Redditor` instance.
147
148 :param reddit: An instance of :class:`.Reddit`.
(...)
153
154 """
155 if (name, fullname, _data).count(None) != 2:
--> 156 raise TypeError(
157 "Exactly one of 'name', 'fullname', or '_data' must be provided."
158 )
159 if _data:
160 assert (
161 isinstance(_data, dict) and "name" in _data
162 ), "Please file a bug with PRAW."
TypeError: Exactly one of 'name', 'fullname', or '_data' must be provided.