如何从 subreddit 中删除的给定帖子中获取 Redditor 的信息?

问题描述 投票:0回答:0

我已经能够在特定日期后从特定的 subreddit 中删除最热门的 reddit 帖子。我将这些帖子的标题、帖子文本和其他属性收集到一个数据框中。

但是,我还想收集每个帖子的作者的属性。我开始尝试为每个帖子的另一个收集评论业力,但我遇到了错误。

import praw
import pandas as pd
import datetime

subreddit = reddit_read_only.subreddit("SuicideWatch")

# Scraping the top posts of all time
posts = subreddit.top(time_filter = "all", limit = None)
 
posts_dict = {"Title": [], "Post Text": [], "Author": [],
              "Author Karma": [],"ID": [], "Score": [],
              "Total Comments": [],"Created On":[], "Post URL": [],
              "Original Content": [], "Edited": [], "Saved": []
              }

start_date = '01-01-20 00:00:00'
start_date = datetime.datetime.strptime(start_date, '%d-%m-%y %H:%M:%S').timestamp()

for post in posts:
    # Date of each posts' creation
    date = post.created_utc
    if date > start_date:
        # Title of each post
        posts_dict["Title"].append(post.title)
     
        # Text inside a post
        posts_dict["Post Text"].append(post.selftext)
 
        # Author of the post
        posts_dict["Author"].append(post.author)
        
        # Author Karama
        posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma

        # Unique ID of each post
        posts_dict["ID"].append(post.id)
     
        # The score of a post
        posts_dict["Score"].append(post.score)
     
        # Total number of comments inside the post
        posts_dict["Total Comments"].append(post.num_comments)
        
        # Comments: instance of a commentforest
        #posts_dict["Comments"].append(post.comments)
         
        # Date the post was Created
        posts_dict["Created On"].append(post.created_utc)
        
        # URL of each post
        posts_dict["Post URL"].append(post.url)
        
        # Flair of each post
        posts_dict["Original Content"].append(post.is_original_content)
        
        # Edited Check for each post
        posts_dict["Edited"].append(post.edited)
        
        # Saved check for each post
        posts_dict["Saved"].append(post.saved)
        
# Saving the data in a pandas dataframe
all_posts = pd.DataFrame(posts_dict)
all_posts['Created On'] = pd.to_datetime(all_posts['Created On'],  unit='s')

这是我添加的用于收集 redditor 信息的行:

 # Author Karama
 posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma

这是由此产生的错误:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[11], line 33
     30 posts_dict["Author"].append(post.author)
     32 # Author Karama
---> 33 posts_dict["Author Karma"].append(reddit_read_only.redditor(post.author)).comment_karma
     35 # Unique ID of each post
     36 posts_dict["ID"].append(post.id)

File ~\anaconda3\lib\site-packages\praw\util\deprecate_args.py:43, in _deprecate_args.<locals>.wrapper.<locals>.wrapped(*args, **kwargs)
     36     arg_string = _generate_arg_string(_old_args[: len(args)])
     37     warn(
     38         f"Positional arguments for {func.__qualname__!r} will no longer be"
     39         f" supported in PRAW 8.\nCall this function with {arg_string}.",
     40         DeprecationWarning,
     41         stacklevel=2,
     42     )
---> 43 return func(**dict(zip(_old_args, args)), **kwargs)

File ~\anaconda3\lib\site-packages\praw\reddit.py:908, in Reddit.redditor(self, name, fullname)
    896 @_deprecate_args("name", "fullname")
    897 def redditor(
    898     self, name: Optional[str] = None, *, fullname: Optional[str] = None
    899 ) -> "praw.models.Redditor":
    900     """Return a lazy instance of :class:`.Redditor`.
    901 
    902     :param name: The name of the redditor.
   (...)
    906 
    907     """
--> 908     return models.Redditor(self, name=name, fullname=fullname)

File ~\anaconda3\lib\site-packages\praw\models\reddit\redditor.py:156, in Redditor.__init__(self, reddit, name, fullname, _data)
    146 """Initialize a :class:`.Redditor` instance.
    147 
    148 :param reddit: An instance of :class:`.Reddit`.
   (...)
    153 
    154 """
    155 if (name, fullname, _data).count(None) != 2:
--> 156     raise TypeError(
    157         "Exactly one of 'name', 'fullname', or '_data' must be provided."
    158     )
    159 if _data:
    160     assert (
    161         isinstance(_data, dict) and "name" in _data
    162     ), "Please file a bug with PRAW."

TypeError: Exactly one of 'name', 'fullname', or '_data' must be provided.
python pandas web-scraping reddit praw
© www.soinside.com 2019 - 2024. All rights reserved.