我现在正在开展一个项目,我试图更多地了解人们在 Instagram 公开帖子下评论的内容。我已经尝试过“instaloader”模块,但我无法让它工作。有人知道我如何解决这个问题吗?我不想下载用户的完整个人资料,我只想能够将帖子的链接粘贴到代码中并获取该特定帖子的评论。
任何帮助将不胜感激! :)
import instaloader
# Replace 'your_username' and 'your_password' with your Instagram credentials
username = 'username'
password = 'password'
# Replace 'post_shortcode' with the shortcode of the Instagram post you want to scrape comments from
post_shortcode = 'ABCDEF12345'
L = instaloader.Instaloader()
# Log in to Instagram
try:
L.load_session_from_file(username)
except FileNotFoundError:
# Session file not found, login with credentials
L.context.log_in(username, password)
L.save_session_to_file()
# Get comments for the post
post = instaloader.Post.from_shortcode(L.context, post_shortcode)
comments = post.get_comments()
# Extract usernames and comments
usernames = [comment.owner.username for comment in comments]
comment_texts = [comment.text for comment in comments]
# Print or process the extracted data as needed
for username, comment_text in zip(usernames, comment_texts):
print(f"{username}: {comment_text}")
# Logout from Instagram (optional)
# L.context.logout()
使用下面的代码,我只能获得 Instagram 帖子的几条评论(大约 100 条)。但我真的很想得到帖子的所有评论:
def scrape_post(url_or_shortcode: str) -> Dict:
"""Scrape single Instagram post data"""
if "http" in url_or_shortcode:
shortcode = url_or_shortcode.split("/p/")[-1].split("/")[0]
else:
shortcode = url_or_shortcode
print(f"scraping instagram post: {shortcode}")
variables = {
"shortcode": shortcode,
"child_comment_count": 10000,
"fetch_comment_count": 10000,
"parent_comment_count": 10000,
"has_threaded_comments": True,
}
query_hash = "b3055c01b4b222b8a47dc12b090e4e64"
encoded_variables = quote(json.dumps(variables))
url = f"https://www.instagram.com/graphql/query/?query_hash={query_hash}&variables={encoded_variables}"
return url
def configure_driver():
chrome_options = Options()
chrome_options.add_argument('--ignore-certificate-errors')
driver = webdriver.Chrome(options=chrome_options)
return driver
def save_in_file(data, file_name):
# Escreve os resultados no arquivo de texto
with open(file_name, "w", encoding="utf-8") as file:
json.dump(data, file, indent=2, ensure_ascii=False)
def analyze_content(content):
soup = BeautifulSoup(content, "html.parser")
pre_tag = soup.find("pre")
json_data = pre_tag.text
parsed_data = json.loads(json_data)
return parsed_data
INSTAGRAM_APP_ID = "936619743392459" # this is the public app id for instagram.com
url = 'https://www.instagram.com/p/C2SGS95NQH_/'
driver = configure_driver()
driver.get(url)
wait_for_login= input('continue? y/n') # I add this pause to login in the Instagram
driver.get(scrape_post(url))
content = driver.page_source
parsed_data = analyze_content(content)
save_in_file(parsed_data, 'conteudo.txt')