在 python 上获取 Instagram 关注者和关注者

问题描述 投票:0回答:1

我有一个代码可以获取 Instagram 关注者和关注者,最后它给出了一个比较列表,但有一个问题。一般来说,它无法获得所有关注者和关注者,例如有 45 个关注者,但它只获得 42 41 个关注者等。我该如何防止这种情况?这是我的代码,但请不要看到它用于线程的 if 条件。

 def compare_profile(self,profile):
        self.should_stop = False
        while self.should_stop==False:
            print("Profil analiz fonksiyonu başladı.")
            self.driver.get(f'https://www.instagram.com/{profile}/')
            if self.should_stop == True:
                break
            time.sleep(10)
            self.driver.get(f'https://www.instagram.com/{profile}/followers/')
            if self.should_stop == True:
                break
            time.sleep(10)
            followers_panel = self.driver.find_element(By.XPATH,'/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[2]')
            last_ht, ht = 0, 1
            while last_ht != ht:
                if self.should_stop == True:
                    break
                last_ht = ht
                ht = self.driver.execute_script(""" arguments[0].scrollTo(0, arguments[0].scrollHeight);return arguments[0].scrollHeight; """,followers_panel)
                try:
                    if self.should_stop == True:
                        break
                    WebDriverWait(self.driver, 15).until(EC.visibility_of_element_located((By.XPATH, '//div[@class="_aanq"]')))
                    if self.should_stop == True:
                        break
                    WebDriverWait(self.driver, 60).until(EC.invisibility_of_element_located((By.CLASS_NAME, "_ab8w  _ab94 _ab97 _ab9f _ab9m _ab9p  _abc0 _abcm")))
                    if self.should_stop == True:
                        break
                    WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH, '(//div[@class="_aano"])')))
                except:
                    if self.should_stop == True:
                        break
                    time.sleep(random.randint(6,10))
                if self.should_stop == True:
                    break
                time.sleep(random.randint(4,8))
                if self.should_stop == True:
                    break
                WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH, '(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
                if self.should_stop == True:
                    break
            WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH,'(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
            if self.should_stop == True:
                break
            list_of_followers = list(map(lambda x: x.text, self.driver.find_elements(By.XPATH, '(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
            if self.should_stop == True:
                break
            time.sleep(5)
            print("Takipçiler çekildi")
            print("Toplam takipçi: ",len(list_of_followers))
            # Following List
            if self.should_stop == True:
                break
            self.driver.get(f'https://www.instagram.com/{profile}/')
            time.sleep(10)
            if self.should_stop == True:
                break
            self.driver.get(f'https://www.instagram.com/{profile}/following/')
            time.sleep(10)
            following_panel = self.driver.find_element(By.XPATH,'/html/body/div[2]/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]')
            last_ht, ht = 0, 1
            while last_ht != ht:
                if self.should_stop == True:
                    break
                last_ht = ht
                ht = self.driver.execute_script(""" arguments[0].scrollTo(0, arguments[0].scrollHeight);return arguments[0].scrollHeight; """,following_panel)
                try:
                    WebDriverWait(self.driver, 15).until(EC.visibility_of_element_located((By.XPATH, '//div[@class="_aanq"]')))
                    if self.should_stop == True:
                        break
                    WebDriverWait(self.driver, 60).until(EC.invisibility_of_element_located((By.CLASS_NAME, "_ab8w  _ab94 _ab97 _ab9f _ab9m _ab9p  _abc0 _abcm")))
                    if self.should_stop == True:
                        break
                    WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH, '(//div[@class="_aano"])')))
                except:
                    if self.should_stop == True:
                        break
                    time.sleep(random.randint(6,10))
                if self.should_stop == True:
                    break
                time.sleep(random.randint(4,8))
                WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH,'(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
                if self.should_stop == True:
                    break
            WebDriverWait(self.driver, 45).until(EC.presence_of_all_elements_located((By.XPATH,'(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
            if self.should_stop == True:
                break
            list_of_followings = list(map(lambda x: x.text, self.driver.find_elements(By.XPATH, '(//div[@class="x1i10hfl x1qjc9v5 xjbqb8w xjqpnuy xa49m3k xqeqjp1 x2hbi6w x13fuv20 xu3j5b3 x1q0q8m5 x26u7qi x972fbf xcfux6l x1qhh985 xm0m39n x9f619 x1ypdohk xdl72j9 x2lah0s xe8uvvx xdj266r x11i5rnm xat24cr x1mh8g0r x2lwn1j xeuugli xexx8yu x4uap5 x18d9i69 xkhd6sd x1n2onr6 x16tdsg8 x1hl2dhg xggy1nq x1ja2u2z x1t137rt x1q0g3np x87ps6o x1lku1pv x1a2a7pz xh8yej3 x193iq5w x1lliihq x1dm5mii x16mil14 xiojian x1yutycm"])')))
            print("Takip edilenler çekildi")
            print("Toplam takip edilen: ",len(list_of_followings))

            # Data Frame listesi oluşturma
            if self.should_stop == True:
                break
            yoursnofollow = list(set(list_of_followers) - set(list_of_followings))
            theynofollow = list(set(list_of_followings) - set(list_of_followers))
            ltheynofollow, lyoursnofollow, llist_of_followers, llist_of_followings = len(theynofollow), len(
                yoursnofollow), len(list_of_followers), len(list_of_followings)
            max_len = max(ltheynofollow, lyoursnofollow, llist_of_followers, llist_of_followings)
            if self.should_stop == True:
                break
            if not max_len == ltheynofollow:
                theynofollow.extend([''] * (max_len - ltheynofollow))
            if not max_len == lyoursnofollow:
                yoursnofollow.extend([''] * (max_len - lyoursnofollow))
            if not max_len == llist_of_followers:
                list_of_followers.extend([''] * (max_len - llist_of_followers))
            if not max_len == llist_of_followings:
                list_of_followings.extend([''] * (max_len - llist_of_followings))
            if self.should_stop == True:
                break
            tablo = {'Takipçiler': list_of_followers, 'Takip Ettiklerin': list_of_followings,
                     'Seni Takip Etmeyenler': theynofollow, 'Senin Takip Etmediklerin': yoursnofollow}
            df = pd.DataFrame(data=tablo)
            print(df)
            df.to_excel(f"{profile}.xlsx")
            self.driver.get('https://www.instagram.com/')
            break
        if self.should_stop == True:
            mbox.showinfo("Stopped", "The bot has stopped.")
        else:
            mbox.showinfo("Function Finished", "Function successfully finished.")
selenium-webdriver web-scraping instagram instagram-api
1个回答
0
投票

使用 Instagram。 我也面临这个问题,但经过一番研究后我明白了。

© www.soinside.com 2019 - 2024. All rights reserved.