From da18780b98a84d8a3257b23e8e33bb96c10af556 Mon Sep 17 00:00:00 2001 From: David Wicker Date: Mon, 18 Jan 2021 23:00:02 +0100 Subject: [PATCH] Fixed GraphQL issues --- README.md | 19 ++++- instaclient/client/constants.py | 8 +- instaclient/client/scraper.py | 138 ++++++++------------------------ 3 files changed, 56 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index 92dcd24..d7e03db 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,18 @@ # instaclient +--- +

+ + PyPi + + + Downloads + + + GitHub license + + + GitHub Repo Size +

**instaclient** is a Python library for accessing Instagram's features. With this library you can create Instagram Bots with ease and simplicity. The InstaClient takes advantage of the selenium library to excecute tasks which are not allowed in the Instagram Graph API (such as sending DMs). @@ -16,11 +30,12 @@ The only thing you need to worry about is to spread your requests throughout the 6. [Help - Community](#help-community) 7. [Credits](#credits) 8. [License](#license) +--- ## Features - Scraping - Scrape a user's followers (Via scrolling or with GraphQL) - - Scraoe a user's following (Via scrolling or with GraphQL) + - Scrape a user's following (Via scrolling or with GraphQL) - Scrape a Hashtag - Scrape a Location - Scrape a Profile @@ -48,6 +63,8 @@ The only thing you need to worry about is to spread your requests throughout the - [x] Like post by shorturl - [x] Unlike post by shorturl - [x] Scrape Location +- [ ] Save cookies +- [ ] Share/Forward a post - [ ] Scrape explore page - [ ] Upload posts - [ ] Scrape feed diff --git a/instaclient/client/constants.py b/instaclient/client/constants.py index f1eb902..7eff800 100644 --- a/instaclient/client/constants.py +++ b/instaclient/client/constants.py @@ -118,11 +118,11 @@ class GraphUrls: GRAPH_SEARCH = 'https://www.instagram.com/web/search/topsearch/?query={}' GRAPH_LOCATION = 'https://www.instagram.com/explore/locations/{}/{}/?__a=1' - GRAPH_FIRST_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Atrue%2C%22first%22%3A24%7D' - GRAPH_CURSOR_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A12%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D' + GRAPH_FIRST_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Atrue%2C%22first%22%3A50%7D' + GRAPH_CURSOR_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D' - GRAPH_FIRST_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A24%7D' - GRAPH_CURSOR_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A12%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D' + GRAPH_FIRST_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%7D' + GRAPH_CURSOR_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D' class QueryHashes: diff --git a/instaclient/client/scraper.py b/instaclient/client/scraper.py index 8f8f0c3..7262745 100644 --- a/instaclient/client/scraper.py +++ b/instaclient/client/scraper.py @@ -67,7 +67,7 @@ def get_notifications(self:'InstaClient', types:list=None, count:int=None) -> Op return notifications - @Component._driver_required + @Component._login_required def get_profile(self:'InstaClient', username:str, context:bool=True) -> Optional['Profile']: if context and not self.logged_in and None not in (self.username, self.password): @@ -376,7 +376,8 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de Args: user (str): User to scrape - count (int): Number of followers to scrape + count (int): Number of followers to scrape. Insert None + to scrape all of the profile's followers. use_api (bool): If set to True, the instaclient module will take advantage of instagram graphql requests to scrape followers. Defaults to False. callback_frequency (int, optional): Number of scraped followers between updates @@ -395,6 +396,9 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de if not profile: raise InvalidUserError(user) + if not count: + count = profile.follower_count + followers = list() failed = list() last_callback = 0 @@ -463,16 +467,25 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de except Exception as error: LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error) else: + requests = 1 request = GraphUrls.GRAPH_FIRST_FOLLOWERS.format(QUERY_HASH=QueryHashes.FOLLOWERS_HASH, ID=profile.id) looping = True + stopping = False while looping: result = self._request(request, use_driver=True) + requests += 1 if not result: break status = result.get('status') if not status == 'ok': + if result.get('message') == 'rate limited': + if stopping: + break + LOGGER.debug('Waiting 120 seconds') + time.sleep(120) + continue break data = result['data']['user']['edge_followed_by'] @@ -498,6 +511,7 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de followers.append(follower) if len(followers) % callback_frequency == 0: + LOGGER.debug(f'Requests made: {requests}') if callable(callback): LOGGER.debug('Called Callback') callback(scraped = followers, **callback_args) @@ -512,9 +526,9 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de cursor = page_info['end_cursor'].replace('==', '') request = GraphUrls.GRAPH_CURSOR_FOLLOWERS.format(QUERY_HASH=QueryHashes.FOLLOWERS_HASH, ID=profile.id, END_CURSOR=cursor) continue - - end = time.time() # TODO + LOGGER.debug(f'Requests made: {requests}') + LOGGER.info(f'Scraped Followers: Total: {len(followers)}') if not deep_scrape: @@ -542,7 +556,8 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de Args: user (str): User to scrape - count (int): Number of followers to scrape + count (int): Number of followers to scrape. Insert + None to get all of the profile's following. use_api (bool): If set to True, the instaclient module will take advantage of instagram graphql requests to scrape followers. Defaults to False. callback_frequency (int, optional): Number of scraped followers between updates @@ -561,6 +576,9 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de if not profile: raise InvalidUserError(user) + if not count: + count = profile.followed_count + following = list() failed = list() last_callback = 0 @@ -629,16 +647,25 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de except Exception as error: LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error) else: + requests = 1 request = GraphUrls.GRAPH_FIRST_FOLLOWING.format(QUERY_HASH=QueryHashes.FOLLOWING_HASH, ID=profile.id) looping = True + stopping = False while looping: result = self._request(request, use_driver=True) + requests += 1 if not result: break status = result.get('status') if not status == 'ok': + if result.get('message') == 'rate limited': + if stopping: + break + LOGGER.debug('Waiting 120 seconds') + time.sleep(120) + continue break data = result['data']['user']['edge_follow'] @@ -678,9 +705,9 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de cursor = page_info['end_cursor'].replace('==', '') request = GraphUrls.GRAPH_CURSOR_FOLLOWING.format(QUERY_HASH=QueryHashes.FOLLOWING_HASH, ID=profile.id, END_CURSOR=cursor) continue - - end = time.time() # TODO + LOGGER.debug(f'Requests made: {requests}') + LOGGER.info(f'Scraped Followers: Total: {len(following)}') if not deep_scrape: @@ -701,103 +728,6 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de LOGGER.warning(f'Failed: {len(failed)}') return profiles - """Scrape an instagram user's following. - - Args: - user (str): User to scrape - count (int): Number of followers to scrape - check_user (bool, optional): If set to True, checks if the `user` is a valid instagram username. Defaults to True. - callback_frequency (int, optional): Number of scraped followers between updates - callback (function): Function with no parameters that gets called with the frequency set by ``callback_frequency``. This method must take a ``scraped`` argument. - - Returns: - Optional[Union[List[Profile], List[str]]]: List of instagram usernames or of instagram profile objects. - - Raises: - NotLoggedInError: Raised if you are not logged into any account - InvalidUserError: Raised if the user is invalid - PrivateAccountError: Raised if the user is a private account - NoSuchElementException: Raised if an element is not found when compiling operation. - """ - self._nav_user(user, check_user=check_user) - following_btn:WebElement = self._find_element(EC.presence_of_element_located((By.XPATH, Paths.FOLLOWED_BTN)), url=ClientUrls.NAV_USER.format(user)) - # Click followers btn - self._press_button(following_btn) - time.sleep(2) - LOGGER.debug(f'Got Following page for <{user}>') - - following = list() - failed = list() - last_callback = 0 - finished_warning = False - - start = time.time() # TODO - - try: - while len(following) < count: - loop = time.time() # TODO - LOGGER.debug(f'Starting Scrape Loop. Followers: {len(following)}') - - scraped_count = len(following) - divs = self._find_element(EC.presence_of_all_elements_located((By.XPATH, Paths.FOLLOWER_USER_DIV)), wait_time=2) - - got_elements = time.time() # TODO - LOGGER.debug(f'Got Divs in {got_elements - loop}') - - new = 0 - for div in divs: - try: - username = div.text.split('\n')[0] - if username not in following and username not in('Follow',) and len(following) < count: - following.append(username) - new += 1 - - if (last_callback + new) % callback_frequency == 0: - if callable(callback): - LOGGER.debug('Called Callback') - callback(scraped = following, **callback_args) - - except: - failed.append(div) - pass - - if len(following) >= count: - break - - if not finished_warning and len(following) == scraped_count: - LOGGER.info('Detected End of Followers Page') - finished_warning = True - time.sleep(3) - elif finished_warning: - LOGGER.info('Finished Followers') - break - else: - finished_warning = False - - LOGGER.debug('Scroll') - self.scroll(mode=self.END_PAGE_SCROLL, times=2, interval=1) - except Exception as error: - LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error) - - - end = time.time() # TODO - LOGGER.info(f'Scraped Followers: Total: {len(following)}') - - if not deep_scrape: - return following - else: - LOGGER.info('Deep scraping profiles...') - # For every shortlink, scrape Post - profiles = list() - for index, follower in enumerate(following): - try: - LOGGER.debug(f'Deep scraped {index} profiles out of {len(following)}') - profiles.append(self.get_profile(follower)) - except: - failed.append(follower) - LOGGER.warning(f'Failed: {len(failed)}') - return profiles - # SCRAPE HASHTAG @Component._driver_required