Skip to content

Commit

Permalink
Fixed GraphQL issues
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwickerhf committed Jan 18, 2021
1 parent c3cc3a1 commit da18780
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 109 deletions.
19 changes: 18 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
# instaclient
---
<p>
<a href="https://pypi.org/project/instaclient/">
<img alt="PyPi" src="https://img.shields.io/pypi/v/instaclient.svg"/>
</a>
<a href="https://pepy.tech/project/instaclient">
<img alt="Downloads" src="https://pepy.tech/badge/instaclient"/>
</a>
<a href="https://github.com/wickerdevs/instaclient/blob/master/LICENSE.txt">
<img alt="GitHub license" src="https://img.shields.io/github/license/wickerdevs/instaclient?style=plastic"/>
</a>
</a>
<img alt="GitHub Repo Size" src="https://img.shields.io/github/repo-size/wickerdevs/instaclient"/>
</p>

**instaclient** is a Python library for accessing Instagram's features.
With this library you can create Instagram Bots with ease and simplicity. The InstaClient takes advantage of the selenium library to excecute tasks which are not allowed in the Instagram Graph API (such as sending DMs).
Expand All @@ -16,11 +30,12 @@ The only thing you need to worry about is to spread your requests throughout the
6. [Help - Community](#help-community)
7. [Credits](#credits)
8. [License](#license)
---

## Features
- Scraping
- Scrape a user's followers (Via scrolling or with GraphQL)
- Scraoe a user's following (Via scrolling or with GraphQL)
- Scrape a user's following (Via scrolling or with GraphQL)
- Scrape a Hashtag
- Scrape a Location
- Scrape a Profile
Expand Down Expand Up @@ -48,6 +63,8 @@ The only thing you need to worry about is to spread your requests throughout the
- [x] Like post by shorturl
- [x] Unlike post by shorturl
- [x] Scrape Location
- [ ] Save cookies
- [ ] Share/Forward a post
- [ ] Scrape explore page
- [ ] Upload posts
- [ ] Scrape feed
Expand Down
8 changes: 4 additions & 4 deletions instaclient/client/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ class GraphUrls:
GRAPH_SEARCH = 'https://www.instagram.com/web/search/topsearch/?query={}'
GRAPH_LOCATION = 'https://www.instagram.com/explore/locations/{}/{}/?__a=1'

GRAPH_FIRST_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Atrue%2C%22first%22%3A24%7D'
GRAPH_CURSOR_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A12%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D'
GRAPH_FIRST_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Atrue%2C%22first%22%3A50%7D'
GRAPH_CURSOR_FOLLOWERS = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D'

GRAPH_FIRST_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A24%7D'
GRAPH_CURSOR_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A12%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D'
GRAPH_FIRST_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%7D'
GRAPH_CURSOR_FOLLOWING = 'https://www.instagram.com/graphql/query/?query_hash={QUERY_HASH}8&variables=%7B%22id%22%3A%22{ID}%22%2C%22include_reel%22%3Atrue%2C%22fetch_mutual%22%3Afalse%2C%22first%22%3A50%2C%22after%22%3A%22{END_CURSOR}%3D%3D%22%7D'


class QueryHashes:
Expand Down
138 changes: 34 additions & 104 deletions instaclient/client/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def get_notifications(self:'InstaClient', types:list=None, count:int=None) -> Op
return notifications


@Component._driver_required
@Component._login_required
def get_profile(self:'InstaClient', username:str, context:bool=True) -> Optional['Profile']:

if context and not self.logged_in and None not in (self.username, self.password):
Expand Down Expand Up @@ -376,7 +376,8 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de
Args:
user (str): User to scrape
count (int): Number of followers to scrape
count (int): Number of followers to scrape. Insert None
to scrape all of the profile's followers.
use_api (bool): If set to True, the instaclient module will take advantage
of instagram graphql requests to scrape followers. Defaults to False.
callback_frequency (int, optional): Number of scraped followers between updates
Expand All @@ -395,6 +396,9 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de
if not profile:
raise InvalidUserError(user)

if not count:
count = profile.follower_count

followers = list()
failed = list()
last_callback = 0
Expand Down Expand Up @@ -463,16 +467,25 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de
except Exception as error:
LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error)
else:
requests = 1
request = GraphUrls.GRAPH_FIRST_FOLLOWERS.format(QUERY_HASH=QueryHashes.FOLLOWERS_HASH, ID=profile.id)
looping = True
stopping = False
while looping:
result = self._request(request, use_driver=True)
requests += 1

if not result:
break

status = result.get('status')
if not status == 'ok':
if result.get('message') == 'rate limited':
if stopping:
break
LOGGER.debug('Waiting 120 seconds')
time.sleep(120)
continue
break

data = result['data']['user']['edge_followed_by']
Expand All @@ -498,6 +511,7 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de
followers.append(follower)

if len(followers) % callback_frequency == 0:
LOGGER.debug(f'Requests made: {requests}')
if callable(callback):
LOGGER.debug('Called Callback')
callback(scraped = followers, **callback_args)
Expand All @@ -512,9 +526,9 @@ def get_followers(self:'InstaClient', user:str, count:int, use_api:bool=True, de
cursor = page_info['end_cursor'].replace('==', '')
request = GraphUrls.GRAPH_CURSOR_FOLLOWERS.format(QUERY_HASH=QueryHashes.FOLLOWERS_HASH, ID=profile.id, END_CURSOR=cursor)
continue


end = time.time() # TODO
LOGGER.debug(f'Requests made: {requests}')

LOGGER.info(f'Scraped Followers: Total: {len(followers)}')

if not deep_scrape:
Expand Down Expand Up @@ -542,7 +556,8 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de
Args:
user (str): User to scrape
count (int): Number of followers to scrape
count (int): Number of followers to scrape. Insert
None to get all of the profile's following.
use_api (bool): If set to True, the instaclient module will take advantage
of instagram graphql requests to scrape followers. Defaults to False.
callback_frequency (int, optional): Number of scraped followers between updates
Expand All @@ -561,6 +576,9 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de
if not profile:
raise InvalidUserError(user)

if not count:
count = profile.followed_count

following = list()
failed = list()
last_callback = 0
Expand Down Expand Up @@ -629,16 +647,25 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de
except Exception as error:
LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error)
else:
requests = 1
request = GraphUrls.GRAPH_FIRST_FOLLOWING.format(QUERY_HASH=QueryHashes.FOLLOWING_HASH, ID=profile.id)
looping = True
stopping = False
while looping:
result = self._request(request, use_driver=True)
requests += 1

if not result:
break

status = result.get('status')
if not status == 'ok':
if result.get('message') == 'rate limited':
if stopping:
break
LOGGER.debug('Waiting 120 seconds')
time.sleep(120)
continue
break

data = result['data']['user']['edge_follow']
Expand Down Expand Up @@ -678,9 +705,9 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de
cursor = page_info['end_cursor'].replace('==', '')
request = GraphUrls.GRAPH_CURSOR_FOLLOWING.format(QUERY_HASH=QueryHashes.FOLLOWING_HASH, ID=profile.id, END_CURSOR=cursor)
continue


end = time.time() # TODO
LOGGER.debug(f'Requests made: {requests}')

LOGGER.info(f'Scraped Followers: Total: {len(following)}')

if not deep_scrape:
Expand All @@ -701,103 +728,6 @@ def get_following(self:'InstaClient', user:str, count:int, use_api:bool=True, de
LOGGER.warning(f'Failed: {len(failed)}')
return profiles

"""Scrape an instagram user's following.
Args:
user (str): User to scrape
count (int): Number of followers to scrape
check_user (bool, optional): If set to True, checks if the `user` is a valid instagram username. Defaults to True.
callback_frequency (int, optional): Number of scraped followers between updates
callback (function): Function with no parameters that gets called with the frequency set by ``callback_frequency``. This method must take a ``scraped`` argument.
Returns:
Optional[Union[List[Profile], List[str]]]: List of instagram usernames or of instagram profile objects.
Raises:
NotLoggedInError: Raised if you are not logged into any account
InvalidUserError: Raised if the user is invalid
PrivateAccountError: Raised if the user is a private account
NoSuchElementException: Raised if an element is not found when compiling operation.
"""
self._nav_user(user, check_user=check_user)
following_btn:WebElement = self._find_element(EC.presence_of_element_located((By.XPATH, Paths.FOLLOWED_BTN)), url=ClientUrls.NAV_USER.format(user))
# Click followers btn
self._press_button(following_btn)
time.sleep(2)
LOGGER.debug(f'Got Following page for <{user}>')

following = list()
failed = list()
last_callback = 0
finished_warning = False

start = time.time() # TODO

try:
while len(following) < count:
loop = time.time() # TODO
LOGGER.debug(f'Starting Scrape Loop. Followers: {len(following)}')

scraped_count = len(following)
divs = self._find_element(EC.presence_of_all_elements_located((By.XPATH, Paths.FOLLOWER_USER_DIV)), wait_time=2)

got_elements = time.time() # TODO
LOGGER.debug(f'Got Divs in {got_elements - loop}')

new = 0
for div in divs:
try:
username = div.text.split('\n')[0]
if username not in following and username not in('Follow',) and len(following) < count:
following.append(username)
new += 1

if (last_callback + new) % callback_frequency == 0:
if callable(callback):
LOGGER.debug('Called Callback')
callback(scraped = following, **callback_args)

except:
failed.append(div)
pass

if len(following) >= count:
break

if not finished_warning and len(following) == scraped_count:
LOGGER.info('Detected End of Followers Page')
finished_warning = True
time.sleep(3)
elif finished_warning:
LOGGER.info('Finished Followers')
break
else:
finished_warning = False

LOGGER.debug('Scroll')
self.scroll(mode=self.END_PAGE_SCROLL, times=2, interval=1)
except Exception as error:
LOGGER.error('ERROR IN SCRAPING FOLLOWERS', exc_info=error)


end = time.time() # TODO
LOGGER.info(f'Scraped Followers: Total: {len(following)}')

if not deep_scrape:
return following
else:
LOGGER.info('Deep scraping profiles...')
# For every shortlink, scrape Post
profiles = list()
for index, follower in enumerate(following):
try:
LOGGER.debug(f'Deep scraped {index} profiles out of {len(following)}')
profiles.append(self.get_profile(follower))
except:
failed.append(follower)
LOGGER.warning(f'Failed: {len(failed)}')
return profiles


# SCRAPE HASHTAG
@Component._driver_required
Expand Down

0 comments on commit da18780

Please sign in to comment.