From ada6e874a49c8c87739bfdf658388d14133769a9 Mon Sep 17 00:00:00 2001
From: ArshansGithub <111618520+ArshansGithub@users.noreply.github.com>
Date: Fri, 12 May 2023 15:12:16 -0700
Subject: [PATCH 1/3] Update __init__.py

added feature for GOOGLE_ABUSE_EXEMPTION cookie which bypasses the captcha presented by google
---
 yagooglesearch/__init__.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/yagooglesearch/__init__.py b/yagooglesearch/__init__.py
index 9c7723d..ae9526e 100644
--- a/yagooglesearch/__init__.py
+++ b/yagooglesearch/__init__.py
@@ -86,6 +86,7 @@ def __init__(
         verify_ssl=True,
         verbosity=5,
         verbose_output=False,
+        google_exemption=None
     ):
 
         """
@@ -118,7 +119,8 @@ def __init__(
             This may need to be disabled in some HTTPS proxy instances.
         :param int verbosity: Logging and console output verbosity.
         :param bool verbose_output: False (only URLs) or True (rank, title, description, and URL).  Defaults to False.
-
+        :param str google_exemption: Google cookie exemption string.  This is a string that Google uses to allow certain google searches. Defaults to None
+        
         :rtype: List of str
         :return: List of URLs found or list of {"rank", "title", "description", "url"}
         """
@@ -142,6 +144,7 @@ def __init__(
         self.verify_ssl = verify_ssl
         self.verbosity = verbosity
         self.verbose_output = verbose_output
+        self.google_exemption = google_exemption
 
         # Assign log level.
         ROOT_LOGGER.setLevel((6 - self.verbosity) * 10)
@@ -152,7 +155,10 @@ def __init__(
             self.num = 100
 
         # Initialize cookies to None, will be updated with each request in get_page().
-        self.cookies = None
+        if self.google_exemption:
+            self.cookies = {'GOOGLE_ABUSE_EXEMPTION': self.google_exemption}
+        else:
+            self.cookies = None
 
         # Used later to ensure there are not any URL parameter collisions.
         self.url_parameters = (

From 4be1c20e08a4038120f7f3b52e8b475330549451 Mon Sep 17 00:00:00 2001
From: opsdisk <info@opsdisk.com>
Date: Sat, 10 Jun 2023 14:38:01 -0500
Subject: [PATCH 2/3] Bumped requests version

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 7463f32..c51383f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,3 @@
 beautifulsoup4>=4.9.3
-requests>=2.26.0
+requests>=2.31.0
 requests[socks]

From c4cac70dc977f44494241d03218aca5fee1114bf Mon Sep 17 00:00:00 2001
From: opsdisk <info@opsdisk.com>
Date: Sat, 10 Jun 2023 14:43:50 -0500
Subject: [PATCH 3/3] Minor fixes, formatting, and documenation updates

---
 README.md                  |  6 ++++++
 yagooglesearch/__init__.py | 29 ++++++++++++++---------------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 59121e8..ae582dd 100644
--- a/README.md
+++ b/README.md
@@ -242,6 +242,11 @@ for search_query in search_queries:
     proxy_rotation_index += 1
 ```
 
+## GOOGLE_ABUSE_EXEMPTION cookie
+
+If you have a `GOOGLE_ABUSE_EXEMPTION` cookie value, it can be passed into `google_exemption` when instantiating the
+`SearchClient` object.
+
 ## &tbs= URL filter clarification
 
 The `&tbs=` parameter is used to specify either verbatim or time-based filters.
@@ -291,3 +296,4 @@ Project Link: [https://github.com/opsdisk/yagooglesearch](https://github.com/ops
 ## Contributors
 
 * [KennBro](https://github.com/KennBro) - <https://github.com/opsdisk/yagooglesearch/pull/9>
+* [ArshansGithub](https://github.com/ArshansGithub) - <https://github.com/opsdisk/yagooglesearch/pull/21>
diff --git a/yagooglesearch/__init__.py b/yagooglesearch/__init__.py
index ae9526e..3ee141a 100644
--- a/yagooglesearch/__init__.py
+++ b/yagooglesearch/__init__.py
@@ -12,7 +12,7 @@
 
 # Custom Python libraries.
 
-__version__ = "1.6.1"
+__version__ = "1.7.0"
 
 # Logging
 ROOT_LOGGER = logging.getLogger("yagooglesearch")
@@ -86,9 +86,8 @@ def __init__(
         verify_ssl=True,
         verbosity=5,
         verbose_output=False,
-        google_exemption=None
+        google_exemption=None,
     ):
-
         """
         SearchClient
         :param str query: Query string.  Must NOT be url-encoded.
@@ -119,8 +118,9 @@ def __init__(
             This may need to be disabled in some HTTPS proxy instances.
         :param int verbosity: Logging and console output verbosity.
         :param bool verbose_output: False (only URLs) or True (rank, title, description, and URL).  Defaults to False.
-        :param str google_exemption: Google cookie exemption string.  This is a string that Google uses to allow certain google searches. Defaults to None
-        
+        :param str google_exemption: Google cookie exemption string.  This is a string that Google uses to allow certain
+            google searches. Defaults to None.
+
         :rtype: List of str
         :return: List of URLs found or list of {"rank", "title", "description", "url"}
         """
@@ -154,9 +154,10 @@ def __init__(
             ROOT_LOGGER.warning("The largest value allowed by Google for num is 100.  Setting num to 100.")
             self.num = 100
 
-        # Initialize cookies to None, will be updated with each request in get_page().
+        # Populate cookies with GOOGLE_ABUSE_EXEMPTION if it is provided.  Otherwise, initialize cookies to None.
+        # It will be updated with each request in get_page().
         if self.google_exemption:
-            self.cookies = {'GOOGLE_ABUSE_EXEMPTION': self.google_exemption}
+            self.cookies = {"GOOGLE_ABUSE_EXEMPTION": self.google_exemption}
         else:
             self.cookies = None
 
@@ -184,7 +185,6 @@ def __init__(
 
         # Update proxy_dict if a proxy is provided.
         if proxy:
-
             # Standardize case since the scheme will be checked against a hard-coded list.
             self.proxy = proxy.lower()
 
@@ -327,7 +327,12 @@ def get_page(self, url):
 
         ROOT_LOGGER.info(f"Requesting URL: {url}")
         response = requests.get(
-            url, proxies=self.proxy_dict, headers=headers, cookies=self.cookies, timeout=15, verify=self.verify_ssl
+            url,
+            proxies=self.proxy_dict,
+            headers=headers,
+            cookies=self.cookies,
+            timeout=15,
+            verify=self.verify_ssl,
         )
 
         # Update the cookies.
@@ -347,7 +352,6 @@ def get_page(self, url):
         # See https://github.com/benbusby/whoogle-search/issues/311
         try:
             if response.cookies["CONSENT"].startswith("PENDING+"):
-
                 ROOT_LOGGER.warning(
                     "Looks like your IP address is sourcing from a European Union location...your search results may "
                     "vary, but I'll try and work around this by updating the cookie."
@@ -387,7 +391,6 @@ def get_page(self, url):
             html = response.text
 
         elif http_response_code == 429:
-
             ROOT_LOGGER.warning("Google is blocking your IP for making too many requests in a specific time period.")
 
             # Calling script does not want yagooglesearch to handle HTTP 429 cool off and retry.  Just return a
@@ -437,7 +440,6 @@ def search(self):
         # Loop until we reach the maximum result results found or there are no more search results found to reach
         # max_search_result_urls_to_return.
         while total_valid_links_found <= self.max_search_result_urls_to_return:
-
             ROOT_LOGGER.info(
                 f"Stats: start={self.start}, num={self.num}, total_valid_links_found={total_valid_links_found} / "
                 f"max_search_result_urls_to_return={self.max_search_result_urls_to_return}"
@@ -490,7 +492,6 @@ def search(self):
 
             # Process every anchored URL.
             for a in anchors:
-
                 # Get the URL from the anchor tag.
                 try:
                     link = a["href"]
@@ -504,7 +505,6 @@ def search(self):
                     continue
 
                 if self.verbose_output:
-
                     # Extract the URL title.
                     try:
                         title = a.get_text()
@@ -526,7 +526,6 @@ def search(self):
 
                 # Check if URL has already been found.
                 if link not in self.search_result_list:
-
                     # Increase the counters.
                     valid_links_found_in_this_search += 1
                     total_valid_links_found += 1