From 70b81a967438325cacbb33a28ac511d654c3f29d Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:53:02 +0000
Subject: [PATCH 01/12] added condition to check stream metadata

---
 tap_google_sheets/sync.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index eb0942c..7b6597e 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -14,6 +14,8 @@ def sync(client, config, catalog, state):
         "sheets_loaded" & "sheet_metadata" -> get the data lists from the "spreadsheet_metadata" stream and sync the records if selected
     """
     last_stream = singer.get_currently_syncing(state)
+    # preset to none
+    file_modified_time = None
     LOGGER.info("last/currently syncing stream: %s", last_stream)
 
     selected_streams = []
@@ -61,7 +63,8 @@ def sync(client, config, catalog, state):
                 stream_obj.sync(catalog, state, sheets_loaded_records)
 
         # sync file metadata
-        elif stream_name == "file_metadata":
+        elif stream_name == "file_metadata" and "file_metadata" in selected_streams:
+            LOGGER.warning("This Stream might not work, please de-select if you face any issues")
             file_changed, file_modified_time = stream_obj.sync(catalog, state, selected_streams)
             if not file_changed:
                 break
@@ -70,4 +73,5 @@ def sync(client, config, catalog, state):
 
     # write "file_metadata" bookmark, as we have successfully synced all the sheet's records
     # it will force to re-sync of there is any interrupt between the sync
-    write_bookmark(state, 'file_metadata', strftime(file_modified_time))
+    if file_modified_time:
+        write_bookmark(state, 'file_metadata', strftime(file_modified_time))

From 892d3f62be0d06c6cdc5b68d82fb182718144e2b Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Fri, 30 Aug 2024 07:06:19 +0000
Subject: [PATCH 02/12] Doc changes

---
 CHANGELOG.md              |  3 +++
 setup.py                  |  2 +-
 tap_google_sheets/sync.py | 11 ++++++++---
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7ab2220..e57631e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # Changelog
 
+## 3.1.0
+  * Updates Sync condition and exception handling for file_metadata stream[#96](https://github.com/singer-io/tap-google-sheets/pull/95)
+
 ## 3.0.0
   * Remove support for date datatype [#95](https://github.com/singer-io/tap-google-sheets/pull/95)
 
diff --git a/setup.py b/setup.py
index 3f42238..67f69b1 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 from setuptools import setup, find_packages
 
 setup(name='tap-google-sheets',
-      version='3.0.0',
+      version='3.1.0',
       description='Singer.io tap for extracting data from the Google Sheets v4 API',
       author='jeff.huth@bytecode.io',
       classifiers=['Programming Language :: Python :: 3 :: Only'],
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index 7b6597e..c50259c 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -1,5 +1,6 @@
 import singer
 from tap_google_sheets.streams import STREAMS, SheetsLoadData, write_bookmark, strftime
+from tap_google_sheets.client import GoogleForbiddenError
 
 LOGGER = singer.get_logger()
 
@@ -65,9 +66,13 @@ def sync(client, config, catalog, state):
         # sync file metadata
         elif stream_name == "file_metadata" and "file_metadata" in selected_streams:
             LOGGER.warning("This Stream might not work, please de-select if you face any issues")
-            file_changed, file_modified_time = stream_obj.sync(catalog, state, selected_streams)
-            if not file_changed:
-                break
+            try:
+                file_changed, file_modified_time = stream_obj.sync(catalog, state, selected_streams)
+                if not file_changed:
+                    break
+            except GoogleForbiddenError as err:
+                LOGGER.info("Stream file_metadata cannot be synced due to insufficeint permissions, please de-select it")
+                raise GoogleForbiddenError("Stream file_metadata cannot be synced due to insufficeint permissions, please de-select it")
 
         LOGGER.info("FINISHED Syncing: %s", stream_name)
 

From a55a103f340b7c45ac4393457c25bacce911a4c3 Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Fri, 30 Aug 2024 07:06:57 +0000
Subject: [PATCH 03/12] updated changelog link

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e57631e..d607961 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## 3.1.0
-  * Updates Sync condition and exception handling for file_metadata stream[#96](https://github.com/singer-io/tap-google-sheets/pull/95)
+  * Updates Sync condition and exception handling for file_metadata stream[#96](https://github.com/singer-io/tap-google-sheets/pull/96)
 
 ## 3.0.0
   * Remove support for date datatype [#95](https://github.com/singer-io/tap-google-sheets/pull/95)

From e8fa9e977fe7adfaadad8b840bbb9ab8f029fb9e Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Fri, 30 Aug 2024 07:13:49 +0000
Subject: [PATCH 04/12] updated log statement

---
 tap_google_sheets/sync.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index c50259c..c8ec7ff 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -71,7 +71,6 @@ def sync(client, config, catalog, state):
                 if not file_changed:
                     break
             except GoogleForbiddenError as err:
-                LOGGER.info("Stream file_metadata cannot be synced due to insufficeint permissions, please de-select it")
                 raise GoogleForbiddenError("Stream file_metadata cannot be synced due to insufficeint permissions, please de-select it")
 
         LOGGER.info("FINISHED Syncing: %s", stream_name)

From de26fb69a708090f5a356c75e124d17787b15bbf Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 00:49:06 +0000
Subject: [PATCH 05/12] remove stream

---
 CHANGELOG.md                          |   2 +-
 tap_google_sheets/streams.py          |  44 --------
 tap_google_sheets/sync.py             |  18 +---
 tap_google_sheets/transform.py        |  14 ---
 tests/base.py                         |   5 -
 tests/test_google_sheets_bookmarks.py | 142 +++++++++++++-------------
 6 files changed, 72 insertions(+), 153 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index d607961..20a85ad 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,7 +1,7 @@
 # Changelog
 
 ## 3.1.0
-  * Updates Sync condition and exception handling for file_metadata stream[#96](https://github.com/singer-io/tap-google-sheets/pull/96)
+  * Remove deprecated stream file_metadata [#96](https://github.com/singer-io/tap-google-sheets/pull/96)
 
 ## 3.0.0
   * Remove support for date datatype [#95](https://github.com/singer-io/tap-google-sheets/pull/95)
diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py
index 721c8fd..69f1c83 100644
--- a/tap_google_sheets/streams.py
+++ b/tap_google_sheets/streams.py
@@ -243,49 +243,6 @@ def sync_stream(self, records, catalog, time_extracted=None):
         LOGGER.info('FINISHED Syncing {}, Total Records: {}'.format(self.stream_name, record_count))
         update_currently_syncing(self.state, None)
 
-class FileMetadata(GoogleSheets):
-    stream_name = "file_metadata"
-    api = "files"
-    path = "files/{spreadsheet_id}"
-    key_properties = ["id"]
-    replication_method = "INCREMENTAL"
-    replication_keys = ["modifiedTime"]
-    params = {
-        "fields": "id,name,createdTime,modifiedTime,version,teamDriveId,driveId,lastModifyingUser",
-        "supportsAllDrives": True
-    }
-
-    def sync(self, catalog, state, selected_streams):
-        """
-        sync file's metadata
-        """
-        self.state = state
-        # variable to check if file is changed or not
-
-        # get date to start sync from, ie. start date or bookmark date
-        start_date = strptime_to_utc(get_bookmark(state, self.stream_name, self.config_start_date))
-
-        LOGGER.info("GET file_metadata")
-        file_metadata, time_extracted = self.get_data(stream_name=self.stream_name)
-        LOGGER.info("Transform file_metadata")
-
-        file_modified_time = strptime_to_utc(file_metadata.get("modifiedTime"))
-        LOGGER.info("last_datetime = {}, file_modified_time = {}".format(start_date, file_modified_time))
-        if file_modified_time <= start_date:
-            # if file is not changed, update the variable
-            LOGGER.info("file_modified_time <= last_datetime, FILE NOT CHANGED. EXITING.")
-            # return and stop syncing the next streams, as the file is not changed
-            return False, file_modified_time
-
-        # only perform sync if file metadata stream is selected and file is changed
-        if self.stream_name in selected_streams:
-            # transform file metadata records
-            file_metadata_transformed = internal_transform.transform_file_metadata(file_metadata)
-            # do sync
-            self.sync_stream(file_metadata_transformed, catalog, time_extracted)
-
-        return True, file_modified_time
-
 class SpreadSheetMetadata(GoogleSheets):
     stream_name = "spreadsheet_metadata"
     api = "sheets"
@@ -645,7 +602,6 @@ def sync(self, catalog, state, sheets_loaded_records):
 # "spreadsheet_metadata" -> get sheets in the spreadsheet and load sheet's records
 #       and prepare records for "sheet_metadata" and "sheets_loaded" streams
 STREAMS = OrderedDict()
-STREAMS['file_metadata'] = FileMetadata
 STREAMS['spreadsheet_metadata'] = SpreadSheetMetadata
 STREAMS['sheet_metadata'] = SheetMetadata
 STREAMS['sheets_loaded'] = SheetsLoaded
diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index c8ec7ff..f89c1cd 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -7,7 +7,6 @@
 def sync(client, config, catalog, state):
     """
     Sync the streams, loop over STREAMS
-        "file_metadata" -> get the file's metadata and if the spreadsheet file is updated then continue the sync else stop the sync
         "spreadsheet_metadata" -> get the spreadsheet's metadata
             - sync the spreadsheet_metadata stream if selected
             - get the sheets in the spreadsheet and loop over the sheets and sync the sheet's records if selected
@@ -63,19 +62,4 @@ def sync(client, config, catalog, state):
             else:
                 stream_obj.sync(catalog, state, sheets_loaded_records)
 
-        # sync file metadata
-        elif stream_name == "file_metadata" and "file_metadata" in selected_streams:
-            LOGGER.warning("This Stream might not work, please de-select if you face any issues")
-            try:
-                file_changed, file_modified_time = stream_obj.sync(catalog, state, selected_streams)
-                if not file_changed:
-                    break
-            except GoogleForbiddenError as err:
-                raise GoogleForbiddenError("Stream file_metadata cannot be synced due to insufficeint permissions, please de-select it")
-
-        LOGGER.info("FINISHED Syncing: %s", stream_name)
-
-    # write "file_metadata" bookmark, as we have successfully synced all the sheet's records
-    # it will force to re-sync of there is any interrupt between the sync
-    if file_modified_time:
-        write_bookmark(state, 'file_metadata', strftime(file_modified_time))
+        LOGGER.info("FINISHED Syncing: %s", stream_name)
\ No newline at end of file
diff --git a/tap_google_sheets/transform.py b/tap_google_sheets/transform.py
index cebeca2..9fd74eb 100644
--- a/tap_google_sheets/transform.py
+++ b/tap_google_sheets/transform.py
@@ -33,20 +33,6 @@ def transform_spreadsheet_metadata(spreadsheet_metadata):
     spreadsheet_metadata_arr.append(spreadsheet_metadata_tf)
     return spreadsheet_metadata_arr
 
-# Tranform file_metadata: remove nodes from lastModifyingUser, format as array
-def transform_file_metadata(file_metadata):
-    # Convert to dict
-    file_metadata_tf = json.loads(json.dumps(file_metadata))
-    # Remove keys
-    if file_metadata_tf.get('lastModifyingUser'):
-        file_metadata_tf['lastModifyingUser'].pop('photoLink', None)
-        file_metadata_tf['lastModifyingUser'].pop('me', None)
-        file_metadata_tf['lastModifyingUser'].pop('permissionId', None)
-    # Add record to an array of 1
-    file_metadata_arr = []
-    file_metadata_arr.append(file_metadata_tf)
-    return file_metadata_arr
-
 # Convert Excel Date Serial Number (excel_date_sn) to datetime string
 # timezone_str: defaults to UTC (which we assume is the timezone for ALL datetimes)
 def excel_to_dttm_str(string_value, excel_date_sn, timezone_str=None):
diff --git a/tests/base.py b/tests/base.py
index 42db120..e4f6c0f 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -72,11 +72,6 @@ def expected_metadata(self):
             # self.REPLICATION_KEYS: {"modified_at"}
         }
         return {
-            "file_metadata": {
-                self.PRIMARY_KEYS: {"id", },
-                self.REPLICATION_METHOD: self.INCREMENTAL,
-                self.REPLICATION_KEYS: {"modifiedTime"}
-            },
             "sheet_metadata": {
                 self.PRIMARY_KEYS: {"sheetId"}, # "spreadsheetId"}, # BUG? | This is not in the real tap, "spreadsheetId"},
                 self.REPLICATION_METHOD: self.FULL_TABLE,
diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 26e4761..484615f 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -17,78 +17,76 @@ class BookmarksTest(GoogleSheetsBaseTest):
     def name():
         return "tap_tester_google_sheets_bookmarks"
 
-    def test_run(self):
-        """
-        Run check mode, perform table and field selection, and run a sync.
-        Replication can be triggered by pushing back state to prior 'file_metadata' state.
-        Run a second sync after not updating state to verify no streams are being synced
-        Run a 3rd sync and ensure full table streams are triggered by the simulated bookmark value.
-
-        - Verify initial sync message actions include activate versions and the upserts
-        - Verify no streams are synced when 'file_metadata' bookmark does not change
-        - Verify that the third sync with the updated simulated bookmark has the same synced streams as the first sync
-        - Verify that streams will sync based off of 'file_metadata' even when it is not selected
-        """
-        skipped_streams = {stream
-                           for stream in self.expected_streams()
-                           if stream.startswith('sadsheet')}.union({
-                                'file_metadata' # testing case without file_metadata selected, but still providing bookmark
-                           })
-        self.expected_test_streams = self.expected_streams() - skipped_streams
-
-        # Grab connection, and run discovery and initial sync
-        self.starter()
-
-        synced_records_1 = runner.get_records_from_target_output()
-
-        # Grab state to be updated later
-        state = menagerie.get_state(self.conn_id)
-        
-        # BUG full table streams are saving bookmarks unnecessarily https://jira.talendforge.org/browse/TDL-14343
-
-        # BUG there are no activate version messages in the sheet_metadata, spreadsheet_metadata
-        #          or sheets_loaded streams, even though they are full table https://jira.talendforge.org/browse/TDL-14346
-        # verify message actions are correct
-        for stream in self.expected_test_streams.difference({'sheet_metadata', 'spreadsheet_metadata', 'sheets_loaded'}):
-            with self.subTest(stream=stream):
-                sync1_message_actions = [message['action'] for message in synced_records_1[stream]['messages']]
-                self.assertEqual('activate_version', sync1_message_actions[0])
-                self.assertEqual('activate_version', sync1_message_actions[-1])
-                self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
-
-        # run a sync again, this time we shouldn't get any records back
-        sync_job_name = runner.run_sync_mode(self, self.conn_id)
-        exit_status = menagerie.get_exit_status(self.conn_id, sync_job_name)
-        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
-        record_count_by_stream_2 = runner.examine_target_output_file(
-            self, self.conn_id, self.expected_streams(), self.expected_primary_keys())
-
-        # verify we do not sync any unexpected streams
-        self.assertSetEqual(set(), set(record_count_by_stream_2.keys()))
-
-        # verify no records were synced for our expected streams
-        for stream in self.expected_test_streams:
-            with self.subTest(stream=stream):
-                self.assertEqual(0, record_count_by_stream_2.get(stream, 0))
-
-        # roll back the state of the file_metadata stream to ensure that we sync sheets
-        # based off of this state
-        file_metadata_stream = 'file_metadata'
-        file_metadata_bookmark = state['bookmarks'][file_metadata_stream]
-        bookmark_datetime = datetime.datetime.strptime(file_metadata_bookmark, self.BOOKMARK_COMPARISON_FORMAT)
-        target_datetime = bookmark_datetime + datetime.timedelta(days=-1)
-        target_bookmark = datetime.datetime.strftime(target_datetime, self.BOOKMARK_COMPARISON_FORMAT)
-
-        new_state = copy.deepcopy(state)
-        new_state['bookmarks'][file_metadata_stream] = target_bookmark
-
-        menagerie.set_state(self.conn_id, new_state)
-
-        record_count_by_stream_3 = self.run_and_verify_sync(self.conn_id)
-        synced_records_3 = runner.get_records_from_target_output()
-
-        # verify we sync sheets based off the state of file_metadata
-        self.assertDictEqual(self.record_count_by_stream_1, record_count_by_stream_3)
+    # def test_run(self):
+    #     """
+    #     Run check mode, perform table and field selection, and run a sync.
+    #     Replication can be triggered by pushing back state to prior 'file_metadata' state.
+    #     Run a second sync after not updating state to verify no streams are being synced
+    #     Run a 3rd sync and ensure full table streams are triggered by the simulated bookmark value.
+
+    #     - Verify initial sync message actions include activate versions and the upserts
+    #     - Verify no streams are synced when 'file_metadata' bookmark does not change
+    #     - Verify that the third sync with the updated simulated bookmark has the same synced streams as the first sync
+    #     - Verify that streams will sync based off of 'file_metadata' even when it is not selected
+    #     """
+    #     skipped_streams = {stream
+    #                        for stream in self.expected_streams()
+    #                        if stream.startswith('sadsheet')}
+    #     self.expected_test_streams = self.expected_streams() - skipped_streams
+
+    #     # Grab connection, and run discovery and initial sync
+    #     self.starter()
+
+    #     synced_records_1 = runner.get_records_from_target_output()
+
+    #     # Grab state to be updated later
+    #     state = menagerie.get_state(self.conn_id)
+
+    #     # BUG full table streams are saving bookmarks unnecessarily https://jira.talendforge.org/browse/TDL-14343
+
+    #     # BUG there are no activate version messages in the sheet_metadata, spreadsheet_metadata
+    #     #          or sheets_loaded streams, even though they are full table https://jira.talendforge.org/browse/TDL-14346
+    #     # verify message actions are correct
+    #     for stream in self.expected_test_streams.difference({'sheet_metadata', 'spreadsheet_metadata', 'sheets_loaded'}):
+    #         with self.subTest(stream=stream):
+    #             sync1_message_actions = [message['action'] for message in synced_records_1[stream]['messages']]
+    #             self.assertEqual('activate_version', sync1_message_actions[0])
+    #             self.assertEqual('activate_version', sync1_message_actions[-1])
+    #             self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
+
+    #     # run a sync again, this time we shouldn't get any records back
+    #     sync_job_name = runner.run_sync_mode(self, self.conn_id)
+    #     exit_status = menagerie.get_exit_status(self.conn_id, sync_job_name)
+    #     menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
+    #     record_count_by_stream_2 = runner.examine_target_output_file(
+    #         self, self.conn_id, self.expected_streams(), self.expected_primary_keys())
+
+    #     # verify we do not sync any unexpected streams
+    #     self.assertSetEqual(set(), set(record_count_by_stream_2.keys()))
+
+    #     # verify no records were synced for our expected streams
+    #     for stream in self.expected_test_streams:
+    #         with self.subTest(stream=stream):
+    #             self.assertEqual(0, record_count_by_stream_2.get(stream, 0))
+
+    #     # roll back the state of the file_metadata stream to ensure that we sync sheets
+    #     # based off of this state
+    #     file_metadata_stream = 'file_metadata'
+    #     file_metadata_bookmark = state['bookmarks'][file_metadata_stream]
+    #     bookmark_datetime = datetime.datetime.strptime(file_metadata_bookmark, self.BOOKMARK_COMPARISON_FORMAT)
+    #     target_datetime = bookmark_datetime + datetime.timedelta(days=-1)
+    #     target_bookmark = datetime.datetime.strftime(target_datetime, self.BOOKMARK_COMPARISON_FORMAT)
+
+    #     new_state = copy.deepcopy(state)
+    #     new_state['bookmarks'][file_metadata_stream] = target_bookmark
+
+    #     menagerie.set_state(self.conn_id, new_state)
+
+    #     record_count_by_stream_3 = self.run_and_verify_sync(self.conn_id)
+    #     synced_records_3 = runner.get_records_from_target_output()
+
+    #     # verify we sync sheets based off the state of file_metadata
+    #     self.assertDictEqual(self.record_count_by_stream_1, record_count_by_stream_3)
 
     def starter(self):
         """

From 9cfcd5e1c1d154fe997116b188f0a3bfb67e841b Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 01:21:23 +0000
Subject: [PATCH 06/12] updated bookmarks test

---
 tap_google_sheets/sync.py             |   3 +-
 tests/test_google_sheets_bookmarks.py | 117 +++++++++-----------------
 2 files changed, 41 insertions(+), 79 deletions(-)

diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index f89c1cd..7e3204f 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -1,6 +1,5 @@
 import singer
 from tap_google_sheets.streams import STREAMS, SheetsLoadData, write_bookmark, strftime
-from tap_google_sheets.client import GoogleForbiddenError
 
 LOGGER = singer.get_logger()
 
@@ -62,4 +61,4 @@ def sync(client, config, catalog, state):
             else:
                 stream_obj.sync(catalog, state, sheets_loaded_records)
 
-        LOGGER.info("FINISHED Syncing: %s", stream_name)
\ No newline at end of file
+        LOGGER.info("FINISHED Syncing: %s", stream_name)
diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 484615f..11a526c 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -8,7 +8,7 @@
 
 class BookmarksTest(GoogleSheetsBaseTest):
     """Ensure all sheets streams will replicate based off of the most recent bookmarked state for 'file_metadata'"""
-  
+
     conn_id = ""
     expected_test_streams = ""
     record_count_by_stream_1 = ""
@@ -17,76 +17,43 @@ class BookmarksTest(GoogleSheetsBaseTest):
     def name():
         return "tap_tester_google_sheets_bookmarks"
 
-    # def test_run(self):
-    #     """
-    #     Run check mode, perform table and field selection, and run a sync.
-    #     Replication can be triggered by pushing back state to prior 'file_metadata' state.
-    #     Run a second sync after not updating state to verify no streams are being synced
-    #     Run a 3rd sync and ensure full table streams are triggered by the simulated bookmark value.
-
-    #     - Verify initial sync message actions include activate versions and the upserts
-    #     - Verify no streams are synced when 'file_metadata' bookmark does not change
-    #     - Verify that the third sync with the updated simulated bookmark has the same synced streams as the first sync
-    #     - Verify that streams will sync based off of 'file_metadata' even when it is not selected
-    #     """
-    #     skipped_streams = {stream
-    #                        for stream in self.expected_streams()
-    #                        if stream.startswith('sadsheet')}
-    #     self.expected_test_streams = self.expected_streams() - skipped_streams
-
-    #     # Grab connection, and run discovery and initial sync
-    #     self.starter()
-
-    #     synced_records_1 = runner.get_records_from_target_output()
-
-    #     # Grab state to be updated later
-    #     state = menagerie.get_state(self.conn_id)
-
-    #     # BUG full table streams are saving bookmarks unnecessarily https://jira.talendforge.org/browse/TDL-14343
-
-    #     # BUG there are no activate version messages in the sheet_metadata, spreadsheet_metadata
-    #     #          or sheets_loaded streams, even though they are full table https://jira.talendforge.org/browse/TDL-14346
-    #     # verify message actions are correct
-    #     for stream in self.expected_test_streams.difference({'sheet_metadata', 'spreadsheet_metadata', 'sheets_loaded'}):
-    #         with self.subTest(stream=stream):
-    #             sync1_message_actions = [message['action'] for message in synced_records_1[stream]['messages']]
-    #             self.assertEqual('activate_version', sync1_message_actions[0])
-    #             self.assertEqual('activate_version', sync1_message_actions[-1])
-    #             self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
-
-    #     # run a sync again, this time we shouldn't get any records back
-    #     sync_job_name = runner.run_sync_mode(self, self.conn_id)
-    #     exit_status = menagerie.get_exit_status(self.conn_id, sync_job_name)
-    #     menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
-    #     record_count_by_stream_2 = runner.examine_target_output_file(
-    #         self, self.conn_id, self.expected_streams(), self.expected_primary_keys())
-
-    #     # verify we do not sync any unexpected streams
-    #     self.assertSetEqual(set(), set(record_count_by_stream_2.keys()))
-
-    #     # verify no records were synced for our expected streams
-    #     for stream in self.expected_test_streams:
-    #         with self.subTest(stream=stream):
-    #             self.assertEqual(0, record_count_by_stream_2.get(stream, 0))
-
-    #     # roll back the state of the file_metadata stream to ensure that we sync sheets
-    #     # based off of this state
-    #     file_metadata_stream = 'file_metadata'
-    #     file_metadata_bookmark = state['bookmarks'][file_metadata_stream]
-    #     bookmark_datetime = datetime.datetime.strptime(file_metadata_bookmark, self.BOOKMARK_COMPARISON_FORMAT)
-    #     target_datetime = bookmark_datetime + datetime.timedelta(days=-1)
-    #     target_bookmark = datetime.datetime.strftime(target_datetime, self.BOOKMARK_COMPARISON_FORMAT)
-
-    #     new_state = copy.deepcopy(state)
-    #     new_state['bookmarks'][file_metadata_stream] = target_bookmark
-
-    #     menagerie.set_state(self.conn_id, new_state)
-
-    #     record_count_by_stream_3 = self.run_and_verify_sync(self.conn_id)
-    #     synced_records_3 = runner.get_records_from_target_output()
-
-    #     # verify we sync sheets based off the state of file_metadata
-    #     self.assertDictEqual(self.record_count_by_stream_1, record_count_by_stream_3)
+    def test_run(self):
+        """
+        Run check mode, perform table and field selection, and run a sync.
+
+        - Verify initial sync message actions include activate versions and the upserts
+        """
+        skipped_streams = {stream
+                           for stream in self.expected_streams()
+                           if stream.startswith('sadsheet')}
+        self.expected_test_streams = self.expected_streams() - skipped_streams
+
+        # Grab connection, and run discovery and initial sync
+        self.starter()
+
+        synced_records_1 = runner.get_records_from_target_output()
+
+        # Grab state to be updated later
+        state = menagerie.get_state(self.conn_id)
+
+        # BUG full table streams are saving bookmarks unnecessarily https://jira.talendforge.org/browse/TDL-14343
+
+        # BUG there are no activate version messages in the sheet_metadata, spreadsheet_metadata
+        #          or sheets_loaded streams, even though they are full table https://jira.talendforge.org/browse/TDL-14346
+        # verify message actions are correct
+        for stream in self.expected_test_streams.difference({'sheet_metadata', 'spreadsheet_metadata', 'sheets_loaded'}):
+            with self.subTest(stream=stream):
+                sync1_message_actions = [message['action'] for message in synced_records_1[stream]['messages']]
+                self.assertEqual('activate_version', sync1_message_actions[0])
+                self.assertEqual('activate_version', sync1_message_actions[-1])
+                self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
+
+        # run a sync again, this time we shouldn't get any records back
+        sync_job_name = runner.run_sync_mode(self, self.conn_id)
+        exit_status = menagerie.get_exit_status(self.conn_id, sync_job_name)
+        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
+        record_count_by_stream_2 = runner.examine_target_output_file(
+            self, self.conn_id, self.expected_streams(), self.expected_primary_keys())
 
     def starter(self):
         """
@@ -100,7 +67,7 @@ def starter(self):
         ### Instantiate connection
         ##########################################################################
         self.conn_id = connections.ensure_connection(self)
-        
+
         ##########################################################################
         ### Discovery without the backoff
         ##########################################################################
@@ -116,7 +83,7 @@ def starter(self):
         self.assertSetEqual(self.expected_streams(), found_catalog_names, msg="discovered schemas do not match")
         LOGGER.info("discovered schemas are OK")
 
-        
+
         # table and field selection
         test_catalogs = [catalog for catalog in found_catalogs
                          if catalog.get('stream_name') in self.expected_test_streams]
@@ -141,7 +108,3 @@ def starter(self):
             msg="failed to replicate any data: {}".format(self.record_count_by_stream_1)
         )
         LOGGER.info("total replicated row count: %s", sum(self.record_count_by_stream_1.values()))
-
-       
-    
-       

From 447605dcc40b3538c083be87b3918b02215d0145 Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 01:38:00 +0000
Subject: [PATCH 07/12] removed unused vars

---
 tap_google_sheets/sync.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tap_google_sheets/sync.py b/tap_google_sheets/sync.py
index 7e3204f..561d9dc 100644
--- a/tap_google_sheets/sync.py
+++ b/tap_google_sheets/sync.py
@@ -13,8 +13,6 @@ def sync(client, config, catalog, state):
         "sheets_loaded" & "sheet_metadata" -> get the data lists from the "spreadsheet_metadata" stream and sync the records if selected
     """
     last_stream = singer.get_currently_syncing(state)
-    # preset to none
-    file_modified_time = None
     LOGGER.info("last/currently syncing stream: %s", last_stream)
 
     selected_streams = []

From 5d7d3880ff564714032bf8752685dad1708676cf Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 01:42:20 +0000
Subject: [PATCH 08/12] updated test

---
 tests/test_google_sheets_bookmarks.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 11a526c..5110d22 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -48,13 +48,6 @@ def test_run(self):
                 self.assertEqual('activate_version', sync1_message_actions[-1])
                 self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
 
-        # run a sync again, this time we shouldn't get any records back
-        sync_job_name = runner.run_sync_mode(self, self.conn_id)
-        exit_status = menagerie.get_exit_status(self.conn_id, sync_job_name)
-        menagerie.verify_sync_exit_status(self, exit_status, sync_job_name)
-        record_count_by_stream_2 = runner.examine_target_output_file(
-            self, self.conn_id, self.expected_streams(), self.expected_primary_keys())
-
     def starter(self):
         """
         Instantiate connection, run discovery, and initial sync.

From c5d5da8ffc24b6b424b7aae489e0347dbd568262 Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 07:49:29 +0000
Subject: [PATCH 09/12] update test bookmark

---
 tests/test_google_sheets_bookmarks.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 5110d22..2ecd4f2 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -7,7 +7,7 @@
 
 
 class BookmarksTest(GoogleSheetsBaseTest):
-    """Ensure all sheets streams will replicate based off of the most recent bookmarked state for 'file_metadata'"""
+    """Ensure all sheets streams will replicate full table """
 
     conn_id = ""
     expected_test_streams = ""
@@ -20,7 +20,6 @@ def name():
     def test_run(self):
         """
         Run check mode, perform table and field selection, and run a sync.
-
         - Verify initial sync message actions include activate versions and the upserts
         """
         skipped_streams = {stream
@@ -47,6 +46,7 @@ def test_run(self):
                 self.assertEqual('activate_version', sync1_message_actions[0])
                 self.assertEqual('activate_version', sync1_message_actions[-1])
                 self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
+                self.assertIn(stream, state["bookmark"].keys())
 
     def starter(self):
         """

From 9626d4ea4544fabfe43ce552815c9521c9b6a0ad Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 08:11:28 +0000
Subject: [PATCH 10/12] update key

---
 tests/test_google_sheets_bookmarks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 2ecd4f2..1566a55 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -46,7 +46,7 @@ def test_run(self):
                 self.assertEqual('activate_version', sync1_message_actions[0])
                 self.assertEqual('activate_version', sync1_message_actions[-1])
                 self.assertSetEqual({'upsert'}, set(sync1_message_actions[1:-1]))
-                self.assertIn(stream, state["bookmark"].keys())
+                self.assertIn(stream, state["bookmarks"].keys())
 
     def starter(self):
         """

From bb8e62d374c04a9d8cd3ba6963083d0b153e93e1 Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 08:32:40 +0000
Subject: [PATCH 11/12] removed refereces to file metadata

---
 README.md                                    |  7 +------
 state.json.example                           |  5 +----
 tap_google_sheets/streams.py                 |  1 -
 tests/base.py                                |  5 +----
 tests/test_google_sheets_all_fields.py       | 15 +--------------
 tests/test_google_sheets_automatic_fields.py |  4 ----
 tests/test_google_sheets_bookmarks.py        |  1 +
 tests/test_google_sheets_discovery.py        |  2 --
 8 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/README.md b/README.md
index 772c28d..8d79e24 100644
--- a/README.md
+++ b/README.md
@@ -68,7 +68,6 @@ This tap:
 
 ## Authentication
 The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=1FojlvtLwS0-BzGS37R0jEXtwSHqSiO1Uw-7RKQQO-C4) Google Doc provides instructions show how to configure the Google Cloud API credentials to enable Google Drive and Google Sheets APIs, configure Google Cloud to authorize/verify your domain ownership, generate an API key (client_id, client_secret), authenticate and generate a refresh_token, and prepare your tap config.json with the necessary parameters.
-- Enable Googe Drive APIs and Authorization Scope: https://www.googleapis.com/auth/drive.metadata.readonly
 - Enable Google Sheets API and Authorization Scope: https://www.googleapis.com/auth/spreadsheets.readonly
 - Tap config.json parameters:
   - client_id: identifies your application
@@ -122,10 +121,7 @@ The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=
 
     ```json
     {
-        "currently_syncing": "file_metadata",
-        "bookmarks": {
-            "file_metadata": "2019-09-27T22:34:39.000000Z"
-        }
+        "currently_syncing": "sheet_metadata",
     }
     ```
 
@@ -185,7 +181,6 @@ The [**Google Sheets Setup & Authentication**](https://drive.google.com/open?id=
     +----------------------+---------+---------+
     | stream               | records | schemas |
     +----------------------+---------+---------+
-    | file_metadata        | 1       | 1       |
     | spreadsheet_metadata | 1       | 1       |
     | Test-1               | 9       | 1       |
     | Test 2               | 2       | 1       |
diff --git a/state.json.example b/state.json.example
index a5ccda7..8599a7f 100644
--- a/state.json.example
+++ b/state.json.example
@@ -1,6 +1,3 @@
 {
-	"currently_syncing": "file_metadata",
-	"bookmarks": {
-		"file_metadata": "2019-09-27T22:34:39.000000Z"
-	}
+	"currently_syncing": "sheet_metadata"
 }
diff --git a/tap_google_sheets/streams.py b/tap_google_sheets/streams.py
index 69f1c83..75f3ad4 100644
--- a/tap_google_sheets/streams.py
+++ b/tap_google_sheets/streams.py
@@ -598,7 +598,6 @@ def sync(self, catalog, state, sheets_loaded_records):
 
 
 # create OrderDict, as the order matters for syncing the streams
-# "file_metadata" -> do not sync other streams, if file is not changed
 # "spreadsheet_metadata" -> get sheets in the spreadsheet and load sheet's records
 #       and prepare records for "sheet_metadata" and "sheets_loaded" streams
 STREAMS = OrderedDict()
diff --git a/tests/base.py b/tests/base.py
index e4f6c0f..0ad6ecf 100644
--- a/tests/base.py
+++ b/tests/base.py
@@ -290,9 +290,6 @@ def perform_and_verify_table_and_field_selection(self,
                 # Verify only automatic fields are selected
                 expected_automatic_fields = self.expected_automatic_fields().get(cat['stream_name'])
                 selected_fields = self.get_selected_fields_from_metadata(catalog_entry['metadata'])
-                # BUG TDL-14241 | Replication keys are not automatic
-                if cat['stream_name'] == "file_metadata":
-                    expected_automatic_fields.remove('modifiedTime')
                 self.assertEqual(expected_automatic_fields, selected_fields)
 
     @staticmethod
@@ -368,7 +365,7 @@ def timedelta_formatted(self, dtime, days=0):
     ##########################################################################
 
     def is_sheet(self, stream):
-        non_sheets_streams = {'sheet_metadata', 'file_metadata', 'sheets_loaded', 'spreadsheet_metadata'}
+        non_sheets_streams = {'sheet_metadata', 'sheets_loaded', 'spreadsheet_metadata'}
         return stream in self.expected_streams().difference(non_sheets_streams)
 
     def undiscoverable_sheets(self):
diff --git a/tests/test_google_sheets_all_fields.py b/tests/test_google_sheets_all_fields.py
index f2edde2..836dcb1 100644
--- a/tests/test_google_sheets_all_fields.py
+++ b/tests/test_google_sheets_all_fields.py
@@ -79,17 +79,4 @@ def test_run(self):
                 # verify all fields for a stream were replicated
                 self.assertGreater(len(expected_all_keys), len(expected_automatic_keys))
                 self.assertTrue(expected_automatic_keys.issubset(expected_all_keys), msg=f'{expected_automatic_keys-expected_all_keys} is not in "expected_all_keys"')
-                if stream == "file_metadata":
-
-                    # As per google documentation https://developers.google.com/drive/api/v3/reference/files `teamDriveId` is deprecated. There is mentioned that use `driveId` instead.
-                    # `driveId` is populated from items in the team shared drives. But stitch integration does not support shared team drive. So replicating driveid is not possible.
-                    # So, these two fields will not be synced.
-                    expected_all_keys.remove('teamDriveId')
-                    expected_all_keys.remove('driveId')
-                    # Earlier field `emailAddress` was defined as `emailAddress`(typo mismatch) in file_metadata.json.
-                    # So, this particular field did not collected. Because API response contain `emailAddress` field.
-                    # Now, typo has been corrected and verifying that `emailAddress` field collected.
-                    lastModifyingUser_fields = set(messages['messages'][0].get('data', {}).get('lastModifyingUser', {}).keys()) # Get `lastModifyingUser` from file_metadata records
-                    # Verify that `emailAddress` field under `lastModifyingUser` collected.
-                    self.assertTrue({'emailAddress'}.issubset(lastModifyingUser_fields), msg="emailAddress does not found in lastModifyingUser")
-                self.assertSetEqual(expected_all_keys, actual_all_keys) 
+                self.assertSetEqual(expected_all_keys, actual_all_keys)
diff --git a/tests/test_google_sheets_automatic_fields.py b/tests/test_google_sheets_automatic_fields.py
index dcb6f9c..e3de71a 100644
--- a/tests/test_google_sheets_automatic_fields.py
+++ b/tests/test_google_sheets_automatic_fields.py
@@ -58,9 +58,5 @@ def test_run(self):
                 # Verify that you get some records for each stream
                 self.assertGreater(record_count_by_stream.get(stream, -1), 0)
 
-                # Verify that only the automatic fields are sent to the target
-                # BUG TDL-14241 | Replication keys are not automatic
-                if stream == "file_metadata":
-                    expected_keys.remove('modifiedTime')
                 for actual_keys in record_messages_keys:
                     self.assertSetEqual(expected_keys, actual_keys)
diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 1566a55..5574e12 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -21,6 +21,7 @@ def test_run(self):
         """
         Run check mode, perform table and field selection, and run a sync.
         - Verify initial sync message actions include activate versions and the upserts
+        - check if bookmark include activate versions for all streams
         """
         skipped_streams = {stream
                            for stream in self.expected_streams()
diff --git a/tests/test_google_sheets_discovery.py b/tests/test_google_sheets_discovery.py
index 3f6b6dc..e58c92c 100644
--- a/tests/test_google_sheets_discovery.py
+++ b/tests/test_google_sheets_discovery.py
@@ -123,8 +123,6 @@ def test_run(self):
                 # verify that primary keys and replication keys
                 # are given the inclusion of automatic in metadata.
                 # BUG TDL-14241 | Replication keys are not automatic
-                if stream  == 'file_metadata':
-                    expected_automatic_fields.remove('modifiedTime')
                 self.assertSetEqual(expected_automatic_fields, actual_automatic_fields)
 
                 # verify missing values where __sdc_row = 2

From 583a79795b51ca1aac15b51aaab4f5c353f95bba Mon Sep 17 00:00:00 2001
From: Vi6hal <20889199+Vi6hal@users.noreply.github.com>
Date: Thu, 5 Sep 2024 08:53:20 +0000
Subject: [PATCH 12/12] updated bookmark test docs

---
 tests/test_google_sheets_bookmarks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_google_sheets_bookmarks.py b/tests/test_google_sheets_bookmarks.py
index 5574e12..e0d8cda 100644
--- a/tests/test_google_sheets_bookmarks.py
+++ b/tests/test_google_sheets_bookmarks.py
@@ -7,7 +7,7 @@
 
 
 class BookmarksTest(GoogleSheetsBaseTest):
-    """Ensure all sheets streams will replicate full table """
+    """Ensure all sheets streams will replicate in full table mode and create appropriate bookmarks"""
 
     conn_id = ""
     expected_test_streams = ""