From ae457d9c98274e049f00c0c44d234ba6b9bdc321 Mon Sep 17 00:00:00 2001
From: kasperstorgaard <kasper.storgaard@gmail.com>
Date: Thu, 2 Nov 2023 08:19:41 +0100
Subject: [PATCH 1/3] add gsheet extractor plugin

---
 package-lock.json                         | 232 ++++++++++++++++++++-
 plugins/gsheet-extractor/CHANGELOG.md     |   1 +
 plugins/gsheet-extractor/README.md        |  20 ++
 plugins/gsheet-extractor/package.json     |  42 ++++
 plugins/gsheet-extractor/src/extractor.ts | 234 ++++++++++++++++++++++
 plugins/gsheet-extractor/src/index.ts     |   9 +
 plugins/gsheet-extractor/src/parser.ts    |  63 ++++++
 7 files changed, 596 insertions(+), 5 deletions(-)
 create mode 100644 plugins/gsheet-extractor/CHANGELOG.md
 create mode 100644 plugins/gsheet-extractor/README.md
 create mode 100644 plugins/gsheet-extractor/package.json
 create mode 100644 plugins/gsheet-extractor/src/extractor.ts
 create mode 100644 plugins/gsheet-extractor/src/index.ts
 create mode 100644 plugins/gsheet-extractor/src/parser.ts

diff --git a/package-lock.json b/package-lock.json
index e42ded02c..eae4ccbb9 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -908,6 +908,10 @@
       "resolved": "plugins/graphql",
       "link": true
     },
+    "node_modules/@flatfile/plugin-gsheet-extractor": {
+      "resolved": "plugins/gsheet-extractor",
+      "link": true
+    },
     "node_modules/@flatfile/plugin-job-handler": {
       "resolved": "plugins/job-handler",
       "link": true
@@ -3573,8 +3577,12 @@
       "license": "MIT"
     },
     "node_modules/@types/node": {
-      "version": "20.5.9",
-      "license": "MIT"
+      "version": "20.8.10",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-20.8.10.tgz",
+      "integrity": "sha512-TlgT8JntpcbmKUFzjhsyhGfP2fsiz1Mv56im6enJ905xG1DAYesxJaeSbGqQmAw8OWPdhyJGhGSQGKRNJ45u9w==",
+      "dependencies": {
+        "undici-types": "~5.26.4"
+      }
     },
     "node_modules/@types/normalize-package-data": {
       "version": "2.4.1",
@@ -4044,6 +4052,14 @@
         "node": ">=4"
       }
     },
+    "node_modules/bignumber.js": {
+      "version": "9.1.2",
+      "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.1.2.tgz",
+      "integrity": "sha512-2/mKyZH9K85bzOEfhXDBFZTGd1CTs+5IHpeFQo9luiBG7hghdC851Pj2WAhb6E3R6b9tZj/XKhbg4fum+Kepug==",
+      "engines": {
+        "node": "*"
+      }
+    },
     "node_modules/boolbase": {
       "version": "1.0.0",
       "dev": true,
@@ -4145,6 +4161,11 @@
         "ieee754": "^1.2.1"
       }
     },
+    "node_modules/buffer-equal-constant-time": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz",
+      "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA=="
+    },
     "node_modules/buffer-from": {
       "version": "1.1.2",
       "dev": true,
@@ -4874,6 +4895,14 @@
         "node": ">=10"
       }
     },
+    "node_modules/ecdsa-sig-formatter": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz",
+      "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==",
+      "dependencies": {
+        "safe-buffer": "^5.0.1"
+      }
+    },
     "node_modules/effect": {
       "version": "2.0.0-next.31",
       "resolved": "https://registry.npmjs.org/effect/-/effect-2.0.0-next.31.tgz",
@@ -5134,6 +5163,11 @@
         "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
       }
     },
+    "node_modules/extend": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
+      "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
+    },
     "node_modules/extendable-error": {
       "version": "0.1.7",
       "dev": true,
@@ -5335,6 +5369,32 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/gaxios": {
+      "version": "6.1.1",
+      "resolved": "https://registry.npmjs.org/gaxios/-/gaxios-6.1.1.tgz",
+      "integrity": "sha512-bw8smrX+XlAoo9o1JAksBwX+hi/RG15J+NTSxmNPIclKC3ZVK6C2afwY8OSdRvOK0+ZLecUJYtj2MmjOt3Dm0w==",
+      "dependencies": {
+        "extend": "^3.0.2",
+        "https-proxy-agent": "^7.0.1",
+        "is-stream": "^2.0.0",
+        "node-fetch": "^2.6.9"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/gcp-metadata": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/gcp-metadata/-/gcp-metadata-6.0.0.tgz",
+      "integrity": "sha512-Ozxyi23/1Ar51wjUT2RDklK+3HxqDr8TLBNK8rBBFQ7T85iIGnXnVusauj06QyqCXRFZig8LZC+TUddWbndlpQ==",
+      "dependencies": {
+        "gaxios": "^6.0.0",
+        "json-bigint": "^1.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
     "node_modules/gensync": {
       "version": "1.0.0-beta.2",
       "license": "MIT",
@@ -5498,6 +5558,50 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/google-auth-library": {
+      "version": "9.2.0",
+      "resolved": "https://registry.npmjs.org/google-auth-library/-/google-auth-library-9.2.0.tgz",
+      "integrity": "sha512-1oV3p0JhNEhVbj26eF3FAJcv9MXXQt4S0wcvKZaDbl4oHq5V3UJoSbsGZGQNcjoCdhW4kDSwOs11wLlHog3fgQ==",
+      "dependencies": {
+        "base64-js": "^1.3.0",
+        "ecdsa-sig-formatter": "^1.0.11",
+        "gaxios": "^6.0.0",
+        "gcp-metadata": "^6.0.0",
+        "gtoken": "^7.0.0",
+        "jws": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/googleapis": {
+      "version": "128.0.0",
+      "resolved": "https://registry.npmjs.org/googleapis/-/googleapis-128.0.0.tgz",
+      "integrity": "sha512-+sLtVYNazcxaSD84N6rihVX4QiGoqRdnlz2SwmQQkadF31XonDfy4ufk3maMg27+FiySrH0rd7V8p+YJG6cknA==",
+      "dependencies": {
+        "google-auth-library": "^9.0.0",
+        "googleapis-common": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/googleapis-common": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/googleapis-common/-/googleapis-common-7.0.1.tgz",
+      "integrity": "sha512-mgt5zsd7zj5t5QXvDanjWguMdHAcJmmDrF9RkInCecNsyV7S7YtGqm5v2IWONNID88osb7zmx5FtrAP12JfD0w==",
+      "dependencies": {
+        "extend": "^3.0.2",
+        "gaxios": "^6.0.3",
+        "google-auth-library": "^9.0.0",
+        "qs": "^6.7.0",
+        "url-template": "^2.0.8",
+        "uuid": "^9.0.0"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
     "node_modules/gopd": {
       "version": "1.0.1",
       "dev": true,
@@ -5518,6 +5622,18 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/gtoken": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/gtoken/-/gtoken-7.0.1.tgz",
+      "integrity": "sha512-KcFVtoP1CVFtQu0aSk3AyAt2og66PFhZAlkUOuWKwzMLoulHXG5W5wE5xAnHb+yl3/wEFoqGW7/cDGMU8igDZQ==",
+      "dependencies": {
+        "gaxios": "^6.0.0",
+        "jws": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
     "node_modules/handlebars": {
       "version": "4.7.8",
       "dev": true,
@@ -6068,7 +6184,6 @@
     },
     "node_modules/is-stream": {
       "version": "2.0.1",
-      "dev": true,
       "license": "MIT",
       "engines": {
         "node": ">=8"
@@ -7699,6 +7814,14 @@
         "node": ">=4"
       }
     },
+    "node_modules/json-bigint": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-bigint/-/json-bigint-1.0.0.tgz",
+      "integrity": "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==",
+      "dependencies": {
+        "bignumber.js": "^9.0.0"
+      }
+    },
     "node_modules/json-parse-even-better-errors": {
       "version": "2.3.1",
       "dev": true,
@@ -7732,6 +7855,25 @@
         "graceful-fs": "^4.1.6"
       }
     },
+    "node_modules/jwa": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/jwa/-/jwa-2.0.0.tgz",
+      "integrity": "sha512-jrZ2Qx916EA+fq9cEAeCROWPTfCwi1IVHqT2tapuqLEVVDKFDENFw1oL+MwrTvH6msKxsd1YTDVw6uKEcsrLEA==",
+      "dependencies": {
+        "buffer-equal-constant-time": "1.0.1",
+        "ecdsa-sig-formatter": "1.0.11",
+        "safe-buffer": "^5.0.1"
+      }
+    },
+    "node_modules/jws": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/jws/-/jws-4.0.0.tgz",
+      "integrity": "sha512-KDncfTmOZoOMTFG4mBlG0qUIOlc03fmzH+ru6RgYVZhPkyiy/92Owlt/8UEN+a4TXR1FQetfIpJE8ApdvdVxTg==",
+      "dependencies": {
+        "jwa": "^2.0.0",
+        "safe-buffer": "^5.0.1"
+      }
+    },
     "node_modules/kind-of": {
       "version": "6.0.3",
       "dev": true,
@@ -8152,6 +8294,25 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/node-fetch": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
+      "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
+      "dependencies": {
+        "whatwg-url": "^5.0.0"
+      },
+      "engines": {
+        "node": "4.x || >=6.0.0"
+      },
+      "peerDependencies": {
+        "encoding": "^0.1.0"
+      },
+      "peerDependenciesMeta": {
+        "encoding": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/node-gyp-build-optional-packages": {
       "version": "5.0.6",
       "dev": true,
@@ -9148,7 +9309,6 @@
     },
     "node_modules/safe-buffer": {
       "version": "5.2.1",
-      "dev": true,
       "funding": [
         {
           "type": "github",
@@ -9804,6 +9964,11 @@
         "node": ">=8.0"
       }
     },
+    "node_modules/tr46": {
+      "version": "0.0.3",
+      "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
+      "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
+    },
     "node_modules/trim-newlines": {
       "version": "3.0.1",
       "dev": true,
@@ -10107,6 +10272,11 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
+    "node_modules/undici-types": {
+      "version": "5.26.5",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
+      "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
+    },
     "node_modules/universalify": {
       "version": "0.1.2",
       "license": "MIT",
@@ -10146,6 +10316,11 @@
       "version": "4.0.1",
       "license": "MIT"
     },
+    "node_modules/url-template": {
+      "version": "2.0.8",
+      "resolved": "https://registry.npmjs.org/url-template/-/url-template-2.0.8.tgz",
+      "integrity": "sha512-XdVKMF4SJ0nP/O7XIPB0JwAEuT9lDIYnNsK8yGVe43y0AWoKeJNdv3ZNWh7ksJ6KqQFjOO6ox/VEitLnaVNufw=="
+    },
     "node_modules/utility-types": {
       "version": "3.10.0",
       "dev": true,
@@ -10154,6 +10329,18 @@
         "node": ">= 4"
       }
     },
+    "node_modules/uuid": {
+      "version": "9.0.1",
+      "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
+      "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
+      "funding": [
+        "https://github.com/sponsors/broofa",
+        "https://github.com/sponsors/ctavan"
+      ],
+      "bin": {
+        "uuid": "dist/bin/uuid"
+      }
+    },
     "node_modules/v8-to-istanbul": {
       "version": "9.1.0",
       "dev": true,
@@ -10201,6 +10388,20 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/webidl-conversions": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
+      "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
+    },
+    "node_modules/whatwg-url": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
+      "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
+      "dependencies": {
+        "tr46": "~0.0.3",
+        "webidl-conversions": "^3.0.0"
+      }
+    },
     "node_modules/which": {
       "version": "2.0.2",
       "dev": true,
@@ -10555,6 +10756,27 @@
       "version": "0.0.2",
       "license": "ISC"
     },
+    "plugins/gsheet-extractor": {
+      "name": "@flatfile/plugin-gsheet-extractor",
+      "version": "0.1.0",
+      "license": "ISC",
+      "dependencies": {
+        "@flatfile/api": "^1.5.30",
+        "@flatfile/hooks": "^1.3.0",
+        "@flatfile/listener": "^0.3.15",
+        "@flatfile/util-common": "^0.2.2",
+        "@flatfile/util-extractor": "^0.4.6",
+        "@flatfile/util-file-buffer": "^0.1.2",
+        "googleapis": "^128.0.0",
+        "remeda": "^1.14.0"
+      },
+      "devDependencies": {
+        "@types/node": "^20.8.10"
+      },
+      "engines": {
+        "node": ">= 12"
+      }
+    },
     "plugins/job-handler": {
       "name": "@flatfile/plugin-job-handler",
       "version": "0.1.4",
@@ -10679,7 +10901,7 @@
     },
     "plugins/record-hook": {
       "name": "@flatfile/plugin-record-hook",
-      "version": "1.1.8",
+      "version": "1.1.9",
       "license": "ISC",
       "dependencies": {
         "@flatfile/api": "^1.5.33",
diff --git a/plugins/gsheet-extractor/CHANGELOG.md b/plugins/gsheet-extractor/CHANGELOG.md
new file mode 100644
index 000000000..653759bc8
--- /dev/null
+++ b/plugins/gsheet-extractor/CHANGELOG.md
@@ -0,0 +1 @@
+# @flatfile/gsheet-extractor
\ No newline at end of file
diff --git a/plugins/gsheet-extractor/README.md b/plugins/gsheet-extractor/README.md
new file mode 100644
index 000000000..78275c08f
--- /dev/null
+++ b/plugins/gsheet-extractor/README.md
@@ -0,0 +1,20 @@
+# @flatfile/plugin-gsheet-extractor
+
+This package parses all Google sheets files and extracts them into Flatfile.
+
+`npm i @flatfile/plugin-xlsx-extractor`
+
+## Prerequisites (WIP)
+1. Create a google service account, and save the json file with private keys locally.
+2. Create the following flatfile secrets below using the google service account json:
+  - google-cloud-project-id
+  - google-cloud-private-key-id
+  - google-cloud-private-key-1 (too long for one secret, split it in half)
+  - google-cloud-private-key-2
+  - google-cloud-client-email
+  - google-cloud-client-id
+  - google-cloud-client-cert-url
+3. Share one or more folders with your google service account, to enable access (can be root).
+
+## Get Started
+TODO
\ No newline at end of file
diff --git a/plugins/gsheet-extractor/package.json b/plugins/gsheet-extractor/package.json
new file mode 100644
index 000000000..035a1589d
--- /dev/null
+++ b/plugins/gsheet-extractor/package.json
@@ -0,0 +1,42 @@
+{
+  "name": "@flatfile/plugin-gsheet-extractor",
+  "version": "0.1.0",
+  "description": "A plugin for parsing gsheet files in Flatfile.",
+  "registryMetadata": {
+    "category": "extractors"
+  },
+  "engines": {
+    "node": ">= 12"
+  },
+  "source": "src/index.ts",
+  "main": "dist/main.js",
+  "module": "dist/module.mjs",
+  "types": "dist/types.d.ts",
+  "scripts": {
+    "build": "parcel build",
+    "dev": "parcel watch",
+    "check": "tsc ./**/*.ts --noEmit --esModuleInterop",
+    "test": "jest ./**/*.spec.ts --config=../../jest.config.js --runInBand"
+  },
+  "keywords": [],
+  "author": "David Boskovic",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/FlatFilers/flatfile-plugins.git",
+    "directory": "plugins/gsheet-extractor"
+  },
+  "license": "ISC",
+  "dependencies": {
+    "@flatfile/api": "^1.5.30",
+    "@flatfile/hooks": "^1.3.0",
+    "@flatfile/listener": "^0.3.15",
+    "@flatfile/util-common": "^0.2.2",
+    "@flatfile/util-extractor": "^0.4.6",
+    "@flatfile/util-file-buffer": "^0.1.2",
+    "googleapis": "^128.0.0",
+    "remeda": "^1.14.0"
+  },
+  "devDependencies": {
+    "@types/node": "^20.8.10"
+  }
+}
diff --git a/plugins/gsheet-extractor/src/extractor.ts b/plugins/gsheet-extractor/src/extractor.ts
new file mode 100644
index 000000000..a93da249c
--- /dev/null
+++ b/plugins/gsheet-extractor/src/extractor.ts
@@ -0,0 +1,234 @@
+import api, { Flatfile } from "@flatfile/api";
+import { JobStatus, JobType } from "@flatfile/api/api";
+import type { FlatfileEvent, FlatfileListener } from "@flatfile/listener";
+import { asyncBatch } from "@flatfile/util-common";
+import { SheetCapture, WorkbookCapture } from "@flatfile/util-extractor";
+import { getFileBuffer } from "@flatfile/util-file-buffer";
+import { mapValues } from "remeda";
+
+type Config = {
+  sheetName: string;
+  range: string;
+};
+
+/**
+ * File extractor, adapted heavily from flatfile extractor plugin.
+ *
+ * Main differences:
+ * - parseBuffer is async
+ * - get google cloud service account secrets from flatfile
+ */
+export const Extractor = (
+  fileExt: string | RegExp,
+  parseBuffer: (
+    buffer: Buffer,
+    options: Config & {
+      serviceAccount: Record<string, string>;
+    },
+  ) => Promise<WorkbookCapture>,
+  options?: Config,
+) => {
+  return (listener: FlatfileListener) => {
+    listener.on("file:created", async event => {
+      const { data: file } = await api.files.get(event.context.fileId);
+      if (file.mode === "export") return false;
+
+      if (typeof fileExt === "string" && !file.name.endsWith(fileExt)) {
+        return false;
+      }
+
+      if (fileExt instanceof RegExp && !fileExt.test(file.name)) return false;
+
+      const jobs = await api.jobs.create({
+        type: JobType.File,
+        operation: `extract-plugin-gsheet`,
+        status: JobStatus.Ready,
+        source: event.context.fileId,
+      });
+
+      await api.jobs.execute(jobs.data.id);
+    });
+
+    listener.on(
+      "job:ready",
+      { operation: `extract-plugin-gsheet` },
+      async event => {
+        const { data: file } = await api.files.get(event.context.fileId);
+
+        const buffer = await getFileBuffer(event);
+
+        const { jobId } = event.context;
+
+        try {
+          await api.jobs.ack(jobId, { progress: 3, info: "Parsing Sheets" });
+
+          /**
+           * TODO: figure out a way to get this service account data from secrets earlier,
+           * not as part of the extractor...
+           */
+          const serviceAccount = await getServiceAccount(event);
+
+          const capture = await parseBuffer(buffer, {
+            ...options,
+            serviceAccount,
+          });
+
+          const workbook = await createWorkbook(
+            event.context.environmentId,
+            file,
+            capture,
+          );
+
+          await api.jobs.ack(jobId, {
+            progress: 10,
+            info: "Adding records to Sheets",
+          });
+
+          let processedRecords = 0;
+
+          const totalLength = Object.values(capture).reduce(
+            (
+              acc: number,
+              sheet: {
+                data: unknown[];
+              },
+            ) => acc + (sheet?.data?.length || 0),
+            0,
+          );
+
+          for (const sheet of workbook.sheets) {
+            if (!capture[sheet.name]) continue;
+
+            await asyncBatch(
+              capture[sheet.name].data,
+              async chunk => {
+                await api.records.insert(sheet.id, chunk);
+
+                processedRecords += chunk.length;
+
+                const progress = Math.min(
+                  99,
+                  Math.round(10 + (90 * processedRecords) / totalLength),
+                );
+
+                await api.jobs.ack(jobId, {
+                  progress,
+                  info: "Adding records to Sheets",
+                });
+              },
+              { chunkSize: 10000, parallel: 1, debug: false },
+            );
+          }
+
+          await api.files.update(file.id, {
+            workbookId: workbook.id,
+          });
+
+          await api.jobs.complete(jobId, {
+            info: "Extraction complete",
+            outcome: {
+              message: "Extracted file",
+            },
+          });
+        } catch (error) {
+          await api.jobs.fail(jobId, {
+            info: `Extraction failed ${error.message}`,
+          });
+        }
+      },
+    );
+  };
+};
+
+async function createWorkbook(
+  environmentId: string,
+  file: Flatfile.File_,
+  workbookCapture: WorkbookCapture,
+): Promise<Flatfile.Workbook> {
+  const workbookConfig = getWorkbookConfig(
+    file.name,
+    file.spaceId,
+    environmentId,
+    workbookCapture,
+  );
+  const workbook = await api.workbooks.create(workbookConfig);
+
+  if (!workbook.data.sheets || workbook.data.sheets.length === 0) {
+    throw new Error("No sheets found");
+  }
+
+  return workbook.data;
+}
+
+function getWorkbookConfig(
+  name: string,
+  spaceId: string,
+  environmentId: string,
+  workbookCapture: WorkbookCapture,
+): Flatfile.CreateWorkbookConfig {
+  const sheets = Object.values(
+    mapValues(workbookCapture, (sheet, sheetName) => {
+      return getSheetConfig(sheetName, sheet);
+    }),
+  );
+
+  return {
+    name: `[file] ${name}`,
+    labels: ["file"],
+    spaceId,
+    environmentId,
+    sheets,
+  };
+}
+
+function getSheetConfig(
+  name: string,
+  { headers, required, descriptions }: SheetCapture,
+): Flatfile.SheetConfig {
+  return {
+    name,
+    fields: headers.map(key => ({
+      key,
+      label: key,
+      description: descriptions?.[key] || "",
+      type: "string",
+      constraints: required?.[key] ? [{ type: "required" }] : [],
+    })),
+  };
+}
+
+async function getServiceAccount(event: FlatfileEvent) {
+  const [
+    projectId,
+    privateKeyId,
+    privateKey1,
+    privateKey2,
+    clientEmail,
+    clientId,
+    clientCertUrl,
+  ] = await Promise.all([
+    event.secrets("google-cloud-project-id"),
+    event.secrets("google-cloud-private-key-id"),
+    // Flatfile secrets can "only" hold 1024 characters, so had to cut it in half.
+    event.secrets("google-cloud-private-key-1"),
+    event.secrets("google-cloud-private-key-2"),
+    event.secrets("google-cloud-client-email"),
+    event.secrets("google-cloud-client-id"),
+    event.secrets("google-cloud-client-cert-url"),
+  ]);
+
+  return {
+    type: "service_account",
+    project_id: projectId,
+    private_key_id: privateKeyId,
+    // Seems that flatfile are escaping newlines in a weird way, so we remove it again.
+    private_key: (privateKey1 + privateKey2).replace(/\\n/g, "\n"),
+    client_email: clientEmail,
+    client_id: clientId,
+    auth_uri: "https://accounts.google.com/o/oauth2/auth",
+    token_uri: "https://oauth2.googleapis.com/token",
+    auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs",
+    client_x509_cert_url: clientCertUrl,
+    universe_domain: "googleapis.com",
+  };
+}
diff --git a/plugins/gsheet-extractor/src/index.ts b/plugins/gsheet-extractor/src/index.ts
new file mode 100644
index 000000000..224ca5f9e
--- /dev/null
+++ b/plugins/gsheet-extractor/src/index.ts
@@ -0,0 +1,9 @@
+import { Extractor } from "./extractor";
+import { parseBuffer } from "./parser";
+
+export const GSheetExtractor = (options?: {
+  sheetName: string;
+  range: string;
+}) => {
+  return Extractor(/\.gsheet$/i, parseBuffer, options);
+};
diff --git a/plugins/gsheet-extractor/src/parser.ts b/plugins/gsheet-extractor/src/parser.ts
new file mode 100644
index 000000000..2523ba2e4
--- /dev/null
+++ b/plugins/gsheet-extractor/src/parser.ts
@@ -0,0 +1,63 @@
+import { RecordData } from "@flatfile/api/api";
+import { WorkbookCapture } from "@flatfile/util-extractor";
+import { google } from "googleapis";
+
+type GsheetFile = {
+  doc_id: string;
+  resource_key: string; // probably empty
+  email: string;
+};
+
+const sheets = google.sheets("v4");
+
+export async function parseBuffer(
+  buffer: Buffer,
+  options: {
+    sheetName: string;
+    range: string;
+    serviceAccount: Record<string, string>;
+  },
+): Promise<WorkbookCapture> {
+  const auth = getAuth(options.serviceAccount);
+
+  const data = JSON.parse(buffer.toString()) as GsheetFile;
+
+  const response = await sheets.spreadsheets.values.get({
+    auth,
+    spreadsheetId: data.doc_id,
+    range: `${options.sheetName}!${options.range}`,
+  });
+
+  const headers = response.data.values[0] as string[];
+
+  const values = [] as RecordData[];
+
+  for (const row of response.data.values.slice(1)) {
+    const value = {} as RecordData;
+
+    for (let index = 0; index < row.length; index++) {
+      // Don't want to save empty headers.
+      if (!headers[index]) continue;
+
+      value[headers[index]] = {
+        value: row[index],
+      };
+    }
+
+    values.push(value);
+  }
+
+  return {
+    [options.sheetName]: {
+      headers: response.data.values[0] as string[],
+      data: values,
+    },
+  };
+}
+
+function getAuth(serviceAccount: Record<string, string>) {
+  return new google.auth.GoogleAuth({
+    credentials: serviceAccount,
+    scopes: ["https://www.googleapis.com/auth/spreadsheets.readonly"],
+  });
+}

From 7caad86d8c7deee4ff03ba08c2059656d33acbe0 Mon Sep 17 00:00:00 2001
From: kasperstorgaard <kasper.storgaard@gmail.com>
Date: Thu, 2 Nov 2023 08:48:15 +0100
Subject: [PATCH 2/3] make the plugin more generic

---
 plugins/gsheet-extractor/src/extractor.ts | 195 +++++++++-------------
 plugins/gsheet-extractor/src/index.ts     |  18 +-
 plugins/gsheet-extractor/src/parser.ts    | 143 +++++++++++-----
 3 files changed, 195 insertions(+), 161 deletions(-)

diff --git a/plugins/gsheet-extractor/src/extractor.ts b/plugins/gsheet-extractor/src/extractor.ts
index a93da249c..d603223fd 100644
--- a/plugins/gsheet-extractor/src/extractor.ts
+++ b/plugins/gsheet-extractor/src/extractor.ts
@@ -1,15 +1,14 @@
-import api, { Flatfile } from "@flatfile/api";
-import { JobStatus, JobType } from "@flatfile/api/api";
-import type { FlatfileEvent, FlatfileListener } from "@flatfile/listener";
-import { asyncBatch } from "@flatfile/util-common";
-import { SheetCapture, WorkbookCapture } from "@flatfile/util-extractor";
-import { getFileBuffer } from "@flatfile/util-file-buffer";
-import { mapValues } from "remeda";
+import api, { Flatfile } from '@flatfile/api'
+import { JobStatus, JobType } from '@flatfile/api/api'
+import type { FlatfileEvent, FlatfileListener } from '@flatfile/listener'
+import { asyncBatch } from '@flatfile/util-common'
+import { SheetCapture, WorkbookCapture } from '@flatfile/util-extractor'
+import { getFileBuffer } from '@flatfile/util-file-buffer'
+import { mapValues } from 'remeda'
 
 type Config = {
-  sheetName: string;
-  range: string;
-};
+  sheetRange: Record<string, string>
+}
 
 /**
  * File extractor, adapted heavily from flatfile extractor plugin.
@@ -23,212 +22,172 @@ export const Extractor = (
   parseBuffer: (
     buffer: Buffer,
     options: Config & {
-      serviceAccount: Record<string, string>;
+      getSecret: (key: string) => Promise<string>
     },
   ) => Promise<WorkbookCapture>,
-  options?: Config,
+  config: Config,
 ) => {
   return (listener: FlatfileListener) => {
-    listener.on("file:created", async event => {
-      const { data: file } = await api.files.get(event.context.fileId);
-      if (file.mode === "export") return false;
+    listener.on('file:created', async (event) => {
+      const { data: file } = await api.files.get(event.context.fileId)
+      if (file.mode === 'export') return false
 
-      if (typeof fileExt === "string" && !file.name.endsWith(fileExt)) {
-        return false;
+      if (typeof fileExt === 'string' && !file.name.endsWith(fileExt)) {
+        return false
       }
 
-      if (fileExt instanceof RegExp && !fileExt.test(file.name)) return false;
+      if (fileExt instanceof RegExp && !fileExt.test(file.name)) return false
 
       const jobs = await api.jobs.create({
         type: JobType.File,
         operation: `extract-plugin-gsheet`,
         status: JobStatus.Ready,
         source: event.context.fileId,
-      });
+      })
 
-      await api.jobs.execute(jobs.data.id);
-    });
+      await api.jobs.execute(jobs.data.id)
+    })
 
     listener.on(
-      "job:ready",
+      'job:ready',
       { operation: `extract-plugin-gsheet` },
-      async event => {
-        const { data: file } = await api.files.get(event.context.fileId);
+      async (event) => {
+        const { data: file } = await api.files.get(event.context.fileId)
 
-        const buffer = await getFileBuffer(event);
+        const buffer = await getFileBuffer(event)
 
-        const { jobId } = event.context;
+        const { jobId } = event.context
 
         try {
-          await api.jobs.ack(jobId, { progress: 3, info: "Parsing Sheets" });
-
-          /**
-           * TODO: figure out a way to get this service account data from secrets earlier,
-           * not as part of the extractor...
-           */
-          const serviceAccount = await getServiceAccount(event);
+          await api.jobs.ack(jobId, { progress: 3, info: 'Parsing Sheets' })
 
           const capture = await parseBuffer(buffer, {
-            ...options,
-            serviceAccount,
-          });
+            ...config,
+            getSecret: (key: string) => event.secrets(key),
+          })
 
           const workbook = await createWorkbook(
             event.context.environmentId,
             file,
-            capture,
-          );
+            capture
+          )
 
           await api.jobs.ack(jobId, {
             progress: 10,
-            info: "Adding records to Sheets",
-          });
+            info: 'Adding records to Sheets',
+          })
 
-          let processedRecords = 0;
+          let processedRecords = 0
 
           const totalLength = Object.values(capture).reduce(
             (
               acc: number,
               sheet: {
-                data: unknown[];
-              },
+                data: unknown[]
+              }
             ) => acc + (sheet?.data?.length || 0),
-            0,
-          );
+            0
+          )
 
           for (const sheet of workbook.sheets) {
-            if (!capture[sheet.name]) continue;
+            if (!capture[sheet.name]) continue
 
             await asyncBatch(
               capture[sheet.name].data,
-              async chunk => {
-                await api.records.insert(sheet.id, chunk);
+              async (chunk) => {
+                await api.records.insert(sheet.id, chunk)
 
-                processedRecords += chunk.length;
+                processedRecords += chunk.length
 
                 const progress = Math.min(
                   99,
-                  Math.round(10 + (90 * processedRecords) / totalLength),
-                );
+                  Math.round(10 + (90 * processedRecords) / totalLength)
+                )
 
                 await api.jobs.ack(jobId, {
                   progress,
-                  info: "Adding records to Sheets",
-                });
+                  info: 'Adding records to Sheets',
+                })
               },
-              { chunkSize: 10000, parallel: 1, debug: false },
-            );
+              { chunkSize: 10000, parallel: 1, debug: false }
+            )
           }
 
           await api.files.update(file.id, {
             workbookId: workbook.id,
-          });
+          })
 
           await api.jobs.complete(jobId, {
-            info: "Extraction complete",
+            info: 'Extraction complete',
             outcome: {
-              message: "Extracted file",
+              message: 'Extracted file',
             },
-          });
+          })
         } catch (error) {
+          console.error(error.message)
+
           await api.jobs.fail(jobId, {
             info: `Extraction failed ${error.message}`,
-          });
+          })
         }
-      },
-    );
-  };
-};
+      }
+    )
+  }
+}
 
 async function createWorkbook(
   environmentId: string,
   file: Flatfile.File_,
-  workbookCapture: WorkbookCapture,
+  workbookCapture: WorkbookCapture
 ): Promise<Flatfile.Workbook> {
   const workbookConfig = getWorkbookConfig(
     file.name,
     file.spaceId,
     environmentId,
-    workbookCapture,
-  );
-  const workbook = await api.workbooks.create(workbookConfig);
+    workbookCapture
+  )
+  const workbook = await api.workbooks.create(workbookConfig)
 
   if (!workbook.data.sheets || workbook.data.sheets.length === 0) {
-    throw new Error("No sheets found");
+    throw new Error('No sheets found')
   }
 
-  return workbook.data;
+  return workbook.data
 }
 
 function getWorkbookConfig(
   name: string,
   spaceId: string,
   environmentId: string,
-  workbookCapture: WorkbookCapture,
+  workbookCapture: WorkbookCapture
 ): Flatfile.CreateWorkbookConfig {
   const sheets = Object.values(
     mapValues(workbookCapture, (sheet, sheetName) => {
-      return getSheetConfig(sheetName, sheet);
-    }),
-  );
+      return getSheetConfig(sheetName, sheet)
+    })
+  )
 
   return {
     name: `[file] ${name}`,
-    labels: ["file"],
+    labels: ['file'],
     spaceId,
     environmentId,
     sheets,
-  };
+  }
 }
 
 function getSheetConfig(
   name: string,
-  { headers, required, descriptions }: SheetCapture,
+  { headers, required, descriptions }: SheetCapture
 ): Flatfile.SheetConfig {
   return {
     name,
-    fields: headers.map(key => ({
+    fields: headers.map((key) => ({
       key,
       label: key,
-      description: descriptions?.[key] || "",
-      type: "string",
-      constraints: required?.[key] ? [{ type: "required" }] : [],
+      description: descriptions?.[key] || '',
+      type: 'string',
+      constraints: required?.[key] ? [{ type: 'required' }] : [],
     })),
-  };
-}
-
-async function getServiceAccount(event: FlatfileEvent) {
-  const [
-    projectId,
-    privateKeyId,
-    privateKey1,
-    privateKey2,
-    clientEmail,
-    clientId,
-    clientCertUrl,
-  ] = await Promise.all([
-    event.secrets("google-cloud-project-id"),
-    event.secrets("google-cloud-private-key-id"),
-    // Flatfile secrets can "only" hold 1024 characters, so had to cut it in half.
-    event.secrets("google-cloud-private-key-1"),
-    event.secrets("google-cloud-private-key-2"),
-    event.secrets("google-cloud-client-email"),
-    event.secrets("google-cloud-client-id"),
-    event.secrets("google-cloud-client-cert-url"),
-  ]);
-
-  return {
-    type: "service_account",
-    project_id: projectId,
-    private_key_id: privateKeyId,
-    // Seems that flatfile are escaping newlines in a weird way, so we remove it again.
-    private_key: (privateKey1 + privateKey2).replace(/\\n/g, "\n"),
-    client_email: clientEmail,
-    client_id: clientId,
-    auth_uri: "https://accounts.google.com/o/oauth2/auth",
-    token_uri: "https://oauth2.googleapis.com/token",
-    auth_provider_x509_cert_url: "https://www.googleapis.com/oauth2/v1/certs",
-    client_x509_cert_url: clientCertUrl,
-    universe_domain: "googleapis.com",
-  };
+  }
 }
diff --git a/plugins/gsheet-extractor/src/index.ts b/plugins/gsheet-extractor/src/index.ts
index 224ca5f9e..fd043d3ad 100644
--- a/plugins/gsheet-extractor/src/index.ts
+++ b/plugins/gsheet-extractor/src/index.ts
@@ -1,9 +1,19 @@
 import { Extractor } from "./extractor";
 import { parseBuffer } from "./parser";
 
-export const GSheetExtractor = (options?: {
-  sheetName: string;
-  range: string;
-}) => {
+type Config = {
+  sheetRange?: string;
+}
+
+/**
+ * Plugin config options.
+ *
+ * @property {string} sheetRange - use if you need a custom subset of columns + rows (example C4:Z)
+ */
+export interface GsheetExtractorOptions {
+  readonly sheetRange?: string
+}
+
+export const GSheetExtractor = (options: GsheetExtractorOptions) => {
   return Extractor(/\.gsheet$/i, parseBuffer, options);
 };
diff --git a/plugins/gsheet-extractor/src/parser.ts b/plugins/gsheet-extractor/src/parser.ts
index 2523ba2e4..98fa27662 100644
--- a/plugins/gsheet-extractor/src/parser.ts
+++ b/plugins/gsheet-extractor/src/parser.ts
@@ -1,63 +1,128 @@
-import { RecordData } from "@flatfile/api/api";
-import { WorkbookCapture } from "@flatfile/util-extractor";
-import { google } from "googleapis";
+import { RecordData } from '@flatfile/api/api'
+import { WorkbookCapture } from '@flatfile/util-extractor'
+import { google } from 'googleapis'
 
 type GsheetFile = {
-  doc_id: string;
-  resource_key: string; // probably empty
-  email: string;
-};
+  doc_id: string
+  resource_key: string // probably empty
+  email: string
+}
 
-const sheets = google.sheets("v4");
+const sheets = google.sheets('v4')
 
 export async function parseBuffer(
   buffer: Buffer,
   options: {
-    sheetName: string;
-    range: string;
-    serviceAccount: Record<string, string>;
-  },
+    getSecret: (key: string) => Promise<string>
+    sheetRange?: string;
+  }
 ): Promise<WorkbookCapture> {
-  const auth = getAuth(options.serviceAccount);
+  const serviceAccount = await getServiceAccount(options)
+
+  const auth = new google.auth.GoogleAuth({
+    credentials: serviceAccount,
+    scopes: ['https://www.googleapis.com/auth/spreadsheets.readonly'],
+  })
 
-  const data = JSON.parse(buffer.toString()) as GsheetFile;
+  const data = JSON.parse(buffer.toString()) as GsheetFile
 
-  const response = await sheets.spreadsheets.values.get({
+  const sheetsResponse = await sheets.spreadsheets.get({
     auth,
     spreadsheetId: data.doc_id,
-    range: `${options.sheetName}!${options.range}`,
-  });
+  })
+
+  const workbooks: WorkbookCapture = {}
+
+  for (const sheet of sheetsResponse.data.sheets) {
+    const title = sheet.properties.title
 
-  const headers = response.data.values[0] as string[];
+    const valuesResponse = await sheets.spreadsheets.values.get({
+      auth,
+      spreadsheetId: data.doc_id,
+      range: options.sheetRange ? `${title}!${options.sheetRange}` : title,
+    })
 
-  const values = [] as RecordData[];
+    const headers: string[] = []
 
-  for (const row of response.data.values.slice(1)) {
-    const value = {} as RecordData;
+    for (let header of valuesResponse.data.values[0]) {
+      let renameCount = 0
 
-    for (let index = 0; index < row.length; index++) {
-      // Don't want to save empty headers.
-      if (!headers[index]) continue;
+      // Empty headers can happen, use "EMPTY" instead when encountered.
+      if (header === '') {
+        header = 'EMPTY'
+      }
 
-      value[headers[index]] = {
-        value: row[index],
-      };
+      // Make sure we do not have duplicate header names
+      while (headers.includes(header)) {
+        if (renameCount === 0) {
+          header = `${header}--${renameCount + 1}`
+        } else {
+          header = `${header.slice(0, header.length - 1)}${renameCount + 1}`
+        }
+
+        renameCount++
+      }
+
+      headers.push(header)
     }
 
-    values.push(value);
-  }
+    const values: RecordData[] = []
 
-  return {
-    [options.sheetName]: {
-      headers: response.data.values[0] as string[],
+    for (const row of valuesResponse.data.values.slice(1)) {
+      const value: RecordData = {}
+
+      for (let index = 0; index < row.length; index++) {
+        value[headers[index]] = {
+          value: row[index],
+        }
+      }
+
+      values.push(value)
+    }
+
+    workbooks[title] = {
+      headers,
       data: values,
-    },
-  };
+    }
+  }
+
+  return workbooks
 }
 
-function getAuth(serviceAccount: Record<string, string>) {
-  return new google.auth.GoogleAuth({
-    credentials: serviceAccount,
-    scopes: ["https://www.googleapis.com/auth/spreadsheets.readonly"],
-  });
+async function getServiceAccount(options: {
+  getSecret: (key: string) => Promise<string>
+}) {
+  const [
+    projectId,
+    privateKeyId,
+    privateKey1,
+    privateKey2,
+    clientEmail,
+    clientId,
+    clientCertUrl,
+  ] = await Promise.all([
+    options.getSecret('google-cloud-project-id'),
+    options.getSecret('google-cloud-private-key-id'),
+    // Flatfile secrets can "only" hold 1024 characters, so had to cut it in half.
+    options.getSecret('google-cloud-private-key-1'),
+    options.getSecret('google-cloud-private-key-2'),
+    options.getSecret('google-cloud-client-email'),
+    options.getSecret('google-cloud-client-id'),
+    options.getSecret('google-cloud-client-cert-url'),
+  ])
+
+  return {
+    type: 'service_account',
+    project_id: projectId,
+    private_key_id: privateKeyId,
+    // Seems that flatfile are escaping newlines in a weird way, so we remove it again.
+    private_key: (privateKey1 + privateKey2).replace(/\\n/g, '\n'),
+    client_email: clientEmail,
+    client_id: clientId,
+    auth_uri: 'https://accounts.google.com/o/oauth2/auth',
+    token_uri: 'https://oauth2.googleapis.com/token',
+    auth_provider_x509_cert_url: 'https://www.googleapis.com/oauth2/v1/certs',
+    client_x509_cert_url: clientCertUrl,
+    universe_domain: 'googleapis.com',
+  }
 }

From 51b2c9141834b153df13e73e51bc8f13dbe877f7 Mon Sep 17 00:00:00 2001
From: kasperstorgaard <kasper.storgaard@gmail.com>
Date: Thu, 2 Nov 2023 09:10:50 +0100
Subject: [PATCH 3/3] add author

---
 plugins/gsheet-extractor/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/plugins/gsheet-extractor/package.json b/plugins/gsheet-extractor/package.json
index 035a1589d..333b133d2 100644
--- a/plugins/gsheet-extractor/package.json
+++ b/plugins/gsheet-extractor/package.json
@@ -19,7 +19,7 @@
     "test": "jest ./**/*.spec.ts --config=../../jest.config.js --runInBand"
   },
   "keywords": [],
-  "author": "David Boskovic",
+  "author": "Kasper Storgaard",
   "repository": {
     "type": "git",
     "url": "https://github.com/FlatFilers/flatfile-plugins.git",