feat(list): list seeder (#4)

* feat(list): list seeder * chore(list): remove unnecessary check add test assertion
Pocket · Mar 1, 2022 · 0441e3a · 0441e3a
1 parent 61ad441
commit 0441e3a
Show file tree

Hide file tree

Showing 6 changed files with 245 additions and 0 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -52,6 +52,7 @@
     ]
   },
   "dependencies": {
+    "chance": "^1.1.8",
     "jest": "^27.5.1",
     "ts-jest": "^27.1.3"
   }

diff --git a/src/index.ts b/src/index.ts
@@ -2,3 +2,4 @@
 // anything we want consumable (module, type, class, etc) should be exported here
 
 export * from './timeIt';
+export * as seeds from './seeds';
diff --git a/src/seeds/index.ts b/src/seeds/index.ts
@@ -0,0 +1 @@
+export * from './list';
diff --git a/src/seeds/list.spec.ts b/src/seeds/list.spec.ts
@@ -0,0 +1,87 @@
+import { mockList } from './list';
+
+describe('mockList', () => {
+  it('should produce data when total count is divisible by batch size', () => {
+    const mockListGenerator = mockList('abc123', { count: 10, batchSize: 2 });
+    let batch = mockListGenerator.next();
+    const results = [];
+    const ids = [];
+    while (!batch.done) {
+      // Keep track of batch size
+      results.push(batch.value['list'].length);
+      ids.push(...batch.value['list'].map((l) => l.item_id));
+      batch = mockListGenerator.next();
+    }
+    // No dupes, proper indexing
+    expect(ids).toStrictEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
+    expect(results).toStrictEqual([2, 2, 2, 2, 2]);
+  });
+  it('should produce data when total count is not divisible by batch size', () => {
+    const mockListGenerator = mockList('abc123', { count: 10, batchSize: 3 });
+    let batch = mockListGenerator.next();
+    const results = [];
+    while (!batch.done) {
+      // Keep track of batch size
+      results.push(batch.value['list'].length);
+      batch = mockListGenerator.next();
+    }
+    expect(results).toStrictEqual([3, 3, 3, 1]);
+  });
+  it('should have characteristics according to data options', () => {
+    const mockListGenerator = mockList('abc123', {
+      minTimestamp: 1646096295000,
+      maxTimestamp: 1646182692000,
+    });
+    let batch = mockListGenerator.next();
+    const archive = [];
+    const favorite = [];
+    const article = [];
+    const timestamps = [];
+    function flatMapKey<T>(objects: T[], key: keyof T): T[keyof T][] {
+      return objects.map((obj) => obj[key]);
+    }
+    function flatMapKeys<T>(
+      objects: T[],
+      keys: (keyof T)[],
+      callback: (obj: any) => any
+    ): T[keyof T][] {
+      return objects.reduce((acc, obj) => {
+        acc.push(...keys.map((key) => callback(obj[key])));
+        return acc;
+      }, [] as any);
+    }
+    // I know this is ugly and a lot of code, but JS doesn't have good methods for manipulating data natively
+    while (!batch.done) {
+      archive.push(...flatMapKey(batch.value['list'], 'status'));
+      favorite.push(...flatMapKey(batch.value['list'], 'favorite'));
+      article.push(...flatMapKey(batch.value['items_extended'], 'is_article'));
+      timestamps.push(
+        ...flatMapKeys(
+          batch.value['list'],
+          ['time_read', 'time_favorited', 'time_added', 'time_updated'],
+          (date: Date) => date?.getTime()
+        )
+      );
+      batch = mockListGenerator.next();
+    }
+    const articleRate =
+      article.reduce((sum, elem) => sum + elem, 0) / article.length;
+    const favoriteRate =
+      favorite.reduce((sum, elem) => sum + elem, 0) / favorite.length;
+    const archivedRate =
+      archive.reduce((sum, elem) => sum + elem, 0) / archive.length;
+    const minTimestamp = Math.min(...timestamps.filter((t) => t != null));
+    const maxTimestamp = Math.max(...timestamps.filter((t) => t != null));
+
+    expect(archive.length).toEqual(1000);
+    expect(favorite.length).toEqual(1000);
+    expect(article.length).toEqual(1000);
+    expect(timestamps.length).toEqual(4000);
+    // 0.05 tolerance to reduce flakiness
+    expect(articleRate).toBeCloseTo(0.9, 1);
+    expect(favoriteRate).toBeCloseTo(0.1, 1);
+    expect(archivedRate).toBeCloseTo(0.2, 1);
+    expect(minTimestamp).toBeGreaterThanOrEqual(1646096295000);
+    expect(maxTimestamp).toBeLessThanOrEqual(1646182692000);
+  });
+});
diff --git a/src/seeds/list.ts b/src/seeds/list.ts
@@ -0,0 +1,144 @@
+import Chance from 'chance';
+
+interface ListDataOptions {
+  /** The total number of SavedItems to create for a mock user. Defaults to 1000.*/
+  count?: number;
+  /** The number of results in each iteration (via `next()`). Defaults to 100.*/
+  batchSize?: number;
+  /** Random chance for a SavedItem to be marked as favorite. Defaults to 0.1 */
+  favoriteRate?: number;
+  /** Random chance for a SavedItem to be marked as archived. Defaults to 0.2 */
+  archiveRate?: number;
+  /** Random chance for a SavedItem to be an Article (vs. a Video). Defaults to 0.9 */
+  articleRate?: number;
+  /** Epoch time (ms). A lower boundary for all timestamp fields. Defaults to 1298613211000 */
+  minTimestamp?: number;
+  /** Epoch time (ms). An upper boundary for all timestamp fields. Defaults to 1645768411000 */
+  maxTimestamp?: number;
+}
+
+/** Subset of list entity which can be inserted into legacy schema. */
+interface ListEntity {
+  user_id: number;
+  item_id: number;
+  resolved_id: number;
+  given_url: string;
+  title: string;
+  time_added: Date;
+  time_updated: Date;
+  status: number;
+  time_read: Date | undefined;
+  favorite: number;
+  time_favorited: Date | undefined;
+  api_id: string;
+  api_id_updated: string;
+}
+
+/** Subset of ItemsExtended entity which can be inserted into legacy schema. */
+interface ItemsExtendedEntity {
+  extended_item_id: number;
+  video: number;
+  is_article: number;
+}
+
+interface ListDataResponse {
+  list: ListEntity[];
+  items_extended: ItemsExtendedEntity[];
+}
+
+/**
+ * Generate a mock list for a user. This method returns an iterator which
+ * provides data that can be inserted into the list, plus additional metadata
+ * used for filters.
+ * This just returns data which can be inserted into a test database -- the calling
+ * method must handle the database calls.
+ * This function is a generator to avoid memory issues when creating very large lists.
+ * It should be consumed until it is finished. When the iterator is finished, `value`
+ * will be undefined.
+ *
+ * Example:
+ * ```
+ * const listGenerator = mockList('abc123');
+ * // Get the first batch
+ * let batch = myListGenerator.next();
+ * while (!batch.done) {
+ *   // handle data insert here
+ *   // await insertData(batch);
+ *   batch = myListGenerator.next();
+ * }
+ * ```
+ *
+ * @param userId a fake userId to generate the list
+ * @param options options controlling the size of the list, batch in each iteration,
+ *  and the mocks (e.g. chance to be favorited or archived).
+ */
+export function* mockList(
+  userId: string,
+  options?: ListDataOptions
+): Generator<ListDataResponse> {
+  // Set defaults
+  const {
+    count = 1000,
+    batchSize = 100,
+    favoriteRate = 0.1,
+    archiveRate = 0.2,
+    articleRate = 0.9,
+    minTimestamp = 1298613211000,
+    maxTimestamp = 1645768411000,
+  } = options ?? {};
+
+  const chance = new Chance();
+  let index = 0;
+  const listData = Array(batchSize);
+  const extendedData = Array(batchSize);
+  // Populate the data
+  while (index < count) {
+    const timeAdded = chance.integer({ min: minTimestamp, max: maxTimestamp });
+    const timeUpdated = chance.integer({ min: timeAdded, max: maxTimestamp });
+    const isArchived = Math.random() < archiveRate;
+    const isFavorite = Math.random() < favoriteRate;
+    const isArticle = Math.random() < articleRate; // otherwise video
+
+    listData[index % batchSize] = {
+      user_id: userId,
+      item_id: index,
+      resolved_id: index,
+      given_url: chance.url(),
+      // Title is a random sentence between 4 and 12 words
+      title: chance.sentence({ words: chance.integer({ min: 4, max: 12 }) }),
+      time_added: new Date(timeAdded),
+      time_updated: new Date(timeUpdated),
+      status: isArchived ? 1 : 0,
+      time_read: isArchived
+        ? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
+        : undefined,
+      favorite: isFavorite ? 1 : 0,
+      time_favorited: isFavorite
+        ? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
+        : undefined,
+      api_id: ['1234', '5678', '1111', '9999'][
+        chance.integer({ min: 0, max: 3 })
+      ],
+      api_id_updated: ['1234', '5678', '1111', '9999'][
+        chance.integer({ min: 0, max: 3 })
+      ],
+    };
+    extendedData[index % batchSize] = {
+      extended_item_id: index,
+      video: isArticle ? 0 : 1,
+      is_article: isArticle ? 1 : 0,
+    };
+    index += 1;
+    if (index % batchSize === 0) {
+      yield { list: listData, items_extended: extendedData };
+    }
+  }
+  // If the count doesn't evenly divide with batch size, yield what we have  left
+  const leftover = index % batchSize;
+  if (leftover) {
+    yield {
+      list: listData.slice(0, leftover),
+      items_extended: extendedData.slice(0, leftover),
+    };
+  }
+}
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,4 @@
		// anything we want consumable (module, type, class, etc) should be exported here

		export * from './timeIt';
		export * as seeds from './seeds';