Skip to content
This repository has been archived by the owner on Jan 5, 2024. It is now read-only.

Commit

Permalink
feat(list): list seeder (#4)
Browse files Browse the repository at this point in the history
* feat(list): list seeder

* chore(list): remove unnecessary check

add test assertion
  • Loading branch information
kschelonka authored Mar 1, 2022
1 parent 61ad441 commit 0441e3a
Show file tree
Hide file tree
Showing 6 changed files with 245 additions and 0 deletions.
11 changes: 11 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
]
},
"dependencies": {
"chance": "^1.1.8",
"jest": "^27.5.1",
"ts-jest": "^27.1.3"
}
Expand Down
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
// anything we want consumable (module, type, class, etc) should be exported here

export * from './timeIt';
export * as seeds from './seeds';
1 change: 1 addition & 0 deletions src/seeds/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './list';
87 changes: 87 additions & 0 deletions src/seeds/list.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { mockList } from './list';

describe('mockList', () => {
it('should produce data when total count is divisible by batch size', () => {
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 2 });
let batch = mockListGenerator.next();
const results = [];
const ids = [];
while (!batch.done) {
// Keep track of batch size
results.push(batch.value['list'].length);
ids.push(...batch.value['list'].map((l) => l.item_id));
batch = mockListGenerator.next();
}
// No dupes, proper indexing
expect(ids).toStrictEqual([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]);
expect(results).toStrictEqual([2, 2, 2, 2, 2]);
});
it('should produce data when total count is not divisible by batch size', () => {
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 3 });
let batch = mockListGenerator.next();
const results = [];
while (!batch.done) {
// Keep track of batch size
results.push(batch.value['list'].length);
batch = mockListGenerator.next();
}
expect(results).toStrictEqual([3, 3, 3, 1]);
});
it('should have characteristics according to data options', () => {
const mockListGenerator = mockList('abc123', {
minTimestamp: 1646096295000,
maxTimestamp: 1646182692000,
});
let batch = mockListGenerator.next();
const archive = [];
const favorite = [];
const article = [];
const timestamps = [];
function flatMapKey<T>(objects: T[], key: keyof T): T[keyof T][] {
return objects.map((obj) => obj[key]);
}
function flatMapKeys<T>(
objects: T[],
keys: (keyof T)[],
callback: (obj: any) => any
): T[keyof T][] {
return objects.reduce((acc, obj) => {
acc.push(...keys.map((key) => callback(obj[key])));
return acc;
}, [] as any);
}
// I know this is ugly and a lot of code, but JS doesn't have good methods for manipulating data natively
while (!batch.done) {
archive.push(...flatMapKey(batch.value['list'], 'status'));
favorite.push(...flatMapKey(batch.value['list'], 'favorite'));
article.push(...flatMapKey(batch.value['items_extended'], 'is_article'));
timestamps.push(
...flatMapKeys(
batch.value['list'],
['time_read', 'time_favorited', 'time_added', 'time_updated'],
(date: Date) => date?.getTime()
)
);
batch = mockListGenerator.next();
}
const articleRate =
article.reduce((sum, elem) => sum + elem, 0) / article.length;
const favoriteRate =
favorite.reduce((sum, elem) => sum + elem, 0) / favorite.length;
const archivedRate =
archive.reduce((sum, elem) => sum + elem, 0) / archive.length;
const minTimestamp = Math.min(...timestamps.filter((t) => t != null));
const maxTimestamp = Math.max(...timestamps.filter((t) => t != null));

expect(archive.length).toEqual(1000);
expect(favorite.length).toEqual(1000);
expect(article.length).toEqual(1000);
expect(timestamps.length).toEqual(4000);
// 0.05 tolerance to reduce flakiness
expect(articleRate).toBeCloseTo(0.9, 1);
expect(favoriteRate).toBeCloseTo(0.1, 1);
expect(archivedRate).toBeCloseTo(0.2, 1);
expect(minTimestamp).toBeGreaterThanOrEqual(1646096295000);
expect(maxTimestamp).toBeLessThanOrEqual(1646182692000);
});
});
144 changes: 144 additions & 0 deletions src/seeds/list.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import Chance from 'chance';

interface ListDataOptions {
/** The total number of SavedItems to create for a mock user. Defaults to 1000.*/
count?: number;
/** The number of results in each iteration (via `next()`). Defaults to 100.*/
batchSize?: number;
/** Random chance for a SavedItem to be marked as favorite. Defaults to 0.1 */
favoriteRate?: number;
/** Random chance for a SavedItem to be marked as archived. Defaults to 0.2 */
archiveRate?: number;
/** Random chance for a SavedItem to be an Article (vs. a Video). Defaults to 0.9 */
articleRate?: number;
/** Epoch time (ms). A lower boundary for all timestamp fields. Defaults to 1298613211000 */
minTimestamp?: number;
/** Epoch time (ms). An upper boundary for all timestamp fields. Defaults to 1645768411000 */
maxTimestamp?: number;
}

/** Subset of list entity which can be inserted into legacy schema. */
interface ListEntity {
user_id: number;
item_id: number;
resolved_id: number;
given_url: string;
title: string;
time_added: Date;
time_updated: Date;
status: number;
time_read: Date | undefined;
favorite: number;
time_favorited: Date | undefined;
api_id: string;
api_id_updated: string;
}

/** Subset of ItemsExtended entity which can be inserted into legacy schema. */
interface ItemsExtendedEntity {
extended_item_id: number;
video: number;
is_article: number;
}

interface ListDataResponse {
list: ListEntity[];
items_extended: ItemsExtendedEntity[];
}

/**
* Generate a mock list for a user. This method returns an iterator which
* provides data that can be inserted into the list, plus additional metadata
* used for filters.
* This just returns data which can be inserted into a test database -- the calling
* method must handle the database calls.
* This function is a generator to avoid memory issues when creating very large lists.
* It should be consumed until it is finished. When the iterator is finished, `value`
* will be undefined.
*
* Example:
* ```
* const listGenerator = mockList('abc123');
* // Get the first batch
* let batch = myListGenerator.next();
* while (!batch.done) {
* // handle data insert here
* // await insertData(batch);
* batch = myListGenerator.next();
* }
* ```
*
* @param userId a fake userId to generate the list
* @param options options controlling the size of the list, batch in each iteration,
* and the mocks (e.g. chance to be favorited or archived).
*/
export function* mockList(
userId: string,
options?: ListDataOptions
): Generator<ListDataResponse> {
// Set defaults
const {
count = 1000,
batchSize = 100,
favoriteRate = 0.1,
archiveRate = 0.2,
articleRate = 0.9,
minTimestamp = 1298613211000,
maxTimestamp = 1645768411000,
} = options ?? {};

const chance = new Chance();
let index = 0;
const listData = Array(batchSize);
const extendedData = Array(batchSize);
// Populate the data
while (index < count) {
const timeAdded = chance.integer({ min: minTimestamp, max: maxTimestamp });
const timeUpdated = chance.integer({ min: timeAdded, max: maxTimestamp });
const isArchived = Math.random() < archiveRate;
const isFavorite = Math.random() < favoriteRate;
const isArticle = Math.random() < articleRate; // otherwise video

listData[index % batchSize] = {
user_id: userId,
item_id: index,
resolved_id: index,
given_url: chance.url(),
// Title is a random sentence between 4 and 12 words
title: chance.sentence({ words: chance.integer({ min: 4, max: 12 }) }),
time_added: new Date(timeAdded),
time_updated: new Date(timeUpdated),
status: isArchived ? 1 : 0,
time_read: isArchived
? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
: undefined,
favorite: isFavorite ? 1 : 0,
time_favorited: isFavorite
? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
: undefined,
api_id: ['1234', '5678', '1111', '9999'][
chance.integer({ min: 0, max: 3 })
],
api_id_updated: ['1234', '5678', '1111', '9999'][
chance.integer({ min: 0, max: 3 })
],
};
extendedData[index % batchSize] = {
extended_item_id: index,
video: isArticle ? 0 : 1,
is_article: isArticle ? 1 : 0,
};
index += 1;
if (index % batchSize === 0) {
yield { list: listData, items_extended: extendedData };
}
}
// If the count doesn't evenly divide with batch size, yield what we have left
const leftover = index % batchSize;
if (leftover) {
yield {
list: listData.slice(0, leftover),
items_extended: extendedData.slice(0, leftover),
};
}
}

0 comments on commit 0441e3a

Please sign in to comment.