Skip to content
This repository has been archived by the owner on Jan 5, 2024. It is now read-only.

feat(list): list seeder #4

Merged
merged 2 commits into from
Mar 1, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
]
},
"dependencies": {
"chance": "^1.1.8",
"jest": "^27.5.1",
"ts-jest": "^27.1.3"
}
Expand Down
1 change: 1 addition & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
// anything we want consumable (module, type, class, etc) should be exported here

export * from './timeIt';
export * as seeds from './seeds';
1 change: 1 addition & 0 deletions src/seeds/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export * from './list';
83 changes: 83 additions & 0 deletions src/seeds/list.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import { mockList } from './list';

describe('mockList', () => {
it('should produce data when total count is divisible by batch size', () => {
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 2 });
let batch = mockListGenerator.next();
const results = [];
while (!batch.done) {
// Keep track of batch size
results.push(batch.value['list'].length);
batch = mockListGenerator.next();
}
expect(results).toStrictEqual([2, 2, 2, 2, 2]);
});
it('should produce data when total count is not divisible by batch size', () => {
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 3 });
let batch = mockListGenerator.next();
const results = [];
while (!batch.done) {
// Keep track of batch size
results.push(batch.value['list'].length);
batch = mockListGenerator.next();
}
expect(results).toStrictEqual([3, 3, 3, 1]);
});
it('should have characteristics according to data options', () => {
const mockListGenerator = mockList('abc123', {
minTimestamp: 1646096295000,
maxTimestamp: 1646182692000,
});
let batch = mockListGenerator.next();
const archive = [];
const favorite = [];
const article = [];
const timestamps = [];
function flatMapKey<T>(objects: T[], key: keyof T): T[keyof T][] {
return objects.map((obj) => obj[key]);
}
function flatMapKeys<T>(
objects: T[],
keys: (keyof T)[],
callback: (obj: any) => any
): T[keyof T][] {
return objects.reduce((acc, obj) => {
acc.push(...keys.map((key) => callback(obj[key])));
return acc;
}, [] as any);
}
// I know this is ugly and a lot of code, but JS doesn't have good methods for manipulating data natively
while (!batch.done) {
archive.push(...flatMapKey(batch.value['list'], 'status'));
favorite.push(...flatMapKey(batch.value['list'], 'favorite'));
article.push(...flatMapKey(batch.value['items_extended'], 'is_article'));
timestamps.push(
...flatMapKeys(
batch.value['list'],
['time_read', 'time_favorited', 'time_added', 'time_updated'],
(date: Date) => date?.getTime()
)
);
batch = mockListGenerator.next();
}
const articleRate =
article.reduce((sum, elem) => sum + elem, 0) / article.length;
const favoriteRate =
favorite.reduce((sum, elem) => sum + elem, 0) / favorite.length;
const archivedRate =
archive.reduce((sum, elem) => sum + elem, 0) / archive.length;
const minTimestamp = Math.min(...timestamps.filter((t) => t != null));
const maxTimestamp = Math.max(...timestamps.filter((t) => t != null));

expect(archive.length).toEqual(1000);
expect(favorite.length).toEqual(1000);
expect(article.length).toEqual(1000);
expect(timestamps.length).toEqual(4000);
// 0.05 tolerance to reduce flakiness
expect(articleRate).toBeCloseTo(0.9, 1);
expect(favoriteRate).toBeCloseTo(0.1, 1);
expect(archivedRate).toBeCloseTo(0.2, 1);
expect(minTimestamp).toBeGreaterThanOrEqual(1646096295000);
expect(maxTimestamp).toBeLessThanOrEqual(1646182692000);
});
});
144 changes: 144 additions & 0 deletions src/seeds/list.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import Chance from 'chance';

interface ListDataOptions {
/** The total number of SavedItems to create for a mock user. Defaults to 1000.*/
count?: number;
/** The number of results in each iteration (via `next()`). Defaults to 100.*/
batchSize?: number;
/** Random chance for a SavedItem to be marked as favorite. Defaults to 0.1 */
favoriteRate?: number;
/** Random chance for a SavedItem to be marked as archived. Defaults to 0.2 */
archiveRate?: number;
/** Random chance for a SavedItem to be an Article (vs. a Video). Defaults to 0.9 */
articleRate?: number;
/** Epoch time (ms). A lower boundary for all timestamp fields. Defaults to 1298613211000 */
minTimestamp?: number;
/** Epoch time (ms). An upper boundary for all timestamp fields. Defaults to 1645768411000 */
maxTimestamp?: number;
}

/** Subset of list entity which can be inserted into legacy schema. */
interface ListEntity {
user_id: number;
item_id: number;
resolved_id: number;
given_url: string;
title: string;
time_added: Date;
time_updated: Date;
status: number;
time_read: Date | undefined;
favorite: number;
time_favorited: Date | undefined;
api_id: string;
api_id_updated: string;
}

/** Subset of ItemsExtended entity which can be inserted into legacy schema. */
interface ItemsExtendedEntity {
extended_item_id: number;
video: number;
is_article: number;
}

interface ListDataResponse {
list: ListEntity[];
items_extended: ItemsExtendedEntity[];
}

/**
* Generate a mock list for a user. This method returns an iterator which
* provides data that can be inserted into the list, plus additional metadata
* used for filters.
* This just returns data which can be inserted into a test database -- the calling
* method must handle the database calls.
* This function is a generator to avoid memory issues when creating very large lists.
* It should be consumed until it is finished. When the iterator is finished, `value`
* will be undefined.
*
* Example:
* ```
* const listGenerator = mockList('abc123');
* // Get the first batch
* let batch = myListGenerator.next();
* while (!batch.done) {
* // handle data insert here
* // await insertData(batch);
* batch = myListGenerator.next();
* }
* ```
*
* @param userId a fake userId to generate the list
* @param options options controlling the size of the list, batch in each iteration,
* and the mocks (e.g. chance to be favorited or archived).
*/
export function* mockList(
userId: string,
options?: ListDataOptions
): Generator<ListDataResponse> {
// Set defaults
const {
count = 1000,
batchSize = 100,
favoriteRate = 0.1,
archiveRate = 0.2,
articleRate = 0.9,
minTimestamp = 1298613211000,
maxTimestamp = 1645768411000,
} = options ?? {};

const chance = new Chance();
let index = 0;
const listData = Array(batchSize);
const extendedData = Array(batchSize);
// Populate the data
while (index < count) {
const timeAdded = chance.integer({ min: minTimestamp, max: maxTimestamp });
const timeUpdated = chance.integer({ min: timeAdded, max: maxTimestamp });
const isArchived = Math.random() < archiveRate;
const isFavorite = Math.random() < favoriteRate;
const isArticle = Math.random() < articleRate; // otherwise video

listData[index % batchSize] = {
Copy link

@Herraj Herraj Mar 1, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct me if I'm wrong, we insert items in this listData array from index 0 --> 100 (batch size), yield that listData and then start again?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, and it reuses the same array hence the modulo indexing. Using a generator keeps us from accidentally making massive datasets that make your computer run out of memory

user_id: userId,
item_id: index,
resolved_id: index,
given_url: chance.url(),
// Title is a random sentence between 4 and 12 words
title: chance.sentence({ words: chance.integer({ min: 4, max: 12 }) }),
time_added: new Date(timeAdded),
time_updated: new Date(timeUpdated),
status: isArchived ? 1 : 0,
time_read: isArchived
? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
: undefined,
favorite: isFavorite ? 1 : 0,
time_favorited: isFavorite
? new Date(chance.integer({ min: timeAdded, max: timeUpdated }))
: undefined,
api_id: ['1234', '5678', '1111', '9999'][
chance.integer({ min: 0, max: 3 })
],
api_id_updated: ['1234', '5678', '1111', '9999'][
chance.integer({ min: 0, max: 3 })
],
};
extendedData[index % batchSize] = {
extended_item_id: index,
video: isArticle ? 0 : 1,
is_article: isArticle ? 1 : 0,
};
index += 1;
if (index && index % batchSize === 0) {
kschelonka marked this conversation as resolved.
Show resolved Hide resolved
yield { list: listData, items_extended: extendedData };
}
}
// If the count doesn't evenly divide with batch size, yield what we have left
const leftover = index % batchSize;
if (leftover) {
yield {
list: listData.slice(0, leftover),
items_extended: extendedData.slice(0, leftover),
};
}
}