This repository has been archived by the owner on Jan 5, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
feat(list): list seeder #4
Merged
Merged
Changes from 1 commit
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -52,6 +52,7 @@ | |
] | ||
}, | ||
"dependencies": { | ||
"chance": "^1.1.8", | ||
"jest": "^27.5.1", | ||
"ts-jest": "^27.1.3" | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export * from './list'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import { mockList } from './list'; | ||
|
||
describe('mockList', () => { | ||
it('should produce data when total count is divisible by batch size', () => { | ||
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 2 }); | ||
let batch = mockListGenerator.next(); | ||
const results = []; | ||
while (!batch.done) { | ||
// Keep track of batch size | ||
results.push(batch.value['list'].length); | ||
batch = mockListGenerator.next(); | ||
} | ||
expect(results).toStrictEqual([2, 2, 2, 2, 2]); | ||
}); | ||
it('should produce data when total count is not divisible by batch size', () => { | ||
const mockListGenerator = mockList('abc123', { count: 10, batchSize: 3 }); | ||
let batch = mockListGenerator.next(); | ||
const results = []; | ||
while (!batch.done) { | ||
// Keep track of batch size | ||
results.push(batch.value['list'].length); | ||
batch = mockListGenerator.next(); | ||
} | ||
expect(results).toStrictEqual([3, 3, 3, 1]); | ||
}); | ||
it('should have characteristics according to data options', () => { | ||
const mockListGenerator = mockList('abc123', { | ||
minTimestamp: 1646096295000, | ||
maxTimestamp: 1646182692000, | ||
}); | ||
let batch = mockListGenerator.next(); | ||
const archive = []; | ||
const favorite = []; | ||
const article = []; | ||
const timestamps = []; | ||
function flatMapKey<T>(objects: T[], key: keyof T): T[keyof T][] { | ||
return objects.map((obj) => obj[key]); | ||
} | ||
function flatMapKeys<T>( | ||
objects: T[], | ||
keys: (keyof T)[], | ||
callback: (obj: any) => any | ||
): T[keyof T][] { | ||
return objects.reduce((acc, obj) => { | ||
acc.push(...keys.map((key) => callback(obj[key]))); | ||
return acc; | ||
}, [] as any); | ||
} | ||
// I know this is ugly and a lot of code, but JS doesn't have good methods for manipulating data natively | ||
while (!batch.done) { | ||
archive.push(...flatMapKey(batch.value['list'], 'status')); | ||
favorite.push(...flatMapKey(batch.value['list'], 'favorite')); | ||
article.push(...flatMapKey(batch.value['items_extended'], 'is_article')); | ||
timestamps.push( | ||
...flatMapKeys( | ||
batch.value['list'], | ||
['time_read', 'time_favorited', 'time_added', 'time_updated'], | ||
(date: Date) => date?.getTime() | ||
) | ||
); | ||
batch = mockListGenerator.next(); | ||
} | ||
const articleRate = | ||
article.reduce((sum, elem) => sum + elem, 0) / article.length; | ||
const favoriteRate = | ||
favorite.reduce((sum, elem) => sum + elem, 0) / favorite.length; | ||
const archivedRate = | ||
archive.reduce((sum, elem) => sum + elem, 0) / archive.length; | ||
const minTimestamp = Math.min(...timestamps.filter((t) => t != null)); | ||
const maxTimestamp = Math.max(...timestamps.filter((t) => t != null)); | ||
|
||
expect(archive.length).toEqual(1000); | ||
expect(favorite.length).toEqual(1000); | ||
expect(article.length).toEqual(1000); | ||
expect(timestamps.length).toEqual(4000); | ||
// 0.05 tolerance to reduce flakiness | ||
expect(articleRate).toBeCloseTo(0.9, 1); | ||
expect(favoriteRate).toBeCloseTo(0.1, 1); | ||
expect(archivedRate).toBeCloseTo(0.2, 1); | ||
expect(minTimestamp).toBeGreaterThanOrEqual(1646096295000); | ||
expect(maxTimestamp).toBeLessThanOrEqual(1646182692000); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
import Chance from 'chance'; | ||
|
||
interface ListDataOptions { | ||
/** The total number of SavedItems to create for a mock user. Defaults to 1000.*/ | ||
count?: number; | ||
/** The number of results in each iteration (via `next()`). Defaults to 100.*/ | ||
batchSize?: number; | ||
/** Random chance for a SavedItem to be marked as favorite. Defaults to 0.1 */ | ||
favoriteRate?: number; | ||
/** Random chance for a SavedItem to be marked as archived. Defaults to 0.2 */ | ||
archiveRate?: number; | ||
/** Random chance for a SavedItem to be an Article (vs. a Video). Defaults to 0.9 */ | ||
articleRate?: number; | ||
/** Epoch time (ms). A lower boundary for all timestamp fields. Defaults to 1298613211000 */ | ||
minTimestamp?: number; | ||
/** Epoch time (ms). An upper boundary for all timestamp fields. Defaults to 1645768411000 */ | ||
maxTimestamp?: number; | ||
} | ||
|
||
/** Subset of list entity which can be inserted into legacy schema. */ | ||
interface ListEntity { | ||
user_id: number; | ||
item_id: number; | ||
resolved_id: number; | ||
given_url: string; | ||
title: string; | ||
time_added: Date; | ||
time_updated: Date; | ||
status: number; | ||
time_read: Date | undefined; | ||
favorite: number; | ||
time_favorited: Date | undefined; | ||
api_id: string; | ||
api_id_updated: string; | ||
} | ||
|
||
/** Subset of ItemsExtended entity which can be inserted into legacy schema. */ | ||
interface ItemsExtendedEntity { | ||
extended_item_id: number; | ||
video: number; | ||
is_article: number; | ||
} | ||
|
||
interface ListDataResponse { | ||
list: ListEntity[]; | ||
items_extended: ItemsExtendedEntity[]; | ||
} | ||
|
||
/** | ||
* Generate a mock list for a user. This method returns an iterator which | ||
* provides data that can be inserted into the list, plus additional metadata | ||
* used for filters. | ||
* This just returns data which can be inserted into a test database -- the calling | ||
* method must handle the database calls. | ||
* This function is a generator to avoid memory issues when creating very large lists. | ||
* It should be consumed until it is finished. When the iterator is finished, `value` | ||
* will be undefined. | ||
* | ||
* Example: | ||
* ``` | ||
* const listGenerator = mockList('abc123'); | ||
* // Get the first batch | ||
* let batch = myListGenerator.next(); | ||
* while (!batch.done) { | ||
* // handle data insert here | ||
* // await insertData(batch); | ||
* batch = myListGenerator.next(); | ||
* } | ||
* ``` | ||
* | ||
* @param userId a fake userId to generate the list | ||
* @param options options controlling the size of the list, batch in each iteration, | ||
* and the mocks (e.g. chance to be favorited or archived). | ||
*/ | ||
export function* mockList( | ||
userId: string, | ||
options?: ListDataOptions | ||
): Generator<ListDataResponse> { | ||
// Set defaults | ||
const { | ||
count = 1000, | ||
batchSize = 100, | ||
favoriteRate = 0.1, | ||
archiveRate = 0.2, | ||
articleRate = 0.9, | ||
minTimestamp = 1298613211000, | ||
maxTimestamp = 1645768411000, | ||
} = options ?? {}; | ||
|
||
const chance = new Chance(); | ||
let index = 0; | ||
const listData = Array(batchSize); | ||
const extendedData = Array(batchSize); | ||
// Populate the data | ||
while (index < count) { | ||
const timeAdded = chance.integer({ min: minTimestamp, max: maxTimestamp }); | ||
const timeUpdated = chance.integer({ min: timeAdded, max: maxTimestamp }); | ||
const isArchived = Math.random() < archiveRate; | ||
const isFavorite = Math.random() < favoriteRate; | ||
const isArticle = Math.random() < articleRate; // otherwise video | ||
|
||
listData[index % batchSize] = { | ||
user_id: userId, | ||
item_id: index, | ||
resolved_id: index, | ||
given_url: chance.url(), | ||
// Title is a random sentence between 4 and 12 words | ||
title: chance.sentence({ words: chance.integer({ min: 4, max: 12 }) }), | ||
time_added: new Date(timeAdded), | ||
time_updated: new Date(timeUpdated), | ||
status: isArchived ? 1 : 0, | ||
time_read: isArchived | ||
? new Date(chance.integer({ min: timeAdded, max: timeUpdated })) | ||
: undefined, | ||
favorite: isFavorite ? 1 : 0, | ||
time_favorited: isFavorite | ||
? new Date(chance.integer({ min: timeAdded, max: timeUpdated })) | ||
: undefined, | ||
api_id: ['1234', '5678', '1111', '9999'][ | ||
chance.integer({ min: 0, max: 3 }) | ||
], | ||
api_id_updated: ['1234', '5678', '1111', '9999'][ | ||
chance.integer({ min: 0, max: 3 }) | ||
], | ||
}; | ||
extendedData[index % batchSize] = { | ||
extended_item_id: index, | ||
video: isArticle ? 0 : 1, | ||
is_article: isArticle ? 1 : 0, | ||
}; | ||
index += 1; | ||
if (index && index % batchSize === 0) { | ||
kschelonka marked this conversation as resolved.
Show resolved
Hide resolved
|
||
yield { list: listData, items_extended: extendedData }; | ||
} | ||
} | ||
// If the count doesn't evenly divide with batch size, yield what we have left | ||
const leftover = index % batchSize; | ||
if (leftover) { | ||
yield { | ||
list: listData.slice(0, leftover), | ||
items_extended: extendedData.slice(0, leftover), | ||
}; | ||
} | ||
} |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Correct me if I'm wrong, we insert items in this
listData
array from index 0 --> 100 (batch size), yield thatlistData
and then start again?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep, and it reuses the same array hence the modulo indexing. Using a generator keeps us from accidentally making massive datasets that make your computer run out of memory