diff --git a/src/lib/getPackagesFromZip.ts b/src/lib/getPackagesFromZip.ts index 2e3549ac9..e69de29bb 100644 --- a/src/lib/getPackagesFromZip.ts +++ b/src/lib/getPackagesFromZip.ts @@ -1,40 +0,0 @@ -import { Body } from 'aws-sdk/clients/s3'; -import { ZipHandler } from './anki/zip'; -import { PrepareDeck } from './parser/DeckParser'; -import Package from './parser/Package'; -import { isHTMLFile, hasMarkdownFileName } from './storage/checks'; -import Settings from './parser/Settings'; - -export interface PackageResult { - packages: Package[]; - containsMarkdown: boolean; -} - -export const getPackagesFromZip = async ( - fileContents: Body | undefined, - isPatreon: boolean, - settings: Settings -): Promise => { - const zipHandler = new ZipHandler(); - const packages = []; - - if (!fileContents) { - return { packages: [], containsMarkdown: false }; - } - - zipHandler.build(fileContents as Uint8Array, isPatreon); - - const fileNames = zipHandler.getFileNames(); - - for (const fileName of fileNames) { - if (isHTMLFile(fileName)) { - const deck = await PrepareDeck(fileName, zipHandler.files, settings); - - if (deck) { - packages.push(new Package(deck.name, deck.apkg)); - } - } - } - - return { packages, containsMarkdown: hasMarkdownFileName(fileNames) }; -}; diff --git a/src/lib/misc/ErrorHandler.tsx b/src/lib/misc/ErrorHandler.tsx index b07a6bf4e..21b407a56 100644 --- a/src/lib/misc/ErrorHandler.tsx +++ b/src/lib/misc/ErrorHandler.tsx @@ -1,5 +1,5 @@ -import { renderToStaticMarkup } from 'react-dom/server'; import express from 'express'; +import { renderToStaticMarkup } from 'react-dom/server'; import { sendError } from '../error/sendError'; const NEW_GITHUB_ISSUE = 'https://github.com/2anki/server/issues/new/choose'; @@ -14,19 +14,6 @@ export const NO_PACKAGE_ERROR = new Error( ) ); -const NOTION_INFO_LINK = - 'https://www.notion.so/help/export-your-content#export-as-html'; -export const UNSUPPORTED_FORMAT_MD = new Error( - renderToStaticMarkup( - <> - Markdown support has been removed, please Export as HTML:{' '} - - {NOTION_INFO_LINK} - - - ) -); - export default function ErrorHandler(res: express.Response, err: Error) { sendError(err); res.set('Content-Type', 'text/plain'); diff --git a/src/lib/parser/CustomExporter.ts b/src/lib/parser/CustomExporter.ts index 517f5ce65..8e30d0c18 100644 --- a/src/lib/parser/CustomExporter.ts +++ b/src/lib/parser/CustomExporter.ts @@ -32,7 +32,7 @@ class CustomExporter { ); } - async save() { + async save(): Promise { const gen = new CardGenerator(this.workspace); if (process.env.SKIP_CREATE_DECK) { return fs.readFileSync(this.getPayloadInfoPath()); diff --git a/src/lib/parser/DeckParser.ts b/src/lib/parser/DeckParser.ts index 871fac847..a1d2cc66b 100644 --- a/src/lib/parser/DeckParser.ts +++ b/src/lib/parser/DeckParser.ts @@ -17,6 +17,8 @@ import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink'; import getUniqueFileName from '../misc/getUniqueFileName'; import { isValidAudioFile } from '../anki/format'; import { sendError } from '../error/sendError'; +import FallbackParser from './experimental/FallbackParser'; +import { NO_PACKAGE_ERROR } from '../misc/ErrorHandler'; export class DeckParser { globalTags: cheerio.Cheerio | null; @@ -536,19 +538,52 @@ export class DeckParser { exporter.configure(this.payload); return exporter.save(); } + + tryExperimental() { + const fallback = new FallbackParser(this.files); + const ws = new Workspace(true, 'fs'); + const exporter = this.setupExporter(this.payload, ws.location); + + this.payload = fallback.run(this.settings); + this.payload[0].settings = this.settings; + exporter.configure(this.payload); + + return exporter.save(); + } + + totalCardCount() { + return this.payload.map((p) => p.cardCount).reduce((a, b) => a + b); + } } +interface PrepareDeckResult { + name: string; + apkg: Buffer; + deck: Deck[]; +} export async function PrepareDeck( fileName: string, files: File[], settings: Settings -) { +): Promise { const parser = new DeckParser(fileName, settings, files); - const total = parser.payload.map((p) => p.cardCount).reduce((a, b) => a + b); - if (total === 0) { - return null; + + if (parser.totalCardCount() === 0) { + const apkg = await parser.tryExperimental(); + if (parser.totalCardCount() === 0) { + throw NO_PACKAGE_ERROR; + } + return { + name: `${parser.name ?? fileName}.apkg`, + apkg, + deck: parser.payload, + }; } const apkg = await parser.build(); - return { name: `${parser.name}.apkg`, apkg, deck: parser.payload }; + return { + name: `${parser.name}.apkg`, + apkg, + deck: parser.payload, + }; } diff --git a/src/lib/parser/Note.ts b/src/lib/parser/Note.ts index 17bb6302d..82e9d72c3 100644 --- a/src/lib/parser/Note.ts +++ b/src/lib/parser/Note.ts @@ -65,8 +65,11 @@ export default class Note { return note; } - isValidBasicNote() { - return this.name && this.name.trim() && this.back && this.back.trim(); + isValidBasicNote(): boolean { + if (!this.name || !this.back) { + return false; + } + return this.name.trim().length > 0 && this.back.trim().length > 0; } isValidClozeNote() { diff --git a/src/lib/parser/experimental/FallbackParser.ts b/src/lib/parser/experimental/FallbackParser.ts new file mode 100644 index 000000000..e7a7c22c4 --- /dev/null +++ b/src/lib/parser/experimental/FallbackParser.ts @@ -0,0 +1,142 @@ +import cheerio from 'cheerio'; + +import { File } from '../../anki/zip'; +import { isHTMLFile, isMarkdownFile, isPlainText } from '../../storage/checks'; +import Deck from '../Deck'; +import Note from '../Note'; +import Settings from '../Settings'; +import { PlainTextParser } from './PlainTextParser/PlainTextParser'; +import { Flashcard, isClozeFlashcard } from './PlainTextParser/types'; + +class FallbackParser { + constructor(private readonly files: File[]) {} + + htmlToTextWithNewlines(html: string) { + const $ = cheerio.load(html); + + function processListItems(items: cheerio.Cheerio) { + let result = ''; + items.each((_, element) => { + const itemText = $(element).text().trim(); + result += `• ${itemText}\n`; + }); + return result; + } + + const elem = $('ul, ol'); + let items: string[] = []; + elem.each((_, element) => { + const listItems = $(element).find('li'); + const listText = processListItems(listItems); + items.push(listText); + }); + + return items; + } + + getTitleFromHTML(html: string) { + const $ = cheerio.load(html); + return $('title').text().trim(); + } + + getStyleTagFromString(html: string) { + const $ = cheerio.load(html); + const styleTag = $('style'); + + if (styleTag.length === 0) { + return ''; // No style tag found, return an empty string + } + + return styleTag.text() ?? ''; + } + + getMarkdownBulletLists(markdown: string) { + const bulletListRegex = /[-*+]( .*)+/g; + return markdown.match(bulletListRegex); + } + + /** + * Return the correct title from markdown + * + * Notion can have two titles in Markdown files. + * The first one is the title with a the id of the page. + * The second one is the title of the page only. + * + * @param markdown user input markdown + * @returns deck title + */ + getTitleMarkdown(markdown: string) { + const headingRegex = /^(#{1,6})\s+(.*)$/gm; + const matches = [...markdown.matchAll(headingRegex)]; + if (matches.length >= 2) { + return matches[1][2]; // return second match + } else if (matches.length > 0) { + return matches[0][2]; + } + return 'Default'; + } + + mapCardsToNotes(cards: Flashcard[]): Note[] { + return cards.filter(Boolean).map((card, index) => { + const note = new Note(card.front, ''); + note.number = index; + if (isClozeFlashcard(card)) { + note.cloze = true; + } else { + note.back = card.back; + + if (!note.back || note.back.trim().length === 0) { + const parts = note.name.split('\n'); + if (parts.length > 1) { + note.name = parts[0]; + note.back = parts.slice(1).join('\n'); + } + } + } + return note; + }); + } + + run(settings: Settings) { + const decks = []; + for (const file of this.files) { + const contents = file.contents?.toString(); + if (!contents) { + continue; + } + + let cards: Note[] = []; + let deckName = 'Untitled'; + if (isHTMLFile(file.name)) { + const plainText = this.htmlToTextWithNewlines(contents).join('\n'); + const plainTextParser = new PlainTextParser(); + const found = plainTextParser.parse(plainText); + cards = this.mapCardsToNotes(found); + deckName = this.getTitleFromHTML(contents); + } else if (isMarkdownFile(file.name) || isPlainText(file.name)) { + const plainTextParser = new PlainTextParser(); + const items = this.getMarkdownBulletLists(contents); + if (!items) { + continue; + } + const found = plainTextParser.parse(items.join('\n')); + cards = this.mapCardsToNotes(found); + deckName = this.getTitleMarkdown(contents); + } + + decks.push( + new Deck( + deckName, + Deck.CleanCards(cards), + '', // skip cover image + '', // skip style + Deck.GenerateId(), + settings + ) + ); + } + return decks; + } +} + +export default FallbackParser; diff --git a/src/lib/parser/experimental/PlainTextParser/PlainTextParser.ts b/src/lib/parser/experimental/PlainTextParser/PlainTextParser.ts new file mode 100644 index 000000000..2ba90189d --- /dev/null +++ b/src/lib/parser/experimental/PlainTextParser/PlainTextParser.ts @@ -0,0 +1,63 @@ +import { + BasicCard, + ClozeCard, + Flashcard, + isPossiblyClozeFlashcard, +} from './types'; + +export class PlainTextParser { + getOneOrMoreAnswers(answers: string): string[] { + const answerList = answers.split(', '); + if (!answerList || answerList.length === 0) { + return [answers]; + } + return answerList; + } + + fillInTheBlanks(sentence: string, answers: string): ClozeCard { + const answerList = this.getOneOrMoreAnswers(answers); + let clozeSentence = sentence; + + for (let i = 0; i < answerList.length; i++) { + clozeSentence = clozeSentence.replace( + /_+/, + `{{c${i + 1}::${answerList[i]}}}` + ); + } + + return { + front: clozeSentence, + isCloze: true, + }; + } + + getBasicFlashcard(flashcardText: string): BasicCard { + const [front, back] = flashcardText.split(' - '); + + return { + front: front, + back: back, + }; + } + + parse(input: string): Flashcard[] { + const flashcards = []; + const bulletPoints = input.split(/\n\n|\n- /); + + for (const bulletPoint of bulletPoints) { + const [question, answers] = bulletPoint.split(' - '); + + if (isPossiblyClozeFlashcard(question)) { + const cards = this.fillInTheBlanks(question, answers); + if (cards) { + flashcards.push(cards); + } + continue; + } + + flashcards.push(this.getBasicFlashcard(bulletPoint)); + } + + return flashcards; + } +} diff --git a/src/lib/parser/experimental/PlainTextParser/types.ts b/src/lib/parser/experimental/PlainTextParser/types.ts new file mode 100644 index 000000000..e86752557 --- /dev/null +++ b/src/lib/parser/experimental/PlainTextParser/types.ts @@ -0,0 +1,27 @@ +export interface ClozeCard { + isCloze: boolean; + front: string; + extra?: string; +} + +export interface BasicCard { + front: string; + back: string; + tags?: string; +} + +export type Flashcard = ClozeCard | BasicCard; + +export const isClozeFlashcard = ( + flashcard: Flashcard +): flashcard is ClozeCard => + 'isCloze' in flashcard && flashcard.isCloze === true; + +export const isBasicFlashcard = ( + flashcard: Flashcard +): flashcard is BasicCard => + 'back' in flashcard && flashcard.back !== undefined; + +export const isPossiblyClozeFlashcard = (question: string) => { + return question.includes('_') && question.split('-'); +}; diff --git a/src/lib/storage/checks.ts b/src/lib/storage/checks.ts index f51f1c4c1..d17fa1c16 100644 --- a/src/lib/storage/checks.ts +++ b/src/lib/storage/checks.ts @@ -4,6 +4,8 @@ export const isHTMLFile = (fileName: string) => fileName.match(/.html$/i); export const isZIPFile = (fileName: string) => fileName.match(/.zip$/i); +export const isPlainText = (fileName: string) => /\.txt$/i.exec(fileName); + export function hasMarkdownFileName(fileNames: string[]) { return fileNames.some(isMarkdownFile); } diff --git a/src/services/UploadService.ts b/src/services/UploadService.ts index b9db4be61..4d44cc8e3 100644 --- a/src/services/UploadService.ts +++ b/src/services/UploadService.ts @@ -2,27 +2,15 @@ import express from 'express'; import multer from 'multer'; import multerS3 from 'multer-s3'; +import { sendBundle } from '../controllers/UploadController'; import UploadRepository from '../data_layer/UploadRespository'; -import { BytesToMegaBytes } from '../lib/misc/file'; +import { sendError } from '../lib/error/sendError'; +import ErrorHandler, { NO_PACKAGE_ERROR } from '../lib/misc/ErrorHandler'; import { getUploadLimits } from '../lib/misc/getUploadLimits'; +import Settings from '../lib/parser/Settings'; import StorageHandler from '../lib/storage/StorageHandler'; import { UploadedFile } from '../lib/storage/types'; -import { sendBundle } from '../controllers/UploadController'; -import { getOwner } from '../lib/User/getOwner'; -import { sendError } from '../lib/error/sendError'; -import { getPackagesFromZip } from '../lib/getPackagesFromZip'; -import ErrorHandler, { - UNSUPPORTED_FORMAT_MD, - NO_PACKAGE_ERROR, -} from '../lib/misc/ErrorHandler'; -import { PrepareDeck } from '../lib/parser/DeckParser'; -import Package from '../lib/parser/Package'; -import Settings from '../lib/parser/Settings'; -import { - hasMarkdownFileName, - isHTMLFile, - isZIPFile, -} from '../lib/storage/checks'; +import GeneratePackagesUseCase from '../usecases/uploads/GeneratePackagesUseCase'; import { toText } from './NotionService/BlockHandler/helpers/deckNameToText'; class UploadService { @@ -38,21 +26,6 @@ class UploadService { await s.delete(key); } - registerUploadSize(file: UploadedFile, owner?: number) { - const { originalname, key, size } = file; - - if (!owner) { - return; - } - - return this.uploadRepository.update( - owner, - originalname, - key, - BytesToMegaBytes(size) - ); - } - getUploadHandler(res: express.Response, storage: StorageHandler) { return multer({ limits: getUploadLimits(res.locals.patreon), @@ -80,42 +53,16 @@ class UploadService { res: express.Response ) { try { - const files = req.files as UploadedFile[]; - let packages: Package[] = []; - let hasMarkdown: boolean = hasMarkdownFileName( - files.map((file) => file.originalname) - ); - for (const file of files) { - const filename = file.originalname; - const settings = new Settings(req.body || {}); - - await this.registerUploadSize(file, getOwner(res)); - const key = file.key; - const fileContents = await storage.getFileContents(key); - - if (isHTMLFile(filename)) { - const d = await PrepareDeck( - filename, - [{ name: filename, contents: fileContents.Body }], - settings - ); - if (d) { - const pkg = new Package(d.name, d.apkg); - packages = packages.concat(pkg); - } - } else if (isZIPFile(filename) || isZIPFile(key)) { - const { packages: extraPackages, containsMarkdown } = - await getPackagesFromZip( - fileContents.Body, - res.locals.patreon, - settings - ); - packages = packages.concat(extraPackages); - hasMarkdown = containsMarkdown; - } - } let payload; let plen; + const settings = new Settings(req.body || {}); + + const useCase = new GeneratePackagesUseCase(storage); + const { packages } = await useCase.execute( + res.locals.patreon, + req.files as UploadedFile[], + settings + ); const first = packages[0]; if (packages.length === 1) { @@ -141,11 +88,7 @@ class UploadService { await sendBundle(packages, res); console.info('Sent bundle with %d packages', packages.length); } else { - if (hasMarkdown) { - ErrorHandler(res, UNSUPPORTED_FORMAT_MD); - } else { - ErrorHandler(res, NO_PACKAGE_ERROR); - } + ErrorHandler(res, NO_PACKAGE_ERROR); } } catch (err) { sendError(err); diff --git a/src/usecases/parsers/ParsePlainTextUseCase.test.ts b/src/usecases/parsers/ParsePlainTextUseCase.test.ts new file mode 100644 index 000000000..c7c942b8a --- /dev/null +++ b/src/usecases/parsers/ParsePlainTextUseCase.test.ts @@ -0,0 +1,39 @@ +import { PlainTextParser } from '../../lib/parser/experimental/PlainTextParser/PlainTextParser'; +import { ParsePlainTextUseCase } from './ParsePlainTextUseCase'; + +describe('Parse plaintext use case', () => { + it('should find one cloze flashcard', () => { + const input = 'The capital of __ is Paris. - France'; + expect( + new ParsePlainTextUseCase(new PlainTextParser()).execute(input) + ).toEqual([ + { front: 'The capital of {{c1::France}} is Paris.', isCloze: true }, + ]); + }); + + it('should find two cloze flashcards', () => { + const input = + 'There tends to be a lot of ice on ____ & ____. - bridges, overpasses'; + expect( + new ParsePlainTextUseCase(new PlainTextParser()).execute(input) + ).toEqual([ + { + front: + 'There tends to be a lot of ice on {{c1::bridges}} & {{c2::overpasses}}.', + isCloze: true, + }, + ]); + }); + + it('should find basic blashcard', () => { + const input = 'What is the capital of France? - Paris'; + expect( + new ParsePlainTextUseCase(new PlainTextParser()).execute(input) + ).toEqual([ + { + front: 'What is the capital of France?', + back: 'Paris', + }, + ]); + }); +}); diff --git a/src/usecases/parsers/ParsePlainTextUseCase.ts b/src/usecases/parsers/ParsePlainTextUseCase.ts new file mode 100644 index 000000000..ad9299348 --- /dev/null +++ b/src/usecases/parsers/ParsePlainTextUseCase.ts @@ -0,0 +1,9 @@ +import { PlainTextParser } from '../../lib/parser/experimental/PlainTextParser/PlainTextParser'; + +export class ParsePlainTextUseCase { + constructor(private readonly parser: PlainTextParser) {} + + execute(text: string) { + return this.parser.parse(text); + } +} diff --git a/src/usecases/uploads/GeneratePackagesUseCase.ts b/src/usecases/uploads/GeneratePackagesUseCase.ts new file mode 100644 index 000000000..dd74517ed --- /dev/null +++ b/src/usecases/uploads/GeneratePackagesUseCase.ts @@ -0,0 +1,90 @@ +import { ZipHandler } from '../../lib/anki/zip'; +import { PrepareDeck } from '../../lib/parser/DeckParser'; +import Package from '../../lib/parser/Package'; +import Settings from '../../lib/parser/Settings'; +import StorageHandler from '../../lib/storage/StorageHandler'; +import { + isHTMLFile, + isMarkdownFile, + isPlainText, + isZIPFile, +} from '../../lib/storage/checks'; +import { UploadedFile } from '../../lib/storage/types'; + +import { Body } from 'aws-sdk/clients/s3'; + +export interface PackageResult { + packages: Package[]; +} + +export const isFileSupported = (filename: string) => + isHTMLFile(filename) ?? isMarkdownFile(filename) ?? isPlainText(filename); + +const getPackagesFromZip = async ( + fileContents: Body | undefined, + isPatreon: boolean, + settings: Settings +): Promise => { + const zipHandler = new ZipHandler(); + const packages = []; + + if (!fileContents) { + return { packages: [] }; + } + + zipHandler.build(fileContents as Uint8Array, isPatreon); + + const fileNames = zipHandler.getFileNames(); + + for (const fileName of fileNames) { + if (isFileSupported(fileName)) { + const deck = await PrepareDeck(fileName, zipHandler.files, settings); + + if (deck) { + packages.push(new Package(deck.name, deck.apkg)); + } + } + } + + return { packages }; +}; + +class GeneratePackagesUseCase { + constructor(private readonly storage: StorageHandler) {} + + async execute( + isPatreon: boolean, + files: UploadedFile[], + settings: Settings + ): Promise { + let packages: Package[] = []; + + for (const file of files) { + const fileContents = await this.storage.getFileContents(file.key); + const filename = file.originalname; + const key = file.key; + + if (isFileSupported(filename)) { + const d = await PrepareDeck( + filename, + [{ name: filename, contents: fileContents.Body }], + settings + ); + if (d) { + const pkg = new Package(d.name, d.apkg); + packages = packages.concat(pkg); + } + } else if (isZIPFile(filename) || isZIPFile(key)) { + const { packages: extraPackages } = await getPackagesFromZip( + fileContents.Body, + isPatreon, + settings + ); + packages = packages.concat(extraPackages); + } + } + return { packages }; + } +} + +export default GeneratePackagesUseCase; diff --git a/tsconfig.json b/tsconfig.json index a359fe6d6..80a9fabe6 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,7 +4,7 @@ /* Basic Options */ // "incremental": true, /* Enable incremental compilation */ - "target": "es5" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, + "target": "es2015" /* Specify ECMAScript target version: 'ES3' (default), 'ES5', 'ES2015', 'ES2016', 'ES2017', 'ES2018', 'ES2019', 'ES2020', or 'ESNEXT'. */, "module": "commonjs", "lib": [ "es6",