Skip to content

Commit

Permalink
feat: adding fallbacks for bullet lists (#1180)
Browse files Browse the repository at this point in the history
The code is ugly as hell but this adds a new fallback parser. For power
users who already have an established workflow, this will not introduce
any issues but only be triggered when their upload would not create any
flashcards.

It relies heavily on the format suggested here
#1178:

The fallback parser takes a bullet list as input and generates one or
more flashcards for each bullet point. The bullet points are separated
by a blank line (\n). In the bullet point, the question and answer are
separated by a dash (-). If there are more answers, f.ex. in the close
deletion case, then answers are comma delimited after the dash (answer1,
answer, etc.)

## Examples

### Cloze card

```
The capital of France is _____. - Paris.
```

That generates the following flashcard:

```
Front of the card: The capital of France is {{c1::Paris}}.
```

### Basic card

Input:
```
What is the capital of Kosovo? - Pristina
```

Output:
```
Front of the card: What is the capital of Kosovo?
Back of the card: Pristina
```

## Supported formats

Note this does not work in the Notion integration but is a fallback
mechanism added for people uploading files that are not handled by the
current DeckParser. So I am hoping people see the error message about no
cards less often with this change. This is a fallback so things like
styling and images do not work.

Markdown, HTML, and ZIP support work for the fallback parser. But note
that there might be some unexpected issues like the styling is not
applied and there are cases where the front gets duplicated content.

Enjoy!
  • Loading branch information
aalemayhu authored Jul 14, 2023
1 parent 10518f0 commit 6c422fb
Show file tree
Hide file tree
Showing 14 changed files with 434 additions and 134 deletions.
40 changes: 0 additions & 40 deletions src/lib/getPackagesFromZip.ts
Original file line number Diff line number Diff line change
@@ -1,40 +0,0 @@
import { Body } from 'aws-sdk/clients/s3';
import { ZipHandler } from './anki/zip';
import { PrepareDeck } from './parser/DeckParser';
import Package from './parser/Package';
import { isHTMLFile, hasMarkdownFileName } from './storage/checks';
import Settings from './parser/Settings';

export interface PackageResult {
packages: Package[];
containsMarkdown: boolean;
}

export const getPackagesFromZip = async (
fileContents: Body | undefined,
isPatreon: boolean,
settings: Settings
): Promise<PackageResult> => {
const zipHandler = new ZipHandler();
const packages = [];

if (!fileContents) {
return { packages: [], containsMarkdown: false };
}

zipHandler.build(fileContents as Uint8Array, isPatreon);

const fileNames = zipHandler.getFileNames();

for (const fileName of fileNames) {
if (isHTMLFile(fileName)) {
const deck = await PrepareDeck(fileName, zipHandler.files, settings);

if (deck) {
packages.push(new Package(deck.name, deck.apkg));
}
}
}

return { packages, containsMarkdown: hasMarkdownFileName(fileNames) };
};
15 changes: 1 addition & 14 deletions src/lib/misc/ErrorHandler.tsx
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { renderToStaticMarkup } from 'react-dom/server';
import express from 'express';
import { renderToStaticMarkup } from 'react-dom/server';
import { sendError } from '../error/sendError';

const NEW_GITHUB_ISSUE = 'https://github.com/2anki/server/issues/new/choose';
Expand All @@ -14,19 +14,6 @@ export const NO_PACKAGE_ERROR = new Error(
)
);

const NOTION_INFO_LINK =
'https://www.notion.so/help/export-your-content#export-as-html';
export const UNSUPPORTED_FORMAT_MD = new Error(
renderToStaticMarkup(
<>
Markdown support has been removed, please Export as HTML:{' '}
<a target="_blank" href={NOTION_INFO_LINK}>
{NOTION_INFO_LINK}
</a>
</>
)
);

export default function ErrorHandler(res: express.Response, err: Error) {
sendError(err);
res.set('Content-Type', 'text/plain');
Expand Down
2 changes: 1 addition & 1 deletion src/lib/parser/CustomExporter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class CustomExporter {
);
}

async save() {
async save(): Promise<Buffer> {
const gen = new CardGenerator(this.workspace);
if (process.env.SKIP_CREATE_DECK) {
return fs.readFileSync(this.getPayloadInfoPath());
Expand Down
45 changes: 40 additions & 5 deletions src/lib/parser/DeckParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import getYouTubeEmbedLink from './helpers/getYouTubeEmbedLink';
import getUniqueFileName from '../misc/getUniqueFileName';
import { isValidAudioFile } from '../anki/format';
import { sendError } from '../error/sendError';
import FallbackParser from './experimental/FallbackParser';
import { NO_PACKAGE_ERROR } from '../misc/ErrorHandler';

export class DeckParser {
globalTags: cheerio.Cheerio | null;
Expand Down Expand Up @@ -536,19 +538,52 @@ export class DeckParser {
exporter.configure(this.payload);
return exporter.save();
}

tryExperimental() {
const fallback = new FallbackParser(this.files);
const ws = new Workspace(true, 'fs');
const exporter = this.setupExporter(this.payload, ws.location);

this.payload = fallback.run(this.settings);
this.payload[0].settings = this.settings;
exporter.configure(this.payload);

return exporter.save();
}

totalCardCount() {
return this.payload.map((p) => p.cardCount).reduce((a, b) => a + b);
}
}

interface PrepareDeckResult {
name: string;
apkg: Buffer;
deck: Deck[];
}
export async function PrepareDeck(
fileName: string,
files: File[],
settings: Settings
) {
): Promise<PrepareDeckResult> {
const parser = new DeckParser(fileName, settings, files);
const total = parser.payload.map((p) => p.cardCount).reduce((a, b) => a + b);
if (total === 0) {
return null;

if (parser.totalCardCount() === 0) {
const apkg = await parser.tryExperimental();
if (parser.totalCardCount() === 0) {
throw NO_PACKAGE_ERROR;
}
return {
name: `${parser.name ?? fileName}.apkg`,
apkg,
deck: parser.payload,
};
}

const apkg = await parser.build();
return { name: `${parser.name}.apkg`, apkg, deck: parser.payload };
return {
name: `${parser.name}.apkg`,
apkg,
deck: parser.payload,
};
}
7 changes: 5 additions & 2 deletions src/lib/parser/Note.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,11 @@ export default class Note {
return note;
}

isValidBasicNote() {
return this.name && this.name.trim() && this.back && this.back.trim();
isValidBasicNote(): boolean {
if (!this.name || !this.back) {
return false;
}
return this.name.trim().length > 0 && this.back.trim().length > 0;
}

isValidClozeNote() {
Expand Down
142 changes: 142 additions & 0 deletions src/lib/parser/experimental/FallbackParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import cheerio from 'cheerio';

import { File } from '../../anki/zip';
import { isHTMLFile, isMarkdownFile, isPlainText } from '../../storage/checks';
import Deck from '../Deck';
import Note from '../Note';
import Settings from '../Settings';
import { PlainTextParser } from './PlainTextParser/PlainTextParser';
import { Flashcard, isClozeFlashcard } from './PlainTextParser/types';

class FallbackParser {
constructor(private readonly files: File[]) {}

htmlToTextWithNewlines(html: string) {
const $ = cheerio.load(html);

function processListItems(items: cheerio.Cheerio) {
let result = '';
items.each((_, element) => {
const itemText = $(element).text().trim();
result += `• ${itemText}\n`;
});
return result;
}

const elem = $('ul, ol');
let items: string[] = [];
elem.each((_, element) => {
const listItems = $(element).find('li');
const listText = processListItems(listItems);
items.push(listText);
});

return items;
}

getTitleFromHTML(html: string) {
const $ = cheerio.load(html);
return $('title').text().trim();
}

getStyleTagFromString(html: string) {
const $ = cheerio.load(html);
const styleTag = $('style');

if (styleTag.length === 0) {
return ''; // No style tag found, return an empty string
}

return styleTag.text() ?? '';
}

getMarkdownBulletLists(markdown: string) {
const bulletListRegex = /[-*+]( .*)+/g;
return markdown.match(bulletListRegex);
}

/**
* Return the correct title from markdown
*
* Notion can have two titles in Markdown files.
* The first one is the title with a the id of the page.
* The second one is the title of the page only.
*
* @param markdown user input markdown
* @returns deck title
*/
getTitleMarkdown(markdown: string) {
const headingRegex = /^(#{1,6})\s+(.*)$/gm;
const matches = [...markdown.matchAll(headingRegex)];
if (matches.length >= 2) {
return matches[1][2]; // return second match
} else if (matches.length > 0) {
return matches[0][2];
}
return 'Default';
}

mapCardsToNotes(cards: Flashcard[]): Note[] {
return cards.filter(Boolean).map((card, index) => {
const note = new Note(card.front, '');
note.number = index;
if (isClozeFlashcard(card)) {
note.cloze = true;
} else {
note.back = card.back;

if (!note.back || note.back.trim().length === 0) {
const parts = note.name.split('\n');
if (parts.length > 1) {
note.name = parts[0];
note.back = parts.slice(1).join('\n');
}
}
}
return note;
});
}

run(settings: Settings) {
const decks = [];
for (const file of this.files) {
const contents = file.contents?.toString();
if (!contents) {
continue;
}

let cards: Note[] = [];
let deckName = 'Untitled';
if (isHTMLFile(file.name)) {
const plainText = this.htmlToTextWithNewlines(contents).join('\n');
const plainTextParser = new PlainTextParser();
const found = plainTextParser.parse(plainText);
cards = this.mapCardsToNotes(found);
deckName = this.getTitleFromHTML(contents);
} else if (isMarkdownFile(file.name) || isPlainText(file.name)) {
const plainTextParser = new PlainTextParser();
const items = this.getMarkdownBulletLists(contents);
if (!items) {
continue;
}
const found = plainTextParser.parse(items.join('\n'));
cards = this.mapCardsToNotes(found);
deckName = this.getTitleMarkdown(contents);
}

decks.push(
new Deck(
deckName,
Deck.CleanCards(cards),
'', // skip cover image
'', // skip style
Deck.GenerateId(),
settings
)
);
}
return decks;
}
}

export default FallbackParser;
63 changes: 63 additions & 0 deletions src/lib/parser/experimental/PlainTextParser/PlainTextParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import {
BasicCard,
ClozeCard,
Flashcard,
isPossiblyClozeFlashcard,
} from './types';

export class PlainTextParser {
getOneOrMoreAnswers(answers: string): string[] {
const answerList = answers.split(', ');
if (!answerList || answerList.length === 0) {
return [answers];
}
return answerList;
}

fillInTheBlanks(sentence: string, answers: string): ClozeCard {
const answerList = this.getOneOrMoreAnswers(answers);
let clozeSentence = sentence;

for (let i = 0; i < answerList.length; i++) {
clozeSentence = clozeSentence.replace(
/_+/,
`{{c${i + 1}::${answerList[i]}}}`
);
}

return {
front: clozeSentence,
isCloze: true,
};
}

getBasicFlashcard(flashcardText: string): BasicCard {
const [front, back] = flashcardText.split(' - ');

return {
front: front,
back: back,
};
}

parse(input: string): Flashcard[] {
const flashcards = [];
const bulletPoints = input.split(/\n\n|\n- /);

for (const bulletPoint of bulletPoints) {
const [question, answers] = bulletPoint.split(' - ');

if (isPossiblyClozeFlashcard(question)) {
const cards = this.fillInTheBlanks(question, answers);
if (cards) {
flashcards.push(cards);
}
continue;
}

flashcards.push(this.getBasicFlashcard(bulletPoint));
}

return flashcards;
}
}
27 changes: 27 additions & 0 deletions src/lib/parser/experimental/PlainTextParser/types.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
export interface ClozeCard {
isCloze: boolean;
front: string;
extra?: string;
}

export interface BasicCard {
front: string;
back: string;
tags?: string;
}

export type Flashcard = ClozeCard | BasicCard;

export const isClozeFlashcard = (
flashcard: Flashcard
): flashcard is ClozeCard =>
'isCloze' in flashcard && flashcard.isCloze === true;

export const isBasicFlashcard = (
flashcard: Flashcard
): flashcard is BasicCard =>
'back' in flashcard && flashcard.back !== undefined;

export const isPossiblyClozeFlashcard = (question: string) => {
return question.includes('_') && question.split('-');
};
Loading

0 comments on commit 6c422fb

Please sign in to comment.