SethClydesdale · SethClydesdale · Nov 9, 2020 · Oct 28, 2020 · Nov 7, 2020 · Nov 7, 2020
diff --git a/resources/javascript/homepage.js b/resources/javascript/homepage.js
@@ -34,6 +34,10 @@
         content : 'Looking for more self-study resources? Visit the official <a href="http://genki.japantimes.co.jp/self_en">self-study room</a> for Genki or check out some of the resources in the <a href="https://github.com/SethClydesdale/genki-study-resources#resources-for-studying-japanese">readme</a> on GitHub. If you use Anki to study vocab, you can find decks for the vocab on Genki Study Resources <a href="' + getPaths() + 'help/anki-decks/">here</a>!'
       },
 
+      {
+        content : 'You can now find xlsx vocabulary lists for Genki in our <a href="https://github.com/SethClydesdale/genki-study-resources/tree/master/resources/tools/wordlist_E-J">Github</a>!'
+      },
+
       {
         content : 'Have a question about the site? Check out the <a href="' + getPaths() + 'help/">FAQ</a>! If you can\'t find an answer to your question, feel free to contact us via <a href="https://github.com/SethClydesdale/genki-study-resources/issues">GitHub\'s issues</a> and we\'ll try to answer your question in a timely manner.'
       },

diff --git a/resources/tools/README.md b/resources/tools/README.md
@@ -88,4 +88,23 @@ python3 anki_decks_maker.py ../../lessons
 
 You can also generate decks by executing `anki_decks_maker-run.bat` and typing either `2nd` or `3rd` to generate a deck for that edition.
 
-All of the decks created are currently available under the [decks](decks/) folder
+All of the decks created are currently available under the [decks](decks/) folder
+
+
+### wordlist_E-J.py
+ * Requires python 3.6+.
+
+ Create xlsx wordlist with words and their English meaning to memorise vocabulary,
+
+
+ ```shell script
+python3 wordlist_E-J.py <path_to_lessons_folder>
+
+# For example: 
+python3 wordlist_E-J.py ../../lessons-3rd
+python3 wordlist_E-J.py ../../lessons
+```
+
+You can also generate decks by executing `wordlist_E-J-run.bat` and typing either `2nd` or `3rd` to generate a deck for that edition.
+
+All of the lists created are currently available under the [wordlists_E-J](wordlists_E-J/) folder
diff --git a/resources/tools/wordlist_E-J-run.bat b/resources/tools/wordlist_E-J-run.bat
@@ -0,0 +1,9 @@
+@echo off
+title xlsx wordlist Maker for Genki Study Resources
+color 1F
+
+set /p id="Type 2nd or 3rd, then press ENTER to generate Anki decks for that edition. "
+
+if %id% == 2nd (python wordlist_E-J.py ../../lessons) else if %id% == 3rd (python wordlist_E-J.py ../../lessons-3rd) else (echo No edition selected, please press any key to terminate the program.)
+
+pause
diff --git a/resources/tools/wordlist_E-J.py b/resources/tools/wordlist_E-J.py
@@ -0,0 +1,75 @@
+import re
+import ast
+import sys
+from pathlib import Path
+from itertools import chain
+
+import openpyxl
+
+lessons_folder = Path(sys.argv[1])
+title_regex = re.compile(r'<title>(.*):(.*)- Lesson')
+quizlet_regex = re.compile(r'quizlet : (.*?})', flags=re.S)
+filter_regex = re.compile(r"format : 'kanji'|format : 'practice'|format : 'hirakata'|type : 'fill'|type : 'drawing'|type : 'stroke'|type : 'multi'|type : 'writing'|<title>Review:|Kanji Practice: Match the Readings|Kanji Practice: Match the Sentences|Kanji Practice: Match the Verbs|Katakana Practice: Countries and Capitals", flags=re.S)
+output_folder = Path('.').absolute().joinpath('wordlist_E-J').joinpath(lessons_folder.name)
+
+
+def get_tags(html):
+    """
+    <title>Useful Expressions: Time (Minutes 11-30) - Lesson 1 | Genki ...</title>
+    Useful_Expressions , Time_(Minutes_11-30)
+    """
+    match = title_regex.search(html)
+    return match.group(1).strip().replace(' ', '_'), match.group(2).strip().replace(' ', '_')
+
+
+def get_vocab(html):
+    return ast.literal_eval(quizlet_regex.search(html).group(1).replace(r'//', '#'))
+
+
+def main():
+    try :
+        print('Creating folder for xlsx...')
+        output_folder.mkdir(parents=True, exist_ok=False)
+    except Exception:
+        print('Folder already exists, skipping this step.')
+
+    workbooks = list()
+
+    for lesson_folder in lessons_folder.glob('lesson*'):
+        lesson_number = lesson_folder.name.split('-')[-1]
+
+        print(f'Getting vocab for Lesson {lesson_number}...')
+
+        wb = openpyxl.Workbook()
+        sheet = wb.active
+        sheet.cell(1, 1).value = "English"
+        sheet.cell(1, 2).value = "Japanese"
+        row_num = 2
+
+        for vocab_folder in chain(lesson_folder.glob('vocab*'), lesson_folder.glob('literacy*')):
+            with open(vocab_folder.joinpath('index.html'), 'r', encoding='UTF8') as f:
+                html = f.read()
+                if filter_regex.search(html) == None: # Filter out exercise types that are NOT vocab
+                    try:
+                        vocab = get_vocab(html)
+                    except Exception:
+                        print(f'Failed parsing of lesson-{lesson_number}, vocab file {vocab_folder}')
+                        continue
+                    for jp, eng in vocab.items():
+                        eng = re.sub(r"\<(.*?)\>", '', eng)
+                        sheet.cell(row_num, 1).value = eng
+                        sheet.cell(row_num, 2).value = jp
+
+                        row_num += 1
+
+        workbooks.append((wb, lesson_folder.name))
+
+    for wb, name in workbooks:
+        print(f'Creating deck for {name}...');
+        wb.save(output_folder.joinpath(f'{name}.xlsx'))
+
+    print('All xlsx list for the selected edition have been generated!')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-0.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-0.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-1.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-1.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-2.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-2.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-3.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-3.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-4.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-4.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-5.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-5.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-6.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-6.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-7.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-7.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-8.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-8.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons-3rd/lesson-9.xlsx b/resources/tools/wordlist_E-J/lessons-3rd/lesson-9.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-0.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-0.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-1.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-1.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-10.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-10.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-11.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-11.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-12.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-12.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-13.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-13.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-14.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-14.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-15.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-15.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-16.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-16.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-17.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-17.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-18.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-18.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-19.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-19.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-2.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-2.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-20.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-20.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-21.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-21.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-22.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-22.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-23.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-23.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-3.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-3.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-4.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-4.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-5.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-5.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-6.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-6.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-7.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-7.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-8.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-8.xlsx
diff --git a/resources/tools/wordlist_E-J/lessons/lesson-9.xlsx b/resources/tools/wordlist_E-J/lessons/lesson-9.xlsx
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,6 +34,10 @@ @@
             content : 'Looking for more self-study resources? Visit the official <a href="http://genki.japantimes.co.jp/self_en">self-study room</a> for Genki or check out some of the resources in the <a href="https://github.com/SethClydesdale/genki-study-resources#resources-for-studying-japanese">readme</a> on GitHub. If you use Anki to study vocab, you can find decks for the vocab on Genki Study Resources <a href="' + getPaths() + 'help/anki-decks/">here</a>!'
           },
+          {
+            content : 'You can now find xlsx vocabulary lists for Genki in our <a href="https://github.com/SethClydesdale/genki-study-resources/tree/master/resources/tools/wordlist_E-J">Github</a>!'
+          },
           {
             content : 'Have a question about the site? Check out the <a href="' + getPaths() + 'help/">FAQ</a>! If you can\'t find an answer to your question, feel free to contact us via <a href="https://github.com/SethClydesdale/genki-study-resources/issues">GitHub\'s issues</a> and we\'ll try to answer your question in a timely manner.'
           },
@@ Expand Down @@