Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
jiru committed Mar 13, 2023
0 parents commit ed51594
Show file tree
Hide file tree
Showing 8 changed files with 4,337 additions and 0 deletions.
4,037 changes: 4,037 additions & 0 deletions ccc.tsv

Large diffs are not rendered by default.

168 changes: 168 additions & 0 deletions colorize_hanzi.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
// The following code changes the text color
// of hanzi according to pinyin tones.
// You can change the colors in the CSS

function colorHanzi(hanziElem, pinyin) {
function getTones(pinyin) {
function getToneNumber(diac) {
var allDiacs = ['āēīōūǖ','áéíóúǘ','ǎěǐǒǔǚ','àèìòùǜ','aeiouür'];
for (var i = 0; i < allDiacs.length; i++) {
if (allDiacs[i].includes(diac)) {
return i+1;
}
}
return 0;
}
var a = '([aāáǎà])';
var e = '([eēéěè])';
var ae = '([aāáǎàeēéěè])';
var i = '([iīíǐì])';
var o = '([oōóǒò])';
var u = '([uūúǔùüǖǘǚǜ])';
var eu = '([eēéěèuūúǔù])';
var regex=
'(?:'
+'(?:'
+'mi'+u
+'|[pm]'+o+'u'
+'|[bpm](?:'
+o
+'|'+e+'(?:i|ng??)?'
+'|'+a+'(?:ng?|i|o)?'
+'|i(?:'+e+'|'+a+'[no])'
+'|'+i+'(?:ng?)?'
+'|'+u
+')'
+')'
+'|(?:f(?:'+o+'u?|'+ae+'(?:ng?|i)?|'+u+'))'
+'|(?:'
+'d(?:'+e+'(?:i|ng?)|i(?:'+a+'[on]?|'+u+'))'
+'|[dt](?:'
+a+'(?:i|ng?|o)?'
+'|'+e+'(?:i|ng)?'
+'|i(?:'+a+'[on]?|'+eu+')'
+'|'+i+'(?:ng)?'
+'|'+o+'(?:ng?|u)'
+'|u(?:'+o+'|'+i+'|'+a+'n?)'
+'|'+u+'n?'
+')'
+')'
+'|(?:'
+'n'+e+'ng?'
+'|[ln](?:'
+a+'(?:i|ng?|o)?'
+'|'+e+'(?:i|ng)?'
+'|i(?:'+a+'ng|'+a+'[on]?|'+e+'|'+u+')'
+'|'+i+'(?:ng?)?'
+'|'+o+'(?:ng?|u)'
+'|u(?:'+o+'|'+a+'n?)'
+'|ü'+e+'?'
+'|'+u+'n?'
+')'
+')'
+'|(?:[ghk](?:'+a+'(?:i|ng??|o)?|'+e+'(?:i|ng?)?|'+o+'(?:u|ng)|u(?:'+a+'(?:i|ng??)??|'+i+'|'+o+')|'+u+'n?))'
+'|(?:zh?'+e+'i|[cz]h?(?:'+e+'(?:ng?)?|'+o+'(?:ng?|u)?|'+a+'o|u?'+a+'(?:i|ng?)?|u?(?:'+o+'|'+i+')|'+u+'n?))'
+'|(?:'
+'s'+o+'ng'
+'|shu'+a+'(?:i|ng?)?'
+'|sh'+e+'i'
+'|sh?(?:'
+a+'(?:i|ng?|o)?'
+'|'+e+'n?g?'
+'|'+o+'u'
+'|u(?:'+a+'n|'+o+'|'+i+')'
+'|'+u+'n?'
+'|'+i
+')'
+')'
+'|(?:'
+'r(?:'
+ae+'ng?'
+'|'+i
+'|'+e
+'|'+a+'o'
+'|'+o+'u'
+'|'+o+'ng'
+'|u(?:'+o+'|'+i+')'
+'|'+u+'n?'
+'|u'+a+'n?'
+')'
+'|(r)'
+')'
+'|(?:[jqx](?:i(?:'+a+'(o|ng??)?|(?:'+e+'|'+u+')|'+o+'ng)|'+i+'(?:ng?)??|u(?:'+e+'|'+a+'n)|'+u+'n??))'
+'|(?:'
+'(?:'
+a+'(?:i|o|ng?)?'
+'|'+o+'u?'
+'|'+e+'(?:i|ng?|r)?'
+')'
+')'
+'|(?:w(?:'+a+'(?:i|ng??)?|'+o+'|'+e+'(?:i|ng?)?|'+u+'))'
+'|y(?:'+a+'(?:o|ng??)?|'+e+'|'+i+'(?:ng?)?|'+o+'(?:u|ng)?|u(?:'+e+'|'+a+'n)|'+u+'n??)'
+')'
+'([12345])?';
pinyin = pinyin.normalize();
pinyin = pinyin.split('/')[0];
pinyin = pinyin.trim();
const re = new RegExp(regex, 'g');
const matches = pinyin.matchAll(re);
var tones = Array.from(matches).map(function (match) {
var m = match;
m.shift();
var diac = m.filter(function(v) { return v !== undefined; });
if (diac.length > 0) {
if (diac.length > 1 && '12345'.includes(diac[1])) {
return parseInt(diac[1]); // tone as pinyin number
} else {
return getToneNumber(diac[0]); // tone as pinyin diacritic
}
} else {
return 0;
}
});
return tones;
}

pinyin = pinyin.toLowerCase();
var tones = getTones(pinyin);

var hanzi = hanziElem.textContent;
hanzi = hanzi.normalize();
// \p{Han} according to ftp://ftp.unicode.org/Public/UNIDATA/Scripts.txt (only codepoints lower than U+FFFF)
const Han = '\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303A\u303B\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFA6D\uFA70-\uFAD9';
const hanziRe = '[/0-9'+Han+']';
const re = new RegExp(hanziRe, 'gu');
var memory = {};
var n = 0;
hanziElem.innerHTML = hanziElem.innerHTML.replace(re, function (match) {
if (match == '/') {
n = 0;
return match;
}
var tone = tones[n++];
if (tone === undefined && match in memory) {
tone = memory[match];
}
if (tone !== undefined) {
memory[match] = tone;
var tag = '<span class="tone tone-' + tone + '">' + match + '</span>';
return tag;
} else {
return match;
}
});
}

function colorFlashCard() {
var pinyin = document.getElementById("ddzw-pinyin");
if (!pinyin)
return;

var hanziElem = document.getElementById("ddzw-hanzi");
if (!hanziElem)
return;

colorHanzi(hanziElem, pinyin.innerText);
}

colorFlashCard();
71 changes: 71 additions & 0 deletions generate_deck.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python

import genanki
import csv
from sys import argv

# Pseudorandom IDs generated using
# import random; random.randrange(1 << 30, 1 << 31)
deck_id = 1727536177
model_id = 1858604882

class CCCNote(genanki.Note):
@property
def guid(self):
# Use hanzi + part of speech as unique identifier
return genanki.guid_for(self.fields[0], self.fields[3])

def read_file(file):
with open(file, 'r') as f:
return f.read()

def add_notes(deck, model, tsv_file):
with open(tsv_file) as ccc:
deckreader = csv.reader(ccc, delimiter='\t')
for row in deckreader:
note = CCCNote(
model=model,
fields=row[0:5],
tags=row[5].split(' ')
)
deck.add_note(note)

def gen_model():
script = '\n<script>\n' \
+ read_file('colorize_hanzi.js') \
+ '\n</script>'
return genanki.Model(
model_id,
'Chinese vocab',
fields=[
{"font": "Arial", "name": "Hanzi"},
{"font": "Arial", "name": "Pinyin"},
{"font": "Arial", "name": "English"},
{"font": "Arial", "name": "Part of speech"},
{"font": "Arial", "name": "Lesson"},
],
templates=[
{
'name': '中->英',
'qfmt': read_file('tmpl.eng.qfmt.html'),
'afmt': read_file('tmpl.eng.afmt.html') + script,
},
{
'name': '英->中',
'qfmt': read_file('tmpl.cmn.qfmt.html'),
'afmt': read_file('tmpl.cmn.afmt.html') + script,
},
],
css=read_file('tmpl.css'),
)

def compile_deck(output_file):
my_deck = genanki.Deck(deck_id, '當代中文課程')
my_model = gen_model()
add_notes(my_deck, my_model, 'ccc.tsv')
genanki.Package(my_deck).write_to_file(output_file)

try:
compile_deck(argv[1])
except IndexError:
print(f"Usage: {argv[0]} <output_deck.apkg>")
7 changes: 7 additions & 0 deletions tmpl.cmn.afmt.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{FrontSide}}

<hr id=answer>

<span lang="zh-tw" id="ddzw-hanzi">{{Hanzi}}</span>
<br><br>
<span lang="en"><small>{{Lesson}}</small></span>
7 changes: 7 additions & 0 deletions tmpl.cmn.qfmt.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{#Part of speech}}
<span lang="en">({{Part of speech}})</span lang="en">
{{/Part of speech}}

<span lang="en" id="ddzw-pinyin">{{Pinyin}}</span>
<br><br>
{{English}}
36 changes: 36 additions & 0 deletions tmpl.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Tone colors for light/day theme
*/
.tone {}
.tone-1 { color: #e30000; }
.tone-2 { color: #02b31c; }
.tone-3 { color: #1510f0; }
.tone-4 { color: #8900bf; }
.tone-5 { color: #777777; }

/**
* Tone colors for dark/night theme
*/
.card.nightMode .tone { }
.card.nightMode .tone-1 { color: #ff8080; }
.card.nightMode .tone-2 { color: #80ff80; }
.card.nightMode .tone-3 { color: #8080ff; }
.card.nightMode .tone-4 { color: #df80ff; }
.card.nightMode .tone-5 { color: #c6c6c6; }

.card {
font-family: arial;
font-size: 20px;
text-align: center;
color: black;
background-color: white;
}

div > span {
vertical-align: middle;
}

#ddzw-hanzi {
font-size: 50px;
}

6 changes: 6 additions & 0 deletions tmpl.eng.afmt.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{{FrontSide}}

<hr id=answer>

<span lang="en"><span id="ddzw-pinyin">{{Pinyin}}</span><br><br>{{English}}</span><br><br>
<span lang="en"><small>{{Lesson}}</small></span>
5 changes: 5 additions & 0 deletions tmpl.eng.qfmt.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{{#Part of speech}}
<span lang="en">({{Part of speech}})</span>
{{/Part of speech}}

<span lang="zh-tw" id="ddzw-hanzi">{{Hanzi}}</span>

0 comments on commit ed51594

Please sign in to comment.