Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ko] Correct Korean deinflection rules #1266

Merged
merged 4 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 54 additions & 20 deletions ext/js/language/ko/korean-transforms.js
Original file line number Diff line number Diff line change
Expand Up @@ -3444,18 +3444,18 @@ export const koreanTransforms = {
suffixInflection('ㅇㅓㄹㅏ', '', [], ['p', 'f']),
],
},
'-아/아서': {
name: '-아/아서',
'-아/어서': {
name: '-아/어서',
rules: [
suffixInflection('ㅏㅅㅓ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅅㅓ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅅㅓ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅅㅓ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅅㅓ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅏㅅㅓ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅅㅓ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅅㅓ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅅㅓ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅅㅓ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅅㅓ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅅㅓ', 'ㅇㅣㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅅㅓ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅅㅓ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅅㅓ', 'ㅂㄷㅏ', [], ['v', 'adj']),
Expand Down Expand Up @@ -3485,12 +3485,12 @@ export const koreanTransforms = {
suffixInflection('ㅏㅇㅑ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅇㅑ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅏㅇㅑ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅑ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅇㅑ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅇㅑ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑ', 'ㅇㅣㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅇㅑ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅇㅑ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅇㅑ', 'ㅂㄷㅏ', [], ['v', 'adj']),
Expand Down Expand Up @@ -3520,12 +3520,12 @@ export const koreanTransforms = {
suffixInflection('ㅏㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅏㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj', 'ida']),
suffixInflection('ㅏㅇㅑㄱㅔㅆ', 'ㅏㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㄱㅔㅆ', 'ㅓㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj', 'ida']),
suffixInflection('ㅇㅑㄱㅔㅆ', 'ㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅕㅇㅑㄱㅔㅆ', 'ㅣㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㄱㅔㅆ', 'ㅇㅣㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㄱㅔㅆ', 'ㅇㅣㄷㅏ', ['f'], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅇㅑㄱㅔㅆ', 'ㅎㅏㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅇㅑㄱㅔㅆ', 'ㅎㅏㄷㅏ', ['f'], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅇㅑㄱㅔㅆ', 'ㅂㄷㅏ', ['f'], ['v', 'adj']),
Expand Down Expand Up @@ -3554,12 +3554,12 @@ export const koreanTransforms = {
suffixInflection('ㅏㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅏㅇㅑㅁㅏㄴ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㅁㅏㄴ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅇㅑㅁㅏㄴ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅇㅑㅁㅏㄴ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㅁㅏㄴ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㅁㅏㄴ', 'ㅇㅣㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅇㅑㅁㅏㄴ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅇㅑㅁㅏㄴ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅇㅑㅁㅏㄴ', 'ㅂㄷㅏ', [], ['v', 'adj']),
Expand Down Expand Up @@ -3589,12 +3589,12 @@ export const koreanTransforms = {
suffixInflection('ㅏㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅏㅇㅑㅈㅣ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅑㅈㅣ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅑㅈㅣ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅇㅑㅈㅣ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㅈㅣ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅑㅈㅣ', 'ㅇㅣㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅇㅑㅈㅣ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅇㅑㅈㅣ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅇㅑㅈㅣ', 'ㅂㄷㅏ', [], ['v', 'adj']),
Expand Down Expand Up @@ -3626,10 +3626,10 @@ export const koreanTransforms = {
suffixInflection('ㅏㅇㅛ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅇㅛ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅇㅛ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅛ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅇㅛ', 'ㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅇㅛ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅇㅛ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅛ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅇㅛ', 'ㅇㅣㄷㅏ', [], ['v', 'adj', 'ida']),
suffixInflection('ㅎㅐㅇㅛ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅇㅛ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅇㅛ', 'ㅂㄷㅏ', [], ['v', 'adj']),
Expand Down Expand Up @@ -3688,6 +3688,40 @@ export const koreanTransforms = {
suffixInflection('ㅇㅓㅈㅣㅇㅣㄷㅏ', '', [], ['p', 'f']),
],
},
'-아/어지다': {
name: '-아/어지다',
rules: [
suffixInflection('ㅏㅈㅣㄷㅏ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅈㅣㄷㅏ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅈㅣㄷㅏ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅈㅣㄷㅏ', 'ㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅏㅈㅣㄷㅏ', 'ㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅈㅣㄷㅏ', 'ㅓㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅕㅈㅣㄷㅏ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅕㅈㅣㄷㅏ', 'ㅇㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅐㅈㅣㄷㅏ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅎㅏㅇㅕㅈㅣㄷㅏ', 'ㅎㅏㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅗㅏㅈㅣㄷㅏ', 'ㅂㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅜㅓㅈㅣㄷㅏ', 'ㅂㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅏㅈㅣㄷㅏ', 'ㅅㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅈㅣㄷㅏ', 'ㅅㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅐㅈㅣㄷㅏ', 'ㅣㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅐㅈㅣㄷㅏ', 'ㅏㅎㄷㅏ', [], ['adj']),
suffixInflection('ㅐㅈㅣㄷㅏ', 'ㅓㅎㄷㅏ', [], ['adj']),
suffixInflection('ㅖㅈㅣㄷㅏ', 'ㅕㅎㄷㅏ', [], ['adj']),
suffixInflection('ㅒㅈㅣㄷㅏ', 'ㅑㅎㄷㅏ', [], ['adj']),
suffixInflection('ㄹㅇㅏㅈㅣㄷㅏ', 'ㄷㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㄹㅇㅓㅈㅣㄷㅏ', 'ㄷㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㄹㄹㅏㅈㅣㄷㅏ', 'ㄹㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㄹㄹㅓㅈㅣㄷㅏ', 'ㄹㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㄹㅡㄹㅓㅈㅣㄷㅏ', 'ㄹㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅏㅈㅣㄷㅏ', 'ㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅈㅣㄷㅏ', 'ㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅏㅈㅣㄷㅏ', 'ㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅓㅈㅣㄷㅏ', 'ㅡㄷㅏ', [], ['v', 'adj']),
suffixInflection('ㅇㅓㅈㅣㄷㅏ', '', [], ['p', 'f']),
],
},
'-았/었': {
name: '-았/었',
rules: [
Expand Down Expand Up @@ -3796,8 +3830,8 @@ export const koreanTransforms = {
'-야': {
name: '-야',
rules: [
suffixInflection('ㅇㅏㄴㅣㅇㅑ', '아니다', [], ['adj']),
suffixInflection('ㅇㅣㅇㅑ', '이다', [], ['ida']),
suffixInflection('ㅇㅏㄴㅣㅇㅑ', 'ㅇㅏㄴㅣㄷㅏ', [], ['adj']),
suffixInflection('ㅇㅣㅇㅑ', 'ㅇㅣㄷㅏ', [], ['ida']),
],
},
'-언마는': {
Expand Down
30 changes: 30 additions & 0 deletions test/language/korean-transforms.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -690,11 +690,41 @@ const tests = [
{term: '돕다', source: '돕자', rule: 'v', reasons: ['-자']},
],
},
{
category: '이다',
valid: true,
tests: [
{term: '이다', source: '이야', rule: 'ida', reasons: ['-야']},


{term: '이다', source: '이어서', rule: 'ida', reasons: ['-아/어서']},
{term: '이다', source: '이어야', rule: 'ida', reasons: ['-아/어야']},
{term: '이다', source: '이어야겠', rule: 'ida', reasons: ['-아/어야겠']},
{term: '이다', source: '이어야만', rule: 'ida', reasons: ['-아/어야만']},
{term: '이다', source: '이어야지', rule: 'ida', reasons: ['-아/어야지']},
{term: '이다', source: '이어요', rule: 'ida', reasons: ['-아/어요']},

{term: '이다', source: '여서', rule: 'ida', reasons: ['-아/어서']},
{term: '이다', source: '여야', rule: 'ida', reasons: ['-아/어야']},
{term: '이다', source: '여야겠', rule: 'ida', reasons: ['-아/어야겠']},
{term: '이다', source: '여야만', rule: 'ida', reasons: ['-아/어야만']},
{term: '이다', source: '여야지', rule: 'ida', reasons: ['-아/어야지']},
{term: '이다', source: '여요', rule: 'ida', reasons: ['-아/어요']},
],
},
{
category: 'invalid deinflections',
valid: false,
tests: [
{term: '보다', source: '보십시다', rule: 'v', reasons: ['-(으)시', '-(으)ㅂ시다']},

// ida
{term: '이다', source: '이서', rule: 'ida', reasons: ['아/어서']},
{term: '이다', source: '이야', rule: 'ida', reasons: ['아/어야']},
{term: '이다', source: '이야겠', rule: 'ida', reasons: ['아/어야겠']},
{term: '이다', source: '이야만', rule: 'ida', reasons: ['아/어야만']},
{term: '이다', source: '이야지', rule: 'ida', reasons: ['아/어야지']},
{term: '이다', source: '이요', rule: 'ida', reasons: ['아/어요']},
],
},
];
Expand Down