Skip to content

Commit

Permalink
Merge pull request #373 from JeroenVdb/master
Browse files Browse the repository at this point in the history
Fixed and extend NL locale + add test for all
  • Loading branch information
wanasit authored Feb 15, 2021
2 parents 911588c + 269fa37 commit fbbd996
Show file tree
Hide file tree
Showing 21 changed files with 1,898 additions and 64 deletions.
10 changes: 5 additions & 5 deletions src/common/parsers/ISOFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ import { AbstractParserWithWordBoundaryChecking } from "./AbstractParserWithWord
// - TZD = (Z or +hh:mm or -hh:mm)
const PATTERN = new RegExp(
"([0-9]{4})\\-([0-9]{1,2})\\-([0-9]{1,2})" +
"(?:T" + //..
"([0-9]{1,2}):([0-9]{1,2})" + // hh:mm
"(?::([0-9]{1,2})(?:\\.(\\d{1,4}))?)?" + // :ss.s
"(?:Z|([+-]\\d{2}):?(\\d{2})?)?" + // TZD (Z or ±hh:mm or ±hhmm or ±hh)
")?" + //..
"(?:T" + //..
"([0-9]{1,2}):([0-9]{1,2})" + // hh:mm
"(?::([0-9]{1,2})(?:\\.(\\d{1,4}))?)?" + // :ss.s
"(?:Z|([+-]\\d{2}):?(\\d{2})?)?" + // TZD (Z or ±hh:mm or ±hhmm or ±hh)
")?" + //..
"(?=\\W|$)",
"i"
);
Expand Down
2 changes: 1 addition & 1 deletion src/locales/nl/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ const SINGLE_TIME_UNIT_REGEX = new RegExp(SINGLE_TIME_UNIT_PATTERN, "i");
const SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE = SINGLE_TIME_UNIT_PATTERN.replace(/\((?!\?)/g, "(?:");

export const TIME_UNITS_PATTERN =
`(?:(?:about|around)\\s*)?` +
`(?:(?:binnen|in)\\s*)?` +
`${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE}\\s*(?:,?\\s*${SINGLE_TIME_UNIT_PATTERN_NO_CAPTURE})*`;

export function parseTimeUnits(timeunitText): TimeUnits {
Expand Down
10 changes: 8 additions & 2 deletions src/locales/nl/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ import NLWeekdayParser from "./parsers/NLWeekdayParser";
import NLMonthNameMiddleEndianParser from "./parsers/NLMonthNameMiddleEndianParser";
import NLMonthNameParser from "./parsers/NLMonthNameParser";
import NLSlashMonthFormatParser from "./parsers/NLSlashMonthFormatParser";
import NLTimeExpressionParser from "./parsers/NLTimeExpressionParser";
import NLCasualYearMonthDayParser from "./parsers/NLCasualYearMonthDayParser";
import NLCasualDateTimeParser from "./parsers/NLCasualDateTimeParser";

// Shortcuts
export const casual = new Chrono(createCasualConfiguration());
Expand All @@ -28,6 +31,7 @@ export function createCasualConfiguration(littleEndian = true): Configuration {
const option = createConfiguration(false, littleEndian);
option.parsers.unshift(new NLCasualDateParser());
option.parsers.unshift(new NLCasualTimeParser());
option.parsers.unshift(new NLCasualDateTimeParser());
return option;
}

Expand All @@ -36,11 +40,13 @@ export function createConfiguration(strictMode = true, littleEndian = true): Con
{
parsers: [
new SlashDateFormatParser(littleEndian),
new NLTimeUnitWithinFormatParser(),
new NLWeekdayParser(),
new NLMonthNameMiddleEndianParser(),
new NLMonthNameParser(),
new NLTimeExpressionParser(),
new NLTimeUnitWithinFormatParser(),
new NLSlashMonthFormatParser(),
new NLWeekdayParser(),
new NLCasualYearMonthDayParser(),
],
refiners: [new NLMergeDateTimeRefiner(), new NLMergeDateRangeRefiner()],
},
Expand Down
7 changes: 2 additions & 5 deletions src/locales/nl/parsers/NLCasualDateParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ import { ParsingComponents, ParsingResult } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import * as references from "../../../common/casualReferences";

export default class ENCasualDateParser extends AbstractParserWithWordBoundaryChecking {
export default class NLCasualDateParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(context: ParsingContext): RegExp {
return /(nu|vandaag|vanacht|morgen|morgend|gisteren)(?=\W|$)/i;
return /(nu|vandaag|morgen|morgend|gisteren)(?=\W|$)/i;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult {
Expand All @@ -25,9 +25,6 @@ export default class ENCasualDateParser extends AbstractParserWithWordBoundaryCh

case "gisteren":
return references.yesterday(context.refDate);

case "vanacht":
return references.tonight(context.refDate);
}

return component;
Expand Down
75 changes: 75 additions & 0 deletions src/locales/nl/parsers/NLCasualDateTimeParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { ParsingContext } from "../../../chrono";
import { ParsingComponents, ParsingResult } from "../../../results";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";
import { Meridiem } from "../../../index";
import { assignSimilarDate, assignTheNextDay } from "../../../utils/dayjs";
import dayjs from "dayjs";

/*
* Find combined words
* - morgenochtend
* - morgenmiddag
* - morgennamiddag
* - morgenavond
* - morgennacht
* - vanochtend
* - vanmiddag
* - vannamiddag
* - vanavond
* - vannacht
* - gisterenochtend
* - gisterenmiddag
* - gisterennamiddag
* - gisterenavond
* - gisterennacht
* */

const DATE_GROUP = 1;
const TIME_OF_DAY_GROUP = 2;

export default class NLCasualDateTimeParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(context: ParsingContext): RegExp {
return /(gisteren|morgen|van)(ochtend|middag|namiddag|avond|nacht)(?=\W|$)/i;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | ParsingResult {
const dateText = match[DATE_GROUP].toLowerCase();
const timeText = match[TIME_OF_DAY_GROUP].toLowerCase();
const component = context.createParsingComponents();
const targetDate = dayjs(context.refDate);

switch (dateText) {
case "gisteren":
assignSimilarDate(component, targetDate.add(-1, "day"));
break;
case "van":
assignSimilarDate(component, targetDate);
break;
case "morgen":
assignTheNextDay(component, targetDate);
break;
}

switch (timeText) {
case "ochtend":
component.imply("meridiem", Meridiem.AM);
component.imply("hour", 6);
break;
case "middag":
component.imply("meridiem", Meridiem.AM);
component.imply("hour", 12);
break;
case "namiddag":
component.imply("meridiem", Meridiem.PM);
component.imply("hour", 15);
break;

case "avond":
component.imply("meridiem", Meridiem.PM);
component.imply("hour", 20);
break;
}

return component;
}
}
15 changes: 12 additions & 3 deletions src/locales/nl/parsers/NLCasualTimeParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,25 @@ import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/
import dayjs from "dayjs";
import { assignTheNextDay } from "../../../utils/dayjs";

export default class ENCasualTimeParser extends AbstractParserWithWordBoundaryChecking {
const DAY_GROUP = 1;
const MOMENT_GROUP = 2;

export default class NLCasualTimeParser extends AbstractParserWithWordBoundaryChecking {
innerPattern() {
return /(?:this)?\s*(namiddag|avond|middernacht|ochtend|middag|'s middags|'s avonds|'s ochtends)(?=\W|$)/i;
return /(deze)?\s*(namiddag|avond|middernacht|ochtend|middag|'s middags|'s avonds|'s ochtends)(?=\W|$)/i;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
const targetDate = dayjs(context.refDate);
const component = context.createParsingComponents();

switch (match[1].toLowerCase()) {
if (match[DAY_GROUP] === "deze") {
component.assign("day", context.refDate.getDate());
component.assign("month", context.refDate.getMonth() + 1);
component.assign("year", context.refDate.getFullYear());
}

switch (match[MOMENT_GROUP].toLowerCase()) {
case "namiddag":
case "'s namiddags":
component.imply("meridiem", Meridiem.PM);
Expand Down
49 changes: 49 additions & 0 deletions src/locales/nl/parsers/NLCasualYearMonthDayParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { ParsingContext } from "../../../chrono";
import { MONTH_DICTIONARY } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

/*
Date format with slash "/" between numbers like ENSlashDateFormatParser,
but this parser expect year before month and date.
- YYYY/MM/DD
- YYYY-MM-DD
- YYYY.MM.DD
*/
const PATTERN = new RegExp(
`([0-9]{4})[\\.\\/\\s]` +
`(?:(${matchAnyPattern(MONTH_DICTIONARY)})|([0-9]{1,2}))[\\.\\/\\s]` +
`([0-9]{1,2})` +
"(?=\\W|$)",
"i"
);

const YEAR_NUMBER_GROUP = 1;
const MONTH_NAME_GROUP = 2;
const MONTH_NUMBER_GROUP = 3;
const DATE_NUMBER_GROUP = 4;

export default class NLCasualYearMonthDayParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
const month = match[MONTH_NUMBER_GROUP]
? parseInt(match[MONTH_NUMBER_GROUP])
: MONTH_DICTIONARY[match[MONTH_NAME_GROUP].toLowerCase()];

if (month < 1 || month > 12) {
return null;
}

const year = parseInt(match[YEAR_NUMBER_GROUP]);
const day = parseInt(match[DATE_NUMBER_GROUP]);

return {
day: day,
month: month,
year: year,
};
}
}
58 changes: 34 additions & 24 deletions src/locales/nl/parsers/NLMonthNameMiddleEndianParser.ts
Original file line number Diff line number Diff line change
@@ -1,43 +1,51 @@
import { ParsingContext } from "../../../chrono";
import { findYearClosestToRef } from "../../../calculation/years";
import { MONTH_DICTIONARY } from "../../en/constants";
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../../en/constants";
import { YEAR_PATTERN, parseYear } from "../../en/constants";
import { MONTH_DICTIONARY } from "../constants";
import { ORDINAL_NUMBER_PATTERN, parseOrdinalNumberPattern } from "../constants";
import { YEAR_PATTERN, parseYear } from "../constants";
import { matchAnyPattern } from "../../../utils/pattern";
import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/AbstractParserWithWordBoundary";

const PATTERN = new RegExp(
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
"(?:-|/|\\s*,?\\s*)" +
`(${ORDINAL_NUMBER_PATTERN})(?!\\s*)\\s*` +
"(?:" +
"(?:to|\\-)\\s*" +
`(${ORDINAL_NUMBER_PATTERN})\\s*` +
"(?:on\\s*?)?" +
`(${ORDINAL_NUMBER_PATTERN})` +
"(?:\\s*" +
"(?:tot|\\-|\\–|until|through|till|\\s)\\s*" +
`(${ORDINAL_NUMBER_PATTERN})` +
")?" +
"(?:-|/|\\s*(?:of)?\\s*)" +
"(" +
matchAnyPattern(MONTH_DICTIONARY) +
")" +
"(?:" +
"(?:-|/|\\s*,?\\s*)" +
`(${YEAR_PATTERN})` +
"(?:-|/|,?\\s*)" +
`(${YEAR_PATTERN}(?![^\\s]\\d))` +
")?" +
"(?=\\W|$)(?!\\:\\d)",
"(?=\\W|$)",
"i"
);

const MONTH_NAME_GROUP = 1;
const DATE_GROUP = 2;
const DATE_TO_GROUP = 3;
const MONTH_NAME_GROUP = 3;
const DATE_GROUP = 1;
const DATE_TO_GROUP = 2;
const YEAR_GROUP = 4;

/**
* The parser for parsing US's date format that begin with month's name.
* - January 13
* - January 13, 2012
* - January 13 - 15, 2012
* Note: Watch out for:
* - January 12:00
* - January 12.44
* - January 1222344
* The parser for parsing BE/NL date format with month's name in full writing
* - 1 januari 2019
* - 01 januari 2019
* - 10 januari 2019
* - 13 januari
* - 10 - 25 maart
* - 10 - 25 maart 2019
* - 1 aug 2019
* - 1 september 200 voor Christus
* - 1 september 2002 na Christus
* - 19 januari 87
* - 12de juli 2013
* - 1ste november 2013
*/
export default class ENMonthNameMiddleEndianParser extends AbstractParserWithWordBoundaryChecking {
export default class NLMonthNameMiddleEndianParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}
Expand All @@ -46,6 +54,8 @@ export default class ENMonthNameMiddleEndianParser extends AbstractParserWithWor
const month = MONTH_DICTIONARY[match[MONTH_NAME_GROUP].toLowerCase()];
const day = parseOrdinalNumberPattern(match[DATE_GROUP]);
if (day > 31) {
// e.g. "[96 Aug]" => "9[6 Aug]", we need to shift away from the next number
match.index = match.index + match[DATE_GROUP].length;
return null;
}

Expand Down
16 changes: 6 additions & 10 deletions src/locales/nl/parsers/NLMonthNameParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ import { AbstractParserWithWordBoundaryChecking } from "../../../common/parsers/

const PATTERN = new RegExp(
`(${matchAnyPattern(MONTH_DICTIONARY)})` +
"\\s*" +
"(?:" +
`\\s*` +
`(?:` +
`[,-]?\\s*(${YEAR_PATTERN})?` +
")?" +
"(?=[^\\s\\w]|\\s+[^0-9]|\\s+$|$)",
Expand All @@ -20,20 +20,16 @@ const YEAR_GROUP = 2;

/**
* The parser for parsing month name and year.
* - January, 2012
* - January 2012
* - January
* - januari, 2012
* - januari 2012
* - januari
*/
export default class ENMonthNameParser extends AbstractParserWithWordBoundaryChecking {
export default class NLMonthNameParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}

innerExtract(context: ParsingContext, match: RegExpMatchArray) {
if (match[0].length <= 3) {
return null;
}

const components = context.createParsingComponents();
components.imply("day", 1);

Expand Down
2 changes: 1 addition & 1 deletion src/locales/nl/parsers/NLSlashMonthFormatParser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const YEAR_GROUP = 2;
* - 11/05
* - 06/2005
*/
export default class ENSlashMonthFormatParser extends AbstractParserWithWordBoundaryChecking {
export default class NLSlashMonthFormatParser extends AbstractParserWithWordBoundaryChecking {
innerPattern(): RegExp {
return PATTERN;
}
Expand Down
22 changes: 22 additions & 0 deletions src/locales/nl/parsers/NLTimeExpressionParser.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import { AbstractTimeExpressionParser } from "../../../common/parsers/AbstractTimeExpressionParser";
import { ParsingComponents } from "../../../results";
import { ParsingContext } from "../../../chrono";

export default class NLTimeExpressionParser extends AbstractTimeExpressionParser {
primaryPrefix(): string {
return "(?:(?:om)\\s*)?";
}

followingPhase(): string {
return "\\s*(?:\\-|\\–|\\~|\\〜|om|\\?)\\s*";
}

extractPrimaryTimeComponents(context: ParsingContext, match: RegExpMatchArray): ParsingComponents | null {
// This looks more like a year e.g. 2020
if (match[0].match(/^\s*\d{4}\s*$/)) {
return null;
}

return super.extractPrimaryTimeComponents(context, match);
}
}
Loading

0 comments on commit fbbd996

Please sign in to comment.