From 1d094f2e9e18c5247002271b2eefed74e2eaece7 Mon Sep 17 00:00:00 2001 From: Paul O'Leary McCann Date: Tue, 1 Aug 2017 23:00:33 +0900 Subject: [PATCH] Fix handling of node-format when output-format-type is set Current behavior is that if output-format-type is set node-format is ignored. This includes the case where output-format-type is set in a config file, so if your config file sets a default format it's impossible to override it on the command line. One solution is to avoid setting a default output format in config files. While that works, it's worth noting unidic-mecab's default config file includes a default output format. If someone tries to use node-format without being aware of this they might just think it doesn't work. This change makes it so that if node-format is explicitly set output-format-type is ignored. If output-format-type isn't used behavior is unchanged; this only changes things when output-format-type and node-format are both set. -POLM --- mecab/src/tagger.cpp | 4 ++-- mecab/src/writer.cpp | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/mecab/src/tagger.cpp b/mecab/src/tagger.cpp index 2780dbb..8aa4fef 100644 --- a/mecab/src/tagger.cpp +++ b/mecab/src/tagger.cpp @@ -49,9 +49,9 @@ const MeCab::Option long_options[] = { "output marginal probability (default false)" }, { "max-grouping-size", 'M', "24", "INT", "maximum grouping size for unknown words (default 24)" }, - { "node-format", 'F', "%m\\t%H\\n", "STR", + { "node-format", 'F', "", "STR", "use STR as the user-defined node format" }, - { "unk-format", 'U', "%m\\t%H\\n", "STR", + { "unk-format", 'U', "", "STR", "use STR as the user-defined unknown node format" }, { "bos-format", 'B', "", "STR", "use STR as the user-defined beginning-of-sentence format" }, diff --git a/mecab/src/writer.cpp b/mecab/src/writer.cpp index ed68767..592294a 100644 --- a/mecab/src/writer.cpp +++ b/mecab/src/writer.cpp @@ -23,7 +23,8 @@ void Writer::close() { } bool Writer::open(const Param ¶m) { - const std::string ostyle = param.get("output-format-type"); + const std::string node_format_option = param.get("node-format"); + const std::string ostyle = node_format_option.empty() ? param.get("output-format-type") : ""; write_ = &Writer::writeLattice; if (ostyle == "wakati") { @@ -77,7 +78,8 @@ bool Writer::open(const Param ¶m) { if (node_format != node_format2 || bos_format != bos_format2 || eos_format != eos_format2 || unk_format != unk_format2) { write_ = &Writer::writeUser; - if (node_format != node_format2) { + + if (!node_format2.empty() && node_format != node_format2) { node_format = node_format2; } if (bos_format != bos_format2) { @@ -86,9 +88,9 @@ bool Writer::open(const Param ¶m) { if (eos_format != eos_format2) { eos_format = eos_format2; } - if (unk_format != unk_format2) { + if (!unk_format2.empty() && unk_format != unk_format2) { unk_format = unk_format2; - } else if (node_format != node_format2) { + } else if (!node_format2.empty() && node_format != node_format2) { unk_format = node_format2; } else { unk_format = node_format;