diff --git a/mailmerge/__main__.py b/mailmerge/__main__.py index a364548..d3fb563 100644 --- a/mailmerge/__main__.py +++ b/mailmerge/__main__.py @@ -255,11 +255,36 @@ def create_sample_input_files(template_path, database_path, config_path): """)) +def detect_database_format(database_file): + """Automatically detect the database format. + + Automatically detect the format ("dialect") using the CSV library's sniffer + class. For example, comma-delimited, tab-delimited, etc. Default to + StrictExcel if automatic detection fails. + + """ + class StrictExcel(csv.excel): + # Our helper class is really simple + # pylint: disable=too-few-public-methods, missing-class-docstring + strict = True + + # Read a sample from database + sample = database_file.read(1024) + database_file.seek(0) + + # Attempt automatic format detection, fall back on StrictExcel default + try: + csvdialect = csv.Sniffer().sniff(sample, delimiters=",;\t") + except csv.Error: + csvdialect = StrictExcel + + return csvdialect + + def read_csv_database(database_path): """Read database CSV file, providing one line at a time. - We'll use a class to modify the csv library's default dialect ('excel') to - enable strict syntax checking. This will trigger errors for things like + Use strict syntax checking, which will trigger errors for things like unclosed quotes. We open the file with the utf-8-sig encoding, which skips a byte order mark @@ -267,13 +292,10 @@ def read_csv_database(database_path): #93 https://github.com/awdeorio/mailmerge/issues/93 """ - class StrictExcel(csv.excel): - # Our helper class is really simple - # pylint: disable=too-few-public-methods, missing-class-docstring - strict = True - with database_path.open(encoding="utf-8-sig") as database_file: - reader = csv.DictReader(database_file, dialect=StrictExcel) + csvdialect = detect_database_format(database_file) + csvdialect.strict = True + reader = csv.DictReader(database_file, dialect=csvdialect) try: for row in reader: yield row diff --git a/tests/test_main.py b/tests/test_main.py index ed4d5f7..0ee2ef7 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -859,3 +859,109 @@ def test_database_bom(tmpdir): >>> Limit was 1 message. To remove the limit, use the --no-limit option. >>> This was a dry run. To send messages, use the --no-dry-run option. """) # noqa: E501 + + +def test_database_tsv(tmpdir): + """Automatically detect TSV database format.""" + # Simple template + template_path = Path(tmpdir/"mailmerge_template.txt") + template_path.write_text(textwrap.dedent("""\ + TO: {{email}} + FROM: My Self + + Hello {{name}} + """), encoding="utf8") + + # Tab-separated format database + database_path = Path(tmpdir/"mailmerge_database.csv") + database_path.write_text(textwrap.dedent("""\ + email\tname + to@test.com\tMy Name + """), encoding="utf8") + + # Simple unsecure server config + config_path = Path(tmpdir/"mailmerge_server.conf") + config_path.write_text(textwrap.dedent("""\ + [smtp_server] + host = open-smtp.example.com + port = 25 + """), encoding="utf8") + + # Run mailmerge + runner = click.testing.CliRunner() + with tmpdir.as_cwd(): + result = runner.invoke(main, ["--output-format", "text"]) + assert not result.exception + assert result.exit_code == 0 + + # Verify output + stdout = copy.deepcopy(result.output) + stdout = re.sub(r"Date:.+", "Date: REDACTED", stdout, re.MULTILINE) + assert stdout == textwrap.dedent("""\ + >>> message 1 + TO: to@test.com + FROM: My Self + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Date: REDACTED + + Hello My Name + + >>> message 1 sent + >>> Limit was 1 message. To remove the limit, use the --no-limit option. + >>> This was a dry run. To send messages, use the --no-dry-run option. + """) # noqa: E501 + + +def test_database_semicolon(tmpdir): + """Automatically detect semicolon-delimited database format.""" + # Simple template + template_path = Path(tmpdir/"mailmerge_template.txt") + template_path.write_text(textwrap.dedent("""\ + TO: {{email}} + FROM: My Self + + Hello {{name}} + """), encoding="utf8") + + # Semicolon-separated format database + database_path = Path(tmpdir/"mailmerge_database.csv") + database_path.write_text(textwrap.dedent("""\ + email;name + to@test.com;My Name + """), encoding="utf8") + + # Simple unsecure server config + config_path = Path(tmpdir/"mailmerge_server.conf") + config_path.write_text(textwrap.dedent("""\ + [smtp_server] + host = open-smtp.example.com + port = 25 + """), encoding="utf8") + + # Run mailmerge + runner = click.testing.CliRunner() + with tmpdir.as_cwd(): + result = runner.invoke(main, ["--output-format", "text"]) + assert not result.exception + assert result.exit_code == 0 + + # Verify output + stdout = copy.deepcopy(result.output) + stdout = re.sub(r"Date:.+", "Date: REDACTED", stdout, re.MULTILINE) + assert stdout == textwrap.dedent("""\ + >>> message 1 + TO: to@test.com + FROM: My Self + MIME-Version: 1.0 + Content-Type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 7bit + Date: REDACTED + + Hello My Name + + >>> message 1 sent + >>> Limit was 1 message. To remove the limit, use the --no-limit option. + >>> This was a dry run. To send messages, use the --no-dry-run option. + """) # noqa: E501