Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for tabs and semicolons as delimiters #147

Merged
merged 7 commits into from
Feb 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 30 additions & 8 deletions mailmerge/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,25 +255,47 @@ def create_sample_input_files(template_path, database_path, config_path):
"""))


def detect_database_format(database_file):
"""Automatically detect the database format.

Automatically detect the format ("dialect") using the CSV library's sniffer
class. For example, comma-delimited, tab-delimited, etc. Default to
StrictExcel if automatic detection fails.

"""
class StrictExcel(csv.excel):
# Our helper class is really simple
# pylint: disable=too-few-public-methods, missing-class-docstring
strict = True

# Read a sample from database
sample = database_file.read(1024)
database_file.seek(0)

# Attempt automatic format detection, fall back on StrictExcel default
try:
csvdialect = csv.Sniffer().sniff(sample, delimiters=",;\t")
except csv.Error:
csvdialect = StrictExcel

return csvdialect


def read_csv_database(database_path):
"""Read database CSV file, providing one line at a time.

We'll use a class to modify the csv library's default dialect ('excel') to
enable strict syntax checking. This will trigger errors for things like
Use strict syntax checking, which will trigger errors for things like
unclosed quotes.

We open the file with the utf-8-sig encoding, which skips a byte order mark
(BOM), if any. Sometimes Excel will save CSV files with a BOM. See Issue
#93 https://github.com/awdeorio/mailmerge/issues/93

"""
class StrictExcel(csv.excel):
# Our helper class is really simple
# pylint: disable=too-few-public-methods, missing-class-docstring
strict = True

with database_path.open(encoding="utf-8-sig") as database_file:
reader = csv.DictReader(database_file, dialect=StrictExcel)
csvdialect = detect_database_format(database_file)
csvdialect.strict = True
reader = csv.DictReader(database_file, dialect=csvdialect)
try:
for row in reader:
yield row
Expand Down
106 changes: 106 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,3 +859,109 @@ def test_database_bom(tmpdir):
>>> Limit was 1 message. To remove the limit, use the --no-limit option.
>>> This was a dry run. To send messages, use the --no-dry-run option.
""") # noqa: E501


def test_database_tsv(tmpdir):
"""Automatically detect TSV database format."""
# Simple template
template_path = Path(tmpdir/"mailmerge_template.txt")
template_path.write_text(textwrap.dedent("""\
TO: {{email}}
FROM: My Self <myself@mydomain.com>

Hello {{name}}
"""), encoding="utf8")

# Tab-separated format database
database_path = Path(tmpdir/"mailmerge_database.csv")
database_path.write_text(textwrap.dedent("""\
email\tname
to@test.com\tMy Name
"""), encoding="utf8")

# Simple unsecure server config
config_path = Path(tmpdir/"mailmerge_server.conf")
config_path.write_text(textwrap.dedent("""\
[smtp_server]
host = open-smtp.example.com
port = 25
"""), encoding="utf8")

# Run mailmerge
runner = click.testing.CliRunner()
with tmpdir.as_cwd():
result = runner.invoke(main, ["--output-format", "text"])
assert not result.exception
assert result.exit_code == 0

# Verify output
stdout = copy.deepcopy(result.output)
stdout = re.sub(r"Date:.+", "Date: REDACTED", stdout, re.MULTILINE)
assert stdout == textwrap.dedent("""\
>>> message 1
TO: to@test.com
FROM: My Self <myself@mydomain.com>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Date: REDACTED

Hello My Name

>>> message 1 sent
>>> Limit was 1 message. To remove the limit, use the --no-limit option.
>>> This was a dry run. To send messages, use the --no-dry-run option.
""") # noqa: E501


def test_database_semicolon(tmpdir):
"""Automatically detect semicolon-delimited database format."""
# Simple template
template_path = Path(tmpdir/"mailmerge_template.txt")
template_path.write_text(textwrap.dedent("""\
TO: {{email}}
FROM: My Self <myself@mydomain.com>

Hello {{name}}
"""), encoding="utf8")

# Semicolon-separated format database
database_path = Path(tmpdir/"mailmerge_database.csv")
database_path.write_text(textwrap.dedent("""\
email;name
to@test.com;My Name
"""), encoding="utf8")

# Simple unsecure server config
config_path = Path(tmpdir/"mailmerge_server.conf")
config_path.write_text(textwrap.dedent("""\
[smtp_server]
host = open-smtp.example.com
port = 25
"""), encoding="utf8")

# Run mailmerge
runner = click.testing.CliRunner()
with tmpdir.as_cwd():
result = runner.invoke(main, ["--output-format", "text"])
assert not result.exception
assert result.exit_code == 0

# Verify output
stdout = copy.deepcopy(result.output)
stdout = re.sub(r"Date:.+", "Date: REDACTED", stdout, re.MULTILINE)
assert stdout == textwrap.dedent("""\
>>> message 1
TO: to@test.com
FROM: My Self <myself@mydomain.com>
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Date: REDACTED

Hello My Name

>>> message 1 sent
>>> Limit was 1 message. To remove the limit, use the --no-limit option.
>>> This was a dry run. To send messages, use the --no-dry-run option.
""") # noqa: E501