Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix for issue/217-Specifying-asterisk-as-query-causes-error #250

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ Options

* ``awslogs groups``: List existing groups
* ``awslogs streams GROUP``: List existing streams withing ``GROUP``
* ``awslogs get GROUP [STREAM_EXPRESSION]``: Get logs matching ``STREAM_EXPRESSION`` in ``GROUP``.
* ``awslogs get GROUP [STREAM_EXPRESSION]``: Get logs from streams with names matching ``STREAM_EXPRESSION`` in log group ``GROUP``.

- Expressions can be regular expressions or the wildcard ``ALL`` if you want any and don't want to type ``.*``.
- STREAM_EXPRESSION is a python regular expression accepted by ``re.compile()`` `described here <https://docs.python.org/3/library/re.html#regular-expression-syntax>`_. Expression ``ALL`` is reserved and is same as ``'.*'``. Remember to quote/escape shell special characters to ensure they are not gobbled up by shell variable expansion. E.g. ``'2014-04.*'`` instead of ``2014-04.*``
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't mention how the regex is used. It is used in .match(…), rather than say .fullmatch(…) (which I might prefer but have no strong opinion†) ,or .search(…) (which it sounds like that's how people imagine it being used).

† changing from .match(…) would be a breaking change (dropping the ^ but still using .match(…) is not as it acts exactly the same) so should be a major version change+in release notes


**Note:** You need to provide to all these options a valid AWS region using ``--aws-region`` or ``AWS_REGION`` env variable.

Expand Down
55 changes: 49 additions & 6 deletions awslogs/bin.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,62 @@
import os
import re
import sys
import locale
import codecs
import argparse
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.tz import tzutc

import boto3
from botocore.client import ClientError
from botocore.compat import total_seconds
from termcolor import colored

from . import exceptions
from .core import AWSLogs
from ._version import __version__


def regex_str(s):
"""Verifies that the s is a valid python regex
if s is not a valid regex then an exception is raised"""
try:
re.compile(s)
except Exception as e:
raise exceptions.InvalidPythonRegularExpressionError('Log stream name pattern', s)

return s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually the return value/what is passed around after handling the CLI would be the compiled regex - it may also speed up the code if it the code using it is called more than once, as the cache use isn't free.



def seconds_since_epoch(datetime_text):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Milliseconds since epoch?

Good change 👍

"""Parse ``datetime_text`` into a seconds since epoch."""

if not datetime_text:
return None

ago_regexp = r'(\d+)\s?(m|minute|minutes|h|hour|hours|d|day|days|w|weeks|weeks)(?: ago)?'
ago_match = re.match(ago_regexp, datetime_text)

if ago_match:
amount, unit = ago_match.groups()
amount = int(amount)
unit = {'m': 60, 'h': 3600, 'd': 86400, 'w': 604800}[unit[0]]
date = datetime.utcnow() + timedelta(seconds=unit * amount * -1)
else:
try:
date = parse(datetime_text)
except ValueError:
raise exceptions.UnknownDateError(datetime_text)

if date.tzinfo:
if date.utcoffset != 0:
date = date.astimezone(tzutc())
date = date.replace(tzinfo=None)

return int(total_seconds(date - datetime(1970, 1, 1))) * 1000


def main(argv=None):

if sys.version_info < (3, 0):
Expand Down Expand Up @@ -57,13 +101,13 @@ def add_common_arguments(parser):

def add_date_range_arguments(parser, default_start='5m'):
parser.add_argument("-s", "--start",
type=str,
type=seconds_since_epoch,
dest='start',
default=default_start,
help="Start time (default %(default)s)")

parser.add_argument("-e", "--end",
type=str,
type=seconds_since_epoch,
dest='end',
help="End time")

Expand All @@ -81,7 +125,7 @@ def add_date_range_arguments(parser, default_start='5m'):
help="log group name")

get_parser.add_argument("log_stream_name",
type=str,
type=regex_str,
default="ALL",
nargs='?',
help="log stream name")
Expand Down Expand Up @@ -167,10 +211,9 @@ def add_date_range_arguments(parser, default_start='5m'):
type=str,
help="log group name")

# Parse input
options, args = parser.parse_known_args(argv)

try:
# Parse input
options, args = parser.parse_known_args(argv)
logs = AWSLogs(**vars(options))
if not hasattr(options, 'func'):
parser.print_help()
Expand Down
48 changes: 13 additions & 35 deletions awslogs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,26 @@
import os
import time
import errno
from datetime import datetime, timedelta
import logging
from datetime import datetime
from collections import deque

import boto3
import botocore
from botocore.compat import json, six, total_seconds
from botocore.compat import json, six

import jmespath

from termcolor import colored
from dateutil.parser import parse
from dateutil.tz import tzutc

from . import exceptions

logger = logging.getLogger('awslogs')
FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(format=FORMAT, filename="/tmp/awslogs.log")
# setLevel to logging.DEBUG to enable logging.
logger.setLevel(logging.CRITICAL)


COLOR_ENABLED = {
'always': True,
Expand Down Expand Up @@ -59,6 +64,7 @@ class AWSLogs(object):
ALL_WILDCARD = 'ALL'

def __init__(self, **kwargs):
logger.debug('AWSLogs(): kwargs: %s', kwargs)
self.aws_region = kwargs.get('aws_region')
self.aws_access_key_id = kwargs.get('aws_access_key_id')
self.aws_secret_access_key = kwargs.get('aws_secret_access_key')
Expand All @@ -75,8 +81,8 @@ def __init__(self, **kwargs):
self.output_timestamp_enabled = kwargs.get('output_timestamp_enabled')
self.output_ingestion_time_enabled = kwargs.get(
'output_ingestion_time_enabled')
self.start = self.parse_datetime(kwargs.get('start'))
self.end = self.parse_datetime(kwargs.get('end'))
self.start = kwargs.get('start')
self.end = kwargs.get('end')
self.query = kwargs.get('query')
if self.query is not None:
self.query_expression = jmespath.compile(self.query)
Expand All @@ -92,7 +98,7 @@ def __init__(self, **kwargs):
def _get_streams_from_pattern(self, group, pattern):
"""Returns streams in ``group`` matching ``pattern``."""
pattern = '.*' if pattern == self.ALL_WILDCARD else pattern
reg = re.compile('^{0}'.format(pattern))
reg = re.compile('{0}'.format(pattern))
for stream in self.get_streams(group):
if re.match(reg, stream):
yield stream
Expand Down Expand Up @@ -250,7 +256,6 @@ def get_streams(self, log_group_name=None):
kwargs = {'logGroupName': log_group_name or self.log_group_name}
window_start = self.start or 0
window_end = self.end or sys.float_info.max

paginator = self.client.get_paginator('describe_log_streams')
for page in paginator.paginate(**kwargs):
for stream in page.get('logStreams', []):
Expand All @@ -268,30 +273,3 @@ def color(self, text, color):
if self.color_enabled:
return colored(text, color)
return text

def parse_datetime(self, datetime_text):
"""Parse ``datetime_text`` into a ``datetime``."""

if not datetime_text:
return None

ago_regexp = r'(\d+)\s?(m|minute|minutes|h|hour|hours|d|day|days|w|weeks|weeks)(?: ago)?'
ago_match = re.match(ago_regexp, datetime_text)

if ago_match:
amount, unit = ago_match.groups()
amount = int(amount)
unit = {'m': 60, 'h': 3600, 'd': 86400, 'w': 604800}[unit[0]]
date = datetime.utcnow() + timedelta(seconds=unit * amount * -1)
else:
try:
date = parse(datetime_text)
except ValueError:
raise exceptions.UnknownDateError(datetime_text)

if date.tzinfo:
if date.utcoffset != 0:
date = date.astimezone(tzutc())
date = date.replace(tzinfo=None)

return int(total_seconds(date - datetime(1970, 1, 1))) * 1000
10 changes: 9 additions & 1 deletion awslogs/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,12 @@ class NoStreamsFilteredError(BaseAWSLogsException):
code = 7

def hint(self):
return ("No streams match your pattern '{0}' for the given time period.").format(*self.args)
return "No streams match your pattern '{0}' for the given time period.".format(*self.args)


class InvalidPythonRegularExpressionError(BaseAWSLogsException):

code = 8

def hint(self):
return "{0} '{1}' is not a valid Python regular expression.".format(*self.args)
54 changes: 43 additions & 11 deletions tests/test_it.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,14 @@

from awslogs import AWSLogs
from awslogs.exceptions import UnknownDateError
from awslogs.bin import main
from awslogs.bin import main, seconds_since_epoch

import logging
logger = logging.getLogger('awslogs')
FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(format=FORMAT, filename="/tmp/awslogs.log")
# setLevel to logging.DEBUG to enable logging.
logger.setLevel(logging.CRITICAL)


def mapkeys(keys, rec_lst):
Expand All @@ -30,9 +37,10 @@ def mapkeys(keys, rec_lst):


class TestAWSLogsDatetimeParse(unittest.TestCase):

@patch('awslogs.bin.datetime')
@patch('awslogs.core.boto3_client')
@patch('awslogs.core.datetime')
def test_parse_datetime(self, datetime_mock, botoclient):
def test_seconds_since_epoch(self, botoclient, datetime_mock):

awslogs = AWSLogs()
datetime_mock.utcnow.return_value = datetime(2015, 1, 1, 3, 0, 0, 0)
Expand All @@ -42,8 +50,8 @@ def iso2epoch(iso_str):
dt = datetime.strptime(iso_str, "%Y-%m-%d %H:%M:%S")
return int(total_seconds(dt - datetime(1970, 1, 1)) * 1000)

self.assertEqual(awslogs.parse_datetime(''), None)
self.assertEqual(awslogs.parse_datetime(None), None)
self.assertEqual(seconds_since_epoch(''), None)
self.assertEqual(seconds_since_epoch(None), None)
plan = (('2015-01-01 02:59:00', '1m'),
('2015-01-01 02:59:00', '1m ago'),
('2015-01-01 02:59:00', '1minute'),
Expand Down Expand Up @@ -81,10 +89,10 @@ def iso2epoch(iso_str):
)

for expected_iso, dateutil_time in plan:
self.assertEqual(awslogs.parse_datetime(dateutil_time),
self.assertEqual(seconds_since_epoch(dateutil_time),
iso2epoch(expected_iso))

self.assertRaises(UnknownDateError, awslogs.parse_datetime, '???')
self.assertRaises(UnknownDateError, seconds_since_epoch, '???')


class TestAWSLogs(unittest.TestCase):
Expand Down Expand Up @@ -230,8 +238,8 @@ def test_get_streams(self, botoclient):
['A', 'B', 'C', 'D', 'E', 'F', 'G'])

@patch('awslogs.core.boto3_client')
@patch('awslogs.core.AWSLogs.parse_datetime')
def test_get_streams_filtered_by_date(self, parse_datetime, botoclient):
@patch('awslogs.bin.seconds_since_epoch')
def test_get_streams_filtered_by_date(self, seconds_since_epoch_mock, botoclient):
client = Mock()
botoclient.return_value = client
client.get_paginator.return_value.paginate.return_value = [
Expand All @@ -243,8 +251,8 @@ def test_get_streams_filtered_by_date(self, parse_datetime, botoclient):
],
}
]
parse_datetime.side_effect = [5, 7]
awslogs = AWSLogs(log_group_name='group', start='5', end='7')
seconds_since_epoch_mock.side_effect = [5, 7]
awslogs = AWSLogs(log_group_name='group', start=5, end=7)
self.assertEqual([g for g in awslogs.get_streams()], ['B', 'C', 'E'])

@patch('awslogs.core.boto3_client')
Expand Down Expand Up @@ -630,3 +638,27 @@ def test_boto3_client_creation(self, mock_core_session):

awslogs = AWSLogs()
self.assertEqual(client, awslogs.client)

@patch('awslogs.core.boto3_client')
@patch('sys.stderr', new_callable=StringIO)
def test_invalid_stream_regex(self, mock_stderr, botoclient):
self.maxDiff = None
botoclient.return_value = None
exit_code = main("awslogs get LG_NAME *".split())
self.assertEqual(mock_stderr.getvalue(),
colored("Log stream name pattern '*' is not a valid Python regular expression.\n",
"red"))
assert exit_code == 8

@patch('awslogs.core.boto3_client')
@patch('sys.stdout', new_callable=StringIO)
def test_valid_stream_regex(self, mock_stdout, botoclient):
logger.debug('botoclient: %s', botoclient)
self.maxDiff = None
self.set_ABCDE_logs(botoclient)
exit_code = main("awslogs streams LG_NAME .*".split())
output = mock_stdout.getvalue()
expected = ("DDD\n"
"EEE\n")
assert output == expected
assert exit_code == 0