added cache control to attachments feature

caronc · Nov 16, 2019 · 44a3527 · 44a3527
1 parent cb9b4fd
commit 44a3527
Show file tree

Hide file tree

Showing 9 changed files with 244 additions and 50 deletions.
diff --git a/apprise/AppriseAttachment.py b/apprise/AppriseAttachment.py
@@ -38,18 +38,35 @@ class AppriseAttachment(object):
 
     """
 
-    def __init__(self, paths=None, asset=None, **kwargs):
+    def __init__(self, paths=None, asset=None, cache=True, **kwargs):
         """
         Loads all of the paths/urls specified (if any).
 
         The path can either be a single string identifying one explicit
         location, otherwise you can pass in a series of locations to scan
         via a list.
+
+        By default we cache our responses so that subsiquent calls does not
+        cause the content to be retrieved again.  For local file references
+        this makes no difference at all.  But for remote content, this does
+        mean more then one call can be made to retrieve the (same) data.  This
+        method can be somewhat inefficient if disabled.  Only disable caching
+        if you understand the consequences.
+
+        You can alternatively set the cache value to an int identifying the
+        number of seconds the previously retrieved can exist for before it
+        should be considered expired.
+
+        It's also worth nothing that the cache value is only set to elements
+        that are not already of subclass AttachBase()
         """
 
         # Initialize our attachment listings
         self.attachments = list()
 
+        # Set our cache flag
+        self.cache = cache
+
         # Prepare our Asset Object
         self.asset = \
             asset if isinstance(asset, AppriseAsset) else AppriseAsset()
@@ -61,14 +78,30 @@ def __init__(self, paths=None, asset=None, **kwargs):
                 # Parse Source domain based on from_addr
                 raise TypeError("One or more attachments could not be added.")
 
-    def add(self, attachments, asset=None, db=None):
+    def add(self, attachments, asset=None, cache=None):
         """
         Adds one or more attachments into our list.
 
+        By default we cache our responses so that subsiquent calls does not
+        cause the content to be retrieved again.  For local file references
+        this makes no difference at all.  But for remote content, this does
+        mean more then one call can be made to retrieve the (same) data.  This
+        method can be somewhat inefficient if disabled.  Only disable caching
+        if you understand the consequences.
+
+        You can alternatively set the cache value to an int identifying the
+        number of seconds the previously retrieved can exist for before it
+        should be considered expired.
+
+        It's also worth nothing that the cache value is only set to elements
+        that are not already of subclass AttachBase()
         """
         # Initialize our return status
         return_status = True
 
+        # Initialize our default cache value
+        cache = cache if cache is not None else self.cache
+
         if isinstance(asset, AppriseAsset):
             # prepare default asset
             asset = self.asset
@@ -107,7 +140,8 @@ def add(self, attachments, asset=None, db=None):
 
             # Instantiate ourselves an object, this function throws or
             # returns None if it fails
-            instance = AppriseAttachment.instantiate(_attachment, asset=asset)
+            instance = AppriseAttachment.instantiate(
+                _attachment, asset=asset, cache=cache)
             if not isinstance(instance, attachment.AttachBase):
                 return_status = False
                 continue
@@ -119,12 +153,14 @@ def add(self, attachments, asset=None, db=None):
         return return_status
 
     @staticmethod
-    def instantiate(url, asset=None, suppress_exceptions=True):
+    def instantiate(url, asset=None, cache=None, suppress_exceptions=True):
         """
         Returns the instance of a instantiated attachment plugin based on
         the provided Attachment URL.  If the url fails to be parsed, then None
         is returned.
 
+        A specified cache value will over-ride anything set
+
         """
         # Attempt to acquire the schema at the very least to allow our
         # attachment based urls.
@@ -156,6 +192,10 @@ def instantiate(url, asset=None, suppress_exceptions=True):
         results['asset'] = \
             asset if isinstance(asset, AppriseAsset) else AppriseAsset()
 
+        if cache is not None:
+            # Force an over-ride of the cache value to what we have specified
+            results['cache'] = cache
+
         if suppress_exceptions:
             try:
                 # Attempt to create an instance of our plugin using the parsed

diff --git a/apprise/attachment/AttachBase.py b/apprise/attachment/AttachBase.py
@@ -24,8 +24,10 @@
 # THE SOFTWARE.
 
 import os
+import time
 import mimetypes
 from ..URLBase import URLBase
+from ..utils import parse_bool
 
 
 class AttachBase(URLBase):
@@ -59,7 +61,7 @@ class AttachBase(URLBase):
     # 5 MB = 5242880 bytes
     max_file_size = 5242880
 
-    def __init__(self, name=None, mimetype=None, **kwargs):
+    def __init__(self, name=None, mimetype=None, cache=True, **kwargs):
         """
         Initialize some general logging and common server arguments that will
         keep things consistent when working with the configurations that
@@ -70,6 +72,17 @@ def __init__(self, name=None, mimetype=None, **kwargs):
 
         The mime-type is automatically detected, but you can over-ride this by
         explicitly stating what it should be.
+
+        By default we cache our responses so that subsiquent calls does not
+        cause the content to be retrieved again.  For local file references
+        this makes no difference at all.  But for remote content, this does
+        mean more then one call can be made to retrieve the (same) data.  This
+        method can be somewhat inefficient if disabled.  Only disable caching
+        if you understand the consequences.
+
+        You can alternatively set the cache value to an int identifying the
+        number of seconds the previously retrieved can exist for before it
+        should be considered expired.
         """
 
         super(AttachBase, self).__init__(**kwargs)
@@ -96,6 +109,18 @@ def __init__(self, name=None, mimetype=None, **kwargs):
         # Absolute path to attachment
         self.download_path = None
 
+        # Set our cache flag
+        # it can be True, or an integer
+        try:
+            self.cache = cache if isinstance(cache, bool) else int(cache)
+            if self.cache < 0:
+                raise ValueError()
+
+        except (ValueError, TypeError):
+            err = 'An invalid cache value ({}) was specified.'.format(cache)
+            self.logger.warning(err)
+            raise TypeError(err)
+
         # Validate mimetype if specified
         if self._mimetype:
             if next((t for t in mimetypes.types_map.values()
@@ -110,13 +135,13 @@ def __init__(self, name=None, mimetype=None, **kwargs):
     @property
     def path(self):
         """
-        Returns the absolute path to the filename
+        Returns the absolute path to the filename. If this is not known or
+        is know but has been considered expired (due to cache setting), then
+        content is re-retrieved prior to returning.
         """
-        if self.download_path:
-            # return our fixed content
-            return self.download_path
 
-        if not self.download():
+        if not self.exists():
+            # we could not obtain our path
             return None
 
         return self.download_path
@@ -130,7 +155,7 @@ def name(self):
             # return our fixed content
             return self._name
 
-        if not self.detected_name and not self.download():
+        if not self.exists():
             # we could not obtain our name
             return None
 
@@ -157,8 +182,8 @@ def mimetype(self):
             # return our pre-calculated cached content
             return self._mimetype
 
-        if not self.detected_mimetype and not self.download():
-            # we could not obtain our name
+        if not self.exists():
+            # we could not obtain our attachment
             return None
 
         if not self.detected_mimetype:
@@ -179,14 +204,58 @@ def mimetype(self):
         return self.detected_mimetype \
             if self.detected_mimetype else self.unknown_mimetype
 
+    def exists(self):
+        """
+        Simply returns true if the object has downloaded and stored the
+        attachment AND the attachment has not expired.
+        """
+        if self.download_path and os.path.isfile(self.download_path) \
+                and self.cache:
+
+            # We have enough reason to look further into our cached value
+            if self.cache is True:
+                # return our fixed content as is; we will always cache it
+                return True
+
+            # Verify our cache time to determine whether we will get our
+            # content again.
+            try:
+                age_in_sec = time.time() - os.stat(self.download_path).st_mtime
+                if age_in_sec <= self.cache:
+                    return True
+
+            except (OSError, IOError):
+                # The file is not present
+                pass
+
+        return self.download()
+
+    def invalidate(self):
+        """
+        Release any temporary data that may be open by child classes.
+        Externally fetched content should be automatically cleaned up when
+        this function is called.
+
+        This function should also reset the following entries to None:
+          - detected_name : Should identify a human readable filename
+          - download_path: Must contain a absolute path to content
+          - detected_mimetype: Should identify mimetype of content
+        """
+        self.detected_name = None
+        self.download_path = None
+        self.detected_mimetype = None
+        return
+
     def download(self):
         """
         This function must be over-ridden by inheriting classes.
 
-        Inherited classes should populate:
-          - detected_name : Should identify a human readable filename
+        Inherited classes MUST populate:
+          - detected_name: Should identify a human readable filename
           - download_path: Must contain a absolute path to content
           - detected_mimetype: Should identify mimetype of content
+
+        If a download fails, you should ensure these values are set to None.
         """
         raise NotImplementedError(
             "download() is implimented by the child class.")
@@ -226,6 +295,17 @@ def parse_url(url, verify_host=True, mimetype_db=None):
             results['name'] = results['qsd'].get('name', '') \
                 .strip().lower()
 
+        # Our cache value
+        if 'cache' in results['qsd']:
+            # First try to get it's integer value
+            try:
+                results['cache'] = int(results['qsd']['cache'])
+
+            except (ValueError, TypeError):
+                # No problem, it just isn't an integer; now treat it as a bool
+                # instead:
+                results['cache'] = parse_bool(results['qsd']['cache'])
+
         return results
 
     def __len__(self):

diff --git a/apprise/attachment/AttachFile.py b/apprise/attachment/AttachFile.py
@@ -81,6 +81,9 @@ def download(self, **kwargs):
         validate it.
         """
 
+        # Ensure any existing content set has been invalidated
+        self.invalidate()
+
         if not os.path.isfile(self.dirty_path):
             return False
 
@@ -100,6 +103,9 @@ def download(self, **kwargs):
         # a call do download() before returning a success
         self.download_path = self.dirty_path
         self.detected_name = os.path.basename(self.download_path)
+
+        # We don't need to set our self.detected_mimetype as it can be
+        # pulled at the time it's needed based on the detected_name
         return True
 
     @staticmethod

diff --git a/apprise/attachment/AttachHTTP.py b/apprise/attachment/AttachHTTP.py
@@ -85,10 +85,8 @@ def download(self, **kwargs):
         Perform retrieval of the configuration based on the specified request
         """
 
-        if self._temp_file is not None:
-            # There is nothing to do; we're already pointing at our downloaded
-            # content
-            return True
+        # Ensure any existing content set has been invalidated
+        self.invalidate()
 
         # prepare header
         headers = {
@@ -188,13 +186,8 @@ def download(self, **kwargs):
                                         int(self.max_file_size / 1024),
                                         self.url(privacy=True)))
 
-                                # Reset our temporary object
-                                self._temp_file = None
-
-                                # Ensure our detected name and mimetype are
-                                # reset
-                                self.detected_name = None
-                                self.detected_mimetype = None
+                                # Invalidate any variables previously set
+                                self.invalidate()
 
                                 # Return False (signifying a failure)
                                 return False
@@ -220,12 +213,8 @@ def download(self, **kwargs):
                 'configuration from %s.' % self.host)
             self.logger.debug('Socket Exception: %s' % str(e))
 
-            # Reset our temporary object
-            self._temp_file = None
-
-            # Ensure our detected name and mimetype are reset
-            self.detected_name = None
-            self.detected_mimetype = None
+            # Invalidate any variables previously set
+            self.invalidate()
 
             # Return False (signifying a failure)
             return False
@@ -239,27 +228,40 @@ def download(self, **kwargs):
                 'Could not write attachment to disk: {}'.format(
                     self.url(privacy=True)))
 
-            # Reset our temporary object
-            self._temp_file = None
-
-            # Ensure our detected name and mimetype are reset
-            self.detected_name = None
-            self.detected_mimetype = None
+            # Invalidate any variables previously set
+            self.invalidate()
 
             # Return False (signifying a failure)
             return False
 
         # Return our success
         return True
 
+    def invalidate(self):
+        """
+        Close our temporary file
+        """
+        if self._temp_file:
+            self._temp_file.close()
+            self._temp_file = None
+
+        super(AttachHTTP, self).invalidate()
+
     def url(self, privacy=False, *args, **kwargs):
         """
         Returns the URL built dynamically based on specified arguments.
         """
 
+        # Prepare our cache value
+        if isinstance(self.cache, bool) or not self.cache:
+            cache = 'yes' if self.cache else 'no'
+        else:
+            cache = int(self.cache)
+
         # Define any arguments set
         args = {
             'verify': 'yes' if self.verify_certificate else 'no',
+            'cache': cache,
         }
 
         if self._mimetype: