Merge pull request #5 from smoke-trees/dev

Updated Version
smoke-trees · Jun 24, 2020 · fa1366c · fa1366c
2 parents d91bb07 + 2e22ab9
commit fa1366c
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -24,11 +24,22 @@ This package will be useful for carrying out all the utilities of the SmokeTrees
         pip install -e .
     ```
 
-## Example Usage
+## Example Usage 
+
+- Load Model using Tensorflow
 
 ``` Python
     from forest_utils import export_keras
 
     model = export_keras.ModelFromH5().load_model()
 ```
+
+- Load Dataset
+
+``` Python
+    from forest_utils import datasets
+
+    tweets = datasets.Dataset().get_emo_tweets()
+```
+
 After pulling down the model use it for predictions and other evalutaion functionalities.
diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-__version__ == '0.0.4'
+__version__ == '0.0.5'
diff --git a/forest_utils/datasets.py b/forest_utils/datasets.py
@@ -0,0 +1,43 @@
+import gdown
+import pickle
+
+import pandas as pd
+
+
+class Datasets(object):
+    """
+    A class for managing downloads and loading of SmokTrees' datasets
+    """
+
+    def __init__(self, output = 'datasets.pickle', config = 'result.json'):
+        super().__init__()
+
+        self.base_url = 'https://drive.google.com/uc?id='
+        self.url_id = self.get_complete_url('https://drive.google.com/file/d/10G-d7rdIHsQ9s8XE1mgs6t-hfQjVD-KA/view?usp=sharing')
+        self.output = output
+
+    def get_complete_url(self, url):
+        """
+        method (used internally inside class) to get complete link (including base_url) from the given url
+
+        Parameters
+        ----------
+        url : str
+            url to split and make complete url from
+        
+        Returns
+        -------
+        link : str
+            complete url to the model file
+
+        """
+        split_url = url.split('/')
+        return self.base_url + split_url[5]
+
+    def get_emo_tweets(self):
+        try:
+            gdown.download(self.url_id, self.output, quiet = False)
+            gdown.download(self.get_complete_url(pickle.load(open(self.output, 'rb'))['emotion_tweets']['link']), 'tweets.csv', quiet = False)
+            return pd.read_csv('tweets.csv')
+        except:
+            print("[ERROR]:Error in loading dataset, please check downloaded file")
diff --git a/setup.py b/setup.py
@@ -1,11 +1,9 @@
-
-import codecs
 import os
-import re
+import codecs
 
 from setuptools import setup, find_packages
 
-REQUIREMENTS = ['gdown==3.11.1', 'requests==2.24.0', 'tensorflow', 'spacy']
+REQUIREMENTS = ['gdown==3.11.1', 'requests==2.24.0', 'tensorflow', 'spacy', 'pandas']
 
 def read(rel_path):
     here = os.path.abspath(os.path.dirname(__file__))