This repository has been archived by the owner on Jun 30, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
55 lines (46 loc) · 1.84 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import mimetypes
import pandas as pd
from sql_export import SQLExport
class Parser:
def __init__(self) -> None:
self.df = pd.DataFrame()
self.mimetype_input = ""
self.mimetype_output = ""
print("Started converting data.")
# Ingests Data (Import from CSV / JSON)
def ingest_data(self, filename):
self.mimetype_input = mimetypes.guess_type(filename)[0]
match self.mimetype_input:
case 'application/json':
self.df = pd.read_json(filename)
case 'text/csv':
self.df = pd.read_csv(filename, delimiter=";")
case other:
raise Exception("Invalid input file type.")
# Extracts columns in place
def extract_columns(self, columns):
_temp_df = self.df
self.df = _temp_df.filter(items=columns)
# Combine columns into one
def combine_columns(self, column_one, column_two, final_column):
self.df[final_column] = self.df[column_one].astype('str') + self.df[column_two].astype('str').str.zfill(2)
# Bottles Data (Export to CSV / JSON)
# Just provide correct filename
def bottle_data(self, filename):
self.mimetype_output = mimetypes.guess_type(filename)[0]
match self.mimetype_output:
case 'application/json':
self.df.to_json(filename, orient="records")
case 'text/csv':
self.df.to_csv(filename, index=False)
case 'application/sql':
self.to_sql(filename)
case other:
raise Exception("Invalid output file type.")
def to_sql(self, filename):
data = self.df.to_dict(orient='records')
sql_export = SQLExport(data)
sql_export.export()
sql_export.save(filename)
def __del__(self):
print("Finished with conversion and filtering.")