add "create table" sql generation function

new-village · Dec 6, 2024 · 833906a · 833906a
1 parent 80a220c
commit 833906a
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -110,6 +110,16 @@ for race_id in race_ids:
     # Process the data as needed
 ```
 
+### Create table query generation for SQLite
+
+The `create_table_sql` function generates an SQL query string for creating a table in an SQLite database. The table structure is dynamically defined based on the configuration file corresponding to the provided `data_type` like `race`, `entry`, `result` and etc. This function ensures that the table is created only if it does not already exist and assigns a primary key to the first column.
+
+```python
+>>> import keibascraper
+>>> query = keibascraper.create_table_sql("entry")
+>>> print(query)
+CREATE TABLE IF NOT EXISTS entry (bracket text, ... weight_diff integer);
+```
 
 ## API Reference
 

diff --git a/keibascraper/__init__.py b/keibascraper/__init__.py
@@ -4,3 +4,4 @@
 keibascraper is a simple scraping library for netkeiba.com
 """
 from keibascraper.load import load, race_list
+from keibascraper.helper import create_table_sql, create_index_sql
diff --git a/keibascraper/helper.py b/keibascraper/helper.py
@@ -245,3 +245,44 @@ def load_config(data_type):
     config_path = os.path.join(base_dir, 'config', f'{data_type}.json')
     with open(config_path, 'r', encoding='utf-8') as f:
         return json.load(f)
+
+
+def create_table_sql(data_type=None):
+    """ The function generate create table SQL strings based on SQLite3 by config file.
+    :param data_type: Data Type is identifier of data types such as ENTRY, ODDS, RACE and RESULT.
+    """
+    # Validating Arguments
+    if data_type is None:
+        raise SystemExit("There is no race_id in HTML")
+
+    # load config file
+    keys = [key["col_name"] for key in load_config(data_type)["columns"]]
+    types = [tp["var_type"] for tp in load_config(data_type)["columns"]]
+    # Create comma separated strings
+    cols = [k + ' ' + v for k, v in zip(keys, types)]
+    # Add PRIMARY KEY string to first column
+    cols[0] = cols[0] + " PRIMARY KEY"
+    cols = ", ".join(cols)
+
+    return f"CREATE TABLE IF NOT EXISTS {data_type} ({cols});"
+
+def create_index_sql(data_type=None):
+    """ The function generate create index SQL strings based on SQLite3 by config file.
+    :param data_type: Data Type is identifier of data types such as ENTRY, ODDS, RACE and RESULT.
+    """
+    # Validating Arguments
+    if data_type == "entry":
+        sql = "CREATE INDEX IF NOT EXISTS race_id ON ENTRY (race_id); " \
+        "CREATE INDEX IF NOT EXISTS horse_id ON ENTRY (horse_id);"
+    elif data_type == "odds":
+        sql = ""
+    elif data_type == "result":
+        sql = "CREATE INDEX IF NOT EXISTS race_id ON RESULT (race_id);" \
+        "CREATE INDEX IF NOT EXISTS horse_id ON RESULT (horse_id);"
+    elif data_type == "history":
+        sql = "CREATE INDEX IF NOT EXISTS race_id ON RESULT (race_id);" \
+        "CREATE INDEX IF NOT EXISTS horse_id ON RESULT (horse_id);"
+    else:
+        raise ValueError(f"Unexpected data type: {data_type}")
+
+    return sql
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name='keibascraper',
-    version='3.0.0',
+    version='3.1.0',
     author='new-village',
     url='https://github.com/new-village/KeibaScraper',
     description='keibascraper is a simple scraping library for netkeiba.com',

diff --git a/test/test_helper.py b/test/test_helper.py
@@ -0,0 +1,70 @@
+import unittest
+import keibascraper
+from unittest.mock import patch
+
+
+class TestHelperSQL(unittest.TestCase):
+
+    @patch('keibascraper.helper.load_config')
+    def test_create_table_sql(self, mock_load_config):
+        # モックされたJSONデータ
+        mock_load_config.return_value = {
+            "columns": [
+                {"col_name": "race_id", "var_type": "text"},
+                {"col_name": "bracket", "var_type": "integer"},
+                {"col_name": "horse_number", "var_type": "integer"},
+                {"col_name": "horse_name", "var_type": "text"}
+            ]
+        }
+
+        # テスト実行
+        result = keibascraper.create_table_sql("entry")
+
+        # 期待されるSQL
+        expected_sql = (
+            "CREATE TABLE IF NOT EXISTS entry ("
+            "race_id text PRIMARY KEY, "
+            "bracket integer, "
+            "horse_number integer, "
+            "horse_name text);"
+        )
+
+        # 検証
+        self.assertEqual(result, expected_sql)
+
+    def test_create_index_sql_entry(self):
+        # entry データタイプ用のテスト
+        result = keibascraper.create_index_sql("entry")
+
+        # 期待されるSQL
+        expected_sql = (
+            "CREATE INDEX IF NOT EXISTS race_id ON ENTRY (race_id); "
+            "CREATE INDEX IF NOT EXISTS horse_id ON ENTRY (horse_id);"
+        )
+
+        # 検証
+        self.assertEqual(result, expected_sql)
+
+    def test_create_index_sql_result(self):
+        # result データタイプ用のテスト
+        result = keibascraper.create_index_sql("result")
+
+        # 期待されるSQL
+        expected_sql = (
+            "CREATE INDEX IF NOT EXISTS race_id ON RESULT (race_id);"
+            "CREATE INDEX IF NOT EXISTS horse_id ON RESULT (horse_id);"
+        )
+
+        # 検証
+        self.assertEqual(result, expected_sql)
+
+    def test_create_index_sql_invalid_type(self):
+        # 無効なデータタイプのテスト
+        with self.assertRaises(ValueError) as context:
+            keibascraper.create_index_sql("invalid_type")
+
+        # エラーメッセージを確認
+        self.assertEqual(str(context.exception), "Unexpected data type: invalid_type")
+
+if __name__ == "__main__":
+    unittest.main()