diff --git a/projects/vdk-plugins/vdk-smarter/.plugin-ci.yml b/projects/vdk-plugins/vdk-smarter/.plugin-ci.yml new file mode 100644 index 0000000000..59e2649ae9 --- /dev/null +++ b/projects/vdk-plugins/vdk-smarter/.plugin-ci.yml @@ -0,0 +1,25 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +gi# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 + +image: "python:3.7" + +.build-vdk-smarter: + variables: + PLUGIN_NAME: vdk-smarter + extends: .build-plugin + +build-py37-vdk-smarter: + extends: .build-vdk-smarter + image: "python:3.7" + +build-py311-vdk-smarter: + extends: .build-vdk-smarter + image: "python:3.11" + +release-vdk-smarter: + variables: + PLUGIN_NAME: vdk-smarter + extends: .release-plugin diff --git a/projects/vdk-plugins/vdk-smarter/README.md b/projects/vdk-plugins/vdk-smarter/README.md new file mode 100644 index 0000000000..f4b7248da7 --- /dev/null +++ b/projects/vdk-plugins/vdk-smarter/README.md @@ -0,0 +1,85 @@ +# VDK Smarter + +Making VDK smarter by employing ML/AI. + + + +## Usage + +``` +pip install vdk-smarter +``` + +### Configuration + +(`vdk config-help` is useful command to browse all config options of your installation of vdk) + + +### Example + +TODO# VDK Smarter + +Making VDK smarter by employing ML/AI. + + + +## Usage + +``` +pip install vdk-smarter +``` + +### Configuration + +(`vdk config-help` is useful command to browse all config options of your installation of vdk) + + +### Example + + + +``` + +``` + +### Build and testing + +``` +pip install -r requirements.txt +pip install -e . +pytest +``` + +In VDK repo [../build-plugin.sh](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/build-plugin.sh) script can be used also. + + +#### Note about the CICD: + +.plugin-ci.yaml is needed only for plugins part of [Versatile Data Kit Plugin repo](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins). + +The CI/CD is separated in two stages, a build stage and a release stage. +The build stage is made up of a few jobs, all which inherit from the same +job configuration and only differ in the Python version they use (3.7, 3.8, 3.9 and 3.10). +They run according to rules, which are ordered in a way such that changes to a +plugin's directory trigger the plugin CI, but changes to a different plugin does not. + +### Build and testing + +``` +pip install -r requirements.txt +pip install -e . +pytest +``` + +In VDK repo [../build-plugin.sh](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins/build-plugin.sh) script can be used also. + + +#### Note about the CICD: + +.plugin-ci.yaml is needed only for plugins part of [Versatile Data Kit Plugin repo](https://github.com/vmware/versatile-data-kit/tree/main/projects/vdk-plugins). + +The CI/CD is separated in two stages, a build stage and a release stage. +The build stage is made up of a few jobs, all which inherit from the same +job configuration and only differ in the Python version they use (3.7, 3.8, 3.9 and 3.10). +They run according to rules, which are ordered in a way such that changes to a +plugin's directory trigger the plugin CI, but changes to a different plugin does not. diff --git a/projects/vdk-plugins/vdk-smarter/requirements.txt b/projects/vdk-plugins/vdk-smarter/requirements.txt new file mode 100644 index 0000000000..4a1b3ecaa5 --- /dev/null +++ b/projects/vdk-plugins/vdk-smarter/requirements.txt @@ -0,0 +1,8 @@ +# this file is used to provide testing requirements +# for requirements (dependencies) needed during and after installation of the plugin see (and update) setup.py install_requires section + +openai + +pytest +vdk-core +vdk-test-utils diff --git a/projects/vdk-plugins/vdk-smarter/setup.py b/projects/vdk-plugins/vdk-smarter/setup.py new file mode 100644 index 0000000000..2ba5f6a891 --- /dev/null +++ b/projects/vdk-plugins/vdk-smarter/setup.py @@ -0,0 +1,35 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +import pathlib + +import setuptools + +""" +Builds a package with the help of setuptools in order for this package to be imported in other projects +""" + +__version__ = "0.1.0" + +setuptools.setup( + name="vdk-smarter", + version=__version__, + url="https://github.com/vmware/versatile-data-kit", + description="Making VDK smarter by employing ML/AI.", + long_description=pathlib.Path("README.md").read_text(), + long_description_content_type="text/markdown", + install_requires=["vdk-core", "openai"], + package_dir={"": "src"}, + packages=setuptools.find_namespace_packages(where="src"), + # This is the only vdk plugin specifc part + # Define entry point called "vdk.plugin.run" with name of plugin and module to act as entry point. + entry_points={"vdk.plugin.run": ["smarter = vdk.plugin.smarter.plugin_entry"]}, + classifiers=[ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + ], +) diff --git a/projects/vdk-plugins/vdk-smarter/src/vdk/plugin/smarter/openai_plugin_entry.py b/projects/vdk-plugins/vdk-smarter/src/vdk/plugin/smarter/openai_plugin_entry.py new file mode 100644 index 0000000000..6a3f2cb27e --- /dev/null +++ b/projects/vdk-plugins/vdk-smarter/src/vdk/plugin/smarter/openai_plugin_entry.py @@ -0,0 +1,108 @@ +# Copyright 2021-2023 VMware, Inc. +# SPDX-License-Identifier: Apache-2.0 +import logging +from collections import OrderedDict +from typing import List + +import openai +from vdk.api.plugin.hook_markers import hookimpl +from vdk.api.plugin.plugin_registry import IPluginRegistry +from vdk.internal.builtin_plugins.connection.decoration_cursor import DecorationCursor +from vdk.internal.builtin_plugins.connection.execution_cursor import ExecutionCursor +from vdk.internal.core.config import ConfigurationBuilder +from vdk.internal.core.context import CoreContext + +log = logging.getLogger(__name__) + + +class OpenAiPlugin: + def __init__(self): + self._review_enabled = False + self._queries = OrderedDict() + self._openai_model = "gpt-3.5-turbo" + + @hookimpl(tryfirst=True) + def vdk_configure(self, config_builder: ConfigurationBuilder): + # TODO: support non open ai models and make it configurable + config_builder.add( + key="openai_api_key", + default_value="", + description=""" + OpenAI API key. You can generete one on your OpenAI account page. + (possibly https://platform.openai.com/account/api-keys) + See best practices for api keys in https://help.openai.com/en/articles/5112595-best-practices-for-api-key-safety + """, + ) + config_builder.add( + key="openai_model", + default_value="gpt-3.5-turbo", + description=""" + OpenAI model to be used. + See more in https://platform.openai.com/docs/models/overview + """, + ) + config_builder.add( + key="openai_review_enabled", + default_value=False, + description="If enabled, it will review each SQL query executed by the job and create summary file at the end.", + ) + + @hookimpl + def vdk_initialize(self, context: CoreContext) -> None: + openai.api_key = context.configuration.get_value("openai_api_key") + self._review_enabled = context.configuration.get_value("openai_review_enabled") + self._openai_model = context.configuration.get_value("openai_model") + + def _review_sql_query(self, sql_query: str): + # Refine the prompt and make configurable + prompt = ( + """Using your extensive knowledge of Impala SQL, analyze the following SQL query and provide a specific feedback. + The feedback should include its efficiency, readability, possible optimization, potential errors, + adherence to best practices, and security vulnerabilities, if any. Provide a score (1 a lot of work needed, 5 - no further changes needed) + Return the answer in format {"score": ?, "review": "?" } Here is the SQL query: + """ + + sql_query + ) + + # Generate the review + # TODO: make configurable most things + response = openai.Completion.create( + engine=self._openai_model, + prompt=prompt, + max_tokens=1000, + n=1, + stop=None, + temperature=0.7, + ) + + # Extract the generated review from the response + review = response.choices[0].text.strip() + self._queries[sql_query] = review + + return review + + @hookimpl + def db_connection_decorate_operation( + self, decoration_cursor: DecorationCursor + ) -> None: + if self._review_enabled: + try: + managed_operation = decoration_cursor.get_managed_operation() + review = self._review_sql_query(managed_operation.get_operation()) + log.info(f"Review\n: {review}\n") + except Exception as e: + log.error(f"Failed to review SQL query: {e}") + + @hookimpl + def vdk_exit(self, context: CoreContext, exit_code: int) -> None: + if self._review_enabled: + with open("queries_reviews.md", "w") as f: + f.write("# SQL Query Reviews\n") + for query, review in self._queries.items(): + f.write(f"## SQL Query\n\n```sql\n{query}\n```\n\n") + f.write(f"### Review\n\n{review}\n\n") + + +@hookimpl +def vdk_start(plugin_registry: IPluginRegistry, command_line_args: List): + plugin_registry.load_plugin_with_hooks_impl(OpenAiPlugin(), "OpenAiPlugin")