From 0e7f24da9a4a4288f3e020a27ede37d726be7f4e Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 24 Apr 2024 19:27:11 -0500 Subject: [PATCH 01/34] merge main from remote Signed-off-by: ravi-kumar-pilla --- demo-project/.version | 0 package.json | 0 package/kedro_viz/server.py | 0 trufflehog-ignore.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 demo-project/.version mode change 100644 => 100755 package.json mode change 100644 => 100755 package/kedro_viz/server.py mode change 100644 => 100755 trufflehog-ignore.txt diff --git a/demo-project/.version b/demo-project/.version old mode 100644 new mode 100755 diff --git a/package.json b/package.json old mode 100644 new mode 100755 diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py old mode 100644 new mode 100755 diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt old mode 100644 new mode 100755 From 52c2060c03eda1fddb93592b0ce00b9d4017b186 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 26 Jun 2024 23:14:20 -0500 Subject: [PATCH 02/34] partially working parser - WIP Signed-off-by: ravi-kumar-pilla --- .gitignore | 3 + .../integrations/kedro/data_loader.py | 84 ++++--- .../kedro_viz/integrations/kedro/parser.py | 214 ++++++++++++++++++ package/kedro_viz/launchers/cli.py | 7 + package/kedro_viz/server.py | 4 + 5 files changed, 282 insertions(+), 30 deletions(-) create mode 100644 package/kedro_viz/integrations/kedro/parser.py diff --git a/.gitignore b/.gitignore index 39a763068..d9017face 100644 --- a/.gitignore +++ b/.gitignore @@ -65,3 +65,6 @@ coverage.xml # Kedro *.log + +# testing +spaceflights/* \ No newline at end of file diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 1ac1521e6..78cbb4cba 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -9,16 +9,15 @@ import logging from pathlib import Path from typing import Any, Dict, Optional, Tuple - -from kedro import __version__ -from kedro.framework.project import configure_project, pipelines -from kedro.framework.session import KedroSession +from kedro.config.omegaconf_config import OmegaConfigLoader +from kedro.framework.context.context import KedroContext from kedro.framework.session.store import BaseSessionStore -from kedro.framework.startup import bootstrap_project +from kedro import __version__ from kedro.io import DataCatalog from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS +from kedro_viz.integrations.kedro.parser import parse_project logger = logging.getLogger(__name__) @@ -75,6 +74,7 @@ def load_data( include_hooks: bool = False, package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, + is_lite: bool = False ) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]: """Load data from a Kedro project. Args: @@ -91,30 +91,54 @@ def load_data( A tuple containing the data catalog and the pipeline dictionary and the session store. """ - if package_name: - configure_project(package_name) - else: - # bootstrap project when viz is run in dev mode - bootstrap_project(project_path) - - with KedroSession.create( + if is_lite: + # [TODO: Confirm on the context creation] + context = KedroContext( + package_name="{{ cookiecutter.python_package }}", project_path=project_path, - env=env, - save_on_close=False, - extra_params=extra_params, - ) as session: - # check for --include-hooks option - if not include_hooks: - session._hook_manager = _VizNullPluginManager() # type: ignore - - context = session.load_context() - session_store = session._store - catalog = context.catalog - - # Pipelines is a lazy dict-like object, so we force it to populate here - # in case user doesn't have an active session down the line when it's first accessed. - # Useful for users who have `get_current_session` in their `register_pipelines()`. - pipelines_dict = dict(pipelines) + config_loader=OmegaConfigLoader(conf_source=str(project_path)), + hook_manager = _VizNullPluginManager(), + env=env) + + # [TODO: Confirm on the session store creation] + session_store = None + + # [TODO: Confirm on the DataCatalog creation] + catalog = DataCatalog() + stats_dict = _get_dataset_stats(project_path) - - return catalog, pipelines_dict, session_store, stats_dict + pipelines_dict = parse_project(project_path) + + return catalog, pipelines_dict, session_store, stats_dict + else: + from kedro.framework.project import configure_project, pipelines + from kedro.framework.session import KedroSession + from kedro.framework.startup import bootstrap_project + + if package_name: + configure_project(package_name) + else: + # bootstrap project when viz is run in dev mode + bootstrap_project(project_path) + + with KedroSession.create( + project_path=project_path, + env=env, + save_on_close=False, + extra_params=extra_params, + ) as session: + # check for --include-hooks option + if not include_hooks: + session._hook_manager = _VizNullPluginManager() # type: ignore + + context = session.load_context() + session_store = session._store + catalog = context.catalog + + # Pipelines is a lazy dict-like object, so we force it to populate here + # in case user doesn't have an active session down the line when it's first accessed. + # Useful for users who have `get_current_session` in their `register_pipelines()`. + pipelines_dict = dict(pipelines) + stats_dict = _get_dataset_stats(project_path) + + return catalog, pipelines_dict, session_store, stats_dict diff --git a/package/kedro_viz/integrations/kedro/parser.py b/package/kedro_viz/integrations/kedro/parser.py new file mode 100644 index 000000000..c4b52a6fb --- /dev/null +++ b/package/kedro_viz/integrations/kedro/parser.py @@ -0,0 +1,214 @@ +from collections import defaultdict +from pathlib import Path +import ast +from typing import Dict, List +from kedro.pipeline.modular_pipeline import pipeline as ModularPipeline +from kedro.pipeline.pipeline import Pipeline, Node + + +# WIP +class KedroPipelineLocator(ast.NodeVisitor): + def __init__(self): + self.pipeline = None + + def visit_FunctionDef(self, node): + if node.name == "create_pipeline": + kedro_node_extractor = KedroNodeExtractor() + kedro_node_extractor.visit(node) + self.pipeline = Pipeline(nodes=kedro_node_extractor.nodes) + + try: + # modular pipeline + if kedro_node_extractor.namespace: + print("Namespace is here", kedro_node_extractor.namespace) + self.pipeline = ModularPipeline( + self.pipeline, + inputs=kedro_node_extractor.inputs, + outputs=kedro_node_extractor.outputs, + parameters=set(), + tags=kedro_node_extractor.tags, + namespace=kedro_node_extractor.namespace, + ) + except Exception as exc: + # [TODO: Error with modular pipeline] + print("error") + print(exc) + self.pipeline = Pipeline(nodes=kedro_node_extractor.nodes) + + self.generic_visit(node) + + +class KedroNodeExtractor(ast.NodeVisitor): + def __init__(self): + self.nodes: List[Node] = [] + self.inputs = set() + self.outputs = set() + self.namespace = None + self.parameters = set() + self.tags = set() + + def visit_Call(self, node): + if isinstance(node.func, ast.Name) and node.func.id == "pipeline": + nodes = [] + inputs = set() + outputs = set() + namespace = None + parameters = set() + tags = set() + for keyword in node.keywords: + # print(keyword.arg) + if keyword.arg == "namespace": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + namespace = keyword.value.value + elif keyword.arg == "inputs": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + inputs = {keyword.value.value} + elif isinstance(keyword.value, ast.Set): + inputs = {elt.value for elt in keyword.value.elts} + elif isinstance(keyword.value, ast.Dict): + inputs = {elt.value for elt in keyword.value.keys} + elif keyword.arg == "outputs": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + outputs = {keyword.value.value} + if isinstance(keyword.value, ast.Set): + outputs = {elt.value for elt in keyword.value.elts} + elif isinstance(keyword.value, ast.Dict): + outputs = {elt.value for elt in keyword.value.keys} + elif keyword.arg == "parameters": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + parameters = {keyword.value.value} + if isinstance(keyword.value, ast.Set): + parameters = {elt.value for elt in keyword.value.elts} + elif isinstance(keyword.value, ast.Dict): + parameters = {elt.value for elt in keyword.value.keys} + elif keyword.arg == "tags": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + tags = {keyword.value.value} + if isinstance(keyword.value, ast.Set): + tags = {elt.value for elt in keyword.value.elts} + elif isinstance(keyword.value, ast.Dict): + tags = {elt.value for elt in keyword.value.keys} + + # exploring nodes + for arg in node.args: + if isinstance(arg, ast.List): + for elt in arg.elts: + if ( + isinstance(elt, ast.Call) + and isinstance(elt.func, ast.Name) + and elt.func.id == "node" + ): + func = None + inputs = set() + outputs = set() + name = None + tags = set() + namespace = None + for keyword in elt.keywords: + if keyword.arg == "func": + func = ( + keyword.value.id + if isinstance(keyword.value, ast.Name) + else "" + ) + elif keyword.arg == "inputs": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + inputs = {keyword.value.value} + elif isinstance(keyword.value, ast.List): + inputs = { + elt.value for elt in keyword.value.elts + } + elif isinstance(keyword.value, ast.Dict): + inputs = { + elt.value for elt in keyword.value.keys + } + elif keyword.arg == "outputs": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + outputs = {keyword.value.value} + elif isinstance(keyword.value, ast.List): + outputs = { + elt.value for elt in keyword.value.elts + } + elif isinstance(keyword.value, ast.Dict): + outputs = { + elt.value for elt in keyword.value.keys + } + elif keyword.arg == "name": + name = keyword.value.value + elif keyword.arg == "tags": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + tags = {keyword.value.value} + elif isinstance(keyword.value, ast.List): + tags = {elt.value for elt in keyword.value.elts} + elif isinstance(keyword.value, ast.Dict): + tags = {elt.value for elt in keyword.value.keys} + elif keyword.arg == "namespace": + if isinstance(keyword.value, ast.Constant): + if not keyword.value.value: + continue + namespace = keyword.value.value + + # Create Node + # [TODO: think of func=lambda *args: sum(args)] + kedro_node = Node( + func=lambda *args: sum(args), + inputs=list(inputs), + outputs=list(outputs), + name=name, + tags=tags, + namespace=namespace, + ) + + nodes.append(kedro_node) + + self.nodes.extend(nodes) + self.inputs |= inputs + self.outputs |= outputs + self.namespace = namespace + self.parameters |= parameters + self.tags |= tags + + self.generic_visit(node) + + +def parse_project(project_path: Path) -> Dict[str, Pipeline]: + pipelines: Dict[str, Pipeline] = defaultdict(dict) + for filepath in project_path.rglob("*.py"): + with open(filepath, "r") as file: + file_content = file.read() + + parsed_content_ast_node = ast.parse(file_content) + pipeline_name = filepath.relative_to(project_path).parent.name + + # Locate pipelines (assumes only 1 create_pipeline per pipeline file) + kedro_pipeline_locator = KedroPipelineLocator() + kedro_pipeline_locator.visit(parsed_content_ast_node) + located_pipeline = kedro_pipeline_locator.pipeline + # print(located_pipeline) + if located_pipeline: + pipelines[pipeline_name] = located_pipeline + + # creating a default pipeline + pipelines["__default__"] = sum(pipelines.values()) + # dealing with pipeline level namespace + # pipelines["data_processing"] = pipeline( + # pipelines["data_engineering"], namespace="data_processing" + # ) + print(pipelines) + return pipelines diff --git a/package/kedro_viz/launchers/cli.py b/package/kedro_viz/launchers/cli.py index daad1e2b8..b23a1d6cb 100644 --- a/package/kedro_viz/launchers/cli.py +++ b/package/kedro_viz/launchers/cli.py @@ -115,6 +115,11 @@ def viz(ctx): # pylint: disable=unused-argument help=PARAMS_ARG_HELP, callback=_split_params, ) +@click.option( + "--lite", + is_flag=True, + help="A flag to load an experimental light-weight Kedro Viz", +) # pylint: disable=import-outside-toplevel, too-many-locals def run( host, @@ -127,6 +132,7 @@ def run( autoreload, include_hooks, params, + lite ): """Launch local Kedro Viz instance""" from kedro_viz.server import run_server @@ -170,6 +176,7 @@ def run( "include_hooks": include_hooks, "package_name": PACKAGE_NAME, "extra_params": params, + "is_lite": lite } if autoreload: run_process_kwargs = { diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py index 384a3545d..7562aa7e9 100755 --- a/package/kedro_viz/server.py +++ b/package/kedro_viz/server.py @@ -56,6 +56,7 @@ def load_and_populate_data( package_name: Optional[str] = None, pipeline_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, + is_lite: bool = False ): """Loads underlying Kedro project data and populates Kedro Viz Repositories""" @@ -66,6 +67,7 @@ def load_and_populate_data( include_hooks, package_name, extra_params, + is_lite ) pipelines = ( @@ -90,6 +92,7 @@ def run_server( include_hooks: bool = False, package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, + is_lite: bool = False ): # pylint: disable=redefined-outer-name """Run a uvicorn server with a FastAPI app that either launches API response data from a file or from reading data from a real Kedro project. @@ -124,6 +127,7 @@ def run_server( package_name, pipeline_name, extra_params, + is_lite ) # [TODO: As we can do this with `kedro viz build`, # we need to shift this feature outside of kedro viz run] From cfd99a79bedfac604e8eabaa26aecc644f3291ab Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Fri, 28 Jun 2024 20:37:06 -0500 Subject: [PATCH 03/34] partial working commit Signed-off-by: ravi-kumar-pilla --- .../integrations/kedro/data_loader.py | 22 +- .../kedro_viz/integrations/kedro/parser.py | 289 +++++++++--------- package/kedro_viz/launchers/cli.py | 4 +- package/kedro_viz/server.py | 19 +- 4 files changed, 161 insertions(+), 173 deletions(-) mode change 100644 => 100755 package/kedro_viz/integrations/kedro/parser.py diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 78cbb4cba..a036aef0e 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -74,7 +74,7 @@ def load_data( include_hooks: bool = False, package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, - is_lite: bool = False + is_lite: bool = False, ) -> Tuple[DataCatalog, Dict[str, Pipeline], BaseSessionStore, Dict]: """Load data from a Kedro project. Args: @@ -94,21 +94,22 @@ def load_data( if is_lite: # [TODO: Confirm on the context creation] context = KedroContext( - package_name="{{ cookiecutter.python_package }}", - project_path=project_path, - config_loader=OmegaConfigLoader(conf_source=str(project_path)), - hook_manager = _VizNullPluginManager(), - env=env) - + package_name="{{ cookiecutter.python_package }}", + project_path=project_path, + config_loader=OmegaConfigLoader(conf_source=str(project_path)), + hook_manager=_VizNullPluginManager(), + env=env, + ) + # [TODO: Confirm on the session store creation] session_store = None # [TODO: Confirm on the DataCatalog creation] catalog = DataCatalog() - + stats_dict = _get_dataset_stats(project_path) - pipelines_dict = parse_project(project_path) - + pipelines_dict = dict(parse_project(project_path)) + # print(pipelines_dict) return catalog, pipelines_dict, session_store, stats_dict else: from kedro.framework.project import configure_project, pipelines @@ -139,6 +140,7 @@ def load_data( # in case user doesn't have an active session down the line when it's first accessed. # Useful for users who have `get_current_session` in their `register_pipelines()`. pipelines_dict = dict(pipelines) + # print(pipelines_dict) stats_dict = _get_dataset_stats(project_path) return catalog, pipelines_dict, session_store, stats_dict diff --git a/package/kedro_viz/integrations/kedro/parser.py b/package/kedro_viz/integrations/kedro/parser.py old mode 100644 new mode 100755 index c4b52a6fb..5c7c56b6d --- a/package/kedro_viz/integrations/kedro/parser.py +++ b/package/kedro_viz/integrations/kedro/parser.py @@ -1,103 +1,87 @@ from collections import defaultdict from pathlib import Path import ast -from typing import Dict, List +from typing import Dict, Iterable, List from kedro.pipeline.modular_pipeline import pipeline as ModularPipeline from kedro.pipeline.pipeline import Pipeline, Node - -# WIP class KedroPipelineLocator(ast.NodeVisitor): def __init__(self): self.pipeline = None def visit_FunctionDef(self, node): - if node.name == "create_pipeline": - kedro_node_extractor = KedroNodeExtractor() - kedro_node_extractor.visit(node) - self.pipeline = Pipeline(nodes=kedro_node_extractor.nodes) - - try: - # modular pipeline - if kedro_node_extractor.namespace: - print("Namespace is here", kedro_node_extractor.namespace) - self.pipeline = ModularPipeline( - self.pipeline, - inputs=kedro_node_extractor.inputs, - outputs=kedro_node_extractor.outputs, - parameters=set(), - tags=kedro_node_extractor.tags, - namespace=kedro_node_extractor.namespace, + try: + if node.name == "create_pipeline": + kedro_pipeline_explorer = KedroPipelineExplorer() + kedro_pipeline_explorer.visit(node) + try: + # modular pipeline + if kedro_pipeline_explorer.namespace: + self.pipeline = ModularPipeline( + pipe=kedro_pipeline_explorer.nodes, + inputs=kedro_pipeline_explorer.inputs, + outputs=kedro_pipeline_explorer.outputs, + parameters=kedro_pipeline_explorer.parameters, + tags=kedro_pipeline_explorer.tags, + namespace=kedro_pipeline_explorer.namespace, + ) + else: + # kedro pipeline + self.pipeline = Pipeline( + nodes=kedro_pipeline_explorer.nodes, + tags=kedro_pipeline_explorer.tags, + ) + except Exception as exc: + # [TODO: Error with modular pipeline, try creating regular pipeline] + print(exc) + self.pipeline = Pipeline( + nodes=kedro_pipeline_explorer.nodes, + tags=kedro_pipeline_explorer.tags, ) - except Exception as exc: - # [TODO: Error with modular pipeline] - print("error") - print(exc) - self.pipeline = Pipeline(nodes=kedro_node_extractor.nodes) - - self.generic_visit(node) + self.generic_visit(node) + + except Exception as exc: + # [TODO: Error with parsing the file, dump the visiting node] + print(exc) + print(ast.dump(node, indent=2)) -class KedroNodeExtractor(ast.NodeVisitor): +class KedroPipelineExplorer(ast.NodeVisitor): + # [TODO: Current explorer only serves for 1 pipeline() function within a create_pipeline def] def __init__(self): + # keeping these here for future use-case + # when dealing with multiple pipeline() functions + # within a create_pipeline def self.nodes: List[Node] = [] - self.inputs = set() - self.outputs = set() + self.inputs = None + self.outputs = None self.namespace = None - self.parameters = set() - self.tags = set() + self.parameters = None + self.tags = None def visit_Call(self, node): if isinstance(node.func, ast.Name) and node.func.id == "pipeline": - nodes = [] - inputs = set() - outputs = set() - namespace = None - parameters = set() - tags = set() + # for a modular pipeline + # [TODO: pipe to be explored later] + # pipe: Iterable[Node | Pipeline] | Pipeline + + pipeline_inputs: str | set[str] | dict[str, str] | None = None + pipeline_outputs: str | set[str] | dict[str, str] | None = None + pipeline_namespace: str | None = None + pipeline_parameters: str | set[str] | dict[str, str] | None = None + pipeline_tags: str | Iterable[str] | None = None + for keyword in node.keywords: - # print(keyword.arg) if keyword.arg == "namespace": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - namespace = keyword.value.value + pipeline_namespace = parse_value(keyword.value) elif keyword.arg == "inputs": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - inputs = {keyword.value.value} - elif isinstance(keyword.value, ast.Set): - inputs = {elt.value for elt in keyword.value.elts} - elif isinstance(keyword.value, ast.Dict): - inputs = {elt.value for elt in keyword.value.keys} + pipeline_inputs = parse_value(keyword.value) elif keyword.arg == "outputs": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - outputs = {keyword.value.value} - if isinstance(keyword.value, ast.Set): - outputs = {elt.value for elt in keyword.value.elts} - elif isinstance(keyword.value, ast.Dict): - outputs = {elt.value for elt in keyword.value.keys} + pipeline_outputs = parse_value(keyword.value) elif keyword.arg == "parameters": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - parameters = {keyword.value.value} - if isinstance(keyword.value, ast.Set): - parameters = {elt.value for elt in keyword.value.elts} - elif isinstance(keyword.value, ast.Dict): - parameters = {elt.value for elt in keyword.value.keys} + pipeline_parameters = parse_value(keyword.value) elif keyword.arg == "tags": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - tags = {keyword.value.value} - if isinstance(keyword.value, ast.Set): - tags = {elt.value for elt in keyword.value.elts} - elif isinstance(keyword.value, ast.Dict): - tags = {elt.value for elt in keyword.value.keys} + pipeline_tags = parse_value(keyword.value) # exploring nodes for arg in node.args: @@ -108,107 +92,120 @@ def visit_Call(self, node): and isinstance(elt.func, ast.Name) and elt.func.id == "node" ): - func = None - inputs = set() - outputs = set() - name = None - tags = set() - namespace = None + node_func = None + node_inputs: str | list[str] | dict[str, str] | None = None + node_outputs: str | list[str] | dict[str, str] | None = None + node_name: str | None = None + node_tags: str | Iterable[str] | None = None + node_confirms: str | list[str] | None = None + node_namespace: str | None = None + for keyword in elt.keywords: + # [TODO: func is WIP. Need to create a Callable] if keyword.arg == "func": - func = ( - keyword.value.id - if isinstance(keyword.value, ast.Name) - else "" - ) + node_func = lambda *args, **kwargs: None elif keyword.arg == "inputs": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - inputs = {keyword.value.value} - elif isinstance(keyword.value, ast.List): - inputs = { - elt.value for elt in keyword.value.elts - } - elif isinstance(keyword.value, ast.Dict): - inputs = { - elt.value for elt in keyword.value.keys - } + node_inputs = parse_value(keyword.value) elif keyword.arg == "outputs": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - outputs = {keyword.value.value} - elif isinstance(keyword.value, ast.List): - outputs = { - elt.value for elt in keyword.value.elts - } - elif isinstance(keyword.value, ast.Dict): - outputs = { - elt.value for elt in keyword.value.keys - } + node_outputs = parse_value(keyword.value) elif keyword.arg == "name": - name = keyword.value.value + node_name = parse_value(keyword.value) elif keyword.arg == "tags": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - tags = {keyword.value.value} - elif isinstance(keyword.value, ast.List): - tags = {elt.value for elt in keyword.value.elts} - elif isinstance(keyword.value, ast.Dict): - tags = {elt.value for elt in keyword.value.keys} + node_tags = parse_value(keyword.value) + elif keyword.arg == "confirms": + node_confirms = parse_value(keyword.value) elif keyword.arg == "namespace": - if isinstance(keyword.value, ast.Constant): - if not keyword.value.value: - continue - namespace = keyword.value.value - + node_namespace = parse_value(keyword.value) + # Create Node - # [TODO: think of func=lambda *args: sum(args)] kedro_node = Node( - func=lambda *args: sum(args), - inputs=list(inputs), - outputs=list(outputs), - name=name, - tags=tags, - namespace=namespace, + func=node_func, + inputs=node_inputs, + outputs=node_outputs, + name=node_name, + tags=node_tags, + confirms=node_confirms, + namespace=node_namespace, ) - nodes.append(kedro_node) + self.nodes.append(kedro_node) - self.nodes.extend(nodes) - self.inputs |= inputs - self.outputs |= outputs - self.namespace = namespace - self.parameters |= parameters - self.tags |= tags + # These will be used for modular pipeline creation + self.inputs = pipeline_inputs + self.outputs = pipeline_outputs + self.namespace = pipeline_namespace + self.parameters = pipeline_parameters + self.tags = pipeline_tags self.generic_visit(node) +# Helper functions +def parse_value(keyword_value): + if isinstance(keyword_value, ast.Constant): + if not keyword_value.value: + return None + return str(keyword_value.value) + elif isinstance(keyword_value, (ast.List, ast.Set)): + return [parse_value(elt) for elt in keyword_value.elts] + elif isinstance(keyword_value, ast.Dict): + return { + parse_value(k): parse_value(v) for k, v in zip(keyword_value.keys, keyword_value.values) + } + elif isinstance(keyword_value, ast.ListComp): + # [TODO: For list comprehensions, complex case handling] + # [Example can be found under demo_project/pipelines/modelling] + return f"ListComp({ast.dump(keyword_value)})" + elif isinstance(keyword_value, ast.DictComp): + # [TODO: For dict comprehensions, complex case handling] + # [Example can be found under demo_project/pipelines/modelling] + return f"DictComp({ast.dump(keyword_value)})" + elif isinstance(keyword_value, ast.FormattedValue): + # [TODO: For formatted strings, complex case handling] + # [Example can be found under demo_project/pipelines/modelling] + return f"FormattedValue({ast.dump(keyword_value)})" + elif isinstance(keyword_value, ast.JoinedStr): + # [TODO: For joined strings, complex case handling] + # [Example can be found under demo_project/pipelines/modelling] + return f"JoinedStr({ast.dump(keyword_value)})" + else: + # [TODO: For any other complex case handling] + return f"Unsupported({ast.dump(keyword_value)})" + + +# [WIP: Naive parsing and exploring pipelines. Not sure of any better way for now] def parse_project(project_path: Path) -> Dict[str, Pipeline]: + # Result pipelines: Dict[str, Pipeline] = defaultdict(dict) + + # Loop through all the .py files in the kedro project + # and start locating create_pipeline for filepath in project_path.rglob("*.py"): with open(filepath, "r") as file: file_content = file.read() + # parse file content using ast parsed_content_ast_node = ast.parse(file_content) + + # extract pipeline name from file path pipeline_name = filepath.relative_to(project_path).parent.name - # Locate pipelines (assumes only 1 create_pipeline per pipeline file) + # Locate pipelines (tested for only 1 create_pipeline per pipeline file) + # [TODO: confirm with Kedro team if more than 1 create_pipeline existence] kedro_pipeline_locator = KedroPipelineLocator() kedro_pipeline_locator.visit(parsed_content_ast_node) located_pipeline = kedro_pipeline_locator.pipeline - # print(located_pipeline) + + # add to the result if a pipeline is located if located_pipeline: pipelines[pipeline_name] = located_pipeline - # creating a default pipeline - pipelines["__default__"] = sum(pipelines.values()) - # dealing with pipeline level namespace - # pipelines["data_processing"] = pipeline( - # pipelines["data_engineering"], namespace="data_processing" - # ) - print(pipelines) + # foolproof to have atleast 1 pipeline + # so the UI won't break + if len(pipelines.values()): + # creating a default pipeline + pipelines["__default__"] = sum(pipelines.values()) + else: + pipelines["__default__"] = Pipeline(nodes=[]) + return pipelines diff --git a/package/kedro_viz/launchers/cli.py b/package/kedro_viz/launchers/cli.py index b23a1d6cb..4205181a8 100644 --- a/package/kedro_viz/launchers/cli.py +++ b/package/kedro_viz/launchers/cli.py @@ -132,7 +132,7 @@ def run( autoreload, include_hooks, params, - lite + lite, ): """Launch local Kedro Viz instance""" from kedro_viz.server import run_server @@ -176,7 +176,7 @@ def run( "include_hooks": include_hooks, "package_name": PACKAGE_NAME, "extra_params": params, - "is_lite": lite + "is_lite": lite, } if autoreload: run_process_kwargs = { diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py index 7562aa7e9..e63060783 100755 --- a/package/kedro_viz/server.py +++ b/package/kedro_viz/server.py @@ -56,18 +56,13 @@ def load_and_populate_data( package_name: Optional[str] = None, pipeline_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, - is_lite: bool = False + is_lite: bool = False, ): """Loads underlying Kedro project data and populates Kedro Viz Repositories""" # Loads data from underlying Kedro Project catalog, pipelines, session_store, stats_dict = kedro_data_loader.load_data( - path, - env, - include_hooks, - package_name, - extra_params, - is_lite + path, env, include_hooks, package_name, extra_params, is_lite ) pipelines = ( @@ -92,7 +87,7 @@ def run_server( include_hooks: bool = False, package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, - is_lite: bool = False + is_lite: bool = False, ): # pylint: disable=redefined-outer-name """Run a uvicorn server with a FastAPI app that either launches API response data from a file or from reading data from a real Kedro project. @@ -121,13 +116,7 @@ def run_server( if load_file is None: load_and_populate_data( - path, - env, - include_hooks, - package_name, - pipeline_name, - extra_params, - is_lite + path, env, include_hooks, package_name, pipeline_name, extra_params, is_lite ) # [TODO: As we can do this with `kedro viz build`, # we need to shift this feature outside of kedro viz run] From 712592716d8031ea7d9fb68421c758529d687efc Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 3 Jul 2024 01:46:20 -0500 Subject: [PATCH 04/34] testing show code Signed-off-by: ravi-kumar-pilla --- .../kedro_viz/integrations/kedro/parser.py | 7 ++++++- package/kedro_viz/models/flowchart.py | 21 ++++++++++++------- package/kedro_viz/utils.py | 18 ++++++++++++++++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/parser.py b/package/kedro_viz/integrations/kedro/parser.py index 5c7c56b6d..ceefe71c6 100755 --- a/package/kedro_viz/integrations/kedro/parser.py +++ b/package/kedro_viz/integrations/kedro/parser.py @@ -103,7 +103,12 @@ def visit_Call(self, node): for keyword in elt.keywords: # [TODO: func is WIP. Need to create a Callable] if keyword.arg == "func": - node_func = lambda *args, **kwargs: None + func_name = keyword.value.id + exec( + f"def {func_name}(*args, **kwargs): pass", + globals(), + ) + node_func = globals()[func_name] elif keyword.arg == "inputs": node_inputs = parse_value(keyword.value) elif keyword.arg == "outputs": diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py index ed55fcfa6..8747ac914 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart.py @@ -20,7 +20,11 @@ ) from kedro_viz.models.utils import get_dataset_type -from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding +from kedro_viz.utils import ( + TRANSCODING_SEPARATOR, + _strip_transcoding, + get_function_source_code, +) try: # kedro 0.18.11 onwards @@ -416,12 +420,15 @@ def set_task_and_kedro_node(cls, task_node): @field_validator("code") @classmethod def set_code(cls, code): - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) - return code - - return None + try: + # this is required to handle partial, curry functions + if inspect.isfunction(cls.kedro_node.func): + code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) + return code + return None + except OSError as exc: + logger.error(exc) + return get_function_source_code(cls.kedro_node.func.__name__) @field_validator("filepath") @classmethod diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py index a0a4a5abc..b1cd8f45f 100644 --- a/package/kedro_viz/utils.py +++ b/package/kedro_viz/utils.py @@ -1,8 +1,12 @@ """Transcoding related utility functions.""" +import ast import hashlib +from pathlib import Path from typing import Tuple +from kedro_viz.launchers.utils import _find_kedro_project + TRANSCODING_SEPARATOR = "@" @@ -57,3 +61,17 @@ def _strip_transcoding(element: str) -> str: def is_dataset_param(dataset_name: str) -> bool: """Return whether a dataset is a parameter""" return dataset_name.lower().startswith("params:") or dataset_name == "parameters" + + +# Helper to get the source code of a function +def get_function_source_code(func_name: str): + project_dir = _find_kedro_project(Path.cwd()) + if project_dir: + for filepath in project_dir.rglob("*.py"): + with open(filepath, "r") as file: + file_content = file.read() + parsed_content = ast.parse(file_content) + for node in ast.walk(parsed_content): + if isinstance(node, ast.FunctionDef) and node.name == func_name: + return ast.unparse(node) + return None From bff5a4c89387aac19c17f216b6010a5a5810fd6a Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 3 Jul 2024 13:13:03 -0500 Subject: [PATCH 05/34] adjust file permissions Signed-off-by: ravi-kumar-pilla --- demo-project/.version | 0 package.json | 0 package/kedro_viz/server.py | 0 trufflehog-ignore.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 demo-project/.version mode change 100755 => 100644 package.json mode change 100755 => 100644 package/kedro_viz/server.py mode change 100755 => 100644 trufflehog-ignore.txt diff --git a/demo-project/.version b/demo-project/.version old mode 100755 new mode 100644 diff --git a/package.json b/package.json old mode 100755 new mode 100644 diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py old mode 100755 new mode 100644 diff --git a/trufflehog-ignore.txt b/trufflehog-ignore.txt old mode 100755 new mode 100644 From 3038afdca7b297455b838c38b71d8f7caafe1789 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 3 Jul 2024 13:22:46 -0500 Subject: [PATCH 06/34] update comments and rename parser file Signed-off-by: ravi-kumar-pilla --- package/kedro_viz/integrations/kedro/data_loader.py | 4 +--- .../integrations/kedro/{parser.py => lite_parser.py} | 0 package/kedro_viz/utils.py | 2 +- 3 files changed, 2 insertions(+), 4 deletions(-) rename package/kedro_viz/integrations/kedro/{parser.py => lite_parser.py} (100%) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index a036aef0e..673c55947 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -17,7 +17,7 @@ from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS -from kedro_viz.integrations.kedro.parser import parse_project +from kedro_viz.integrations.kedro.lite_parser import parse_project logger = logging.getLogger(__name__) @@ -109,7 +109,6 @@ def load_data( stats_dict = _get_dataset_stats(project_path) pipelines_dict = dict(parse_project(project_path)) - # print(pipelines_dict) return catalog, pipelines_dict, session_store, stats_dict else: from kedro.framework.project import configure_project, pipelines @@ -140,7 +139,6 @@ def load_data( # in case user doesn't have an active session down the line when it's first accessed. # Useful for users who have `get_current_session` in their `register_pipelines()`. pipelines_dict = dict(pipelines) - # print(pipelines_dict) stats_dict = _get_dataset_stats(project_path) return catalog, pipelines_dict, session_store, stats_dict diff --git a/package/kedro_viz/integrations/kedro/parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py similarity index 100% rename from package/kedro_viz/integrations/kedro/parser.py rename to package/kedro_viz/integrations/kedro/lite_parser.py diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py index b1cd8f45f..6aa4bf009 100644 --- a/package/kedro_viz/utils.py +++ b/package/kedro_viz/utils.py @@ -62,7 +62,7 @@ def is_dataset_param(dataset_name: str) -> bool: """Return whether a dataset is a parameter""" return dataset_name.lower().startswith("params:") or dataset_name == "parameters" - +# [NOTE: Experimentation] # Helper to get the source code of a function def get_function_source_code(func_name: str): project_dir = _find_kedro_project(Path.cwd()) From 0e91504f2e10de962ca30cfcc17c2f04f3431b1b Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 3 Jul 2024 13:24:20 -0500 Subject: [PATCH 07/34] remove gitignore Signed-off-by: ravi-kumar-pilla --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index d9017face..39a763068 100644 --- a/.gitignore +++ b/.gitignore @@ -65,6 +65,3 @@ coverage.xml # Kedro *.log - -# testing -spaceflights/* \ No newline at end of file From a4b3b1a0275426ceef6f1053383b26a09c8f501a Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 3 Jul 2024 17:12:42 -0500 Subject: [PATCH 08/34] handle func lambda case Signed-off-by: ravi-kumar-pilla --- .../integrations/kedro/data_loader.py | 3 +- .../integrations/kedro/lite_parser.py | 76 +++++++++++++------ package/kedro_viz/utils.py | 1 + 3 files changed, 54 insertions(+), 26 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 673c55947..eb5825837 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -9,10 +9,11 @@ import logging from pathlib import Path from typing import Any, Dict, Optional, Tuple + +from kedro import __version__ from kedro.config.omegaconf_config import OmegaConfigLoader from kedro.framework.context.context import KedroContext from kedro.framework.session.store import BaseSessionStore -from kedro import __version__ from kedro.io import DataCatalog from kedro.pipeline import Pipeline diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index ceefe71c6..4ae400ac7 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -1,17 +1,30 @@ +import ast +import logging from collections import defaultdict from pathlib import Path -import ast from typing import Dict, Iterable, List + from kedro.pipeline.modular_pipeline import pipeline as ModularPipeline -from kedro.pipeline.pipeline import Pipeline, Node +from kedro.pipeline.pipeline import Node, Pipeline + +logger = logging.getLogger(__name__) + class KedroPipelineLocator(ast.NodeVisitor): + """ + Represents a pipeline that is located when parsing + the Kedro project's `create_pipeline` function + + """ + def __init__(self): self.pipeline = None def visit_FunctionDef(self, node): try: if node.name == "create_pipeline": + # Explore the located pipeline for nodes + # and other keyword args kedro_pipeline_explorer = KedroPipelineExplorer() kedro_pipeline_explorer.visit(node) try: @@ -33,24 +46,26 @@ def visit_FunctionDef(self, node): ) except Exception as exc: # [TODO: Error with modular pipeline, try creating regular pipeline] - print(exc) + logger.error(exc) self.pipeline = Pipeline( nodes=kedro_pipeline_explorer.nodes, tags=kedro_pipeline_explorer.tags, ) self.generic_visit(node) - + except Exception as exc: - # [TODO: Error with parsing the file, dump the visiting node] - print(exc) - print(ast.dump(node, indent=2)) + # [TODO: Error with parsing the file, + # dump the visiting node for debugging] + logger.error(exc) + logger.info(ast.dump(node, indent=2)) + class KedroPipelineExplorer(ast.NodeVisitor): # [TODO: Current explorer only serves for 1 pipeline() function within a create_pipeline def] def __init__(self): - # keeping these here for future use-case - # when dealing with multiple pipeline() functions + # keeping these here for future use-case + # when dealing with multiple `pipeline()` functions # within a create_pipeline def self.nodes: List[Node] = [] self.inputs = None @@ -64,7 +79,7 @@ def visit_Call(self, node): # for a modular pipeline # [TODO: pipe to be explored later] # pipe: Iterable[Node | Pipeline] | Pipeline - + pipeline_inputs: str | set[str] | dict[str, str] | None = None pipeline_outputs: str | set[str] | dict[str, str] | None = None pipeline_namespace: str | None = None @@ -103,12 +118,15 @@ def visit_Call(self, node): for keyword in elt.keywords: # [TODO: func is WIP. Need to create a Callable] if keyword.arg == "func": - func_name = keyword.value.id - exec( - f"def {func_name}(*args, **kwargs): pass", - globals(), - ) - node_func = globals()[func_name] + if isinstance(keyword.value, ast.Name): + func_name = keyword.value.id + exec( + f"def {func_name}(*args, **kwargs): pass", + globals(), + ) + node_func = globals()[func_name] + else: + node_func = lambda *args, **kwargs: None elif keyword.arg == "inputs": node_inputs = parse_value(keyword.value) elif keyword.arg == "outputs": @@ -121,7 +139,7 @@ def visit_Call(self, node): node_confirms = parse_value(keyword.value) elif keyword.arg == "namespace": node_namespace = parse_value(keyword.value) - + # Create Node kedro_node = Node( func=node_func, @@ -147,6 +165,7 @@ def visit_Call(self, node): # Helper functions def parse_value(keyword_value): + """Helper to parse values assigned to node/pipeline properties""" if isinstance(keyword_value, ast.Constant): if not keyword_value.value: return None @@ -155,7 +174,8 @@ def parse_value(keyword_value): return [parse_value(elt) for elt in keyword_value.elts] elif isinstance(keyword_value, ast.Dict): return { - parse_value(k): parse_value(v) for k, v in zip(keyword_value.keys, keyword_value.values) + parse_value(k): parse_value(v) + for k, v in zip(keyword_value.keys, keyword_value.values) } elif isinstance(keyword_value, ast.ListComp): # [TODO: For list comprehensions, complex case handling] @@ -166,13 +186,19 @@ def parse_value(keyword_value): # [Example can be found under demo_project/pipelines/modelling] return f"DictComp({ast.dump(keyword_value)})" elif isinstance(keyword_value, ast.FormattedValue): - # [TODO: For formatted strings, complex case handling] + # [TODO: For formatted strings i.e., single formatted fields, + # complex case handling] # [Example can be found under demo_project/pipelines/modelling] return f"FormattedValue({ast.dump(keyword_value)})" elif isinstance(keyword_value, ast.JoinedStr): - # [TODO: For joined strings, complex case handling] + # [TODO: For joined strings i.e., multiple formatted fields, + # complex case handling] # [Example can be found under demo_project/pipelines/modelling] return f"JoinedStr({ast.dump(keyword_value)})" + elif isinstance(keyword_value, ast.Name): + # [TODO: For variable references, complex case handling] + # [Example can be found under demo_project/pipelines/modelling] + return f"Variable({ast.dump(keyword_value)})" else: # [TODO: For any other complex case handling] return f"Unsupported({ast.dump(keyword_value)})" @@ -182,8 +208,8 @@ def parse_value(keyword_value): def parse_project(project_path: Path) -> Dict[str, Pipeline]: # Result pipelines: Dict[str, Pipeline] = defaultdict(dict) - - # Loop through all the .py files in the kedro project + + # Loop through all the .py files in the kedro project # and start locating create_pipeline for filepath in project_path.rglob("*.py"): with open(filepath, "r") as file: @@ -191,7 +217,7 @@ def parse_project(project_path: Path) -> Dict[str, Pipeline]: # parse file content using ast parsed_content_ast_node = ast.parse(file_content) - + # extract pipeline name from file path pipeline_name = filepath.relative_to(project_path).parent.name @@ -200,14 +226,14 @@ def parse_project(project_path: Path) -> Dict[str, Pipeline]: kedro_pipeline_locator = KedroPipelineLocator() kedro_pipeline_locator.visit(parsed_content_ast_node) located_pipeline = kedro_pipeline_locator.pipeline - + # add to the result if a pipeline is located if located_pipeline: pipelines[pipeline_name] = located_pipeline # foolproof to have atleast 1 pipeline # so the UI won't break - if len(pipelines.values()): + if len(pipelines.keys()): # creating a default pipeline pipelines["__default__"] = sum(pipelines.values()) else: diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py index 6aa4bf009..54f1b84d4 100644 --- a/package/kedro_viz/utils.py +++ b/package/kedro_viz/utils.py @@ -62,6 +62,7 @@ def is_dataset_param(dataset_name: str) -> bool: """Return whether a dataset is a parameter""" return dataset_name.lower().startswith("params:") or dataset_name == "parameters" + # [NOTE: Experimentation] # Helper to get the source code of a function def get_function_source_code(func_name: str): From 0a80f6c3bf0a3030ed5fa1f1bc667751ea0445c5 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Fri, 12 Jul 2024 17:04:44 -0500 Subject: [PATCH 09/34] mocking working draft proposal --- .../integrations/kedro/data_loader.py | 43 ++++--- .../integrations/kedro/lite_parser_mocking.py | 108 ++++++++++++++++++ 2 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 package/kedro_viz/integrations/kedro/lite_parser_mocking.py diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index eb5825837..3f21b2919 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -8,8 +8,10 @@ import json import logging from pathlib import Path +import sys from typing import Any, Dict, Optional, Tuple - +from kedro.framework.project import configure_project, pipelines +from kedro.framework.startup import bootstrap_project from kedro import __version__ from kedro.config.omegaconf_config import OmegaConfigLoader from kedro.framework.context.context import KedroContext @@ -18,7 +20,7 @@ from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS -from kedro_viz.integrations.kedro.lite_parser import parse_project +from kedro_viz.integrations.kedro.lite_parser_mocking import get_mocked_modules logger = logging.getLogger(__name__) @@ -68,6 +70,9 @@ def _get_dataset_stats(project_path: Path) -> Dict: ) return {} +def _update_sys_modules(mock_modules): + for module_name, mock in mock_modules.items(): + sys.modules[module_name] = mock def load_data( project_path: Path, @@ -92,36 +97,46 @@ def load_data( A tuple containing the data catalog and the pipeline dictionary and the session store. """ + + if package_name: + configure_project(package_name) + else: + # bootstrap project when viz is run in dev mode + bootstrap_project(project_path) + if is_lite: # [TODO: Confirm on the context creation] context = KedroContext( package_name="{{ cookiecutter.python_package }}", project_path=project_path, - config_loader=OmegaConfigLoader(conf_source=str(project_path)), + config_loader=OmegaConfigLoader(conf_source="conf", base_env="base", default_run_env="local"), hook_manager=_VizNullPluginManager(), env=env, ) - # [TODO: Confirm on the session store creation] + # Lite version will not support experiment tracking for now session_store = None # [TODO: Confirm on the DataCatalog creation] catalog = DataCatalog() stats_dict = _get_dataset_stats(project_path) - pipelines_dict = dict(parse_project(project_path)) + mocked_modules = get_mocked_modules(project_path) + + # Temporarily clear and reload sys.modules to force use of mock_modules + original_sys_modules = sys.modules.copy() + try: + _update_sys_modules(mocked_modules) + pipelines_dict = dict(pipelines) + finally: + sys.modules.clear() + sys.modules.update(original_sys_modules) + + print(pipelines_dict) + return catalog, pipelines_dict, session_store, stats_dict else: - from kedro.framework.project import configure_project, pipelines from kedro.framework.session import KedroSession - from kedro.framework.startup import bootstrap_project - - if package_name: - configure_project(package_name) - else: - # bootstrap project when viz is run in dev mode - bootstrap_project(project_path) - with KedroSession.create( project_path=project_path, env=env, diff --git a/package/kedro_viz/integrations/kedro/lite_parser_mocking.py b/package/kedro_viz/integrations/kedro/lite_parser_mocking.py new file mode 100644 index 000000000..98bf0131a --- /dev/null +++ b/package/kedro_viz/integrations/kedro/lite_parser_mocking.py @@ -0,0 +1,108 @@ +import ast +import logging +from pathlib import Path +import importlib.util +from unittest.mock import MagicMock + +logger = logging.getLogger(__name__) + + +def _get_import_statements_from_ast(parsed_content_ast_node): + import_statements = [] + + for node in ast.walk(parsed_content_ast_node): + if isinstance(node, ast.Import): + for alias in node.names: + import_statements.append(f"import {alias.name}") + elif isinstance(node, ast.ImportFrom): + module = node.module if node.module else "" + for alias in node.names: + import_statements.append(f"from {module} import {alias.name}") + + return import_statements + + +def _is_module_importable(module_name): + try: + importlib.import_module(module_name) + return True + except ImportError: + return False + + +def _is_relative_import_resolvable(module_name, file_path): + base_dir = file_path.parent + relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") + return relative_path.exists() + + +def _get_unresolvable_imports(import_statements, file_path): + unresolvable_imports = [] + + for statement in import_statements: + if statement.startswith("import "): + module_name = statement.split(" ")[1].split(".")[0] + if not _is_module_importable(module_name): + unresolvable_imports.append(statement) + elif statement.startswith("from "): + parts = statement.split(" ") + module_name = parts[1] + + if _is_relative_import_resolvable(module_name, file_path): + continue + + module_name = module_name.split(".")[0] + + if not _is_module_importable(module_name): + unresolvable_imports.append(statement) + + return unresolvable_imports + + +def _parse_project_for_imports(project_path: Path): + all_imports = {} + for filepath in project_path.rglob("*.py"): + with open(filepath, "r") as file: + file_content = file.read() + + # parse file content using ast + parsed_content_ast_node = ast.parse(file_content) + import_statements = _get_import_statements_from_ast(parsed_content_ast_node) + all_imports[filepath] = import_statements + return all_imports + + +def _create_mock_imports(unresolvable_imports, mock_modules): + for statement in unresolvable_imports: + if statement.startswith("import "): + module_name = statement.split(" ")[1] + elif statement.startswith("from "): + module_name = statement.split(" ")[1] + + parts = module_name.split(".") + full_name = "" + for i, part in enumerate(parts): + full_name = part if i == 0 else f"{full_name}.{part}" + if full_name not in mock_modules: + mock_modules[full_name] = MagicMock() + if i < len(parts) - 1: + parent_module = mock_modules[full_name] + if not hasattr(parent_module, part): + setattr(parent_module, part, MagicMock()) + + +def get_mocked_modules(project_path: Path): + all_imports = _parse_project_for_imports(project_path) + mock_modules = {} + + for file_path, imports in all_imports.items(): + unresolvable_imports = _get_unresolvable_imports(imports, file_path) + + print(f"File Path: {file_path}, Import Errors: {unresolvable_imports}") + + # Create mock imports + _create_mock_imports(unresolvable_imports, mock_modules) + + print(f"Mocked modules: {mock_modules}") + + return mock_modules From e31242f2343ad5a23095540f0c114fe98a439aaa Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 15 Jul 2024 16:46:13 -0500 Subject: [PATCH 10/34] reuse session with mock modules --- .../integrations/kedro/data_loader.py | 106 +++--- .../integrations/kedro/lite_parser.py | 345 ++++++------------ .../integrations/kedro/lite_parser_mocking.py | 108 ------ package/kedro_viz/models/flowchart.py | 21 +- package/kedro_viz/utils.py | 19 - package/tests/test_launchers/test_cli.py | 23 ++ 6 files changed, 194 insertions(+), 428 deletions(-) mode change 100755 => 100644 package/kedro_viz/integrations/kedro/lite_parser.py delete mode 100644 package/kedro_viz/integrations/kedro/lite_parser_mocking.py diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 3f21b2919..2b64f7cc3 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -7,20 +7,20 @@ import json import logging -from pathlib import Path import sys +from pathlib import Path from typing import Any, Dict, Optional, Tuple -from kedro.framework.project import configure_project, pipelines -from kedro.framework.startup import bootstrap_project + from kedro import __version__ -from kedro.config.omegaconf_config import OmegaConfigLoader -from kedro.framework.context.context import KedroContext +from kedro.framework.project import configure_project, pipelines +from kedro.framework.session import KedroSession from kedro.framework.session.store import BaseSessionStore +from kedro.framework.startup import bootstrap_project from kedro.io import DataCatalog from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS -from kedro_viz.integrations.kedro.lite_parser_mocking import get_mocked_modules +from kedro_viz.integrations.kedro.lite_parser import get_mocked_modules logger = logging.getLogger(__name__) @@ -70,10 +70,47 @@ def _get_dataset_stats(project_path: Path) -> Dict: ) return {} + def _update_sys_modules(mock_modules): for module_name, mock in mock_modules.items(): sys.modules[module_name] = mock + +def _load_data_helper( + project_path: Path, + env: Optional[str] = None, + include_hooks: bool = False, + package_name: Optional[str] = None, + extra_params: Optional[Dict[str, Any]] = None, +): + if package_name: + configure_project(package_name) + else: + # bootstrap project when viz is run in dev mode + bootstrap_project(project_path) + + with KedroSession.create( + project_path=project_path, + env=env, + save_on_close=False, + extra_params=extra_params, + ) as session: + # check for --include-hooks option + if not include_hooks: + session._hook_manager = _VizNullPluginManager() # type: ignore + + context = session.load_context() + session_store = session._store + catalog = context.catalog + + # Pipelines is a lazy dict-like object, so we force it to populate here + # in case user doesn't have an active session down the line when it's first accessed. + # Useful for users who have `get_current_session` in their `register_pipelines()`. + pipelines_dict = dict(pipelines) + stats_dict = _get_dataset_stats(project_path) + return catalog, pipelines_dict, session_store, stats_dict + + def load_data( project_path: Path, env: Optional[str] = None, @@ -97,64 +134,19 @@ def load_data( A tuple containing the data catalog and the pipeline dictionary and the session store. """ - - if package_name: - configure_project(package_name) - else: - # bootstrap project when viz is run in dev mode - bootstrap_project(project_path) - if is_lite: - # [TODO: Confirm on the context creation] - context = KedroContext( - package_name="{{ cookiecutter.python_package }}", - project_path=project_path, - config_loader=OmegaConfigLoader(conf_source="conf", base_env="base", default_run_env="local"), - hook_manager=_VizNullPluginManager(), - env=env, - ) - - # Lite version will not support experiment tracking for now - session_store = None - - # [TODO: Confirm on the DataCatalog creation] - catalog = DataCatalog() - - stats_dict = _get_dataset_stats(project_path) mocked_modules = get_mocked_modules(project_path) - # Temporarily clear and reload sys.modules to force use of mock_modules original_sys_modules = sys.modules.copy() try: _update_sys_modules(mocked_modules) - pipelines_dict = dict(pipelines) + return _load_data_helper( + project_path, env, include_hooks, package_name, extra_params + ) finally: sys.modules.clear() sys.modules.update(original_sys_modules) - - print(pipelines_dict) - - return catalog, pipelines_dict, session_store, stats_dict else: - from kedro.framework.session import KedroSession - with KedroSession.create( - project_path=project_path, - env=env, - save_on_close=False, - extra_params=extra_params, - ) as session: - # check for --include-hooks option - if not include_hooks: - session._hook_manager = _VizNullPluginManager() # type: ignore - - context = session.load_context() - session_store = session._store - catalog = context.catalog - - # Pipelines is a lazy dict-like object, so we force it to populate here - # in case user doesn't have an active session down the line when it's first accessed. - # Useful for users who have `get_current_session` in their `register_pipelines()`. - pipelines_dict = dict(pipelines) - stats_dict = _get_dataset_stats(project_path) - - return catalog, pipelines_dict, session_store, stats_dict + return _load_data_helper( + project_path, env, include_hooks, package_name, extra_params + ) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py old mode 100755 new mode 100644 index 4ae400ac7..ddec3fa56 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -1,242 +1,127 @@ import ast +import importlib.util import logging -from collections import defaultdict from pathlib import Path -from typing import Dict, Iterable, List - -from kedro.pipeline.modular_pipeline import pipeline as ModularPipeline -from kedro.pipeline.pipeline import Node, Pipeline +from typing import Any, Dict, List +from unittest.mock import MagicMock logger = logging.getLogger(__name__) -class KedroPipelineLocator(ast.NodeVisitor): - """ - Represents a pipeline that is located when parsing - the Kedro project's `create_pipeline` function - - """ - - def __init__(self): - self.pipeline = None - - def visit_FunctionDef(self, node): - try: - if node.name == "create_pipeline": - # Explore the located pipeline for nodes - # and other keyword args - kedro_pipeline_explorer = KedroPipelineExplorer() - kedro_pipeline_explorer.visit(node) - try: - # modular pipeline - if kedro_pipeline_explorer.namespace: - self.pipeline = ModularPipeline( - pipe=kedro_pipeline_explorer.nodes, - inputs=kedro_pipeline_explorer.inputs, - outputs=kedro_pipeline_explorer.outputs, - parameters=kedro_pipeline_explorer.parameters, - tags=kedro_pipeline_explorer.tags, - namespace=kedro_pipeline_explorer.namespace, - ) - else: - # kedro pipeline - self.pipeline = Pipeline( - nodes=kedro_pipeline_explorer.nodes, - tags=kedro_pipeline_explorer.tags, - ) - except Exception as exc: - # [TODO: Error with modular pipeline, try creating regular pipeline] - logger.error(exc) - self.pipeline = Pipeline( - nodes=kedro_pipeline_explorer.nodes, - tags=kedro_pipeline_explorer.tags, - ) - - self.generic_visit(node) - - except Exception as exc: - # [TODO: Error with parsing the file, - # dump the visiting node for debugging] - logger.error(exc) - logger.info(ast.dump(node, indent=2)) - - -class KedroPipelineExplorer(ast.NodeVisitor): - # [TODO: Current explorer only serves for 1 pipeline() function within a create_pipeline def] - def __init__(self): - # keeping these here for future use-case - # when dealing with multiple `pipeline()` functions - # within a create_pipeline def - self.nodes: List[Node] = [] - self.inputs = None - self.outputs = None - self.namespace = None - self.parameters = None - self.tags = None - - def visit_Call(self, node): - if isinstance(node.func, ast.Name) and node.func.id == "pipeline": - # for a modular pipeline - # [TODO: pipe to be explored later] - # pipe: Iterable[Node | Pipeline] | Pipeline - - pipeline_inputs: str | set[str] | dict[str, str] | None = None - pipeline_outputs: str | set[str] | dict[str, str] | None = None - pipeline_namespace: str | None = None - pipeline_parameters: str | set[str] | dict[str, str] | None = None - pipeline_tags: str | Iterable[str] | None = None - - for keyword in node.keywords: - if keyword.arg == "namespace": - pipeline_namespace = parse_value(keyword.value) - elif keyword.arg == "inputs": - pipeline_inputs = parse_value(keyword.value) - elif keyword.arg == "outputs": - pipeline_outputs = parse_value(keyword.value) - elif keyword.arg == "parameters": - pipeline_parameters = parse_value(keyword.value) - elif keyword.arg == "tags": - pipeline_tags = parse_value(keyword.value) - - # exploring nodes - for arg in node.args: - if isinstance(arg, ast.List): - for elt in arg.elts: - if ( - isinstance(elt, ast.Call) - and isinstance(elt.func, ast.Name) - and elt.func.id == "node" - ): - node_func = None - node_inputs: str | list[str] | dict[str, str] | None = None - node_outputs: str | list[str] | dict[str, str] | None = None - node_name: str | None = None - node_tags: str | Iterable[str] | None = None - node_confirms: str | list[str] | None = None - node_namespace: str | None = None - - for keyword in elt.keywords: - # [TODO: func is WIP. Need to create a Callable] - if keyword.arg == "func": - if isinstance(keyword.value, ast.Name): - func_name = keyword.value.id - exec( - f"def {func_name}(*args, **kwargs): pass", - globals(), - ) - node_func = globals()[func_name] - else: - node_func = lambda *args, **kwargs: None - elif keyword.arg == "inputs": - node_inputs = parse_value(keyword.value) - elif keyword.arg == "outputs": - node_outputs = parse_value(keyword.value) - elif keyword.arg == "name": - node_name = parse_value(keyword.value) - elif keyword.arg == "tags": - node_tags = parse_value(keyword.value) - elif keyword.arg == "confirms": - node_confirms = parse_value(keyword.value) - elif keyword.arg == "namespace": - node_namespace = parse_value(keyword.value) - - # Create Node - kedro_node = Node( - func=node_func, - inputs=node_inputs, - outputs=node_outputs, - name=node_name, - tags=node_tags, - confirms=node_confirms, - namespace=node_namespace, - ) - - self.nodes.append(kedro_node) - - # These will be used for modular pipeline creation - self.inputs = pipeline_inputs - self.outputs = pipeline_outputs - self.namespace = pipeline_namespace - self.parameters = pipeline_parameters - self.tags = pipeline_tags - - self.generic_visit(node) - - -# Helper functions -def parse_value(keyword_value): - """Helper to parse values assigned to node/pipeline properties""" - if isinstance(keyword_value, ast.Constant): - if not keyword_value.value: - return None - return str(keyword_value.value) - elif isinstance(keyword_value, (ast.List, ast.Set)): - return [parse_value(elt) for elt in keyword_value.elts] - elif isinstance(keyword_value, ast.Dict): - return { - parse_value(k): parse_value(v) - for k, v in zip(keyword_value.keys, keyword_value.values) - } - elif isinstance(keyword_value, ast.ListComp): - # [TODO: For list comprehensions, complex case handling] - # [Example can be found under demo_project/pipelines/modelling] - return f"ListComp({ast.dump(keyword_value)})" - elif isinstance(keyword_value, ast.DictComp): - # [TODO: For dict comprehensions, complex case handling] - # [Example can be found under demo_project/pipelines/modelling] - return f"DictComp({ast.dump(keyword_value)})" - elif isinstance(keyword_value, ast.FormattedValue): - # [TODO: For formatted strings i.e., single formatted fields, - # complex case handling] - # [Example can be found under demo_project/pipelines/modelling] - return f"FormattedValue({ast.dump(keyword_value)})" - elif isinstance(keyword_value, ast.JoinedStr): - # [TODO: For joined strings i.e., multiple formatted fields, - # complex case handling] - # [Example can be found under demo_project/pipelines/modelling] - return f"JoinedStr({ast.dump(keyword_value)})" - elif isinstance(keyword_value, ast.Name): - # [TODO: For variable references, complex case handling] - # [Example can be found under demo_project/pipelines/modelling] - return f"Variable({ast.dump(keyword_value)})" - else: - # [TODO: For any other complex case handling] - return f"Unsupported({ast.dump(keyword_value)})" - - -# [WIP: Naive parsing and exploring pipelines. Not sure of any better way for now] -def parse_project(project_path: Path) -> Dict[str, Pipeline]: - # Result - pipelines: Dict[str, Pipeline] = defaultdict(dict) - - # Loop through all the .py files in the kedro project - # and start locating create_pipeline +def _get_import_statements_from_ast(parsed_content_ast_node: ast.Module) -> List[str]: + import_statements: List[str] = [] + + for node in ast.walk(parsed_content_ast_node): + if isinstance(node, ast.Import): + for alias in node.names: + import_statements.append(f"import {alias.name}") + elif isinstance(node, ast.ImportFrom): + module = node.module if node.module else "" + for alias in node.names: + import_statements.append(f"from {module} import {alias.name}") + + return import_statements + + +def _is_module_importable(module_name: str) -> bool: + try: + importlib.import_module(module_name) + return True + except ImportError: + return False + + +def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: + base_dir = file_path.parent + relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") + return relative_path.exists() + + +def _is_valid_import_stmt(statement: Any) -> bool: + if not isinstance(statement, str) or not statement.strip(): + return False + + # Split the statement by spaces + parts = statement.split() + + # Ensure that the statement has at least two parts + if len(parts) < 2: + return False + + return True + + +def _get_unresolvable_imports( + file_path: Path, import_statements: List[str] +) -> List[str]: + unresolvable_imports: List[str] = [] + + for statement in import_statements: + if _is_valid_import_stmt(statement): + if statement.startswith("import "): + module_name = statement.split(" ")[1].split(".")[0] + + if not _is_module_importable(module_name): + unresolvable_imports.append(statement) + + elif statement.startswith("from "): + parts = statement.split(" ") + module_name = parts[1] + + if _is_relative_import_resolvable(file_path, module_name): + continue + + # only checking for parent module + module_name = module_name.split(".")[0] + + if not _is_module_importable(module_name): + unresolvable_imports.append(statement) + + return unresolvable_imports + + +def _parse_project_for_imports(project_path: Path) -> Dict[Path, List[str]]: + all_imports: Dict[Path, List[str]] = {} + for filepath in project_path.rglob("*.py"): with open(filepath, "r") as file: file_content = file.read() # parse file content using ast - parsed_content_ast_node = ast.parse(file_content) - - # extract pipeline name from file path - pipeline_name = filepath.relative_to(project_path).parent.name - - # Locate pipelines (tested for only 1 create_pipeline per pipeline file) - # [TODO: confirm with Kedro team if more than 1 create_pipeline existence] - kedro_pipeline_locator = KedroPipelineLocator() - kedro_pipeline_locator.visit(parsed_content_ast_node) - located_pipeline = kedro_pipeline_locator.pipeline - - # add to the result if a pipeline is located - if located_pipeline: - pipelines[pipeline_name] = located_pipeline - - # foolproof to have atleast 1 pipeline - # so the UI won't break - if len(pipelines.keys()): - # creating a default pipeline - pipelines["__default__"] = sum(pipelines.values()) - else: - pipelines["__default__"] = Pipeline(nodes=[]) - - return pipelines + parsed_content_ast_node: ast.Module = ast.parse(file_content) + import_statements = _get_import_statements_from_ast(parsed_content_ast_node) + all_imports[filepath] = import_statements + return all_imports + + +def _create_mock_imports( + unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] +) -> None: + for statement in unresolvable_imports: + # needs error handling + module_name = statement.split(" ")[1] + module_parts = module_name.split(".") + full_module_name = "" + for idx, sub_module_name in enumerate(module_parts): + full_module_name = ( + sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" + ) + if full_module_name not in mock_modules: + mock_modules[full_module_name] = MagicMock() + + +def get_mocked_modules(project_path: Path) -> Dict[str, MagicMock]: + all_imports: Dict[Path, List[str]] = _parse_project_for_imports(project_path) + mock_modules: Dict[str, MagicMock] = {} + + for file_path, imports in all_imports.items(): + unresolvable_imports: List[str] = _get_unresolvable_imports(file_path, imports) + + print(f"File Path: {file_path}, Import Errors: {unresolvable_imports}") + + # Create mock imports + _create_mock_imports(unresolvable_imports, mock_modules) + + print(f"Mocked modules: {mock_modules}") + + return mock_modules diff --git a/package/kedro_viz/integrations/kedro/lite_parser_mocking.py b/package/kedro_viz/integrations/kedro/lite_parser_mocking.py deleted file mode 100644 index 98bf0131a..000000000 --- a/package/kedro_viz/integrations/kedro/lite_parser_mocking.py +++ /dev/null @@ -1,108 +0,0 @@ -import ast -import logging -from pathlib import Path -import importlib.util -from unittest.mock import MagicMock - -logger = logging.getLogger(__name__) - - -def _get_import_statements_from_ast(parsed_content_ast_node): - import_statements = [] - - for node in ast.walk(parsed_content_ast_node): - if isinstance(node, ast.Import): - for alias in node.names: - import_statements.append(f"import {alias.name}") - elif isinstance(node, ast.ImportFrom): - module = node.module if node.module else "" - for alias in node.names: - import_statements.append(f"from {module} import {alias.name}") - - return import_statements - - -def _is_module_importable(module_name): - try: - importlib.import_module(module_name) - return True - except ImportError: - return False - - -def _is_relative_import_resolvable(module_name, file_path): - base_dir = file_path.parent - relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") - return relative_path.exists() - - -def _get_unresolvable_imports(import_statements, file_path): - unresolvable_imports = [] - - for statement in import_statements: - if statement.startswith("import "): - module_name = statement.split(" ")[1].split(".")[0] - if not _is_module_importable(module_name): - unresolvable_imports.append(statement) - elif statement.startswith("from "): - parts = statement.split(" ") - module_name = parts[1] - - if _is_relative_import_resolvable(module_name, file_path): - continue - - module_name = module_name.split(".")[0] - - if not _is_module_importable(module_name): - unresolvable_imports.append(statement) - - return unresolvable_imports - - -def _parse_project_for_imports(project_path: Path): - all_imports = {} - for filepath in project_path.rglob("*.py"): - with open(filepath, "r") as file: - file_content = file.read() - - # parse file content using ast - parsed_content_ast_node = ast.parse(file_content) - import_statements = _get_import_statements_from_ast(parsed_content_ast_node) - all_imports[filepath] = import_statements - return all_imports - - -def _create_mock_imports(unresolvable_imports, mock_modules): - for statement in unresolvable_imports: - if statement.startswith("import "): - module_name = statement.split(" ")[1] - elif statement.startswith("from "): - module_name = statement.split(" ")[1] - - parts = module_name.split(".") - full_name = "" - for i, part in enumerate(parts): - full_name = part if i == 0 else f"{full_name}.{part}" - if full_name not in mock_modules: - mock_modules[full_name] = MagicMock() - if i < len(parts) - 1: - parent_module = mock_modules[full_name] - if not hasattr(parent_module, part): - setattr(parent_module, part, MagicMock()) - - -def get_mocked_modules(project_path: Path): - all_imports = _parse_project_for_imports(project_path) - mock_modules = {} - - for file_path, imports in all_imports.items(): - unresolvable_imports = _get_unresolvable_imports(imports, file_path) - - print(f"File Path: {file_path}, Import Errors: {unresolvable_imports}") - - # Create mock imports - _create_mock_imports(unresolvable_imports, mock_modules) - - print(f"Mocked modules: {mock_modules}") - - return mock_modules diff --git a/package/kedro_viz/models/flowchart.py b/package/kedro_viz/models/flowchart.py index 8747ac914..ed55fcfa6 100644 --- a/package/kedro_viz/models/flowchart.py +++ b/package/kedro_viz/models/flowchart.py @@ -20,11 +20,7 @@ ) from kedro_viz.models.utils import get_dataset_type -from kedro_viz.utils import ( - TRANSCODING_SEPARATOR, - _strip_transcoding, - get_function_source_code, -) +from kedro_viz.utils import TRANSCODING_SEPARATOR, _strip_transcoding try: # kedro 0.18.11 onwards @@ -420,15 +416,12 @@ def set_task_and_kedro_node(cls, task_node): @field_validator("code") @classmethod def set_code(cls, code): - try: - # this is required to handle partial, curry functions - if inspect.isfunction(cls.kedro_node.func): - code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) - return code - return None - except OSError as exc: - logger.error(exc) - return get_function_source_code(cls.kedro_node.func.__name__) + # this is required to handle partial, curry functions + if inspect.isfunction(cls.kedro_node.func): + code = inspect.getsource(_extract_wrapped_func(cls.kedro_node.func)) + return code + + return None @field_validator("filepath") @classmethod diff --git a/package/kedro_viz/utils.py b/package/kedro_viz/utils.py index 54f1b84d4..a0a4a5abc 100644 --- a/package/kedro_viz/utils.py +++ b/package/kedro_viz/utils.py @@ -1,12 +1,8 @@ """Transcoding related utility functions.""" -import ast import hashlib -from pathlib import Path from typing import Tuple -from kedro_viz.launchers.utils import _find_kedro_project - TRANSCODING_SEPARATOR = "@" @@ -61,18 +57,3 @@ def _strip_transcoding(element: str) -> str: def is_dataset_param(dataset_name: str) -> bool: """Return whether a dataset is a parameter""" return dataset_name.lower().startswith("params:") or dataset_name == "parameters" - - -# [NOTE: Experimentation] -# Helper to get the source code of a function -def get_function_source_code(func_name: str): - project_dir = _find_kedro_project(Path.cwd()) - if project_dir: - for filepath in project_dir.rglob("*.py"): - with open(filepath, "r") as file: - file_content = file.read() - parsed_content = ast.parse(file_content) - for node in ast.walk(parsed_content): - if isinstance(node, ast.FunctionDef) and node.name == func_name: - return ast.unparse(node) - return None diff --git a/package/tests/test_launchers/test_cli.py b/package/tests/test_launchers/test_cli.py index d30e651bb..ed32a8be5 100755 --- a/package/tests/test_launchers/test_cli.py +++ b/package/tests/test_launchers/test_cli.py @@ -91,6 +91,7 @@ def mock_project_path(mocker): "include_hooks": False, "package_name": None, "extra_params": {}, + "is_lite": False, }, ), ( @@ -107,6 +108,7 @@ def mock_project_path(mocker): "include_hooks": False, "package_name": None, "extra_params": {}, + "is_lite": False, }, ), ( @@ -128,6 +130,7 @@ def mock_project_path(mocker): "include_hooks": False, "package_name": None, "extra_params": {}, + "is_lite": False, }, ), ( @@ -160,6 +163,7 @@ def mock_project_path(mocker): "include_hooks": False, "package_name": None, "extra_params": {"extra_param": "param"}, + "is_lite": False, }, ), ( @@ -176,6 +180,24 @@ def mock_project_path(mocker): "include_hooks": True, "package_name": None, "extra_params": {}, + "is_lite": False, + }, + ), + ( + ["viz", "run", "--lite"], + { + "host": "127.0.0.1", + "port": 4141, + "load_file": None, + "save_file": None, + "pipeline_name": None, + "env": None, + "project_path": "testPath", + "autoreload": False, + "include_hooks": False, + "package_name": None, + "extra_params": {}, + "is_lite": True, }, ), ], @@ -340,6 +362,7 @@ def test_kedro_viz_command_with_autoreload( "include_hooks": False, "package_name": None, "extra_params": {}, + "is_lite": False }, "watcher_cls": RegExpWatcher, "watcher_kwargs": {"re_files": "^.*(\\.yml|\\.yaml|\\.py|\\.json)$"}, From 8b8e33796f4468020b8edc2a4f034f611329f44b Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 16 Jul 2024 20:04:50 -0500 Subject: [PATCH 11/34] wip integration tests --- .../test_integrations/test_data_loader.py | 93 ++++++++++++ .../test_integrations/test_lite_parser.py | 140 ++++++++++++++++++ package/tests/test_launchers/test_cli.py | 2 +- 3 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 package/tests/test_integrations/test_data_loader.py create mode 100644 package/tests/test_integrations/test_lite_parser.py diff --git a/package/tests/test_integrations/test_data_loader.py b/package/tests/test_integrations/test_data_loader.py new file mode 100644 index 000000000..2a7d79097 --- /dev/null +++ b/package/tests/test_integrations/test_data_loader.py @@ -0,0 +1,93 @@ +import json +import shutil +import subprocess +import sys +import tempfile +from pathlib import Path +from unittest.mock import patch + +import pytest + +from kedro_viz.integrations.kedro.data_loader import load_data +from kedro_viz.integrations.kedro.lite_parser import get_mocked_modules + + +@pytest.fixture +def kedro_project_path(): + # Setup a temporary directory + tmpdir = Path(tempfile.mkdtemp()) + try: + subprocess.run( + ["kedro", "new", "--name=spaceflights", "--tools=viz", "--example=y"], + cwd=tmpdir, + check=True, + ) + project_dir = next(tmpdir.glob("*/"), None) + if project_dir is None: + raise FileNotFoundError("Kedro project was not created successfully.") + yield project_dir + finally: + shutil.rmtree(tmpdir) + + +def test_load_data_with_dependencies(kedro_project_path): + # Install the project's dependencies + subprocess.run( + ["pip", "install", "-r", kedro_project_path / "requirements.txt"], check=True + ) + + # Load data with all dependencies installed + data_catalog, pipelines, session_store, context = load_data(kedro_project_path) + + assert data_catalog is not None + assert pipelines is not None + assert session_store is not None + assert context is not None + + +# [TODO: WIP, need help] +def test_load_data_without_dependencies(kedro_project_path): + try: + # # Create a new conda environment + # subprocess.run(["conda", "create", "--name", "mytestenv", "python=3.9", "--yes"], check=True) + + # # Activate the conda environment and run subsequent commands within it + # activate_cmd = ["conda", "activate", "mytestenv"] + + # # Run the combined command in shell + # subprocess.run(activate_cmd, shell=True, check=True) + + mocked_modules_with_deps = get_mocked_modules(kedro_project_path) + _, pipelines_dict_with_deps, _, _ = load_data(kedro_project_path, is_lite=False) + + assert mocked_modules_with_deps == {} + + subprocess.run( + ["pip", "uninstall", "-r", kedro_project_path / "requirements.txt", "-y"], + check=True, + ) + + # Install necessary dependencies using pip within the conda environment + subprocess.run(["pip", "install", "../kedro", "./package"], check=True) + + mocked_modules_without_deps = get_mocked_modules(kedro_project_path) + assert len(mocked_modules_without_deps.keys()) > 0 + + _, pipelines_dict_without_deps, _, _ = load_data( + kedro_project_path, is_lite=True + ) + + assert pipelines_dict_with_deps == pipelines_dict_without_deps + + finally: + # Deactivate the conda environment + # deactivate_cmd = ["conda", "deactivate"] + # subprocess.run(deactivate_cmd, shell=True, check=True) + + # # Delete the conda environment + # remove_cmd = ["conda", "remove", "--name", "mytestenv", "--all", "--yes"] + # subprocess.run(remove_cmd, shell=True, check=True) + subprocess.run( + ["pip", "install", "-r", kedro_project_path / "requirements.txt"], + check=True, + ) diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py new file mode 100644 index 000000000..a5ecc7fff --- /dev/null +++ b/package/tests/test_integrations/test_lite_parser.py @@ -0,0 +1,140 @@ +import ast +from pathlib import Path +from unittest.mock import MagicMock + +import pytest + +from kedro_viz.integrations.kedro.lite_parser import ( + _create_mock_imports, + _get_import_statements_from_ast, + _get_unresolvable_imports, + _is_module_importable, + _is_relative_import_resolvable, + _is_valid_import_stmt, + _parse_project_for_imports, + get_mocked_modules, +) + + +def test_get_import_statements_from_ast(): + content = ( + "import os\n" + "import sys\n" + "from pathlib import Path\n" + "from collections import namedtuple\n" + "# import test" + ) + parsed_content_ast_node = ast.parse(content) + expected_imports = [ + "import os", + "import sys", + "from pathlib import Path", + "from collections import namedtuple", + ] + assert _get_import_statements_from_ast(parsed_content_ast_node) == expected_imports + + +def test_is_module_importable(): + assert _is_module_importable("os") is True + assert _is_module_importable("non_existent_module") is False + + +def test_is_relative_import_resolvable(tmp_path): + file_path = tmp_path / "test.py" + file_path.touch() + (tmp_path / "module.py").touch() + assert _is_relative_import_resolvable(file_path, "module") is True + assert _is_relative_import_resolvable(file_path, "non_existent_module") is False + + +@pytest.mark.parametrize( + "statement,expected", + [ + ("import os", True), + ("from os import path", True), + ("", False), + ("import", False), + (123, False), + ], +) +def test_is_valid_import_stmt(statement, expected): + assert _is_valid_import_stmt(statement) == expected + + +@pytest.mark.parametrize( + "is_module_importable, is_relative_import_resolvable, expected_unresolvable", + [ + (True, True, []), + (True, False, []), + (False, True, ["import os", "import non_existent_module"]), + ( + False, + False, + [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + ], + ), + ], +) +def test_get_unresolvable_imports( + is_module_importable, is_relative_import_resolvable, expected_unresolvable, mocker +): + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser._is_module_importable", + return_value=is_module_importable, + ) + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser._is_relative_import_resolvable", + return_value=is_relative_import_resolvable, + ) + file_path = Path("/fake/path") + import_statements = [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + ] + assert ( + _get_unresolvable_imports(file_path, import_statements) == expected_unresolvable + ) + + +def test_parse_project_for_imports(tmp_path): + file1 = tmp_path / "file1.py" + file2 = tmp_path / "file2.py" + file1.write_text("import os\nfrom sys import path") + file2.write_text("import ast\nfrom collections import namedtuple") + expected_imports = { + file1: ["import os", "from sys import path"], + file2: ["import ast", "from collections import namedtuple"], + } + assert _parse_project_for_imports(tmp_path) == expected_imports + + +def test_create_mock_imports(): + unresolvable_imports = [ + "import non_existent_module", + "from non_existent_module import path", + ] + mock_modules = {} + _create_mock_imports(unresolvable_imports, mock_modules) + assert "non_existent_module" in mock_modules + assert isinstance(mock_modules["non_existent_module"], MagicMock) + + +def test_get_mocked_modules(tmp_path, mocker): + file1 = tmp_path / "file1.py" + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser._parse_project_for_imports", + return_value={file1: ["import os", "from sys import path"]}, + ) + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser._get_unresolvable_imports", + return_value=["from sys import path"], + ) + mocked_modules = get_mocked_modules(tmp_path) + assert "sys" in mocked_modules + assert isinstance(mocked_modules["sys"], MagicMock) diff --git a/package/tests/test_launchers/test_cli.py b/package/tests/test_launchers/test_cli.py index ed32a8be5..e79e5e197 100755 --- a/package/tests/test_launchers/test_cli.py +++ b/package/tests/test_launchers/test_cli.py @@ -362,7 +362,7 @@ def test_kedro_viz_command_with_autoreload( "include_hooks": False, "package_name": None, "extra_params": {}, - "is_lite": False + "is_lite": False, }, "watcher_cls": RegExpWatcher, "watcher_kwargs": {"re_files": "^.*(\\.yml|\\.yaml|\\.py|\\.json)$"}, From 8e0ae73c5b1e03f295a315951646c0b6344afe14 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 17 Jul 2024 19:58:25 -0500 Subject: [PATCH 12/34] sporadic working needs testing --- .../integrations/kedro/data_catalog_lite.py | 81 +++++++++++++++++++ .../integrations/kedro/data_loader.py | 11 ++- 2 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 package/kedro_viz/integrations/kedro/data_catalog_lite.py diff --git a/package/kedro_viz/integrations/kedro/data_catalog_lite.py b/package/kedro_viz/integrations/kedro/data_catalog_lite.py new file mode 100644 index 000000000..905cd871e --- /dev/null +++ b/package/kedro_viz/integrations/kedro/data_catalog_lite.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +import copy +from typing import Any + +from kedro.io.core import ( + AbstractDataset, + DatasetError, + DatasetNotFoundError, + generate_timestamp, +) +from kedro.io.data_catalog import DataCatalog, _resolve_credentials +from kedro.io.memory_dataset import MemoryDataset + + +class DataCatalogLite(DataCatalog): + @classmethod + def from_config( + cls, + catalog: dict[str, dict[str, Any]] | None, + credentials: dict[str, dict[str, Any]] | None = None, + load_versions: dict[str, str] | None = None, + save_version: str | None = None, + ) -> DataCatalog: + + datasets = {} + dataset_patterns = {} + catalog = copy.deepcopy(catalog) or {} + credentials = copy.deepcopy(credentials) or {} + save_version = save_version or generate_timestamp() + load_versions = copy.deepcopy(load_versions) or {} + user_default = {} + + for ds_name, ds_config in catalog.items(): + if not isinstance(ds_config, dict): + raise DatasetError( + f"Catalog entry '{ds_name}' is not a valid dataset configuration. " + "\nHint: If this catalog entry is intended for variable interpolation, " + "make sure that the key is preceded by an underscore." + ) + + ds_config = _resolve_credentials( # noqa: PLW2901 + ds_config, credentials + ) + if cls._is_pattern(ds_name): + # Add each factory to the dataset_patterns dict. + dataset_patterns[ds_name] = ds_config + + else: + try: + datasets[ds_name] = AbstractDataset.from_config( + ds_name, ds_config, load_versions.get(ds_name), save_version + ) + except DatasetError: + datasets[ds_name] = MemoryDataset() + + sorted_patterns = cls._sort_patterns(dataset_patterns) + if sorted_patterns: + # If the last pattern is a catch-all pattern, pop it and set it as the default + if cls._specificity(list(sorted_patterns.keys())[-1]) == 0: + last_pattern = sorted_patterns.popitem() + user_default = {last_pattern[0]: last_pattern[1]} + + missing_keys = [ + key + for key in load_versions.keys() + if not (key in catalog or cls._match_pattern(sorted_patterns, key)) + ] + if missing_keys: + raise DatasetNotFoundError( + f"'load_versions' keys [{', '.join(sorted(missing_keys))}] " + f"are not found in the catalog." + ) + + return DataCatalog( + datasets=datasets, + dataset_patterns=sorted_patterns, + load_versions=load_versions, + save_version=save_version, + default_pattern=user_default, + ) \ No newline at end of file diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 2b64f7cc3..f4d926df5 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -20,6 +20,7 @@ from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS +# from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite from kedro_viz.integrations.kedro.lite_parser import get_mocked_modules logger = logging.getLogger(__name__) @@ -82,6 +83,7 @@ def _load_data_helper( include_hooks: bool = False, package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, + is_lite: bool = False, ): if package_name: configure_project(package_name) @@ -101,6 +103,11 @@ def _load_data_helper( context = session.load_context() session_store = session._store + + # if is_lite: + # project_settings = _ProjectSettings() + # project_settings._DATA_CATALOG_CLASS = DataCatalogLite + catalog = context.catalog # Pipelines is a lazy dict-like object, so we force it to populate here @@ -141,12 +148,12 @@ def load_data( try: _update_sys_modules(mocked_modules) return _load_data_helper( - project_path, env, include_hooks, package_name, extra_params + project_path, env, include_hooks, package_name, extra_params, is_lite ) finally: sys.modules.clear() sys.modules.update(original_sys_modules) else: return _load_data_helper( - project_path, env, include_hooks, package_name, extra_params + project_path, env, include_hooks, package_name, extra_params, is_lite ) From 38782e313ecefc1311bfdd5b8be0a6f4ae47a9f3 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Thu, 18 Jul 2024 11:27:11 -0500 Subject: [PATCH 13/34] update sys modules with patch --- .../integrations/kedro/data_catalog_lite.py | 33 ++- .../integrations/kedro/data_loader.py | 42 ++-- .../integrations/kedro/lite_parser.py | 193 +++++++++--------- 3 files changed, 145 insertions(+), 123 deletions(-) mode change 100644 => 100755 package/kedro_viz/integrations/kedro/data_catalog_lite.py diff --git a/package/kedro_viz/integrations/kedro/data_catalog_lite.py b/package/kedro_viz/integrations/kedro/data_catalog_lite.py old mode 100644 new mode 100755 index 905cd871e..9aa4b60a2 --- a/package/kedro_viz/integrations/kedro/data_catalog_lite.py +++ b/package/kedro_viz/integrations/kedro/data_catalog_lite.py @@ -1,7 +1,5 @@ -from __future__ import annotations - import copy -from typing import Any +from typing import Any, Dict from kedro.io.core import ( AbstractDataset, @@ -9,11 +7,29 @@ DatasetNotFoundError, generate_timestamp, ) -from kedro.io.data_catalog import DataCatalog, _resolve_credentials +from kedro.io.data_catalog import DataCatalog, Patterns, _resolve_credentials from kedro.io.memory_dataset import MemoryDataset class DataCatalogLite(DataCatalog): + def __init__( + self, + datasets: dict[str, AbstractDataset] | None = None, + feed_dict: dict[str, Any] | None = None, + dataset_patterns: Dict[str, Dict[str, Any]] | None = None, + load_versions: dict[str, str] | None = None, + save_version: str | None = None, + default_pattern: Dict[str, Dict[str, Any]] | None = None, + ) -> None: + super().__init__( + datasets, + feed_dict, + dataset_patterns, + load_versions, + save_version, + default_pattern, + ) + @classmethod def from_config( cls, @@ -22,7 +38,6 @@ def from_config( load_versions: dict[str, str] | None = None, save_version: str | None = None, ) -> DataCatalog: - datasets = {} dataset_patterns = {} catalog = copy.deepcopy(catalog) or {} @@ -39,9 +54,7 @@ def from_config( "make sure that the key is preceded by an underscore." ) - ds_config = _resolve_credentials( # noqa: PLW2901 - ds_config, credentials - ) + ds_config = _resolve_credentials(ds_config, credentials) # noqa: PLW2901 if cls._is_pattern(ds_name): # Add each factory to the dataset_patterns dict. dataset_patterns[ds_name] = ds_config @@ -72,10 +85,10 @@ def from_config( f"are not found in the catalog." ) - return DataCatalog( + return cls( datasets=datasets, dataset_patterns=sorted_patterns, load_versions=load_versions, save_version=save_version, default_pattern=user_default, - ) \ No newline at end of file + ) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index f4d926df5..5585d4232 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -10,9 +10,10 @@ import sys from pathlib import Path from typing import Any, Dict, Optional, Tuple +from unittest.mock import patch from kedro import __version__ -from kedro.framework.project import configure_project, pipelines +from kedro.framework.project import configure_project, pipelines, settings from kedro.framework.session import KedroSession from kedro.framework.session.store import BaseSessionStore from kedro.framework.startup import bootstrap_project @@ -20,8 +21,8 @@ from kedro.pipeline import Pipeline from kedro_viz.constants import VIZ_METADATA_ARGS -# from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite -from kedro_viz.integrations.kedro.lite_parser import get_mocked_modules +from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite +from kedro_viz.integrations.kedro.lite_parser import LiteParser logger = logging.getLogger(__name__) @@ -72,11 +73,6 @@ def _get_dataset_stats(project_path: Path) -> Dict: return {} -def _update_sys_modules(mock_modules): - for module_name, mock in mock_modules.items(): - sys.modules[module_name] = mock - - def _load_data_helper( project_path: Path, env: Optional[str] = None, @@ -85,6 +81,8 @@ def _load_data_helper( extra_params: Optional[Dict[str, Any]] = None, is_lite: bool = False, ): + """Helper to load data from a Kedro project.""" + if package_name: configure_project(package_name) else: @@ -103,11 +101,13 @@ def _load_data_helper( context = session.load_context() session_store = session._store - - # if is_lite: - # project_settings = _ProjectSettings() - # project_settings._DATA_CATALOG_CLASS = DataCatalogLite - + + # Update the DataCatalog class for a custom implementation + # to handle kedro.io.core.DatasetError from + # `settings.DATA_CATALOG_CLASS.from_config` + if is_lite: + settings.DATA_CATALOG_CLASS = DataCatalogLite + catalog = context.catalog # Pipelines is a lazy dict-like object, so we force it to populate here @@ -142,17 +142,17 @@ def load_data( and the session store. """ if is_lite: - mocked_modules = get_mocked_modules(project_path) - # Temporarily clear and reload sys.modules to force use of mock_modules - original_sys_modules = sys.modules.copy() - try: - _update_sys_modules(mocked_modules) + lite_parser = LiteParser(project_path) + mocked_modules = lite_parser.get_mocked_modules() + + sys_modules_patch = sys.modules.copy() + sys_modules_patch.update(mocked_modules) + + # Patch actual sys modules + with patch.dict("sys.modules", sys_modules_patch): return _load_data_helper( project_path, env, include_hooks, package_name, extra_params, is_lite ) - finally: - sys.modules.clear() - sys.modules.update(original_sys_modules) else: return _load_data_helper( project_path, env, include_hooks, package_name, extra_params, is_lite diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index ddec3fa56..25f152366 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -1,3 +1,5 @@ +"""`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST.""" + import ast import importlib.util import logging @@ -8,120 +10,127 @@ logger = logging.getLogger(__name__) -def _get_import_statements_from_ast(parsed_content_ast_node: ast.Module) -> List[str]: - import_statements: List[str] = [] - - for node in ast.walk(parsed_content_ast_node): - if isinstance(node, ast.Import): - for alias in node.names: - import_statements.append(f"import {alias.name}") - elif isinstance(node, ast.ImportFrom): - module = node.module if node.module else "" - for alias in node.names: - import_statements.append(f"from {module} import {alias.name}") - - return import_statements - - -def _is_module_importable(module_name: str) -> bool: - try: - importlib.import_module(module_name) - return True - except ImportError: - return False - +class LiteParser: + def __init__(self, project_path: Path) -> None: + self.project_path = project_path -def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: - base_dir = file_path.parent - relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") - return relative_path.exists() + def _get_import_statements_from_ast( + self, parsed_content_ast_node: ast.Module + ) -> List[str]: + import_statements: List[str] = [] + for node in ast.walk(parsed_content_ast_node): + if isinstance(node, ast.Import): + for alias in node.names: + import_statements.append(f"import {alias.name}") + elif isinstance(node, ast.ImportFrom): + module = node.module if node.module else "" + for alias in node.names: + import_statements.append(f"from {module} import {alias.name}") -def _is_valid_import_stmt(statement: Any) -> bool: - if not isinstance(statement, str) or not statement.strip(): - return False + return import_statements - # Split the statement by spaces - parts = statement.split() + def _is_module_importable(self, module_name: str) -> bool: + try: + importlib.import_module(module_name) + return True + except ImportError: + return False - # Ensure that the statement has at least two parts - if len(parts) < 2: - return False + def _is_relative_import_resolvable(self, file_path: Path, module_name: str) -> bool: + base_dir = file_path.parent + relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") + return relative_path.exists() - return True + def _is_valid_import_stmt(self, statement: Any) -> bool: + if not isinstance(statement, str) or not statement.strip(): + return False + # Split the statement by spaces + parts = statement.split() -def _get_unresolvable_imports( - file_path: Path, import_statements: List[str] -) -> List[str]: - unresolvable_imports: List[str] = [] + # Ensure that the statement has at least two parts + if len(parts) < 2: + return False - for statement in import_statements: - if _is_valid_import_stmt(statement): - if statement.startswith("import "): - module_name = statement.split(" ")[1].split(".")[0] - - if not _is_module_importable(module_name): - unresolvable_imports.append(statement) + return True - elif statement.startswith("from "): - parts = statement.split(" ") - module_name = parts[1] + def _get_unresolvable_imports( + self, file_path: Path, import_statements: List[str] + ) -> List[str]: + unresolvable_imports: List[str] = [] - if _is_relative_import_resolvable(file_path, module_name): - continue + for statement in import_statements: + if self._is_valid_import_stmt(statement): + if statement.startswith("import "): + module_name = statement.split(" ")[1].split(".")[0] - # only checking for parent module - module_name = module_name.split(".")[0] + if not self._is_module_importable(module_name): + unresolvable_imports.append(statement) - if not _is_module_importable(module_name): - unresolvable_imports.append(statement) + elif statement.startswith("from "): + parts = statement.split(" ") + module_name = parts[1] - return unresolvable_imports + if self._is_relative_import_resolvable(file_path, module_name): + continue + # only checking for parent module + module_name = module_name.split(".")[0] -def _parse_project_for_imports(project_path: Path) -> Dict[Path, List[str]]: - all_imports: Dict[Path, List[str]] = {} + if not self._is_module_importable(module_name): + unresolvable_imports.append(statement) - for filepath in project_path.rglob("*.py"): - with open(filepath, "r") as file: - file_content = file.read() + return unresolvable_imports - # parse file content using ast - parsed_content_ast_node: ast.Module = ast.parse(file_content) - import_statements = _get_import_statements_from_ast(parsed_content_ast_node) - all_imports[filepath] = import_statements - return all_imports + def _parse_project_for_imports(self, project_path: Path) -> Dict[Path, List[str]]: + all_imports: Dict[Path, List[str]] = {} + for filepath in project_path.rglob("*.py"): + with open(filepath, "r") as file: + file_content = file.read() -def _create_mock_imports( - unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] -) -> None: - for statement in unresolvable_imports: - # needs error handling - module_name = statement.split(" ")[1] - module_parts = module_name.split(".") - full_module_name = "" - for idx, sub_module_name in enumerate(module_parts): - full_module_name = ( - sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" + # parse file content using ast + parsed_content_ast_node: ast.Module = ast.parse(file_content) + import_statements = self._get_import_statements_from_ast( + parsed_content_ast_node + ) + all_imports[filepath] = import_statements + return all_imports + + def _create_mock_imports( + self, unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] + ) -> None: + for statement in unresolvable_imports: + # needs error handling + module_name = statement.split(" ")[1] + module_parts = module_name.split(".") + full_module_name = "" + for idx, sub_module_name in enumerate(module_parts): + full_module_name = ( + sub_module_name + if idx == 0 + else f"{full_module_name}.{sub_module_name}" + ) + if full_module_name not in mock_modules: + mock_modules[full_module_name] = MagicMock() + + def get_mocked_modules(self) -> Dict[str, MagicMock]: + all_imports: Dict[Path, List[str]] = self._parse_project_for_imports( + self.project_path + ) + mocked_modules: Dict[str, MagicMock] = {} + + for file_path, imports in all_imports.items(): + unresolvable_imports: List[str] = self._get_unresolvable_imports( + file_path, imports ) - if full_module_name not in mock_modules: - mock_modules[full_module_name] = MagicMock() - - -def get_mocked_modules(project_path: Path) -> Dict[str, MagicMock]: - all_imports: Dict[Path, List[str]] = _parse_project_for_imports(project_path) - mock_modules: Dict[str, MagicMock] = {} - - for file_path, imports in all_imports.items(): - unresolvable_imports: List[str] = _get_unresolvable_imports(file_path, imports) - print(f"File Path: {file_path}, Import Errors: {unresolvable_imports}") + print(f"File Path: {file_path}, Unresolved imports: {unresolvable_imports}") - # Create mock imports - _create_mock_imports(unresolvable_imports, mock_modules) + # Create mock imports + self._create_mock_imports(unresolvable_imports, mocked_modules) - print(f"Mocked modules: {mock_modules}") + print(f"Mocked modules: {mocked_modules}") - return mock_modules + return mocked_modules From 1fc1faf7ac5d9181895c88902ad5850c737e82fd Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Thu, 18 Jul 2024 16:43:09 -0500 Subject: [PATCH 14/34] fix lint and pytests --- .../integrations/kedro/data_catalog_lite.py | 75 ++--- .../integrations/kedro/data_loader.py | 1 + .../integrations/kedro/lite_parser.py | 67 ++-- package/kedro_viz/server.py | 1 + .../test_data_catalog_lite.py | 287 ++++++++++++++++++ .../test_integrations/test_data_loader.py | 93 ------ .../test_integrations/test_lite_parser.py | 265 ++++++++-------- 7 files changed, 491 insertions(+), 298 deletions(-) create mode 100644 package/tests/test_integrations/test_data_catalog_lite.py delete mode 100644 package/tests/test_integrations/test_data_loader.py diff --git a/package/kedro_viz/integrations/kedro/data_catalog_lite.py b/package/kedro_viz/integrations/kedro/data_catalog_lite.py index 9aa4b60a2..97440e502 100755 --- a/package/kedro_viz/integrations/kedro/data_catalog_lite.py +++ b/package/kedro_viz/integrations/kedro/data_catalog_lite.py @@ -1,5 +1,9 @@ +"""``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog`` +to provide a MemoryDataset instance when running Kedro-Viz in lite mode. +""" + import copy -from typing import Any, Dict +from typing import Any, Optional from kedro.io.core import ( AbstractDataset, @@ -7,36 +11,23 @@ DatasetNotFoundError, generate_timestamp, ) -from kedro.io.data_catalog import DataCatalog, Patterns, _resolve_credentials +from kedro.io.data_catalog import DataCatalog, _resolve_credentials from kedro.io.memory_dataset import MemoryDataset class DataCatalogLite(DataCatalog): - def __init__( - self, - datasets: dict[str, AbstractDataset] | None = None, - feed_dict: dict[str, Any] | None = None, - dataset_patterns: Dict[str, Dict[str, Any]] | None = None, - load_versions: dict[str, str] | None = None, - save_version: str | None = None, - default_pattern: Dict[str, Dict[str, Any]] | None = None, - ) -> None: - super().__init__( - datasets, - feed_dict, - dataset_patterns, - load_versions, - save_version, - default_pattern, - ) + """``DataCatalogLite`` is a custom implementation of Kedro's ``DataCatalog`` + to provide a MemoryDataset instance by overriding ``from_config`` of ``DataCatalog`` + when running Kedro-Viz in lite mode. + """ @classmethod def from_config( cls, - catalog: dict[str, dict[str, Any]] | None, - credentials: dict[str, dict[str, Any]] | None = None, - load_versions: dict[str, str] | None = None, - save_version: str | None = None, + catalog: Optional[dict[str, dict[str, Any]]], + credentials: Optional[dict[str, dict[str, Any]]] = None, + load_versions: Optional[dict[str, str]] = None, + save_version: Optional[str] = None, ) -> DataCatalog: datasets = {} dataset_patterns = {} @@ -54,18 +45,23 @@ def from_config( "make sure that the key is preceded by an underscore." ) - ds_config = _resolve_credentials(ds_config, credentials) # noqa: PLW2901 - if cls._is_pattern(ds_name): - # Add each factory to the dataset_patterns dict. - dataset_patterns[ds_name] = ds_config + try: + ds_config = _resolve_credentials( + ds_config, credentials + ) # noqa: PLW2901 + if cls._is_pattern(ds_name): + # Add each factory to the dataset_patterns dict. + dataset_patterns[ds_name] = ds_config - else: - try: - datasets[ds_name] = AbstractDataset.from_config( - ds_name, ds_config, load_versions.get(ds_name), save_version - ) - except DatasetError: - datasets[ds_name] = MemoryDataset() + else: + try: + datasets[ds_name] = AbstractDataset.from_config( + ds_name, ds_config, load_versions.get(ds_name), save_version + ) + except DatasetError: + datasets[ds_name] = MemoryDataset() + except KeyError: + datasets[ds_name] = MemoryDataset() sorted_patterns = cls._sort_patterns(dataset_patterns) if sorted_patterns: @@ -74,17 +70,6 @@ def from_config( last_pattern = sorted_patterns.popitem() user_default = {last_pattern[0]: last_pattern[1]} - missing_keys = [ - key - for key in load_versions.keys() - if not (key in catalog or cls._match_pattern(sorted_patterns, key)) - ] - if missing_keys: - raise DatasetNotFoundError( - f"'load_versions' keys [{', '.join(sorted(missing_keys))}] " - f"are not found in the catalog." - ) - return cls( datasets=datasets, dataset_patterns=sorted_patterns, diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 5585d4232..c467c7f9e 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -137,6 +137,7 @@ def load_data( for underlying KedroContext. If specified, will update (and therefore take precedence over) the parameters retrieved from the project configuration. + is_lite: A flag to run Kedro-Viz in lite mode. Returns: A tuple containing the data catalog and the pipeline dictionary and the session store. diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 25f152366..b031499c9 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -11,11 +11,18 @@ class LiteParser: + """Represents a Kedro Parser which uses AST + + Args: + project_path (Path): the path where the Kedro project is located. + """ + def __init__(self, project_path: Path) -> None: - self.project_path = project_path + self._project_path = project_path + @staticmethod def _get_import_statements_from_ast( - self, parsed_content_ast_node: ast.Module + parsed_content_ast_node: ast.Module, ) -> List[str]: import_statements: List[str] = [] @@ -30,19 +37,22 @@ def _get_import_statements_from_ast( return import_statements - def _is_module_importable(self, module_name: str) -> bool: + @staticmethod + def _is_module_importable(module_name: str) -> bool: try: importlib.import_module(module_name) return True except ImportError: return False - def _is_relative_import_resolvable(self, file_path: Path, module_name: str) -> bool: + @staticmethod + def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: base_dir = file_path.parent relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") return relative_path.exists() - def _is_valid_import_stmt(self, statement: Any) -> bool: + @staticmethod + def _is_valid_import_stmt(statement: Any) -> bool: if not isinstance(statement, str) or not statement.strip(): return False @@ -55,6 +65,23 @@ def _is_valid_import_stmt(self, statement: Any) -> bool: return True + @staticmethod + def _create_mock_imports( + unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] + ) -> None: + for statement in unresolvable_imports: + module_name = statement.split(" ")[1] + module_parts = module_name.split(".") + full_module_name = "" + for idx, sub_module_name in enumerate(module_parts): + full_module_name = ( + sub_module_name + if idx == 0 + else f"{full_module_name}.{sub_module_name}" + ) + if full_module_name not in mock_modules: + mock_modules[full_module_name] = MagicMock() + def _get_unresolvable_imports( self, file_path: Path, import_statements: List[str] ) -> List[str]: @@ -83,11 +110,11 @@ def _get_unresolvable_imports( return unresolvable_imports - def _parse_project_for_imports(self, project_path: Path) -> Dict[Path, List[str]]: + def _parse_project_for_imports(self) -> Dict[Path, List[str]]: all_imports: Dict[Path, List[str]] = {} - for filepath in project_path.rglob("*.py"): - with open(filepath, "r") as file: + for filepath in self._project_path.rglob("*.py"): + with open(filepath, "r", encoding="utf-8") as file: file_content = file.read() # parse file content using ast @@ -98,27 +125,11 @@ def _parse_project_for_imports(self, project_path: Path) -> Dict[Path, List[str] all_imports[filepath] = import_statements return all_imports - def _create_mock_imports( - self, unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] - ) -> None: - for statement in unresolvable_imports: - # needs error handling - module_name = statement.split(" ")[1] - module_parts = module_name.split(".") - full_module_name = "" - for idx, sub_module_name in enumerate(module_parts): - full_module_name = ( - sub_module_name - if idx == 0 - else f"{full_module_name}.{sub_module_name}" - ) - if full_module_name not in mock_modules: - mock_modules[full_module_name] = MagicMock() - def get_mocked_modules(self) -> Dict[str, MagicMock]: - all_imports: Dict[Path, List[str]] = self._parse_project_for_imports( - self.project_path - ) + """Returns mocked modules for all the dependency errors + as a dictionary for each file in your Kedro project + """ + all_imports: Dict[Path, List[str]] = self._parse_project_for_imports() mocked_modules: Dict[str, MagicMock] = {} for file_path, imports in all_imports.items(): diff --git a/package/kedro_viz/server.py b/package/kedro_viz/server.py index e63060783..165463dbb 100644 --- a/package/kedro_viz/server.py +++ b/package/kedro_viz/server.py @@ -110,6 +110,7 @@ def run_server( for underlying KedroContext. If specified, will update (and therefore take precedence over) the parameters retrieved from the project configuration. + is_lite: A flag to run Kedro-Viz in lite mode. """ path = Path(project_path) if project_path else Path.cwd() diff --git a/package/tests/test_integrations/test_data_catalog_lite.py b/package/tests/test_integrations/test_data_catalog_lite.py new file mode 100644 index 000000000..b0ae6fb87 --- /dev/null +++ b/package/tests/test_integrations/test_data_catalog_lite.py @@ -0,0 +1,287 @@ +import logging +import re +import sys +from copy import deepcopy + +import pandas as pd +import pytest +from kedro.io import DatasetError +from pandas.testing import assert_frame_equal + +from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite + + +@pytest.fixture +def filepath(tmp_path): + return (tmp_path / "some" / "dir" / "test.csv").as_posix() + + +@pytest.fixture +def dummy_dataframe(): + return pd.DataFrame({"col1": [1, 2], "col2": [4, 5], "col3": [5, 6]}) + + +@pytest.fixture +def sane_config(filepath): + return { + "catalog": { + "boats": {"type": "pandas.CSVDataset", "filepath": filepath}, + "cars": { + "type": "pandas.CSVDataset", + "filepath": "s3://test_bucket/test_file.csv", + "credentials": "s3_credentials", + }, + }, + "credentials": { + "s3_credentials": {"key": "FAKE_ACCESS_KEY", "secret": "FAKE_SECRET_KEY"} + }, + } + + +@pytest.fixture +def sane_config_with_nested_creds(sane_config): + sane_config["catalog"]["cars"]["credentials"] = { + "client_kwargs": {"credentials": "other_credentials"}, + "key": "secret", + } + sane_config["credentials"]["other_credentials"] = { + "client_kwargs": { + "aws_access_key_id": "OTHER_FAKE_ACCESS_KEY", + "aws_secret_access_key": "OTHER_FAKE_SECRET_KEY", + } + } + return sane_config + + +@pytest.fixture +def bad_config(filepath): + return { + "bad": {"type": "tests.io.test_data_catalog.BadDataset", "filepath": filepath} + } + + +@pytest.fixture +def data_catalog_lite_from_config(sane_config): + return DataCatalogLite.from_config(**sane_config) + + +class TestDataCatalogLiteFromConfig: + def test_from_sane_config(self, data_catalog_lite_from_config, dummy_dataframe): + """Test populating the data catalog from config""" + data_catalog_lite_from_config.save("boats", dummy_dataframe) + reloaded_df = data_catalog_lite_from_config.load("boats") + assert_frame_equal(reloaded_df, dummy_dataframe) + + def test_config_missing_type(self, sane_config): + """Check for no error if type attribute is missing for some data set(s) + in the config""" + del sane_config["catalog"]["boats"]["type"] + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_invalid_module(self, sane_config): + """Check for no error if the type points to nonexistent module""" + + sane_config["catalog"]["boats"][ + "type" + ] = "kedro.invalid_module_name.io.CSVDataset" + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_relative_import(self, sane_config): + """Check for no error if the type points to a relative import""" + sane_config["catalog"]["boats"]["type"] = ".CSVDatasetInvalid" + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_missing_class(self, sane_config): + """Check for no error if the type points to nonexistent class""" + sane_config["catalog"]["boats"]["type"] = "kedro.io.CSVDatasetInvalid" + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + @pytest.mark.skipif( + sys.version_info < (3, 9), + reason="for python 3.8 kedro-datasets version 1.8 is used which has the old spelling", + ) + def test_config_incorrect_spelling(self, sane_config): + """Check hint if the type uses the old DataSet spelling""" + sane_config["catalog"]["boats"]["type"] = "pandas.CSVDataSet" + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_invalid_dataset(self, sane_config): + """Check for no error if the type points to invalid class""" + sane_config["catalog"]["boats"]["type"] = "DataCatalogLite" + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_invalid_arguments(self, sane_config): + """Check for no error if the data set config contains invalid arguments""" + sane_config["catalog"]["boats"]["save_and_load_args"] = False + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_config_invalid_dataset_config(self, sane_config): + """Check for valid config""" + sane_config["catalog"]["invalid_entry"] = "some string" + pattern = ( + "Catalog entry 'invalid_entry' is not a valid dataset configuration. " + "\nHint: If this catalog entry is intended for variable interpolation, " + "make sure that the key is preceded by an underscore." + ) + with pytest.raises(DatasetError, match=pattern): + DataCatalogLite.from_config(**sane_config) + + def test_empty_config(self): + """Test empty config""" + assert DataCatalogLite.from_config(None) + + def test_missing_credentials(self, sane_config): + """Check for no error if credentials can't be located""" + sane_config["catalog"]["cars"]["credentials"] = "missing" + + try: + # DataCatalogLite should not raise KeyError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised KeyError unexpectedly") + + def test_link_credentials(self, sane_config, mocker): + """Test credentials being linked to the relevant data set""" + mock_client = mocker.patch("kedro_datasets.pandas.csv_dataset.fsspec") + config = deepcopy(sane_config) + del config["catalog"]["boats"] + + DataCatalogLite.from_config(**config) + + expected_client_kwargs = sane_config["credentials"]["s3_credentials"] + mock_client.filesystem.assert_called_with("s3", **expected_client_kwargs) + + def test_nested_credentials(self, sane_config_with_nested_creds, mocker): + mock_client = mocker.patch("kedro_datasets.pandas.csv_dataset.fsspec") + config = deepcopy(sane_config_with_nested_creds) + del config["catalog"]["boats"] + DataCatalogLite.from_config(**config) + + expected_client_kwargs = { + "client_kwargs": { + "credentials": { + "client_kwargs": { + "aws_access_key_id": "OTHER_FAKE_ACCESS_KEY", + "aws_secret_access_key": "OTHER_FAKE_SECRET_KEY", + } + } + }, + "key": "secret", + } + mock_client.filesystem.assert_called_once_with("s3", **expected_client_kwargs) + + def test_missing_nested_credentials(self, sane_config_with_nested_creds): + """Check for no error if credentials are missing from nested credentials""" + del sane_config_with_nested_creds["credentials"]["other_credentials"] + + try: + # DataCatalogLite should not raise KeyError + DataCatalogLite.from_config(**sane_config_with_nested_creds) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised KeyError unexpectedly") + + def test_missing_dependency(self, sane_config, mocker): + """Test that no error is thrown when a dependency is missing.""" + pattern = "dependency issue" + + def dummy_load(obj_path, *args, **kwargs): + if obj_path == "kedro_datasets.pandas.CSVDataset": + raise AttributeError(pattern) + if obj_path == "kedro_datasets.pandas.__all__": + return ["CSVDataset"] + return None + + mocker.patch("kedro.io.core.load_obj", side_effect=dummy_load) + + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(**sane_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_idempotent_catalog(self, sane_config): + """Test that data catalog instantiations are idempotent""" + _ = DataCatalogLite.from_config(**sane_config) + catalog = DataCatalogLite.from_config(**sane_config) + assert catalog + + def test_error_dataset_init(self, bad_config): + """Check for no error when trying to instantiate erroneous data set""" + try: + # DataCatalogLite should not raise DatasetError + DataCatalogLite.from_config(bad_config) + except DatasetError: + pytest.fail("DataCatalogLite.from_config raised DatasetError unexpectedly") + + def test_confirm(self, tmp_path, caplog, mocker): + """Confirm the dataset""" + with caplog.at_level(logging.INFO): + mock_confirm = mocker.patch( + "kedro_datasets.partitions.incremental_dataset.IncrementalDataset.confirm" + ) + catalog = { + "ds_to_confirm": { + "type": "kedro_datasets.partitions.incremental_dataset.IncrementalDataset", + "dataset": "pandas.CSVDataset", + "path": str(tmp_path), + } + } + data_catalog = DataCatalogLite.from_config(catalog=catalog) + data_catalog.confirm("ds_to_confirm") + assert caplog.record_tuples == [ + ( + "kedro.io.data_catalog", + logging.INFO, + "Confirming dataset 'ds_to_confirm'", + ) + ] + mock_confirm.assert_called_once_with() + + @pytest.mark.parametrize( + "dataset_name,pattern", + [ + ("missing", "Dataset 'missing' not found in the catalog"), + ("boats", "Dataset 'boats' does not have 'confirm' method"), + ], + ) + def test_bad_confirm(self, sane_config, dataset_name, pattern): + """Test confirming non existent dataset or the one that + does not have `confirm` method""" + data_catalog_lite = DataCatalogLite.from_config(**sane_config) + + with pytest.raises(DatasetError, match=re.escape(pattern)): + data_catalog_lite.confirm(dataset_name) diff --git a/package/tests/test_integrations/test_data_loader.py b/package/tests/test_integrations/test_data_loader.py deleted file mode 100644 index 2a7d79097..000000000 --- a/package/tests/test_integrations/test_data_loader.py +++ /dev/null @@ -1,93 +0,0 @@ -import json -import shutil -import subprocess -import sys -import tempfile -from pathlib import Path -from unittest.mock import patch - -import pytest - -from kedro_viz.integrations.kedro.data_loader import load_data -from kedro_viz.integrations.kedro.lite_parser import get_mocked_modules - - -@pytest.fixture -def kedro_project_path(): - # Setup a temporary directory - tmpdir = Path(tempfile.mkdtemp()) - try: - subprocess.run( - ["kedro", "new", "--name=spaceflights", "--tools=viz", "--example=y"], - cwd=tmpdir, - check=True, - ) - project_dir = next(tmpdir.glob("*/"), None) - if project_dir is None: - raise FileNotFoundError("Kedro project was not created successfully.") - yield project_dir - finally: - shutil.rmtree(tmpdir) - - -def test_load_data_with_dependencies(kedro_project_path): - # Install the project's dependencies - subprocess.run( - ["pip", "install", "-r", kedro_project_path / "requirements.txt"], check=True - ) - - # Load data with all dependencies installed - data_catalog, pipelines, session_store, context = load_data(kedro_project_path) - - assert data_catalog is not None - assert pipelines is not None - assert session_store is not None - assert context is not None - - -# [TODO: WIP, need help] -def test_load_data_without_dependencies(kedro_project_path): - try: - # # Create a new conda environment - # subprocess.run(["conda", "create", "--name", "mytestenv", "python=3.9", "--yes"], check=True) - - # # Activate the conda environment and run subsequent commands within it - # activate_cmd = ["conda", "activate", "mytestenv"] - - # # Run the combined command in shell - # subprocess.run(activate_cmd, shell=True, check=True) - - mocked_modules_with_deps = get_mocked_modules(kedro_project_path) - _, pipelines_dict_with_deps, _, _ = load_data(kedro_project_path, is_lite=False) - - assert mocked_modules_with_deps == {} - - subprocess.run( - ["pip", "uninstall", "-r", kedro_project_path / "requirements.txt", "-y"], - check=True, - ) - - # Install necessary dependencies using pip within the conda environment - subprocess.run(["pip", "install", "../kedro", "./package"], check=True) - - mocked_modules_without_deps = get_mocked_modules(kedro_project_path) - assert len(mocked_modules_without_deps.keys()) > 0 - - _, pipelines_dict_without_deps, _, _ = load_data( - kedro_project_path, is_lite=True - ) - - assert pipelines_dict_with_deps == pipelines_dict_without_deps - - finally: - # Deactivate the conda environment - # deactivate_cmd = ["conda", "deactivate"] - # subprocess.run(deactivate_cmd, shell=True, check=True) - - # # Delete the conda environment - # remove_cmd = ["conda", "remove", "--name", "mytestenv", "--all", "--yes"] - # subprocess.run(remove_cmd, shell=True, check=True) - subprocess.run( - ["pip", "install", "-r", kedro_project_path / "requirements.txt"], - check=True, - ) diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index a5ecc7fff..d7dd860c6 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -4,137 +4,138 @@ import pytest -from kedro_viz.integrations.kedro.lite_parser import ( - _create_mock_imports, - _get_import_statements_from_ast, - _get_unresolvable_imports, - _is_module_importable, - _is_relative_import_resolvable, - _is_valid_import_stmt, - _parse_project_for_imports, - get_mocked_modules, -) - - -def test_get_import_statements_from_ast(): - content = ( - "import os\n" - "import sys\n" - "from pathlib import Path\n" - "from collections import namedtuple\n" - "# import test" - ) - parsed_content_ast_node = ast.parse(content) - expected_imports = [ - "import os", - "import sys", - "from pathlib import Path", - "from collections import namedtuple", - ] - assert _get_import_statements_from_ast(parsed_content_ast_node) == expected_imports - - -def test_is_module_importable(): - assert _is_module_importable("os") is True - assert _is_module_importable("non_existent_module") is False - - -def test_is_relative_import_resolvable(tmp_path): - file_path = tmp_path / "test.py" - file_path.touch() - (tmp_path / "module.py").touch() - assert _is_relative_import_resolvable(file_path, "module") is True - assert _is_relative_import_resolvable(file_path, "non_existent_module") is False - - -@pytest.mark.parametrize( - "statement,expected", - [ - ("import os", True), - ("from os import path", True), - ("", False), - ("import", False), - (123, False), - ], -) -def test_is_valid_import_stmt(statement, expected): - assert _is_valid_import_stmt(statement) == expected - - -@pytest.mark.parametrize( - "is_module_importable, is_relative_import_resolvable, expected_unresolvable", - [ - (True, True, []), - (True, False, []), - (False, True, ["import os", "import non_existent_module"]), - ( - False, - False, - [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - ], - ), - ], -) -def test_get_unresolvable_imports( - is_module_importable, is_relative_import_resolvable, expected_unresolvable, mocker -): - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser._is_module_importable", - return_value=is_module_importable, - ) - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser._is_relative_import_resolvable", - return_value=is_relative_import_resolvable, - ) - file_path = Path("/fake/path") - import_statements = [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - ] - assert ( - _get_unresolvable_imports(file_path, import_statements) == expected_unresolvable - ) - - -def test_parse_project_for_imports(tmp_path): - file1 = tmp_path / "file1.py" - file2 = tmp_path / "file2.py" - file1.write_text("import os\nfrom sys import path") - file2.write_text("import ast\nfrom collections import namedtuple") - expected_imports = { - file1: ["import os", "from sys import path"], - file2: ["import ast", "from collections import namedtuple"], - } - assert _parse_project_for_imports(tmp_path) == expected_imports - - -def test_create_mock_imports(): - unresolvable_imports = [ - "import non_existent_module", - "from non_existent_module import path", - ] - mock_modules = {} - _create_mock_imports(unresolvable_imports, mock_modules) - assert "non_existent_module" in mock_modules - assert isinstance(mock_modules["non_existent_module"], MagicMock) - - -def test_get_mocked_modules(tmp_path, mocker): - file1 = tmp_path / "file1.py" - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser._parse_project_for_imports", - return_value={file1: ["import os", "from sys import path"]}, +from kedro_viz.integrations.kedro.lite_parser import LiteParser + + +class TestLiteParser: + def test_get_import_statements_from_ast(self): + content = ( + "import os\n" + "import sys\n" + "from pathlib import Path\n" + "from collections import namedtuple\n" + "# import test" + ) + parsed_content_ast_node = ast.parse(content) + expected_imports = [ + "import os", + "import sys", + "from pathlib import Path", + "from collections import namedtuple", + ] + assert ( + LiteParser._get_import_statements_from_ast(parsed_content_ast_node) + == expected_imports + ) + + def test_is_module_importable(self): + assert LiteParser._is_module_importable("os") is True + assert LiteParser._is_module_importable("non_existent_module") is False + + def test_is_relative_import_resolvable(self, tmp_path): + file_path = tmp_path / "test.py" + file_path.touch() + (tmp_path / "module.py").touch() + assert LiteParser._is_relative_import_resolvable(file_path, "module") is True + assert ( + LiteParser._is_relative_import_resolvable(file_path, "non_existent_module") + is False + ) + + @pytest.mark.parametrize( + "statement,expected", + [ + ("import os", True), + ("from os import path", True), + ("", False), + ("import", False), + (123, False), + ], ) - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser._get_unresolvable_imports", - return_value=["from sys import path"], + def test_is_valid_import_stmt(self, statement, expected): + assert LiteParser._is_valid_import_stmt(statement) == expected + + @pytest.mark.parametrize( + "is_module_importable, is_relative_import_resolvable, expected_unresolvable", + [ + (True, True, []), + (True, False, []), + (False, True, ["import os", "import non_existent_module"]), + ( + False, + False, + [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + ], + ), + ], ) - mocked_modules = get_mocked_modules(tmp_path) - assert "sys" in mocked_modules - assert isinstance(mocked_modules["sys"], MagicMock) + def test_get_unresolvable_imports( + self, + is_module_importable, + is_relative_import_resolvable, + expected_unresolvable, + mocker, + ): + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_module_importable", + return_value=is_module_importable, + ) + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_relative_import_resolvable", + return_value=is_relative_import_resolvable, + ) + file_path = Path("/fake/path") + import_statements = [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + ] + lite_parser_obj = LiteParser(file_path) + assert ( + lite_parser_obj._get_unresolvable_imports(file_path, import_statements) + == expected_unresolvable + ) + + def test_parse_project_for_imports(self, tmp_path): + file1 = tmp_path / "file1.py" + file2 = tmp_path / "file2.py" + file1.write_text("import os\nfrom sys import path") + file2.write_text("import ast\nfrom collections import namedtuple") + expected_imports = { + file1: ["import os", "from sys import path"], + file2: ["import ast", "from collections import namedtuple"], + } + lite_parser_obj = LiteParser(tmp_path) + assert lite_parser_obj._parse_project_for_imports() == expected_imports + + def test_create_mock_imports(self): + unresolvable_imports = [ + "import non_existent_module", + "from non_existent_module import path", + ] + mock_modules = {} + LiteParser._create_mock_imports(unresolvable_imports, mock_modules) + assert "non_existent_module" in mock_modules + assert isinstance(mock_modules["non_existent_module"], MagicMock) + + def test_get_mocked_modules(self, tmp_path, mocker): + file1 = tmp_path / "file1.py" + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._parse_project_for_imports", + return_value={file1: ["import os", "from sys import path"]}, + ) + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._get_unresolvable_imports", + return_value=["from sys import path"], + ) + + lite_parser_obj = LiteParser(tmp_path) + mocked_modules = lite_parser_obj.get_mocked_modules() + + assert "sys" in mocked_modules + assert isinstance(mocked_modules["sys"], MagicMock) From 98361e335f98f96e53f93ec6e48bc2000d39c0b0 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Sun, 21 Jul 2024 22:28:05 -0500 Subject: [PATCH 15/34] add dataset factories test --- .../integrations/kedro/data_catalog_lite.py | 7 +--- .../test_data_catalog_lite.py | 36 +++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_catalog_lite.py b/package/kedro_viz/integrations/kedro/data_catalog_lite.py index 97440e502..8cea521bc 100755 --- a/package/kedro_viz/integrations/kedro/data_catalog_lite.py +++ b/package/kedro_viz/integrations/kedro/data_catalog_lite.py @@ -5,12 +5,7 @@ import copy from typing import Any, Optional -from kedro.io.core import ( - AbstractDataset, - DatasetError, - DatasetNotFoundError, - generate_timestamp, -) +from kedro.io.core import AbstractDataset, DatasetError, generate_timestamp from kedro.io.data_catalog import DataCatalog, _resolve_credentials from kedro.io.memory_dataset import MemoryDataset diff --git a/package/tests/test_integrations/test_data_catalog_lite.py b/package/tests/test_integrations/test_data_catalog_lite.py index b0ae6fb87..e68bc1bd6 100644 --- a/package/tests/test_integrations/test_data_catalog_lite.py +++ b/package/tests/test_integrations/test_data_catalog_lite.py @@ -6,6 +6,7 @@ import pandas as pd import pytest from kedro.io import DatasetError +from kedro_datasets.pandas import CSVDataset from pandas.testing import assert_frame_equal from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite @@ -53,6 +54,30 @@ def sane_config_with_nested_creds(sane_config): return sane_config +@pytest.fixture +def config_with_dataset_factories(): + return { + "catalog": { + "{brand}_cars": { + "type": "pandas.CSVDataset", + "filepath": "data/01_raw/{brand}_cars.csv", + }, + "audi_cars": { + "type": "pandas.ParquetDataset", + "filepath": "data/01_raw/audi_cars.pq", + }, + "{type}_boats": { + "type": "pandas.CSVDataset", + "filepath": "data/01_raw/{type}_boats.csv", + }, + "{default1}": { + "type": "pandas.CSVDataset", + "filepath": "data/01_raw/{default1}.csv", + }, + }, + } + + @pytest.fixture def bad_config(filepath): return { @@ -285,3 +310,14 @@ def test_bad_confirm(self, sane_config, dataset_name, pattern): with pytest.raises(DatasetError, match=re.escape(pattern)): data_catalog_lite.confirm(dataset_name) + + def test_match_added_to_datasets_on_get(self, config_with_dataset_factories): + """Check that the datasets that match patterns are only added when fetched""" + catalog = DataCatalogLite.from_config(**config_with_dataset_factories) + assert "{brand}_cars" not in catalog._datasets + assert "tesla_cars" not in catalog._datasets + assert "{brand}_cars" in catalog._dataset_patterns + + tesla_cars = catalog._get_dataset("tesla_cars") + assert isinstance(tesla_cars, CSVDataset) + assert "tesla_cars" in catalog._datasets From e120ccc6aa891f8653606ce0f746a1ca70fa753a Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Sun, 21 Jul 2024 23:12:55 -0500 Subject: [PATCH 16/34] add e2e test --- package/features/steps/cli_steps.py | 10 ++++++++++ package/features/viz.feature | 6 ++++++ package/kedro_viz/integrations/kedro/lite_parser.py | 5 ----- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/package/features/steps/cli_steps.py b/package/features/steps/cli_steps.py index c45221752..dfd32bc41 100644 --- a/package/features/steps/cli_steps.py +++ b/package/features/steps/cli_steps.py @@ -147,6 +147,16 @@ def exec_viz_command(context): ) +@when("I execute the kedro viz run command with lite option") +def exec_viz_lite_command(context): + """Execute Kedro-Viz command.""" + context.result = ChildTerminatingPopen( + [context.kedro, "viz", "run", "--lite", "--no-browser"], + env=context.env, + cwd=str(context.root_project_dir), + ) + + @then("kedro-viz should start successfully") def check_kedroviz_up(context): """Check that Kedro-Viz is up and responding to requests.""" diff --git a/package/features/viz.feature b/package/features/viz.feature index 75c7b65fe..083496aca 100644 --- a/package/features/viz.feature +++ b/package/features/viz.feature @@ -24,3 +24,9 @@ Feature: Viz plugin in new project When I execute the kedro viz run command Then kedro-viz should start successfully + Scenario: Execute viz lite with latest Kedro + Given I have installed kedro version "latest" + And I have run a non-interactive kedro new with spaceflights-pandas starter + When I execute the kedro viz run command with lite option + Then kedro-viz should start successfully + diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index b031499c9..84931a2e7 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -136,12 +136,7 @@ def get_mocked_modules(self) -> Dict[str, MagicMock]: unresolvable_imports: List[str] = self._get_unresolvable_imports( file_path, imports ) - - print(f"File Path: {file_path}, Unresolved imports: {unresolvable_imports}") - # Create mock imports self._create_mock_imports(unresolvable_imports, mocked_modules) - print(f"Mocked modules: {mocked_modules}") - return mocked_modules From b7a1862c28149c326bb0cd30c14d7019b81c2ed9 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 22 Jul 2024 08:26:07 -0500 Subject: [PATCH 17/34] fix CI --- package/tests/test_launchers/test_cli.py | 1 + 1 file changed, 1 insertion(+) diff --git a/package/tests/test_launchers/test_cli.py b/package/tests/test_launchers/test_cli.py index ac87a1226..9990a71a9 100755 --- a/package/tests/test_launchers/test_cli.py +++ b/package/tests/test_launchers/test_cli.py @@ -196,6 +196,7 @@ def mock_project_path(mocker): "include_hooks": False, "package_name": None, "extra_params": {"extra_param": "param"}, + "is_lite": False, }, ), ( From 06e35bffc98f766c185efe8f481554b29a4fe71e Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 22 Jul 2024 22:39:16 -0500 Subject: [PATCH 18/34] dataset factory pattern support in lite mode --- package/kedro_viz/data_access/managers.py | 12 +++++++++- .../tests/test_data_access/test_managers.py | 24 +++++++++++++++++++ 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 4eb3e7213..875448a57 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -7,6 +7,8 @@ import networkx as nx from kedro.io import DataCatalog +from kedro.io.core import DatasetError +from kedro.io.memory_dataset import MemoryDataset from kedro.pipeline import Pipeline as KedroPipeline from kedro.pipeline.node import Node as KedroNode from sqlalchemy.orm import sessionmaker @@ -316,7 +318,15 @@ def add_dataset( Returns: The GraphNode instance representing the dataset that was added to the NodesRepository. """ - obj = self.catalog.get_dataset(dataset_name) + try: + obj = self.catalog.get_dataset(dataset_name) + except DatasetError: + # This is to handle dataset factory patterns when running + # Kedro Viz in lite mode. The `get_dataset` function + # of DataCatalog calls AbstractDataset.from_config + # which tries to create a Dataset instance from the pattern + obj = MemoryDataset() + layer = self.catalog.get_layer_for_dataset(dataset_name) graph_node: Union[DataNode, TranscodedDataNode, ParametersNode] ( diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index af94785cb..6f55331c2 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -3,6 +3,7 @@ import networkx as nx import pytest from kedro.io import DataCatalog, MemoryDataset +from kedro.io.core import DatasetError from kedro.pipeline import Pipeline, node from kedro.pipeline.modular_pipeline import pipeline from kedro_datasets.pandas import CSVDataset @@ -378,6 +379,29 @@ def test_add_dataset_with_modular_pipeline( "uk.data_science", } + def test_add_dataset_with_unresolved_pattern( + self, + data_access_manager: DataAccessManager, + example_pipelines: Dict[str, Pipeline], + example_modular_pipelines_repo_obj, + mocker, + ): + dataset = CSVDataset(filepath="dataset.csv") + dataset_name = "companies#csv" + catalog = DataCatalog(datasets={dataset_name: dataset}) + data_access_manager.add_catalog(catalog, example_pipelines) + + with mocker.patch.object( + data_access_manager.catalog, + "get_dataset", + side_effect=DatasetError("Dataset not found"), + ): + dataset_obj = data_access_manager.add_dataset( + "my_pipeline", dataset_name, example_modular_pipelines_repo_obj + ) + + assert isinstance(dataset_obj.kedro_obj, MemoryDataset) + def test_add_all_parameters( self, data_access_manager: DataAccessManager, From 78cd4135dafe5c97f12499c9d7a1793fa4139699 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 23 Jul 2024 00:36:34 -0500 Subject: [PATCH 19/34] add doc strings --- .../integrations/kedro/lite_parser.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 84931a2e7..5542dd1e2 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -24,6 +24,14 @@ def __init__(self, project_path: Path) -> None: def _get_import_statements_from_ast( parsed_content_ast_node: ast.Module, ) -> List[str]: + """Get all the import statements from an AST Node. + + Args: + parsed_content_ast_node (ast.Module): The AST node to + extract import statements + Returns: + A list of import statements as strings + """ import_statements: List[str] = [] for node in ast.walk(parsed_content_ast_node): @@ -39,6 +47,14 @@ def _get_import_statements_from_ast( @staticmethod def _is_module_importable(module_name: str) -> bool: + """Checks if a module is importable + + Args: + module_name (str): The name of the module to check + importability + Returns: + Whether the module can be imported + """ try: importlib.import_module(module_name) return True @@ -47,12 +63,30 @@ def _is_module_importable(module_name: str) -> bool: @staticmethod def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: + """Checks if a relative module is importable + + Args: + file_path (Path): The file path where the module is mentioned + as an import statement + module_name (str): The name of the module to check + importability + Returns: + Whether the module can be imported + """ base_dir = file_path.parent relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") return relative_path.exists() @staticmethod def _is_valid_import_stmt(statement: Any) -> bool: + """Checks for a valid import statement + + Args: + statement (Any): The import statement to validate + + Returns: + Whether the statement is a valid import string + """ if not isinstance(statement, str) or not statement.strip(): return False @@ -69,6 +103,15 @@ def _is_valid_import_stmt(statement: Any) -> bool: def _create_mock_imports( unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] ) -> None: + """Creates mock modules for the unresolvable imports and adds them to the + dictionary of mock_modules + + Args: + unresolvable_imports (List[str]): A list of import statements + that are not resolved + mock_modules (Dict[str, MagicMock]): A dictionary of mocked imports + + """ for statement in unresolvable_imports: module_name = statement.split(" ")[1] module_parts = module_name.split(".") @@ -85,6 +128,15 @@ def _create_mock_imports( def _get_unresolvable_imports( self, file_path: Path, import_statements: List[str] ) -> List[str]: + """Retrieves all the unresolved import statements from a file + + Args: + file_path (Path): The file path where the import statements are mentioned + import_statements (List[str]): A list of all the import statements mentioned in + the file + Returns: + A list of import statements that are not resolved + """ unresolvable_imports: List[str] = [] for statement in import_statements: @@ -111,6 +163,13 @@ def _get_unresolvable_imports( return unresolvable_imports def _parse_project_for_imports(self) -> Dict[Path, List[str]]: + """Loops through all the python files, parses each file using + AST and creates a map containing the file path and the extracted + import statements + + Returns: + A dictionary of file path and corresponding import statements + """ all_imports: Dict[Path, List[str]] = {} for filepath in self._project_path.rglob("*.py"): From f2dda9308558dc0c511c135418a4f59726d7aa4b Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 23 Jul 2024 19:12:04 -0500 Subject: [PATCH 20/34] add e2e test and clear unused func --- package/features/steps/cli_steps.py | 23 +++++++++++++++++++ package/features/viz.feature | 8 +++++++ .../tests/test_data_access/test_managers.py | 19 --------------- 3 files changed, 31 insertions(+), 19 deletions(-) diff --git a/package/features/steps/cli_steps.py b/package/features/steps/cli_steps.py index dfd32bc41..769cb08d6 100644 --- a/package/features/steps/cli_steps.py +++ b/package/features/steps/cli_steps.py @@ -179,3 +179,26 @@ def check_kedroviz_up(context): ) finally: context.result.terminate() + + +@then("I store the response from main endpoint") +def get_main_api_response(context): + max_duration = 30 # 30 seconds + end_by = time() + max_duration + + while time() < end_by: + try: + response = requests.get("http://localhost:4141/api/main") + context.response = response.json() + assert response.status_code == 200 + except Exception: + sleep(2.0) + continue + else: + break + + +@then("I compare the responses in regular and lite mode") +def compare_main_api_responses(context): + regular_mode_response = requests.get("http://localhost:4141/api/main").json() + assert context.response == regular_mode_response diff --git a/package/features/viz.feature b/package/features/viz.feature index 083496aca..d3c01e2f7 100644 --- a/package/features/viz.feature +++ b/package/features/viz.feature @@ -30,3 +30,11 @@ Feature: Viz plugin in new project When I execute the kedro viz run command with lite option Then kedro-viz should start successfully + Scenario: Compare viz responses in regular and lite mode + Given I have installed kedro version "latest" + And I have run a non-interactive kedro new with spaceflights-pandas starter + When I execute the kedro viz run command with lite option + Then I store the response from main endpoint + Given I have installed the project's requirements + When I execute the kedro viz run command + Then I compare the responses in regular and lite mode diff --git a/package/tests/test_data_access/test_managers.py b/package/tests/test_data_access/test_managers.py index 6f55331c2..d1c43162b 100644 --- a/package/tests/test_data_access/test_managers.py +++ b/package/tests/test_data_access/test_managers.py @@ -28,25 +28,6 @@ def identity(x): return x -def assert_expected_modular_pipeline_values_for_edge_cases( - expected_modular_pipeline_tree_obj, - modular_pipeline_node_id, - data_access_manager, - modular_pipeline_tree_values, - expected_key, -): - """This asserts an `expected_key` value present in modular_pipeline_tree - that is constructed in the edge cases with the expected_modular_pipeline_tree""" - assert sorted( - list(expected_modular_pipeline_tree_obj[modular_pipeline_node_id][expected_key]) - ) == sorted( - list( - data_access_manager.nodes.get_node_by_id(node_id).name - for node_id in modular_pipeline_tree_values - ) - ) - - class TestAddCatalog: def test_add_catalog( self, From bc4aea2dc26ae3640e76c81be0d11029cb856695 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 12 Aug 2024 23:15:07 -0500 Subject: [PATCH 21/34] testing relative to absolute imports --- package/kedro_viz/data_access/managers.py | 4 +- .../integrations/kedro/data_catalog_lite.py | 6 +- .../integrations/kedro/lite_parser.py | 72 +++-- .../integrations/kedro/lite_parser_test.py | 246 ++++++++++++++++++ .../test_integrations/test_lite_parser.py | 108 ++++++-- 5 files changed, 396 insertions(+), 40 deletions(-) create mode 100644 package/kedro_viz/integrations/kedro/lite_parser_test.py diff --git a/package/kedro_viz/data_access/managers.py b/package/kedro_viz/data_access/managers.py index 875448a57..afeba9358 100644 --- a/package/kedro_viz/data_access/managers.py +++ b/package/kedro_viz/data_access/managers.py @@ -325,7 +325,9 @@ def add_dataset( # Kedro Viz in lite mode. The `get_dataset` function # of DataCatalog calls AbstractDataset.from_config # which tries to create a Dataset instance from the pattern - obj = MemoryDataset() + + # pylint: disable=abstract-class-instantiated + obj = MemoryDataset() # type: ignore[abstract] layer = self.catalog.get_layer_for_dataset(dataset_name) graph_node: Union[DataNode, TranscodedDataNode, ParametersNode] diff --git a/package/kedro_viz/integrations/kedro/data_catalog_lite.py b/package/kedro_viz/integrations/kedro/data_catalog_lite.py index 8cea521bc..11c179cdb 100755 --- a/package/kedro_viz/integrations/kedro/data_catalog_lite.py +++ b/package/kedro_viz/integrations/kedro/data_catalog_lite.py @@ -54,9 +54,11 @@ def from_config( ds_name, ds_config, load_versions.get(ds_name), save_version ) except DatasetError: - datasets[ds_name] = MemoryDataset() + # pylint: disable=abstract-class-instantiated + datasets[ds_name] = MemoryDataset() # type: ignore[abstract] except KeyError: - datasets[ds_name] = MemoryDataset() + # pylint: disable=abstract-class-instantiated + datasets[ds_name] = MemoryDataset() # type: ignore[abstract] sorted_patterns = cls._sort_patterns(dataset_patterns) if sorted_patterns: diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 5542dd1e2..23b108774 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -3,6 +3,7 @@ import ast import importlib.util import logging +import re from pathlib import Path from typing import Any, Dict, List from unittest.mock import MagicMock @@ -39,9 +40,13 @@ def _get_import_statements_from_ast( for alias in node.names: import_statements.append(f"import {alias.name}") elif isinstance(node, ast.ImportFrom): - module = node.module if node.module else "" + module_name = node.module if node.module else "" + level = node.level for alias in node.names: - import_statements.append(f"from {module} import {alias.name}") + relative_module_name = "." * level + module_name + import_statements.append( + f"from {relative_module_name} import {alias.name}" + ) return import_statements @@ -56,13 +61,16 @@ def _is_module_importable(module_name: str) -> bool: Whether the module can be imported """ try: - importlib.import_module(module_name) + if importlib.util.find_spec(module_name) is None: + return False return True - except ImportError: + except (ImportError, ModuleNotFoundError, ValueError): return False @staticmethod - def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: + def _is_relative_import_resolvable( + file_path: Path, module_name: str, dot_count: int + ) -> bool: """Checks if a relative module is importable Args: @@ -70,12 +78,32 @@ def _is_relative_import_resolvable(file_path: Path, module_name: str) -> bool: as an import statement module_name (str): The name of the module to check importability + dot_count (int): The length of dots in the module_name Returns: Whether the module can be imported """ - base_dir = file_path.parent - relative_path = (base_dir / module_name.replace(".", "/")).with_suffix(".py") - return relative_path.exists() + # Get the current directory of the file + current_dir = file_path.parent + + # Navigate up the directory tree based on the dot count + target_dir = current_dir + for _ in range(dot_count - 1): + if not target_dir: + return False + target_dir = target_dir.parent + + # Combine the target directory with module_name + if module_name: + module_parts = module_name.split(".") + module_path = target_dir.joinpath(*module_parts) + else: + module_path = target_dir + + if module_path.is_dir(): + # Check if it's a package by looking for __init__.py + init_file = module_path / "__init__.py" + return init_file.exists() + return module_path.with_suffix(".py").exists() @staticmethod def _is_valid_import_stmt(statement: Any) -> bool: @@ -90,11 +118,10 @@ def _is_valid_import_stmt(statement: Any) -> bool: if not isinstance(statement, str) or not statement.strip(): return False - # Split the statement by spaces - parts = statement.split() + # Regex to match different import statements + import_match = re.match(r"(from|import)\s+(\.+)?([a-zA-Z0-9_.]+)", statement) - # Ensure that the statement has at least two parts - if len(parts) < 2: + if not import_match: return False return True @@ -142,19 +169,26 @@ def _get_unresolvable_imports( for statement in import_statements: if self._is_valid_import_stmt(statement): if statement.startswith("import "): + # standard library imports, only considering root module module_name = statement.split(" ")[1].split(".")[0] if not self._is_module_importable(module_name): unresolvable_imports.append(statement) - - elif statement.startswith("from "): - parts = statement.split(" ") - module_name = parts[1] - - if self._is_relative_import_resolvable(file_path, module_name): + else: + # relative imports + module_name = statement.split(" ")[1] + + # Get the dot count for relative imports + dot_count = len(module_name) - len(module_name.lstrip(".")) + + if dot_count > 0: + if not self._is_relative_import_resolvable( + file_path, module_name[dot_count:], dot_count + ): + unresolvable_imports.append(statement) continue - # only checking for parent module + # absolute imports, only considering root module module_name = module_name.split(".")[0] if not self._is_module_importable(module_name): diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py new file mode 100644 index 000000000..1b2a5990c --- /dev/null +++ b/package/kedro_viz/integrations/kedro/lite_parser_test.py @@ -0,0 +1,246 @@ +# """`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST.""" + +# import ast +# import importlib.util +# import logging +# from pathlib import Path +# from typing import Any, Dict, List, Union +# from unittest.mock import MagicMock +# from kedro.framework.project import PACKAGE_NAME + +# logger = logging.getLogger(__name__) +# PACKAGE_NAME = "demo_project" + +# class LiteParser: +# """Represents a Kedro Parser which uses AST + +# Args: +# project_path (Path): the path where the Kedro project is located. +# """ + +# def __init__(self, project_path: Path) -> None: +# self._project_path = project_path + +# @staticmethod +# def _is_module_importable(module_name: str, package_name: str = None) -> bool: +# """Checks if a module is importable + +# Args: +# module_name (str): The name of the module to check +# importability +# Returns: +# Whether the module can be imported +# """ +# try: +# if importlib.util.find_spec(module_name, package_name) is None: +# return False +# return True +# except ModuleNotFoundError as exc: +# print(exc) +# return False +# except (ImportError, ValueError): +# return False + +# @staticmethod +# def _is_relative_import_resolvable( +# file_path: Path, module_name: str, dot_count: int +# ) -> bool: +# """Checks if a relative module is importable + +# Args: +# file_path (Path): The file path where the module is mentioned +# as an import statement +# module_name (str): The name of the module to check +# importability +# dot_count (int): The length of dots in the module_name +# Returns: +# Whether the module can be imported +# """ + +# # import pdb +# # pdb.set_trace() + +# # Get the current directory of the file +# current_dir = file_path.parent + +# # Navigate up the directory tree based on the dot count +# target_dir = current_dir +# for _ in range(dot_count - 1): +# if not target_dir: +# return False +# target_dir = target_dir.parent + +# # Combine the target directory with module_name +# if module_name: +# module_parts = module_name.split(".") +# module_path = target_dir.joinpath(*module_parts) +# else: +# module_path = target_dir + +# if module_path.is_dir(): +# # Check if it's a package by looking for __init__.py +# init_file = module_path / "__init__.py" +# return init_file.exists() +# return module_path.with_suffix(".py").exists() + +# def _create_absolute_mock_imports( +# self, module_name: str, mocked_modules: Dict[str, MagicMock] +# ) -> None: +# """Creates mock modules for the unresolvable imports and adds them to the +# dictionary of mock_modules + +# Args: +# module_name (str): The module name to be mocked +# mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports + +# """ + +# module_parts = module_name.split(".") +# full_module_name = "" + +# for idx, sub_module_name in enumerate(module_parts): +# full_module_name = ( +# sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" +# ) +# if ( +# not self._is_module_importable(full_module_name) +# and full_module_name not in mocked_modules +# ): +# mocked_modules[full_module_name] = MagicMock() + +# def get_mocked_modules(self) -> Dict[str, MagicMock]: +# """Returns mocked modules for all the dependency errors +# as a dictionary for each file in your Kedro project +# """ +# mocked_modules: Dict[str, MagicMock] = {} + +# for filepath in self._project_path.rglob("*.py"): +# with open(filepath, "r", encoding="utf-8") as file: +# file_content = file.read() + +# # parse file content using ast +# parsed_content_ast_node: ast.Module = ast.parse(file_content) +# self._mock_missing_dependencies( +# parsed_content_ast_node, filepath, mocked_modules +# ) + +# return mocked_modules + +# def _mock_missing_dependencies( +# self, +# parsed_content_ast_node: ast.Module, +# file_path: Path, +# mocked_modules: Dict[str, MagicMock], +# ) -> None: +# """Mock missing dependencies + +# Args: +# parsed_content_ast_node (ast.Module): The AST node to +# extract import statements +# file_path (Path): The current file path to check +# for missing dependencies +# mocked_modules: A dictionary of mocked imports +# """ + +# for node in ast.walk(parsed_content_ast_node): +# if isinstance(node, ast.Import): +# for alias in node.names: +# module_name = alias.name +# self._create_absolute_mock_imports(module_name, mocked_modules) + +# elif isinstance(node, ast.ImportFrom): +# module_name = node.module if node.module else "" +# level = node.level + +# for alias in node.names: +# if level == 0: +# # absolute imports (should be from root_dir of the package) +# self._create_absolute_mock_imports(module_name, mocked_modules) +# else: +# # relative imports (starting with dot) +# # if not self._is_relative_import_resolvable( +# # file_path, module_name, level +# # ): +# # self._create_relative_mock_imports( +# # file_path, module_name, level, mocked_modules +# # ) +# if not self._is_module_importable(module_name, PACKAGE_NAME): +# mocked_modules[module_name] = MagicMock() + +# def _create_relative_mock_imports( +# self, +# file_path: Path, +# module_name: str, +# level: int, +# mocked_modules: Dict[str, MagicMock], +# ): + +# # import pdb +# # pdb.set_trace() + +# root = Path(self._project_path).resolve() +# file = Path(file_path).resolve() + +# print("Root Path", root) +# print("File Path", file) + +# # Extract the directory of the file +# file_dir = file.parent + +# # Navigate up the directory tree based on the number of leading dots +# target_dir = file_dir +# for _ in range(level): +# if target_dir == root: +# break +# target_dir = target_dir.parent + +# # Create the absolute import path +# module_path = ("." * (level) + module_name).replace('.', '/') + '.py' +# absolute_path = target_dir / module_path +# print(absolute_path.resolve()) +# mocked_modules[".nodes"] = MagicMock() + +# @staticmethod +# def _extract_path_starting_from_package(file_path: Path) -> Union[Path, None]: +# # Convert the file path to a list of parts +# path_parts = file_path.parts + +# try: +# package_index = path_parts.index(PACKAGE_NAME) +# except ValueError: +# return None + +# # Extract the path parts starting from the package name +# sub_path = Path(*path_parts[package_index:]) + +# return sub_path + +# @staticmethod +# def _convert_relative_import_to_absolute(file_path: Path, relative_import: str, level: int) -> Union[Path, None]: +# # Get the current directory of the file +# current_dir = file_path.parent + +# # Navigate up the directory tree based on the dot count +# target_dir = current_dir +# for _ in range(level - 1): +# if target_dir: +# target_dir = target_dir.parent + +# # Combine the target directory with module_name +# if relative_import: +# module_parts = relative_import.split(".") +# module_path = target_dir.joinpath(*module_parts) +# else: +# module_path = target_dir + +# print(module_path) +# module_absolute_path = LiteParser._extract_path_starting_from_package(module_path) +# return module_absolute_path + + +# if __name__ == "__main__": +# # print(LiteParser._extract_path_starting_from_package(Path("/Users/Ravi_Kumar_Pilla/Library/CloudStorage/OneDrive-McKinsey&Company/Documents/Kedro/KedroOrg/kedro-viz/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py"))) + +# # print(importlib.util.find_spec()) +# print(LiteParser._convert_relative_import_to_absolute(Path("/Users/Ravi_Kumar_Pilla/Library/CloudStorage/OneDrive-McKinsey&Company/Documents/Kedro/KedroOrg/kedro-viz/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py"), "data_ingestion.nodes", 0)) + diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index d7dd860c6..4c1bd70ad 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -32,15 +32,74 @@ def test_is_module_importable(self): assert LiteParser._is_module_importable("os") is True assert LiteParser._is_module_importable("non_existent_module") is False - def test_is_relative_import_resolvable(self, tmp_path): - file_path = tmp_path / "test.py" + def test_valid_relative_import(self, tmp_path): + # Create a directory structure + package_dir = tmp_path / "project" / "subpackage" + package_dir.mkdir(parents=True) + + # Create a valid module file + module_file = package_dir / "module.py" + module_file.touch() + + # Check if the relative import is resolvable + file_path = package_dir / "another_module.py" file_path.touch() - (tmp_path / "module.py").touch() - assert LiteParser._is_relative_import_resolvable(file_path, "module") is True - assert ( - LiteParser._is_relative_import_resolvable(file_path, "non_existent_module") - is False - ) + + assert LiteParser._is_relative_import_resolvable(Path(file_path), "subpackage.module", 1) == True + + def test_valid_relative_import_with_dots(self, tmp_path): + # Create a directory structure + root_dir = tmp_path / "project" + subpackage_dir = root_dir / "subpackage" + subpackage_dir.mkdir(parents=True) + + # Create a valid module file + module_file = root_dir / "module.py" + module_file.touch() + + # Check if the relative import is resolvable (one level up) + file_path = subpackage_dir / "another_module.py" + file_path.touch() + + assert LiteParser._is_relative_import_resolvable(file_path, "module", 2) == True + + def test_invalid_relative_import(self, tmp_path): + # Create a directory structure + package_dir = tmp_path / "project" / "subpackage" + package_dir.mkdir(parents=True) + + # Create a file that will simulate an import from a non-existing module + file_path = package_dir / "another_module.py" + file_path.touch() + + assert LiteParser._is_relative_import_resolvable(file_path, "nonexistent.module", 1) == False + + def test_import_of_package(self, tmp_path): + # Create a directory structure with a package + package_dir = tmp_path / "project" / "subpackage" + package_dir.mkdir(parents=True) + + # Create __init__.py to make it a package + init_file = package_dir / "__init__.py" + init_file.touch() + + # Check if the relative import is resolvable for a package + file_path = tmp_path / "project" / "module.py" + file_path.touch() + + assert LiteParser._is_relative_import_resolvable(file_path, "subpackage", 1) == True + + def test_invalid_path_navigation(self, tmp_path): + # Create a directory structure + subpackage_dir = tmp_path / "project" / "subpackage" + subpackage_dir.mkdir(parents=True) + + # Create a file + file_path = subpackage_dir / "module.py" + file_path.touch() + + # Trying to go up too many levels should fail + assert LiteParser._is_relative_import_resolvable(file_path, "module", 5) == False @pytest.mark.parametrize( "statement,expected", @@ -56,11 +115,20 @@ def test_is_valid_import_stmt(self, statement, expected): assert LiteParser._is_valid_import_stmt(statement) == expected @pytest.mark.parametrize( - "is_module_importable, is_relative_import_resolvable, expected_unresolvable", + "is_module_importable, is_relative_import_resolvable, import_statements, expected_unresolvable", [ - (True, True, []), - (True, False, []), - (False, True, ["import os", "import non_existent_module"]), + ( + True, + True, + [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + "from ...pipelines.nodes import test_func" + ], + [], + ), ( False, False, @@ -69,14 +137,24 @@ def test_is_valid_import_stmt(self, statement, expected): "from sys import path", "import non_existent_module", "from non_existent_module import path", + "from ...pipelines.nodes import test_func" + ], + [ + "import os", + "from sys import path", + "import non_existent_module", + "from non_existent_module import path", + "from ...pipelines.nodes import test_func" ], ), + (True, False, ["import os", "import non_existent_module"], []), ], ) def test_get_unresolvable_imports( self, is_module_importable, is_relative_import_resolvable, + import_statements, expected_unresolvable, mocker, ): @@ -89,12 +167,6 @@ def test_get_unresolvable_imports( return_value=is_relative_import_resolvable, ) file_path = Path("/fake/path") - import_statements = [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - ] lite_parser_obj = LiteParser(file_path) assert ( lite_parser_obj._get_unresolvable_imports(file_path, import_statements) From 81621479992bc0c04eeeb7331c0d827e3cc67fe1 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Fri, 16 Aug 2024 17:01:29 -0500 Subject: [PATCH 22/34] testing relative imports --- .../integrations/kedro/lite_parser_test.py | 464 ++++++++---------- 1 file changed, 218 insertions(+), 246 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py index 1b2a5990c..53af1a8d5 100644 --- a/package/kedro_viz/integrations/kedro/lite_parser_test.py +++ b/package/kedro_viz/integrations/kedro/lite_parser_test.py @@ -1,246 +1,218 @@ -# """`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST.""" - -# import ast -# import importlib.util -# import logging -# from pathlib import Path -# from typing import Any, Dict, List, Union -# from unittest.mock import MagicMock -# from kedro.framework.project import PACKAGE_NAME - -# logger = logging.getLogger(__name__) -# PACKAGE_NAME = "demo_project" - -# class LiteParser: -# """Represents a Kedro Parser which uses AST - -# Args: -# project_path (Path): the path where the Kedro project is located. -# """ - -# def __init__(self, project_path: Path) -> None: -# self._project_path = project_path - -# @staticmethod -# def _is_module_importable(module_name: str, package_name: str = None) -> bool: -# """Checks if a module is importable - -# Args: -# module_name (str): The name of the module to check -# importability -# Returns: -# Whether the module can be imported -# """ -# try: -# if importlib.util.find_spec(module_name, package_name) is None: -# return False -# return True -# except ModuleNotFoundError as exc: -# print(exc) -# return False -# except (ImportError, ValueError): -# return False - -# @staticmethod -# def _is_relative_import_resolvable( -# file_path: Path, module_name: str, dot_count: int -# ) -> bool: -# """Checks if a relative module is importable - -# Args: -# file_path (Path): The file path where the module is mentioned -# as an import statement -# module_name (str): The name of the module to check -# importability -# dot_count (int): The length of dots in the module_name -# Returns: -# Whether the module can be imported -# """ - -# # import pdb -# # pdb.set_trace() - -# # Get the current directory of the file -# current_dir = file_path.parent - -# # Navigate up the directory tree based on the dot count -# target_dir = current_dir -# for _ in range(dot_count - 1): -# if not target_dir: -# return False -# target_dir = target_dir.parent - -# # Combine the target directory with module_name -# if module_name: -# module_parts = module_name.split(".") -# module_path = target_dir.joinpath(*module_parts) -# else: -# module_path = target_dir - -# if module_path.is_dir(): -# # Check if it's a package by looking for __init__.py -# init_file = module_path / "__init__.py" -# return init_file.exists() -# return module_path.with_suffix(".py").exists() - -# def _create_absolute_mock_imports( -# self, module_name: str, mocked_modules: Dict[str, MagicMock] -# ) -> None: -# """Creates mock modules for the unresolvable imports and adds them to the -# dictionary of mock_modules - -# Args: -# module_name (str): The module name to be mocked -# mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports - -# """ - -# module_parts = module_name.split(".") -# full_module_name = "" - -# for idx, sub_module_name in enumerate(module_parts): -# full_module_name = ( -# sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" -# ) -# if ( -# not self._is_module_importable(full_module_name) -# and full_module_name not in mocked_modules -# ): -# mocked_modules[full_module_name] = MagicMock() - -# def get_mocked_modules(self) -> Dict[str, MagicMock]: -# """Returns mocked modules for all the dependency errors -# as a dictionary for each file in your Kedro project -# """ -# mocked_modules: Dict[str, MagicMock] = {} - -# for filepath in self._project_path.rglob("*.py"): -# with open(filepath, "r", encoding="utf-8") as file: -# file_content = file.read() - -# # parse file content using ast -# parsed_content_ast_node: ast.Module = ast.parse(file_content) -# self._mock_missing_dependencies( -# parsed_content_ast_node, filepath, mocked_modules -# ) - -# return mocked_modules - -# def _mock_missing_dependencies( -# self, -# parsed_content_ast_node: ast.Module, -# file_path: Path, -# mocked_modules: Dict[str, MagicMock], -# ) -> None: -# """Mock missing dependencies - -# Args: -# parsed_content_ast_node (ast.Module): The AST node to -# extract import statements -# file_path (Path): The current file path to check -# for missing dependencies -# mocked_modules: A dictionary of mocked imports -# """ - -# for node in ast.walk(parsed_content_ast_node): -# if isinstance(node, ast.Import): -# for alias in node.names: -# module_name = alias.name -# self._create_absolute_mock_imports(module_name, mocked_modules) - -# elif isinstance(node, ast.ImportFrom): -# module_name = node.module if node.module else "" -# level = node.level - -# for alias in node.names: -# if level == 0: -# # absolute imports (should be from root_dir of the package) -# self._create_absolute_mock_imports(module_name, mocked_modules) -# else: -# # relative imports (starting with dot) -# # if not self._is_relative_import_resolvable( -# # file_path, module_name, level -# # ): -# # self._create_relative_mock_imports( -# # file_path, module_name, level, mocked_modules -# # ) -# if not self._is_module_importable(module_name, PACKAGE_NAME): -# mocked_modules[module_name] = MagicMock() - -# def _create_relative_mock_imports( -# self, -# file_path: Path, -# module_name: str, -# level: int, -# mocked_modules: Dict[str, MagicMock], -# ): - -# # import pdb -# # pdb.set_trace() - -# root = Path(self._project_path).resolve() -# file = Path(file_path).resolve() - -# print("Root Path", root) -# print("File Path", file) - -# # Extract the directory of the file -# file_dir = file.parent - -# # Navigate up the directory tree based on the number of leading dots -# target_dir = file_dir -# for _ in range(level): -# if target_dir == root: -# break -# target_dir = target_dir.parent - -# # Create the absolute import path -# module_path = ("." * (level) + module_name).replace('.', '/') + '.py' -# absolute_path = target_dir / module_path -# print(absolute_path.resolve()) -# mocked_modules[".nodes"] = MagicMock() - -# @staticmethod -# def _extract_path_starting_from_package(file_path: Path) -> Union[Path, None]: -# # Convert the file path to a list of parts -# path_parts = file_path.parts - -# try: -# package_index = path_parts.index(PACKAGE_NAME) -# except ValueError: -# return None - -# # Extract the path parts starting from the package name -# sub_path = Path(*path_parts[package_index:]) - -# return sub_path - -# @staticmethod -# def _convert_relative_import_to_absolute(file_path: Path, relative_import: str, level: int) -> Union[Path, None]: -# # Get the current directory of the file -# current_dir = file_path.parent - -# # Navigate up the directory tree based on the dot count -# target_dir = current_dir -# for _ in range(level - 1): -# if target_dir: -# target_dir = target_dir.parent - -# # Combine the target directory with module_name -# if relative_import: -# module_parts = relative_import.split(".") -# module_path = target_dir.joinpath(*module_parts) -# else: -# module_path = target_dir - -# print(module_path) -# module_absolute_path = LiteParser._extract_path_starting_from_package(module_path) -# return module_absolute_path - - -# if __name__ == "__main__": -# # print(LiteParser._extract_path_starting_from_package(Path("/Users/Ravi_Kumar_Pilla/Library/CloudStorage/OneDrive-McKinsey&Company/Documents/Kedro/KedroOrg/kedro-viz/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py"))) - -# # print(importlib.util.find_spec()) -# print(LiteParser._convert_relative_import_to_absolute(Path("/Users/Ravi_Kumar_Pilla/Library/CloudStorage/OneDrive-McKinsey&Company/Documents/Kedro/KedroOrg/kedro-viz/demo-project/src/demo_project/pipelines/data_ingestion/pipeline.py"), "data_ingestion.nodes", 0)) - +"""`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST.""" + +import ast +import importlib.util +import logging +from pathlib import Path +from typing import Any, Dict, List, Union +from unittest.mock import MagicMock +from kedro.framework.project import PACKAGE_NAME + +logger = logging.getLogger(__name__) +PACKAGE_NAME = "demo_project" + + +class LiteParser: + """Represents a Kedro Parser which uses AST + + Args: + project_path (Path): the path where the Kedro project is located. + """ + + def __init__(self, project_path: Path) -> None: + self._project_path = project_path.resolve() + + @staticmethod + def _is_module_importable(module_name: str) -> bool: + """Checks if a module is importable + + Args: + module_name (str): The name of the module to check + importability + Returns: + Whether the module can be imported + """ + try: + if importlib.util.find_spec(module_name) is None: + return False + return True + except (ModuleNotFoundError, ImportError, ValueError): + return False + + @staticmethod + def _is_relative_import_resolvable( + file_path: Path, module_name: str, dot_count: int + ) -> bool: + """Checks if a relative module is importable + + Args: + file_path (Path): The file path where the module is mentioned + as an import statement + module_name (str): The name of the module to check + importability + dot_count (int): The length of dots in the module_name + Returns: + Whether the module can be imported + """ + + # Get the current directory of the file + current_dir = file_path.parent + + # Navigate up the directory tree based on the dot count + target_dir = current_dir + for _ in range(dot_count - 1): + if not target_dir: + return False + target_dir = target_dir.parent + + # Combine the target directory with module_name + if module_name: + module_parts = module_name.split(".") + module_path = target_dir.joinpath(*module_parts) + else: + module_path = target_dir + + if module_path.is_dir(): + # Check if it's a package by looking for __init__.py + init_file = module_path / "__init__.py" + return init_file.exists() + return module_path.with_suffix(".py").exists() + + def _create_absolute_mock_imports( + self, module_name: str, mocked_modules: Dict[str, MagicMock] + ) -> None: + """Creates mock modules for the unresolvable imports and adds them to the + dictionary of mock_modules + + Args: + module_name (str): The module name to be mocked + mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports + + """ + + module_parts = module_name.split(".") + full_module_name = "" + + for idx, sub_module_name in enumerate(module_parts): + full_module_name = ( + sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" + ) + if ( + not self._is_module_importable(full_module_name) + and full_module_name not in mocked_modules + ): + mocked_modules[full_module_name] = MagicMock() + + def get_mocked_modules(self) -> Dict[str, MagicMock]: + """Returns mocked modules for all the dependency errors + as a dictionary for each file in your Kedro project + """ + mocked_modules: Dict[str, MagicMock] = {} + + for filepath in self._project_path.rglob("*.py"): + with open(filepath, "r", encoding="utf-8") as file: + file_content = file.read() + + # parse file content using ast + parsed_content_ast_node: ast.Module = ast.parse(file_content) + self._mock_missing_dependencies( + parsed_content_ast_node, filepath, mocked_modules + ) + + return mocked_modules + + def _mock_missing_dependencies( + self, + parsed_content_ast_node: ast.Module, + file_path: Path, + mocked_modules: Dict[str, MagicMock], + ) -> None: + """Mock missing dependencies + + Args: + parsed_content_ast_node (ast.Module): The AST node to + extract import statements + file_path (Path): The current file path to check + for missing dependencies + mocked_modules: A dictionary of mocked imports + """ + for node in ast.walk(parsed_content_ast_node): + if isinstance(node, ast.Import): + for alias in node.names: + module_name = alias.name + self._create_absolute_mock_imports(module_name, mocked_modules) + + elif isinstance(node, ast.ImportFrom): + module_name = node.module if node.module else "" + level = node.level + + if not module_name or module_name == "": + return + + for alias in node.names: + if self._is_module_importable(module_name): + continue + + # find module within the current package + absolute_module_name = self._convert_relative_imports_to_absolute( + file_path, ("." * level + module_name) + ) + self._create_absolute_mock_imports( + absolute_module_name, mocked_modules + ) + + @staticmethod + def _extract_path_starting_from_package(file_path: Path) -> Union[Path, None]: + # Convert the file path to a list of parts + path_parts = file_path.parts + + try: + package_index = path_parts.index(PACKAGE_NAME) + except ValueError: + return None + + # Extract the path parts starting from the package name + sub_path = Path(*path_parts[package_index:]) + + return sub_path + + @staticmethod + def _convert_relative_imports_to_absolute( + file_path: Path, relative_import: str + ) -> str: + file_path = file_path.resolve() + + # Ensure the package name is in the file path + if PACKAGE_NAME not in file_path.parts: + raise ValueError( + f"Package name '{PACKAGE_NAME}' not found in the file path '{file_path}'." + ) + + # Find the package root directory + package_index = file_path.parts.index(PACKAGE_NAME) + package_root = Path(*file_path.parts[: package_index + 1]) + + # Determine the directory of the current file + file_directory = file_path.parent + + # Count the dots in the relative import to determine how many levels to go up + if relative_import.startswith("."): + # Calculate levels to go up based on leading dots + levels_up = relative_import.count(".") - 1 + target_module = relative_import.split(".")[levels_up + 1] + else: + levels_up = 0 + target_module = relative_import.split(".")[-1] + + # Traverse up the directory structure + target_directory = file_directory + for _ in range(levels_up): + target_directory = target_directory.parent + + # Construct the full module path from the package root + relative_parts = target_directory.relative_to(package_root).parts + absolute_import = ".".join( + [PACKAGE_NAME] + list(relative_parts) + [target_module] + ) + + return absolute_import From 840cb9f6dbbb896691ca010b55838b25d364f39b Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Fri, 16 Aug 2024 21:49:02 -0500 Subject: [PATCH 23/34] working draft for relative imports multi-level --- .../integrations/kedro/data_loader.py | 4 +- .../integrations/kedro/lite_parser_test.py | 179 +++++++----------- 2 files changed, 71 insertions(+), 112 deletions(-) mode change 100644 => 100755 package/kedro_viz/integrations/kedro/lite_parser_test.py diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index c467c7f9e..e2ab6377b 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -22,7 +22,7 @@ from kedro_viz.constants import VIZ_METADATA_ARGS from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite -from kedro_viz.integrations.kedro.lite_parser import LiteParser +from kedro_viz.integrations.kedro.lite_parser_test import LiteParser logger = logging.getLogger(__name__) @@ -146,6 +146,8 @@ def load_data( lite_parser = LiteParser(project_path) mocked_modules = lite_parser.get_mocked_modules() + print(mocked_modules) + sys_modules_patch = sys.modules.copy() sys_modules_patch.update(mocked_modules) diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py old mode 100644 new mode 100755 index 53af1a8d5..5991eac20 --- a/package/kedro_viz/integrations/kedro/lite_parser_test.py +++ b/package/kedro_viz/integrations/kedro/lite_parser_test.py @@ -40,43 +40,25 @@ def _is_module_importable(module_name: str) -> bool: return False @staticmethod - def _is_relative_import_resolvable( - file_path: Path, module_name: str, dot_count: int - ) -> bool: - """Checks if a relative module is importable - - Args: - file_path (Path): The file path where the module is mentioned - as an import statement - module_name (str): The name of the module to check - importability - dot_count (int): The length of dots in the module_name - Returns: - Whether the module can be imported - """ - - # Get the current directory of the file - current_dir = file_path.parent - - # Navigate up the directory tree based on the dot count - target_dir = current_dir - for _ in range(dot_count - 1): - if not target_dir: - return False - target_dir = target_dir.parent + def _convert_relative_imports_to_absolute( + file_directory_path: Path, + relative_import: str, + level: int, + package_root_path: Path, + ) -> str: + """This handles cases where there is a relative import in the file""" + # Traverse up the directory structure + target_directory = file_directory_path + for _ in range(level - 1): + target_directory = target_directory.parent - # Combine the target directory with module_name - if module_name: - module_parts = module_name.split(".") - module_path = target_dir.joinpath(*module_parts) - else: - module_path = target_dir + # Construct the full module path from the package root + relative_parts = target_directory.relative_to(package_root_path).parts + absolute_import = ".".join( + [PACKAGE_NAME] + list(relative_parts) + [relative_import] + ) - if module_path.is_dir(): - # Check if it's a package by looking for __init__.py - init_file = module_path / "__init__.py" - return init_file.exists() - return module_path.with_suffix(".py").exists() + return absolute_import def _create_absolute_mock_imports( self, module_name: str, mocked_modules: Dict[str, MagicMock] @@ -103,28 +85,11 @@ def _create_absolute_mock_imports( ): mocked_modules[full_module_name] = MagicMock() - def get_mocked_modules(self) -> Dict[str, MagicMock]: - """Returns mocked modules for all the dependency errors - as a dictionary for each file in your Kedro project - """ - mocked_modules: Dict[str, MagicMock] = {} - - for filepath in self._project_path.rglob("*.py"): - with open(filepath, "r", encoding="utf-8") as file: - file_content = file.read() - - # parse file content using ast - parsed_content_ast_node: ast.Module = ast.parse(file_content) - self._mock_missing_dependencies( - parsed_content_ast_node, filepath, mocked_modules - ) - - return mocked_modules - def _mock_missing_dependencies( self, parsed_content_ast_node: ast.Module, - file_path: Path, + file_directory_path: Path, + package_root_path: Path, mocked_modules: Dict[str, MagicMock], ) -> None: """Mock missing dependencies @@ -132,9 +97,10 @@ def _mock_missing_dependencies( Args: parsed_content_ast_node (ast.Module): The AST node to extract import statements - file_path (Path): The current file path to check + file_directory_path (Path): The current file path to check for missing dependencies - mocked_modules: A dictionary of mocked imports + package_root_path (Path): The root package directory path + mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ for node in ast.walk(parsed_content_ast_node): if isinstance(node, ast.Import): @@ -150,69 +116,60 @@ def _mock_missing_dependencies( return for alias in node.names: + # absolute modules in the env or within the + # package starting from package root if self._is_module_importable(module_name): continue - # find module within the current package - absolute_module_name = self._convert_relative_imports_to_absolute( - file_path, ("." * level + module_name) - ) - self._create_absolute_mock_imports( - absolute_module_name, mocked_modules - ) + # convert relative imports to absolute imports + # based on leading dots + if level > 0: + absolute_module_name = ( + self._convert_relative_imports_to_absolute( + file_directory_path, + module_name, + level, + package_root_path, + ) + ) + self._create_absolute_mock_imports( + absolute_module_name, mocked_modules + ) + + def get_mocked_modules(self) -> Dict[str, MagicMock]: + """Returns mocked modules for all the dependency errors + as a dictionary for each file in your Kedro project + """ + mocked_modules: Dict[str, MagicMock] = {} + package_root_path = None - @staticmethod - def _extract_path_starting_from_package(file_path: Path) -> Union[Path, None]: - # Convert the file path to a list of parts - path_parts = file_path.parts + for file_path in self._project_path.rglob("*.py"): + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() - try: - package_index = path_parts.index(PACKAGE_NAME) - except ValueError: - return None + # parse file content using ast + parsed_content_ast_node: ast.Module = ast.parse(file_content) + file_path = file_path.resolve() - # Extract the path parts starting from the package name - sub_path = Path(*path_parts[package_index:]) + # Ensure the package name is in the file path + if PACKAGE_NAME not in file_path.parts: + # we are only mocking the dependencies + # inside the package + continue - return sub_path + # Find the package root directory + if not package_root_path: + package_index = file_path.parts.index(PACKAGE_NAME) + package_root_path = Path(*file_path.parts[: package_index + 1]) - @staticmethod - def _convert_relative_imports_to_absolute( - file_path: Path, relative_import: str - ) -> str: - file_path = file_path.resolve() + # Determine the directory of the current file + file_directory_path = file_path.parent - # Ensure the package name is in the file path - if PACKAGE_NAME not in file_path.parts: - raise ValueError( - f"Package name '{PACKAGE_NAME}' not found in the file path '{file_path}'." + self._mock_missing_dependencies( + parsed_content_ast_node, + file_directory_path, + package_root_path, + mocked_modules, ) - # Find the package root directory - package_index = file_path.parts.index(PACKAGE_NAME) - package_root = Path(*file_path.parts[: package_index + 1]) - - # Determine the directory of the current file - file_directory = file_path.parent - - # Count the dots in the relative import to determine how many levels to go up - if relative_import.startswith("."): - # Calculate levels to go up based on leading dots - levels_up = relative_import.count(".") - 1 - target_module = relative_import.split(".")[levels_up + 1] - else: - levels_up = 0 - target_module = relative_import.split(".")[-1] - - # Traverse up the directory structure - target_directory = file_directory - for _ in range(levels_up): - target_directory = target_directory.parent - - # Construct the full module path from the package root - relative_parts = target_directory.relative_to(package_root).parts - absolute_import = ".".join( - [PACKAGE_NAME] + list(relative_parts) + [target_module] - ) - - return absolute_import + return mocked_modules From 76e3c2b055f71d08c6e7d96f429aae055a6f00ff Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Sun, 18 Aug 2024 21:09:34 -0500 Subject: [PATCH 24/34] remove resolving relative dependencies --- .../integrations/kedro/data_loader.py | 2 +- .../integrations/kedro/lite_parser_test.py | 91 +++++-------------- 2 files changed, 22 insertions(+), 71 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index e2ab6377b..c4fa56464 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -143,7 +143,7 @@ def load_data( and the session store. """ if is_lite: - lite_parser = LiteParser(project_path) + lite_parser = LiteParser(project_path, package_name) mocked_modules = lite_parser.get_mocked_modules() print(mocked_modules) diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py index 5991eac20..93b744908 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser_test.py +++ b/package/kedro_viz/integrations/kedro/lite_parser_test.py @@ -4,12 +4,10 @@ import importlib.util import logging from pathlib import Path -from typing import Any, Dict, List, Union +from typing import Dict from unittest.mock import MagicMock -from kedro.framework.project import PACKAGE_NAME logger = logging.getLogger(__name__) -PACKAGE_NAME = "demo_project" class LiteParser: @@ -19,8 +17,9 @@ class LiteParser: project_path (Path): the path where the Kedro project is located. """ - def __init__(self, project_path: Path) -> None: - self._project_path = project_path.resolve() + def __init__(self, project_path: Path, package_name: str) -> None: + self._project_path = project_path + self._package_name = package_name @staticmethod def _is_module_importable(module_name: str) -> bool: @@ -36,31 +35,10 @@ def _is_module_importable(module_name: str) -> bool: if importlib.util.find_spec(module_name) is None: return False return True - except (ModuleNotFoundError, ImportError, ValueError): + except (ImportError, ModuleNotFoundError, ValueError): return False - @staticmethod - def _convert_relative_imports_to_absolute( - file_directory_path: Path, - relative_import: str, - level: int, - package_root_path: Path, - ) -> str: - """This handles cases where there is a relative import in the file""" - # Traverse up the directory structure - target_directory = file_directory_path - for _ in range(level - 1): - target_directory = target_directory.parent - - # Construct the full module path from the package root - relative_parts = target_directory.relative_to(package_root_path).parts - absolute_import = ".".join( - [PACKAGE_NAME] + list(relative_parts) + [relative_import] - ) - - return absolute_import - - def _create_absolute_mock_imports( + def _create_mock_imports( self, module_name: str, mocked_modules: Dict[str, MagicMock] ) -> None: """Creates mock modules for the unresolvable imports and adds them to the @@ -88,60 +66,41 @@ def _create_absolute_mock_imports( def _mock_missing_dependencies( self, parsed_content_ast_node: ast.Module, - file_directory_path: Path, - package_root_path: Path, mocked_modules: Dict[str, MagicMock], ) -> None: - """Mock missing dependencies - + """Mock missing project dependencies + [TODO : Relative imports with dot, even if mocked had issues] Args: parsed_content_ast_node (ast.Module): The AST node to extract import statements - file_directory_path (Path): The current file path to check - for missing dependencies - package_root_path (Path): The root package directory path mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ for node in ast.walk(parsed_content_ast_node): if isinstance(node, ast.Import): for alias in node.names: module_name = alias.name - self._create_absolute_mock_imports(module_name, mocked_modules) + self._create_mock_imports(module_name, mocked_modules) elif isinstance(node, ast.ImportFrom): module_name = node.module if node.module else "" level = node.level - if not module_name or module_name == "": + if ( + not module_name + or module_name == "" + or self._package_name in module_name + ): return - for alias in node.names: - # absolute modules in the env or within the - # package starting from package root - if self._is_module_importable(module_name): - continue - - # convert relative imports to absolute imports - # based on leading dots - if level > 0: - absolute_module_name = ( - self._convert_relative_imports_to_absolute( - file_directory_path, - module_name, - level, - package_root_path, - ) - ) - self._create_absolute_mock_imports( - absolute_module_name, mocked_modules - ) - + # absolute modules in the env + if level == 0: + self._create_absolute_mock_imports(module_name, mocked_modules) + def get_mocked_modules(self) -> Dict[str, MagicMock]: """Returns mocked modules for all the dependency errors as a dictionary for each file in your Kedro project """ mocked_modules: Dict[str, MagicMock] = {} - package_root_path = None for file_path in self._project_path.rglob("*.py"): with open(file_path, "r", encoding="utf-8") as file: @@ -152,23 +111,15 @@ def get_mocked_modules(self) -> Dict[str, MagicMock]: file_path = file_path.resolve() # Ensure the package name is in the file path - if PACKAGE_NAME not in file_path.parts: + # [TODO: At this moment we are focussing on mocking missing + # dependencies inside the package] + if self._package_name not in file_path.parts: # we are only mocking the dependencies # inside the package continue - # Find the package root directory - if not package_root_path: - package_index = file_path.parts.index(PACKAGE_NAME) - package_root_path = Path(*file_path.parts[: package_index + 1]) - - # Determine the directory of the current file - file_directory_path = file_path.parent - self._mock_missing_dependencies( parsed_content_ast_node, - file_directory_path, - package_root_path, mocked_modules, ) From 2d18e9ae8bae656ae2793a84416c9e273f0d30d2 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 19 Aug 2024 08:55:08 -0500 Subject: [PATCH 25/34] test --- package/kedro_viz/integrations/kedro/lite_parser_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py index 93b744908..7b37ac966 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser_test.py +++ b/package/kedro_viz/integrations/kedro/lite_parser_test.py @@ -94,7 +94,8 @@ def _mock_missing_dependencies( # absolute modules in the env if level == 0: - self._create_absolute_mock_imports(module_name, mocked_modules) + self._create_mock_imports(module_name, mocked_modules) + def get_mocked_modules(self) -> Dict[str, MagicMock]: """Returns mocked modules for all the dependency errors From 16e1ef59ea4dae94601073b232b6bbebfccb64ba Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 19 Aug 2024 13:15:26 -0500 Subject: [PATCH 26/34] working draft --- .../integrations/kedro/data_loader.py | 4 +- .../integrations/kedro/lite_parser.py | 238 +++++----------- .../integrations/kedro/lite_parser_test.py | 127 --------- .../test_integrations/test_lite_parser.py | 266 +++++------------- 4 files changed, 137 insertions(+), 498 deletions(-) mode change 100644 => 100755 package/kedro_viz/integrations/kedro/lite_parser.py delete mode 100755 package/kedro_viz/integrations/kedro/lite_parser_test.py diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index c4fa56464..195b5870a 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -22,7 +22,7 @@ from kedro_viz.constants import VIZ_METADATA_ARGS from kedro_viz.integrations.kedro.data_catalog_lite import DataCatalogLite -from kedro_viz.integrations.kedro.lite_parser_test import LiteParser +from kedro_viz.integrations.kedro.lite_parser import LiteParser logger = logging.getLogger(__name__) @@ -146,8 +146,6 @@ def load_data( lite_parser = LiteParser(project_path, package_name) mocked_modules = lite_parser.get_mocked_modules() - print(mocked_modules) - sys_modules_patch = sys.modules.copy() sys_modules_patch.update(mocked_modules) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py old mode 100644 new mode 100755 index 23b108774..5e894b276 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -3,9 +3,8 @@ import ast import importlib.util import logging -import re from pathlib import Path -from typing import Any, Dict, List +from typing import Dict, Union from unittest.mock import MagicMock logger = logging.getLogger(__name__) @@ -16,39 +15,14 @@ class LiteParser: Args: project_path (Path): the path where the Kedro project is located. + package_name (Union[str, None]): The name of the current package """ - def __init__(self, project_path: Path) -> None: + def __init__( + self, project_path: Path, package_name: Union[str, None] = None + ) -> None: self._project_path = project_path - - @staticmethod - def _get_import_statements_from_ast( - parsed_content_ast_node: ast.Module, - ) -> List[str]: - """Get all the import statements from an AST Node. - - Args: - parsed_content_ast_node (ast.Module): The AST node to - extract import statements - Returns: - A list of import statements as strings - """ - import_statements: List[str] = [] - - for node in ast.walk(parsed_content_ast_node): - if isinstance(node, ast.Import): - for alias in node.names: - import_statements.append(f"import {alias.name}") - elif isinstance(node, ast.ImportFrom): - module_name = node.module if node.module else "" - level = node.level - for alias in node.names: - relative_module_name = "." * level + module_name - import_statements.append( - f"from {relative_module_name} import {alias.name}" - ) - - return import_statements + self._package_name = package_name @staticmethod def _is_module_importable(module_name: str) -> bool: @@ -67,169 +41,87 @@ def _is_module_importable(module_name: str) -> bool: except (ImportError, ModuleNotFoundError, ValueError): return False - @staticmethod - def _is_relative_import_resolvable( - file_path: Path, module_name: str, dot_count: int - ) -> bool: - """Checks if a relative module is importable - - Args: - file_path (Path): The file path where the module is mentioned - as an import statement - module_name (str): The name of the module to check - importability - dot_count (int): The length of dots in the module_name - Returns: - Whether the module can be imported - """ - # Get the current directory of the file - current_dir = file_path.parent - - # Navigate up the directory tree based on the dot count - target_dir = current_dir - for _ in range(dot_count - 1): - if not target_dir: - return False - target_dir = target_dir.parent - - # Combine the target directory with module_name - if module_name: - module_parts = module_name.split(".") - module_path = target_dir.joinpath(*module_parts) - else: - module_path = target_dir - - if module_path.is_dir(): - # Check if it's a package by looking for __init__.py - init_file = module_path / "__init__.py" - return init_file.exists() - return module_path.with_suffix(".py").exists() - - @staticmethod - def _is_valid_import_stmt(statement: Any) -> bool: - """Checks for a valid import statement + def _create_mock_imports( + self, module_name: str, mocked_modules: Dict[str, MagicMock] + ) -> None: + """Creates mock modules for unresolvable imports and adds them to the + dictionary of mocked_modules Args: - statement (Any): The import statement to validate + module_name (str): The module name to be mocked + mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports - Returns: - Whether the statement is a valid import string """ - if not isinstance(statement, str) or not statement.strip(): - return False - # Regex to match different import statements - import_match = re.match(r"(from|import)\s+(\.+)?([a-zA-Z0-9_.]+)", statement) + module_parts = module_name.split(".") + full_module_name = "" - if not import_match: - return False - - return True - - @staticmethod - def _create_mock_imports( - unresolvable_imports: List[str], mock_modules: Dict[str, MagicMock] + for idx, sub_module_name in enumerate(module_parts): + full_module_name = ( + sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" + ) + if ( + not self._is_module_importable(full_module_name) + and full_module_name not in mocked_modules + ): + mocked_modules[full_module_name] = MagicMock() + + def _mock_missing_dependencies( + self, + parsed_content_ast_node: ast.Module, + mocked_modules: Dict[str, MagicMock], ) -> None: - """Creates mock modules for the unresolvable imports and adds them to the - dictionary of mock_modules - - Args: - unresolvable_imports (List[str]): A list of import statements - that are not resolved - mock_modules (Dict[str, MagicMock]): A dictionary of mocked imports - - """ - for statement in unresolvable_imports: - module_name = statement.split(" ")[1] - module_parts = module_name.split(".") - full_module_name = "" - for idx, sub_module_name in enumerate(module_parts): - full_module_name = ( - sub_module_name - if idx == 0 - else f"{full_module_name}.{sub_module_name}" - ) - if full_module_name not in mock_modules: - mock_modules[full_module_name] = MagicMock() - - def _get_unresolvable_imports( - self, file_path: Path, import_statements: List[str] - ) -> List[str]: - """Retrieves all the unresolved import statements from a file + """Mock missing project dependencies Args: - file_path (Path): The file path where the import statements are mentioned - import_statements (List[str]): A list of all the import statements mentioned in - the file - Returns: - A list of import statements that are not resolved + parsed_content_ast_node (ast.Module): The AST node to + extract import statements + mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ - unresolvable_imports: List[str] = [] - - for statement in import_statements: - if self._is_valid_import_stmt(statement): - if statement.startswith("import "): - # standard library imports, only considering root module - module_name = statement.split(" ")[1].split(".")[0] - - if not self._is_module_importable(module_name): - unresolvable_imports.append(statement) - else: - # relative imports - module_name = statement.split(" ")[1] - - # Get the dot count for relative imports - dot_count = len(module_name) - len(module_name.lstrip(".")) - - if dot_count > 0: - if not self._is_relative_import_resolvable( - file_path, module_name[dot_count:], dot_count - ): - unresolvable_imports.append(statement) - continue - - # absolute imports, only considering root module - module_name = module_name.split(".")[0] + for node in ast.walk(parsed_content_ast_node): + if isinstance(node, ast.Import): + for alias in node.names: + module_name = alias.name + self._create_mock_imports(module_name, mocked_modules) - if not self._is_module_importable(module_name): - unresolvable_imports.append(statement) + elif isinstance(node, ast.ImportFrom): + module_name = node.module if node.module else "" + level = node.level - return unresolvable_imports + if ( + not module_name + or module_name == "" + or (self._package_name and self._package_name in module_name) + ): + continue - def _parse_project_for_imports(self) -> Dict[Path, List[str]]: - """Loops through all the python files, parses each file using - AST and creates a map containing the file path and the extracted - import statements + # absolute modules in the env + if level == 0: + self._create_mock_imports(module_name, mocked_modules) - Returns: - A dictionary of file path and corresponding import statements + def get_mocked_modules(self) -> Dict[str, MagicMock]: + """Returns mocked modules for all the dependency errors + as a dictionary for each file in your Kedro project """ - all_imports: Dict[Path, List[str]] = {} + mocked_modules: Dict[str, MagicMock] = {} - for filepath in self._project_path.rglob("*.py"): - with open(filepath, "r", encoding="utf-8") as file: + for file_path in self._project_path.rglob("*.py"): + with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() # parse file content using ast parsed_content_ast_node: ast.Module = ast.parse(file_content) - import_statements = self._get_import_statements_from_ast( - parsed_content_ast_node - ) - all_imports[filepath] = import_statements - return all_imports + file_path = file_path.resolve() - def get_mocked_modules(self) -> Dict[str, MagicMock]: - """Returns mocked modules for all the dependency errors - as a dictionary for each file in your Kedro project - """ - all_imports: Dict[Path, List[str]] = self._parse_project_for_imports() - mocked_modules: Dict[str, MagicMock] = {} + # Ensure the package name is in the file path + if self._package_name and self._package_name not in file_path.parts: + # we are only mocking the dependencies + # inside the package + continue - for file_path, imports in all_imports.items(): - unresolvable_imports: List[str] = self._get_unresolvable_imports( - file_path, imports + self._mock_missing_dependencies( + parsed_content_ast_node, + mocked_modules, ) - # Create mock imports - self._create_mock_imports(unresolvable_imports, mocked_modules) return mocked_modules diff --git a/package/kedro_viz/integrations/kedro/lite_parser_test.py b/package/kedro_viz/integrations/kedro/lite_parser_test.py deleted file mode 100755 index 7b37ac966..000000000 --- a/package/kedro_viz/integrations/kedro/lite_parser_test.py +++ /dev/null @@ -1,127 +0,0 @@ -"""`kedro_viz.integrations.kedro.lite_parser` defines a Kedro parser using AST.""" - -import ast -import importlib.util -import logging -from pathlib import Path -from typing import Dict -from unittest.mock import MagicMock - -logger = logging.getLogger(__name__) - - -class LiteParser: - """Represents a Kedro Parser which uses AST - - Args: - project_path (Path): the path where the Kedro project is located. - """ - - def __init__(self, project_path: Path, package_name: str) -> None: - self._project_path = project_path - self._package_name = package_name - - @staticmethod - def _is_module_importable(module_name: str) -> bool: - """Checks if a module is importable - - Args: - module_name (str): The name of the module to check - importability - Returns: - Whether the module can be imported - """ - try: - if importlib.util.find_spec(module_name) is None: - return False - return True - except (ImportError, ModuleNotFoundError, ValueError): - return False - - def _create_mock_imports( - self, module_name: str, mocked_modules: Dict[str, MagicMock] - ) -> None: - """Creates mock modules for the unresolvable imports and adds them to the - dictionary of mock_modules - - Args: - module_name (str): The module name to be mocked - mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports - - """ - - module_parts = module_name.split(".") - full_module_name = "" - - for idx, sub_module_name in enumerate(module_parts): - full_module_name = ( - sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" - ) - if ( - not self._is_module_importable(full_module_name) - and full_module_name not in mocked_modules - ): - mocked_modules[full_module_name] = MagicMock() - - def _mock_missing_dependencies( - self, - parsed_content_ast_node: ast.Module, - mocked_modules: Dict[str, MagicMock], - ) -> None: - """Mock missing project dependencies - [TODO : Relative imports with dot, even if mocked had issues] - Args: - parsed_content_ast_node (ast.Module): The AST node to - extract import statements - mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports - """ - for node in ast.walk(parsed_content_ast_node): - if isinstance(node, ast.Import): - for alias in node.names: - module_name = alias.name - self._create_mock_imports(module_name, mocked_modules) - - elif isinstance(node, ast.ImportFrom): - module_name = node.module if node.module else "" - level = node.level - - if ( - not module_name - or module_name == "" - or self._package_name in module_name - ): - return - - # absolute modules in the env - if level == 0: - self._create_mock_imports(module_name, mocked_modules) - - - def get_mocked_modules(self) -> Dict[str, MagicMock]: - """Returns mocked modules for all the dependency errors - as a dictionary for each file in your Kedro project - """ - mocked_modules: Dict[str, MagicMock] = {} - - for file_path in self._project_path.rglob("*.py"): - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() - - # parse file content using ast - parsed_content_ast_node: ast.Module = ast.parse(file_content) - file_path = file_path.resolve() - - # Ensure the package name is in the file path - # [TODO: At this moment we are focussing on mocking missing - # dependencies inside the package] - if self._package_name not in file_path.parts: - # we are only mocking the dependencies - # inside the package - continue - - self._mock_missing_dependencies( - parsed_content_ast_node, - mocked_modules, - ) - - return mocked_modules diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index 4c1bd70ad..cc160f466 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -1,213 +1,89 @@ import ast -from pathlib import Path -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch import pytest from kedro_viz.integrations.kedro.lite_parser import LiteParser +@pytest.fixture +def sample_project_path(tmp_path): + # Create a sample directory structure + package_dir = tmp_path / "mock_spaceflights" + package_dir.mkdir() + (package_dir / "__init__.py").touch() + (package_dir / "data_processing.py").write_text( + "import os\nimport nonexistentmodule" + ) + return tmp_path + + +@pytest.fixture +def lite_parser(sample_project_path): + return LiteParser( + project_path=sample_project_path, package_name="mock_spaceflights" + ) + + class TestLiteParser: - def test_get_import_statements_from_ast(self): + def test_is_module_importable_existing_module(self, lite_parser): + assert lite_parser._is_module_importable("os") is True + + def test_is_module_importable_nonexistent_module(self, lite_parser): + assert lite_parser._is_module_importable("nonexistentmodule") is False + + def test_is_module_importable_importerror(self, lite_parser): + with patch("importlib.util.find_spec", side_effect=ImportError): + assert lite_parser._is_module_importable("nonexistentmodule") is False + + def test_is_module_importable_modulenotfounderror(self, lite_parser): + with patch("importlib.util.find_spec", side_effect=ModuleNotFoundError): + assert lite_parser._is_module_importable("nonexistentmodule") is False + + def test_is_module_importable_valueerror(self, lite_parser): + with patch("importlib.util.find_spec", side_effect=ValueError): + assert lite_parser._is_module_importable("nonexistentmodule") is False + + def test_create_mock_imports(self, lite_parser): + mocked_modules = {} + lite_parser._create_mock_imports("nonexistentmodule", mocked_modules) + assert "nonexistentmodule" in mocked_modules + assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) + + def test_mock_missing_dependencies(self, lite_parser): + mocked_modules = {} content = ( "import os\n" - "import sys\n" - "from pathlib import Path\n" - "from collections import namedtuple\n" + "import nonexistentmodule\n" + "from math import sqrt\n" + "from mock_spaceflights import data_processing" "# import test" ) + parsed_content_ast_node = ast.parse(content) - expected_imports = [ - "import os", - "import sys", - "from pathlib import Path", - "from collections import namedtuple", - ] - assert ( - LiteParser._get_import_statements_from_ast(parsed_content_ast_node) - == expected_imports - ) + lite_parser._mock_missing_dependencies(parsed_content_ast_node, mocked_modules) - def test_is_module_importable(self): - assert LiteParser._is_module_importable("os") is True - assert LiteParser._is_module_importable("non_existent_module") is False - - def test_valid_relative_import(self, tmp_path): - # Create a directory structure - package_dir = tmp_path / "project" / "subpackage" - package_dir.mkdir(parents=True) - - # Create a valid module file - module_file = package_dir / "module.py" - module_file.touch() - - # Check if the relative import is resolvable - file_path = package_dir / "another_module.py" - file_path.touch() - - assert LiteParser._is_relative_import_resolvable(Path(file_path), "subpackage.module", 1) == True - - def test_valid_relative_import_with_dots(self, tmp_path): - # Create a directory structure - root_dir = tmp_path / "project" - subpackage_dir = root_dir / "subpackage" - subpackage_dir.mkdir(parents=True) - - # Create a valid module file - module_file = root_dir / "module.py" - module_file.touch() - - # Check if the relative import is resolvable (one level up) - file_path = subpackage_dir / "another_module.py" - file_path.touch() - - assert LiteParser._is_relative_import_resolvable(file_path, "module", 2) == True - - def test_invalid_relative_import(self, tmp_path): - # Create a directory structure - package_dir = tmp_path / "project" / "subpackage" - package_dir.mkdir(parents=True) - - # Create a file that will simulate an import from a non-existing module - file_path = package_dir / "another_module.py" - file_path.touch() - - assert LiteParser._is_relative_import_resolvable(file_path, "nonexistent.module", 1) == False - - def test_import_of_package(self, tmp_path): - # Create a directory structure with a package - package_dir = tmp_path / "project" / "subpackage" - package_dir.mkdir(parents=True) - - # Create __init__.py to make it a package - init_file = package_dir / "__init__.py" - init_file.touch() - - # Check if the relative import is resolvable for a package - file_path = tmp_path / "project" / "module.py" - file_path.touch() - - assert LiteParser._is_relative_import_resolvable(file_path, "subpackage", 1) == True - - def test_invalid_path_navigation(self, tmp_path): - # Create a directory structure - subpackage_dir = tmp_path / "project" / "subpackage" - subpackage_dir.mkdir(parents=True) - - # Create a file - file_path = subpackage_dir / "module.py" - file_path.touch() - - # Trying to go up too many levels should fail - assert LiteParser._is_relative_import_resolvable(file_path, "module", 5) == False - - @pytest.mark.parametrize( - "statement,expected", - [ - ("import os", True), - ("from os import path", True), - ("", False), - ("import", False), - (123, False), - ], - ) - def test_is_valid_import_stmt(self, statement, expected): - assert LiteParser._is_valid_import_stmt(statement) == expected - - @pytest.mark.parametrize( - "is_module_importable, is_relative_import_resolvable, import_statements, expected_unresolvable", - [ - ( - True, - True, - [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - "from ...pipelines.nodes import test_func" - ], - [], - ), - ( - False, - False, - [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - "from ...pipelines.nodes import test_func" - ], - [ - "import os", - "from sys import path", - "import non_existent_module", - "from non_existent_module import path", - "from ...pipelines.nodes import test_func" - ], - ), - (True, False, ["import os", "import non_existent_module"], []), - ], - ) - def test_get_unresolvable_imports( - self, - is_module_importable, - is_relative_import_resolvable, - import_statements, - expected_unresolvable, - mocker, - ): - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_module_importable", - return_value=is_module_importable, - ) - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_relative_import_resolvable", - return_value=is_relative_import_resolvable, - ) - file_path = Path("/fake/path") - lite_parser_obj = LiteParser(file_path) - assert ( - lite_parser_obj._get_unresolvable_imports(file_path, import_statements) - == expected_unresolvable - ) + assert "nonexistentmodule" in mocked_modules + assert "os" not in mocked_modules + assert "math" not in mocked_modules - def test_parse_project_for_imports(self, tmp_path): - file1 = tmp_path / "file1.py" - file2 = tmp_path / "file2.py" - file1.write_text("import os\nfrom sys import path") - file2.write_text("import ast\nfrom collections import namedtuple") - expected_imports = { - file1: ["import os", "from sys import path"], - file2: ["import ast", "from collections import namedtuple"], - } - lite_parser_obj = LiteParser(tmp_path) - assert lite_parser_obj._parse_project_for_imports() == expected_imports - - def test_create_mock_imports(self): - unresolvable_imports = [ - "import non_existent_module", - "from non_existent_module import path", - ] - mock_modules = {} - LiteParser._create_mock_imports(unresolvable_imports, mock_modules) - assert "non_existent_module" in mock_modules - assert isinstance(mock_modules["non_existent_module"], MagicMock) - - def test_get_mocked_modules(self, tmp_path, mocker): - file1 = tmp_path / "file1.py" - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser.LiteParser._parse_project_for_imports", - return_value={file1: ["import os", "from sys import path"]}, - ) - mocker.patch( - "kedro_viz.integrations.kedro.lite_parser.LiteParser._get_unresolvable_imports", - return_value=["from sys import path"], + def test_get_mocked_modules(self, lite_parser): + mocked_modules = lite_parser.get_mocked_modules() + + assert "nonexistentmodule" in mocked_modules + assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) + assert "os" not in mocked_modules + + def test_get_mocked_modules_for_non_package_path( + self, sample_project_path, lite_parser + ): + other_package_dir = sample_project_path / "mock_aircrafts" + other_package_dir.mkdir() + (other_package_dir / "__init__.py").touch() + (other_package_dir / "data_science.py").write_text( + "import os\nfrom data_processing import datascience_dependency" ) - lite_parser_obj = LiteParser(tmp_path) - mocked_modules = lite_parser_obj.get_mocked_modules() + mocked_modules = lite_parser.get_mocked_modules() - assert "sys" in mocked_modules - assert isinstance(mocked_modules["sys"], MagicMock) + assert "data_processing" not in mocked_modules From 8c6d87872ca1571c004cfcafb1372b418ea6070c Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Mon, 19 Aug 2024 17:31:08 -0500 Subject: [PATCH 27/34] modify test and standalone support for lite --- .../integrations/kedro/data_loader.py | 26 +++++--- .../integrations/kedro/lite_parser.py | 62 ++++++++++++++++--- .../test_integrations/test_lite_parser.py | 27 ++++++-- 3 files changed, 93 insertions(+), 22 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index 195b5870a..f2bff7900 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -77,18 +77,11 @@ def _load_data_helper( project_path: Path, env: Optional[str] = None, include_hooks: bool = False, - package_name: Optional[str] = None, extra_params: Optional[Dict[str, Any]] = None, is_lite: bool = False, ): """Helper to load data from a Kedro project.""" - if package_name: - configure_project(package_name) - else: - # bootstrap project when viz is run in dev mode - bootstrap_project(project_path) - with KedroSession.create( project_path=project_path, env=env, @@ -142,19 +135,34 @@ def load_data( A tuple containing the data catalog and the pipeline dictionary and the session store. """ + if package_name: + configure_project(package_name) + else: + # bootstrap project when viz is run in dev mode + bootstrap_project(project_path) + if is_lite: lite_parser = LiteParser(project_path, package_name) mocked_modules = lite_parser.get_mocked_modules() + if len(mocked_modules): + logger.warning( + "Kedro-Viz has mocked the following dependencies for lite-mode.\n" + "%s \n" + "In order to get a complete experience of Viz, " + "please install the missing Kedro project dependencies\n", + list(mocked_modules.keys()), + ) + sys_modules_patch = sys.modules.copy() sys_modules_patch.update(mocked_modules) # Patch actual sys modules with patch.dict("sys.modules", sys_modules_patch): return _load_data_helper( - project_path, env, include_hooks, package_name, extra_params, is_lite + project_path, env, include_hooks, extra_params, is_lite ) else: return _load_data_helper( - project_path, env, include_hooks, package_name, extra_params, is_lite + project_path, env, include_hooks, extra_params, is_lite ) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 5e894b276..4d4ae05f7 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -23,6 +23,7 @@ def __init__( ) -> None: self._project_path = project_path self._package_name = package_name + self._project_file_paths = set(self._project_path.rglob("*.py")) @staticmethod def _is_module_importable(module_name: str) -> bool: @@ -38,9 +39,54 @@ def _is_module_importable(module_name: str) -> bool: if importlib.util.find_spec(module_name) is None: return False return True - except (ImportError, ModuleNotFoundError, ValueError): + except ModuleNotFoundError as mnf_exc: + logger.debug( + "ModuleNotFoundError in resolving %s : %s", module_name, mnf_exc + ) + return False + except ImportError as imp_exc: + logger.debug("ImportError in resolving %s : %s", module_name, imp_exc) + return False + except ValueError as val_exc: + logger.debug("ValueError in resolving %s : %s", module_name, val_exc) + return False + # pylint: disable=broad-except + except Exception as exc: # pragma: no cover + logger.debug( + "An exception occurred while resolving %s : %s", module_name, exc + ) return False + def _is_relative_import(self, module_name: str): + """Checks if a module is a relative import. This is needed + in dev or standalone mode when the package_name is None and + internal package files have unresolved external dependencies + + Args: + module_name (str): The name of the module to check + importability + + Example: + >>> lite_parser_obj = LiteParser("path/to/kedro/project") + >>> module_name = "kedro_project_package.pipelines.reporting.nodes" + >>> lite_parser_obj._is_relative_import(module_name) + True + + Returns: + Whether the module is a relative import starting + from the root package dir + """ + relative_module_path = module_name.replace(".", "/") + + # Check if the relative_module_path + # is a substring of any Python file path + is_relative_import_path = any( + relative_module_path in str(project_file_path) + for project_file_path in self._project_file_paths + ) + + return is_relative_import_path + def _create_mock_imports( self, module_name: str, mocked_modules: Dict[str, MagicMock] ) -> None: @@ -52,7 +98,6 @@ def _create_mock_imports( mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ - module_parts = module_name.split(".") full_module_name = "" @@ -88,10 +133,13 @@ def _mock_missing_dependencies( module_name = node.module if node.module else "" level = node.level - if ( - not module_name - or module_name == "" - or (self._package_name and self._package_name in module_name) + if not module_name or module_name == "": + continue + + if (self._package_name and self._package_name in module_name) or ( + # dev or standalone mode + not self._package_name + and self._is_relative_import(module_name) ): continue @@ -105,7 +153,7 @@ def get_mocked_modules(self) -> Dict[str, MagicMock]: """ mocked_modules: Dict[str, MagicMock] = {} - for file_path in self._project_path.rglob("*.py"): + for file_path in self._project_file_paths: with open(file_path, "r", encoding="utf-8") as file: file_content = file.read() diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index cc160f466..1bfdb9fc8 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -44,6 +44,17 @@ def test_is_module_importable_valueerror(self, lite_parser): with patch("importlib.util.find_spec", side_effect=ValueError): assert lite_parser._is_module_importable("nonexistentmodule") is False + def test_is_relative_import(self, lite_parser): + assert ( + lite_parser._is_relative_import("mock_spaceflights.data_processing") is True + ) + assert ( + lite_parser._is_relative_import( + "mock_spaceflights.data_processing.random_module" + ) + is False + ) + def test_create_mock_imports(self, lite_parser): mocked_modules = {} lite_parser._create_mock_imports("nonexistentmodule", mocked_modules) @@ -56,7 +67,8 @@ def test_mock_missing_dependencies(self, lite_parser): "import os\n" "import nonexistentmodule\n" "from math import sqrt\n" - "from mock_spaceflights import data_processing" + "from mock_spaceflights import data_processing\n" + "from . import some_module\n" "# import test" ) @@ -66,6 +78,7 @@ def test_mock_missing_dependencies(self, lite_parser): assert "nonexistentmodule" in mocked_modules assert "os" not in mocked_modules assert "math" not in mocked_modules + assert None not in mocked_modules def test_get_mocked_modules(self, lite_parser): mocked_modules = lite_parser.get_mocked_modules() @@ -74,16 +87,18 @@ def test_get_mocked_modules(self, lite_parser): assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) assert "os" not in mocked_modules - def test_get_mocked_modules_for_non_package_path( - self, sample_project_path, lite_parser - ): + def test_get_mocked_modules_for_non_package_path(self, sample_project_path): other_package_dir = sample_project_path / "mock_aircrafts" other_package_dir.mkdir() (other_package_dir / "__init__.py").touch() (other_package_dir / "data_science.py").write_text( "import os\nfrom data_processing import datascience_dependency" ) + lite_parser_obj = LiteParser( + project_path=sample_project_path, package_name="mock_spaceflights" + ) + mocked_modules = lite_parser_obj.get_mocked_modules() - mocked_modules = lite_parser.get_mocked_modules() - + # dependencies mocked for only files under the package + # if package name is provided assert "data_processing" not in mocked_modules From db1b416189ce6508e7673179336d292d994d8d36 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 20 Aug 2024 11:37:51 -0500 Subject: [PATCH 28/34] improve readability --- docs/source/experiment_tracking.md | 2 +- .../integrations/kedro/lite_parser.py | 33 ++++++++++++++++--- .../test_integrations/test_lite_parser.py | 4 +-- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/docs/source/experiment_tracking.md b/docs/source/experiment_tracking.md index 0c3870ecd..75b346092 100644 --- a/docs/source/experiment_tracking.md +++ b/docs/source/experiment_tracking.md @@ -346,7 +346,7 @@ Parallel coordinates displays all metrics on a single graph, with each vertical When in comparison view, comparing runs highlights your selections on the respective chart types, improving readability even in the event there is a multitude of data points. ```{note} -The following graphic is taken from the [Kedro-Viz experiment tracking demo](https://demo.kedro.org/experiment-tracking) (it is not a visualisation from the example code you created above). +The following graphic is taken from the [Kedro-Viz experiment tracking demo](https://docs.kedro.org/en/stable/experiment_tracking/index.html) (it is not a visualisation from the example code you created above). ``` ![](./images/experiment-tracking-metrics-comparison.gif) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 4d4ae05f7..33c2f7fe8 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -36,6 +36,9 @@ def _is_module_importable(module_name: str) -> bool: Whether the module can be imported """ try: + # Check if the module can be importable + # In case of submodule (contains a dot, e.g: sklearn.linear_model), + # find_spec imports the parent module if importlib.util.find_spec(module_name) is None: return False return True @@ -79,7 +82,7 @@ def _is_relative_import(self, module_name: str): relative_module_path = module_name.replace(".", "/") # Check if the relative_module_path - # is a substring of any Python file path + # is a substring of current project file path is_relative_import_path = any( relative_module_path in str(project_file_path) for project_file_path in self._project_file_paths @@ -101,6 +104,9 @@ def _create_mock_imports( module_parts = module_name.split(".") full_module_name = "" + # Try to import each sub-module starting from the root module + # Example: module_name = sklearn.linear_model + # We will try to find spec for sklearn, sklearn.linear_model for idx, sub_module_name in enumerate(module_parts): full_module_name = ( sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" @@ -111,12 +117,12 @@ def _create_mock_imports( ): mocked_modules[full_module_name] = MagicMock() - def _mock_missing_dependencies( + def _populate_mocked_modules( self, parsed_content_ast_node: ast.Module, mocked_modules: Dict[str, MagicMock], ) -> None: - """Mock missing project dependencies + """Populate mocked_modules with missing external dependencies Args: parsed_content_ast_node (ast.Module): The AST node to @@ -124,18 +130,34 @@ def _mock_missing_dependencies( mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ for node in ast.walk(parsed_content_ast_node): + + # Handling dependencies that starts with "import " + # Example: import logging + # Corresponding AST node will be: + # Import(names=[alias(name='logging')]) if isinstance(node, ast.Import): for alias in node.names: module_name = alias.name self._create_mock_imports(module_name, mocked_modules) + # Handling dependencies that starts with "from " + # Example: from typing import Dict, Union + # Corresponding AST node will be: + # ImportFrom(module='typing', names=[alias(name='Dict'), + # alias(name='Union')], + # level=0) elif isinstance(node, ast.ImportFrom): module_name = node.module if node.module else "" level = node.level + # Ignore relative imports like "from . import a" if not module_name or module_name == "": continue + # Ignore relative imports within the package + # Examples: + # "from demo_project.pipelines.reporting import test", + # "from ..nodes import func_test" if (self._package_name and self._package_name in module_name) or ( # dev or standalone mode not self._package_name @@ -144,6 +166,9 @@ def _mock_missing_dependencies( continue # absolute modules in the env + # Examples: + # from typing import Dict, Union + # from sklearn.linear_model import LinearRegression if level == 0: self._create_mock_imports(module_name, mocked_modules) @@ -167,7 +192,7 @@ def get_mocked_modules(self) -> Dict[str, MagicMock]: # inside the package continue - self._mock_missing_dependencies( + self._populate_mocked_modules( parsed_content_ast_node, mocked_modules, ) diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index 1bfdb9fc8..ab6aec698 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -61,7 +61,7 @@ def test_create_mock_imports(self, lite_parser): assert "nonexistentmodule" in mocked_modules assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) - def test_mock_missing_dependencies(self, lite_parser): + def test_populate_mocked_modules(self, lite_parser): mocked_modules = {} content = ( "import os\n" @@ -73,7 +73,7 @@ def test_mock_missing_dependencies(self, lite_parser): ) parsed_content_ast_node = ast.parse(content) - lite_parser._mock_missing_dependencies(parsed_content_ast_node, mocked_modules) + lite_parser._populate_mocked_modules(parsed_content_ast_node, mocked_modules) assert "nonexistentmodule" in mocked_modules assert "os" not in mocked_modules From fe09d2006558802ecb4bb95ff851ad901414f882 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 20 Aug 2024 13:23:55 -0500 Subject: [PATCH 29/34] fix lint and pytest --- .../integrations/kedro/lite_parser.py | 3 +-- .../test_integrations/test_lite_parser.py | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 33c2f7fe8..10a7143ad 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -130,7 +130,6 @@ def _populate_mocked_modules( mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports """ for node in ast.walk(parsed_content_ast_node): - # Handling dependencies that starts with "import " # Example: import logging # Corresponding AST node will be: @@ -151,7 +150,7 @@ def _populate_mocked_modules( level = node.level # Ignore relative imports like "from . import a" - if not module_name or module_name == "": + if not module_name: continue # Ignore relative imports within the package diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index ab6aec698..575db5fd5 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -80,6 +80,28 @@ def test_populate_mocked_modules(self, lite_parser): assert "math" not in mocked_modules assert None not in mocked_modules + def test_populate_mocked_modules_in_standalone(self, sample_project_path): + lite_parser_obj = LiteParser(project_path=sample_project_path) + mocked_modules = {} + content = ( + "import os\n" + "import nonexistentmodule\n" + "from math import sqrt\n" + "from mock_spaceflights import data_processing\n" + "from data_processing import some_module\n" + "# import test" + ) + + parsed_content_ast_node = ast.parse(content) + lite_parser_obj._populate_mocked_modules( + parsed_content_ast_node, mocked_modules + ) + + assert "nonexistentmodule" in mocked_modules + assert "os" not in mocked_modules + assert "math" not in mocked_modules + assert "data_processing" not in mocked_modules + def test_get_mocked_modules(self, lite_parser): mocked_modules = lite_parser.get_mocked_modules() From fefafa69b1582ffd61edec8c6a18edc3d3ec9432 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Tue, 20 Aug 2024 21:26:44 -0500 Subject: [PATCH 30/34] revert link redirect --- docs/source/experiment_tracking.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/experiment_tracking.md b/docs/source/experiment_tracking.md index 75b346092..0c3870ecd 100644 --- a/docs/source/experiment_tracking.md +++ b/docs/source/experiment_tracking.md @@ -346,7 +346,7 @@ Parallel coordinates displays all metrics on a single graph, with each vertical When in comparison view, comparing runs highlights your selections on the respective chart types, improving readability even in the event there is a multitude of data points. ```{note} -The following graphic is taken from the [Kedro-Viz experiment tracking demo](https://docs.kedro.org/en/stable/experiment_tracking/index.html) (it is not a visualisation from the example code you created above). +The following graphic is taken from the [Kedro-Viz experiment tracking demo](https://demo.kedro.org/experiment-tracking) (it is not a visualisation from the example code you created above). ``` ![](./images/experiment-tracking-metrics-comparison.gif) From ae94f1e9789a9238f1f4e9d32845d4c5382f3a11 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 21 Aug 2024 13:02:05 -0500 Subject: [PATCH 31/34] remove side effects --- .../integrations/kedro/data_loader.py | 18 +- .../integrations/kedro/lite_parser.py | 194 ++++++++++++------ 2 files changed, 143 insertions(+), 69 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index f2bff7900..c0805ae88 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -9,8 +9,8 @@ import logging import sys from pathlib import Path -from typing import Any, Dict, Optional, Tuple -from unittest.mock import patch +from typing import Any, Dict, Optional, Set, Tuple +from unittest.mock import MagicMock, patch from kedro import __version__ from kedro.framework.project import configure_project, pipelines, settings @@ -142,10 +142,18 @@ def load_data( bootstrap_project(project_path) if is_lite: - lite_parser = LiteParser(project_path, package_name) - mocked_modules = lite_parser.get_mocked_modules() + lite_parser = LiteParser(package_name) + unresolved_imports = lite_parser.parse(project_path) + mocked_modules: Dict[str, MagicMock] = {} + + if len(unresolved_imports): + modules_to_mock: Set[str] = set() + + for unresolved_module_set in unresolved_imports.values(): + modules_to_mock = modules_to_mock.union(unresolved_module_set) + + mocked_modules = lite_parser.create_mock_modules(modules_to_mock) - if len(mocked_modules): logger.warning( "Kedro-Viz has mocked the following dependencies for lite-mode.\n" "%s \n" diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 10a7143ad..5091581d0 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -4,7 +4,7 @@ import importlib.util import logging from pathlib import Path -from typing import Dict, Union +from typing import Dict, List, Set, Union from unittest.mock import MagicMock logger = logging.getLogger(__name__) @@ -14,16 +14,11 @@ class LiteParser: """Represents a Kedro Parser which uses AST Args: - project_path (Path): the path where the Kedro project is located. package_name (Union[str, None]): The name of the current package """ - def __init__( - self, project_path: Path, package_name: Union[str, None] = None - ) -> None: - self._project_path = project_path + def __init__(self, package_name: Union[str, None] = None) -> None: self._package_name = package_name - self._project_file_paths = set(self._project_path.rglob("*.py")) @staticmethod def _is_module_importable(module_name: str) -> bool: @@ -60,7 +55,34 @@ def _is_module_importable(module_name: str) -> bool: ) return False - def _is_relative_import(self, module_name: str): + @staticmethod + def _get_module_parts(module_name: str) -> List[str]: + """Creates a list of module parts to check for importability + + Args: + module_name (str): The module name to split + + Returns: + A list of module parts + + Example: + >>> LiteParser._get_module_parts("kedro.framework.project") + ["kedro", "kedro.framework", "kedro.framework.project"] + + """ + module_split = module_name.split(".") + full_module_name = "" + module_parts = [] + + for idx, sub_module_name in enumerate(module_split): + full_module_name = ( + sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" + ) + module_parts.append(full_module_name) + + return module_parts + + def _is_relative_import(self, module_name: str, project_file_paths: Set[Path]): """Checks if a module is a relative import. This is needed in dev or standalone mode when the package_name is None and internal package files have unresolved external dependencies @@ -68,16 +90,18 @@ def _is_relative_import(self, module_name: str): Args: module_name (str): The name of the module to check importability - - Example: - >>> lite_parser_obj = LiteParser("path/to/kedro/project") - >>> module_name = "kedro_project_package.pipelines.reporting.nodes" - >>> lite_parser_obj._is_relative_import(module_name) - True + project_file_paths (Set[Path]): A set of project file paths Returns: Whether the module is a relative import starting from the root package dir + + Example: + >>> lite_parser_obj = LiteParser() + >>> module_name = "kedro_project_package.pipelines.reporting.nodes" + >>> project_file_paths = set([Path("/path/to/relative/file")]) + >>> lite_parser_obj._is_relative_import(module_name, project_file_paths) + True """ relative_module_path = module_name.replace(".", "/") @@ -85,50 +109,60 @@ def _is_relative_import(self, module_name: str): # is a substring of current project file path is_relative_import_path = any( relative_module_path in str(project_file_path) - for project_file_path in self._project_file_paths + for project_file_path in project_file_paths ) return is_relative_import_path - def _create_mock_imports( - self, module_name: str, mocked_modules: Dict[str, MagicMock] + def _populate_missing_dependencies( + self, module_name: str, missing_dependencies: Set[str] ) -> None: - """Creates mock modules for unresolvable imports and adds them to the - dictionary of mocked_modules + """Helper to populate missing dependencies Args: - module_name (str): The module name to be mocked - mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports + module_name (str): The module name to check if it is importable + missing_dependencies (Set[str]): A set of missing dependencies """ - module_parts = module_name.split(".") - full_module_name = "" - - # Try to import each sub-module starting from the root module - # Example: module_name = sklearn.linear_model - # We will try to find spec for sklearn, sklearn.linear_model - for idx, sub_module_name in enumerate(module_parts): - full_module_name = ( - sub_module_name if idx == 0 else f"{full_module_name}.{sub_module_name}" - ) + module_name_parts = self._get_module_parts(module_name) + for module_name_part in module_name_parts: if ( - not self._is_module_importable(full_module_name) - and full_module_name not in mocked_modules + not self._is_module_importable(module_name_part) + and module_name_part not in missing_dependencies ): - mocked_modules[full_module_name] = MagicMock() + missing_dependencies.add(module_name_part) - def _populate_mocked_modules( - self, - parsed_content_ast_node: ast.Module, - mocked_modules: Dict[str, MagicMock], - ) -> None: - """Populate mocked_modules with missing external dependencies + def _get_unresolved_imports( + self, file_path: Path, project_file_paths: Union[Set[Path], None] = None + ) -> Set[str]: + """Parse the file using AST and return any missing dependencies + in the current file Args: - parsed_content_ast_node (ast.Module): The AST node to - extract import statements - mocked_modules (Dict[str, MagicMock]): A dictionary of mocked imports + file_path (Path): The file path to parse + project_file_paths Union[Set[Path], None]: A set of project file paths + + Returns: + A set of missing dependencies """ + + missing_dependencies = set() + + # Read the file + with open(file_path, "r", encoding="utf-8") as file: + file_content = file.read() + + # parse file content using ast + parsed_content_ast_node: ast.Module = ast.parse(file_content) + file_path = file_path.resolve() + + # Ensure the package name is in the file path + if self._package_name and self._package_name not in file_path.parts: + # we are only mocking the dependencies + # inside the package + return missing_dependencies + + # Explore each node in the AST tree for node in ast.walk(parsed_content_ast_node): # Handling dependencies that starts with "import " # Example: import logging @@ -137,7 +171,9 @@ def _populate_mocked_modules( if isinstance(node, ast.Import): for alias in node.names: module_name = alias.name - self._create_mock_imports(module_name, mocked_modules) + self._populate_missing_dependencies( + module_name, missing_dependencies + ) # Handling dependencies that starts with "from " # Example: from typing import Dict, Union @@ -160,7 +196,8 @@ def _populate_mocked_modules( if (self._package_name and self._package_name in module_name) or ( # dev or standalone mode not self._package_name - and self._is_relative_import(module_name) + and project_file_paths + and self._is_relative_import(module_name, project_file_paths) ): continue @@ -169,31 +206,60 @@ def _populate_mocked_modules( # from typing import Dict, Union # from sklearn.linear_model import LinearRegression if level == 0: - self._create_mock_imports(module_name, mocked_modules) + self._populate_missing_dependencies( + module_name, missing_dependencies + ) - def get_mocked_modules(self) -> Dict[str, MagicMock]: - """Returns mocked modules for all the dependency errors - as a dictionary for each file in your Kedro project + return missing_dependencies + + def create_mock_modules(self, unresolved_imports: Set[str]) -> Dict[str, MagicMock]: + """Creates mock modules for unresolved imports + + Args: + unresolved_imports (Set[str]): A set of unresolved imports + + Returns: + A dictionary of mocked modules for the unresolved imports """ mocked_modules: Dict[str, MagicMock] = {} - for file_path in self._project_file_paths: - with open(file_path, "r", encoding="utf-8") as file: - file_content = file.read() + for unresolved_import in unresolved_imports: + mocked_modules[unresolved_import] = MagicMock() - # parse file content using ast - parsed_content_ast_node: ast.Module = ast.parse(file_content) - file_path = file_path.resolve() + return mocked_modules - # Ensure the package name is in the file path - if self._package_name and self._package_name not in file_path.parts: - # we are only mocking the dependencies - # inside the package - continue + def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: + """Parses the file(s) in the target path and returns + any unresolved imports for all the dependency errors + as a dictionary of file(s) in the target path and a set of module names - self._populate_mocked_modules( - parsed_content_ast_node, - mocked_modules, + Args: + target_path (Path): The path to parse file(s) + + Returns: + A dictionary of file(s) in the target path and a set of module names + """ + + if not target_path.exists(): + logger.warning("Path `%s` does not exist", str(target_path)) + return None + + unresolved_imports: Dict[str, Set[str]] = {} + + if target_path.is_file(): + missing_dependencies = self._get_unresolved_imports(target_path) + if len(missing_dependencies) > 0: + unresolved_imports[str(target_path)] = missing_dependencies + return unresolved_imports + + # handling directories + _project_file_paths = set(target_path.rglob("*.py")) + + for file_path in _project_file_paths: + missing_dependencies = self._get_unresolved_imports( + file_path, _project_file_paths ) + if len(missing_dependencies) > 0: + unresolved_imports[str(file_path)] = missing_dependencies - return mocked_modules + return unresolved_imports From 45da624bbfbef77f49f9892e075df006b188ab1f Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Wed, 21 Aug 2024 23:57:54 -0500 Subject: [PATCH 32/34] pr suggestions addressed --- .../integrations/kedro/data_loader.py | 31 ++- .../integrations/kedro/lite_parser.py | 6 +- .../test_integrations/test_lite_parser.py | 216 ++++++++++++------ 3 files changed, 168 insertions(+), 85 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/data_loader.py b/package/kedro_viz/integrations/kedro/data_loader.py index c0805ae88..e83c377e9 100644 --- a/package/kedro_viz/integrations/kedro/data_loader.py +++ b/package/kedro_viz/integrations/kedro/data_loader.py @@ -10,7 +10,7 @@ import sys from pathlib import Path from typing import Any, Dict, Optional, Set, Tuple -from unittest.mock import MagicMock, patch +from unittest.mock import patch from kedro import __version__ from kedro.framework.project import configure_project, pipelines, settings @@ -80,7 +80,22 @@ def _load_data_helper( extra_params: Optional[Dict[str, Any]] = None, is_lite: bool = False, ): - """Helper to load data from a Kedro project.""" + """Helper to load data from a Kedro project. + + Args: + project_path: the path where the Kedro project is located. + env: the Kedro environment to load the data. If not provided. + it will use Kedro default, which is local. + include_hooks: A flag to include all registered hooks in your Kedro Project. + extra_params: Optional dictionary containing extra project parameters + for underlying KedroContext. If specified, will update (and therefore + take precedence over) the parameters retrieved from the project + configuration. + is_lite: A flag to run Kedro-Viz in lite mode. + Returns: + A tuple containing the data catalog, pipeline dictionary, session store + and dataset stats dictionary. + """ with KedroSession.create( project_path=project_path, @@ -132,8 +147,8 @@ def load_data( configuration. is_lite: A flag to run Kedro-Viz in lite mode. Returns: - A tuple containing the data catalog and the pipeline dictionary - and the session store. + A tuple containing the data catalog, pipeline dictionary, session store + and dataset stats dictionary. """ if package_name: configure_project(package_name) @@ -144,15 +159,16 @@ def load_data( if is_lite: lite_parser = LiteParser(package_name) unresolved_imports = lite_parser.parse(project_path) - mocked_modules: Dict[str, MagicMock] = {} + sys_modules_patch = sys.modules.copy() - if len(unresolved_imports): + if unresolved_imports and len(unresolved_imports) > 0: modules_to_mock: Set[str] = set() for unresolved_module_set in unresolved_imports.values(): modules_to_mock = modules_to_mock.union(unresolved_module_set) mocked_modules = lite_parser.create_mock_modules(modules_to_mock) + sys_modules_patch.update(mocked_modules) logger.warning( "Kedro-Viz has mocked the following dependencies for lite-mode.\n" @@ -162,9 +178,6 @@ def load_data( list(mocked_modules.keys()), ) - sys_modules_patch = sys.modules.copy() - sys_modules_patch.update(mocked_modules) - # Patch actual sys modules with patch.dict("sys.modules", sys_modules_patch): return _load_data_helper( diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 5091581d0..9ce843fe8 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -146,7 +146,7 @@ def _get_unresolved_imports( A set of missing dependencies """ - missing_dependencies = set() + missing_dependencies: Set[str] = set() # Read the file with open(file_path, "r", encoding="utf-8") as file: @@ -249,7 +249,7 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: if target_path.is_file(): missing_dependencies = self._get_unresolved_imports(target_path) if len(missing_dependencies) > 0: - unresolved_imports[str(target_path)] = missing_dependencies + unresolved_imports[target_path.name] = missing_dependencies return unresolved_imports # handling directories @@ -260,6 +260,6 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: file_path, _project_file_paths ) if len(missing_dependencies) > 0: - unresolved_imports[str(file_path)] = missing_dependencies + unresolved_imports[file_path.name] = missing_dependencies return unresolved_imports diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index 575db5fd5..75a9a4935 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -1,4 +1,4 @@ -import ast +from pathlib import Path from unittest.mock import MagicMock, patch import pytest @@ -12,17 +12,22 @@ def sample_project_path(tmp_path): package_dir = tmp_path / "mock_spaceflights" package_dir.mkdir() (package_dir / "__init__.py").touch() + (package_dir / "__init__.py").write_text( + "from mock_spaceflights import data_processing\n" + "from mock_spaceflights.data_processing import create_metrics" + ) (package_dir / "data_processing.py").write_text( - "import os\nimport nonexistentmodule" + "import os\n" + "import nonexistentmodule\n" + "from . import test\n" + "from typing import Dict" ) return tmp_path @pytest.fixture -def lite_parser(sample_project_path): - return LiteParser( - project_path=sample_project_path, package_name="mock_spaceflights" - ) +def lite_parser(): + return LiteParser(package_name="mock_spaceflights") class TestLiteParser: @@ -44,83 +49,148 @@ def test_is_module_importable_valueerror(self, lite_parser): with patch("importlib.util.find_spec", side_effect=ValueError): assert lite_parser._is_module_importable("nonexistentmodule") is False - def test_is_relative_import(self, lite_parser): - assert ( - lite_parser._is_relative_import("mock_spaceflights.data_processing") is True - ) - assert ( - lite_parser._is_relative_import( - "mock_spaceflights.data_processing.random_module" - ) - is False + @pytest.mark.parametrize( + "module_name, expected_module_parts", + [ + ("sklearn", ["sklearn"]), + ( + "demo_project.pipelines.ingestion", + [ + "demo_project", + "demo_project.pipelines", + "demo_project.pipelines.ingestion", + ], + ), + ], + ) + def test_get_module_parts(self, lite_parser, module_name, expected_module_parts): + assert lite_parser._get_module_parts(module_name) == expected_module_parts + + def test_is_relative_import_found(self, lite_parser): + module_name = "kedro_project_package.pipelines.reporting.nodes" + project_file_paths = { + Path("/path/to/kedro_project_package/pipelines/reporting/nodes.py") + } + assert lite_parser._is_relative_import(module_name, project_file_paths) + + def test_relative_import_not_found(self, lite_parser): + module_name = "kedro_project_package.pipelines.reporting.nodes" + project_file_paths = { + Path("/path/to/another_project/pipelines/reporting/nodes.py") + } + assert not lite_parser._is_relative_import(module_name, project_file_paths) + + def test_relative_import_partial_match(self, lite_parser): + module_name = "kedro_project_package.pipelines" + project_file_paths = { + Path("/path/to/kedro_project_package/pipelines/reporting/nodes.py"), + Path("/path/to/kedro_project_package/pipelines/something_else.py"), + } + assert lite_parser._is_relative_import(module_name, project_file_paths) + + def test_relative_import_empty_file_paths(self, lite_parser): + module_name = "kedro_project_package.pipelines.reporting.nodes" + project_file_paths = set() + assert not lite_parser._is_relative_import(module_name, project_file_paths) + + def test_populate_missing_dependencies(self, lite_parser): + module_name = "non_importable.module.part" + missing_dependencies = set() + + lite_parser._populate_missing_dependencies(module_name, missing_dependencies) + + # The test expects the missing dependencies to + # include each part of the module name + expected_missing = { + "non_importable", + "non_importable.module", + "non_importable.module.part", + } + assert missing_dependencies == expected_missing + + def test_no_missing_dependencies(self, lite_parser, mocker): + module_name = "importable_module" + missing_dependencies = set() + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_module_importable", + return_value=True, ) - def test_create_mock_imports(self, lite_parser): - mocked_modules = {} - lite_parser._create_mock_imports("nonexistentmodule", mocked_modules) - assert "nonexistentmodule" in mocked_modules - assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) - - def test_populate_mocked_modules(self, lite_parser): - mocked_modules = {} - content = ( - "import os\n" - "import nonexistentmodule\n" - "from math import sqrt\n" - "from mock_spaceflights import data_processing\n" - "from . import some_module\n" - "# import test" - ) + lite_parser._populate_missing_dependencies(module_name, missing_dependencies) - parsed_content_ast_node = ast.parse(content) - lite_parser._populate_mocked_modules(parsed_content_ast_node, mocked_modules) - - assert "nonexistentmodule" in mocked_modules - assert "os" not in mocked_modules - assert "math" not in mocked_modules - assert None not in mocked_modules - - def test_populate_mocked_modules_in_standalone(self, sample_project_path): - lite_parser_obj = LiteParser(project_path=sample_project_path) - mocked_modules = {} - content = ( - "import os\n" - "import nonexistentmodule\n" - "from math import sqrt\n" - "from mock_spaceflights import data_processing\n" - "from data_processing import some_module\n" - "# import test" + # Since the module is importable, + # the set should remain empty + assert not missing_dependencies + + def test_partial_importability(self, lite_parser, mocker): + module_name = "importable_module.non_importable_part" + missing_dependencies = set() + mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._is_module_importable", + side_effect=lambda part: part == "importable_module", ) - parsed_content_ast_node = ast.parse(content) - lite_parser_obj._populate_mocked_modules( - parsed_content_ast_node, mocked_modules + lite_parser._populate_missing_dependencies(module_name, missing_dependencies) + + # Only the non-importable part + # should be added to the set + expected_missing = {"importable_module.non_importable_part"} + assert missing_dependencies == expected_missing + + def test_get_unresolved_imports(self, lite_parser, sample_project_path, mocker): + file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py") + mock_populate = mocker.patch( + "kedro_viz.integrations.kedro.lite_parser.LiteParser._populate_missing_dependencies" ) - assert "nonexistentmodule" in mocked_modules - assert "os" not in mocked_modules - assert "math" not in mocked_modules - assert "data_processing" not in mocked_modules + lite_parser._get_unresolved_imports(file_path) - def test_get_mocked_modules(self, lite_parser): - mocked_modules = lite_parser.get_mocked_modules() + # Ensure _populate_missing_dependencies was called + # with correct module names + mock_populate.assert_any_call("os", set()) + mock_populate.assert_any_call("nonexistentmodule", set()) - assert "nonexistentmodule" in mocked_modules - assert isinstance(mocked_modules["nonexistentmodule"], MagicMock) - assert "os" not in mocked_modules + def test_get_unresolved_relative_imports(self, sample_project_path, mocker): + lite_parser_obj = LiteParser() + file_path = Path(sample_project_path / "mock_spaceflights/__init__.py") - def test_get_mocked_modules_for_non_package_path(self, sample_project_path): - other_package_dir = sample_project_path / "mock_aircrafts" - other_package_dir.mkdir() - (other_package_dir / "__init__.py").touch() - (other_package_dir / "data_science.py").write_text( - "import os\nfrom data_processing import datascience_dependency" + unresolvable_imports = lite_parser_obj._get_unresolved_imports( + file_path, set(sample_project_path.rglob("*.py")) ) - lite_parser_obj = LiteParser( - project_path=sample_project_path, package_name="mock_spaceflights" + + assert len(unresolvable_imports) == 0 + + def test_get_unresolved_imports_for_non_package_paths(self, sample_project_path): + lite_parser_obj = LiteParser("mock_pyspark") + file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py") + + unresolvable_imports = lite_parser_obj._get_unresolved_imports( + file_path, set(sample_project_path.rglob("*.py")) ) - mocked_modules = lite_parser_obj.get_mocked_modules() - # dependencies mocked for only files under the package - # if package name is provided - assert "data_processing" not in mocked_modules + # ignore files in other packages if + # LiteParser is instantiated with a package_name + assert len(unresolvable_imports) == 0 + + def test_create_mock_modules(self, lite_parser): + unresolved_imports = {"sklearn", "pyspark.pandas"} + mocked_modules = lite_parser.create_mock_modules(unresolved_imports) + + assert len(mocked_modules) == len(unresolved_imports) + assert "sklearn" in mocked_modules + assert "pyspark.pandas" in mocked_modules + assert isinstance(mocked_modules["sklearn"], MagicMock) + + def test_parse_non_existent_path(self, lite_parser): + assert not lite_parser.parse(Path("non/existent/path")) + assert not lite_parser.parse(Path("non/existent/path/file.py")) + + def test_file_parse(self, lite_parser, sample_project_path): + file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py") + unresolved_imports = lite_parser.parse(file_path) + + assert unresolved_imports == {file_path.name: {"nonexistentmodule"}} + + def test_directory_parse(self, lite_parser, sample_project_path): + unresolved_imports = lite_parser.parse(sample_project_path) + assert unresolved_imports == {"data_processing.py": {"nonexistentmodule"}} From bcdd304bd226fe42c81897b3d0f0ae5ad9946455 Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Thu, 22 Aug 2024 00:45:18 -0500 Subject: [PATCH 33/34] fix dict issue --- package/kedro_viz/integrations/kedro/lite_parser.py | 4 ++-- package/tests/test_integrations/test_lite_parser.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index 9ce843fe8..b02bc92db 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -249,7 +249,7 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: if target_path.is_file(): missing_dependencies = self._get_unresolved_imports(target_path) if len(missing_dependencies) > 0: - unresolved_imports[target_path.name] = missing_dependencies + unresolved_imports[str(target_path)] = missing_dependencies return unresolved_imports # handling directories @@ -260,6 +260,6 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: file_path, _project_file_paths ) if len(missing_dependencies) > 0: - unresolved_imports[file_path.name] = missing_dependencies + unresolved_imports[str(file_path)] = missing_dependencies return unresolved_imports diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index 75a9a4935..e363b01c8 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -189,8 +189,11 @@ def test_file_parse(self, lite_parser, sample_project_path): file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py") unresolved_imports = lite_parser.parse(file_path) - assert unresolved_imports == {file_path.name: {"nonexistentmodule"}} + assert unresolved_imports == {str(file_path): {"nonexistentmodule"}} def test_directory_parse(self, lite_parser, sample_project_path): unresolved_imports = lite_parser.parse(sample_project_path) - assert unresolved_imports == {"data_processing.py": {"nonexistentmodule"}} + expected_file_path = Path( + sample_project_path / "mock_spaceflights/data_processing.py" + ) + assert unresolved_imports == {str(expected_file_path): {"nonexistentmodule"}} From 050bff2972738b5b2af86a3b8423ae985e06c87f Mon Sep 17 00:00:00 2001 From: ravi-kumar-pilla Date: Thu, 22 Aug 2024 18:54:48 -0500 Subject: [PATCH 34/34] moved package check under dirs and add exception block --- .../integrations/kedro/lite_parser.py | 30 ++++++++++++------- .../test_integrations/test_lite_parser.py | 20 +++++-------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/package/kedro_viz/integrations/kedro/lite_parser.py b/package/kedro_viz/integrations/kedro/lite_parser.py index b02bc92db..bd9fcb682 100755 --- a/package/kedro_viz/integrations/kedro/lite_parser.py +++ b/package/kedro_viz/integrations/kedro/lite_parser.py @@ -156,12 +156,6 @@ def _get_unresolved_imports( parsed_content_ast_node: ast.Module = ast.parse(file_content) file_path = file_path.resolve() - # Ensure the package name is in the file path - if self._package_name and self._package_name not in file_path.parts: - # we are only mocking the dependencies - # inside the package - return missing_dependencies - # Explore each node in the AST tree for node in ast.walk(parsed_content_ast_node): # Handling dependencies that starts with "import " @@ -256,10 +250,24 @@ def parse(self, target_path: Path) -> Union[Dict[str, Set[str]], None]: _project_file_paths = set(target_path.rglob("*.py")) for file_path in _project_file_paths: - missing_dependencies = self._get_unresolved_imports( - file_path, _project_file_paths - ) - if len(missing_dependencies) > 0: - unresolved_imports[str(file_path)] = missing_dependencies + try: + # Ensure the package name is in the file path + if self._package_name and self._package_name not in file_path.parts: + # we are only mocking the dependencies + # inside the package + continue + + missing_dependencies = self._get_unresolved_imports( + file_path, _project_file_paths + ) + if len(missing_dependencies) > 0: + unresolved_imports[str(file_path)] = missing_dependencies + # pylint: disable=broad-except + except Exception as exc: # pragma: no cover + logger.error( + "An error occurred in LiteParser while mocking dependencies : %s", + exc, + ) + continue return unresolved_imports diff --git a/package/tests/test_integrations/test_lite_parser.py b/package/tests/test_integrations/test_lite_parser.py index e363b01c8..b3ae1eede 100644 --- a/package/tests/test_integrations/test_lite_parser.py +++ b/package/tests/test_integrations/test_lite_parser.py @@ -160,18 +160,6 @@ def test_get_unresolved_relative_imports(self, sample_project_path, mocker): assert len(unresolvable_imports) == 0 - def test_get_unresolved_imports_for_non_package_paths(self, sample_project_path): - lite_parser_obj = LiteParser("mock_pyspark") - file_path = Path(sample_project_path / "mock_spaceflights/data_processing.py") - - unresolvable_imports = lite_parser_obj._get_unresolved_imports( - file_path, set(sample_project_path.rglob("*.py")) - ) - - # ignore files in other packages if - # LiteParser is instantiated with a package_name - assert len(unresolvable_imports) == 0 - def test_create_mock_modules(self, lite_parser): unresolved_imports = {"sklearn", "pyspark.pandas"} mocked_modules = lite_parser.create_mock_modules(unresolved_imports) @@ -197,3 +185,11 @@ def test_directory_parse(self, lite_parser, sample_project_path): sample_project_path / "mock_spaceflights/data_processing.py" ) assert unresolved_imports == {str(expected_file_path): {"nonexistentmodule"}} + + def test_directory_parse_non_package_path(self, sample_project_path): + lite_parser_obj = LiteParser("mock_pyspark") + unresolvable_imports = lite_parser_obj.parse(sample_project_path) + + # ignore files in other packages if + # LiteParser is instantiated with a package_name + assert len(unresolvable_imports) == 0