From 69f96a3e0456c003fcd2afa008a40f32b8973f46 Mon Sep 17 00:00:00 2001 From: dbogunowicz <97082108+dbogunowicz@users.noreply.github.com> Date: Thu, 6 Oct 2022 16:57:01 +0200 Subject: [PATCH] [Cherry Pick] Internal dev README (potentially user-facing README) (#239) * Internal dev README (potentially user-facing README) (#205) * initial commit * merge readmes * just need grammar and consistency review * Apply suggestions from code review * Update README.md Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> * Update README.md Co-authored-by: bogunowicz@arrival.com Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> * Internal dev README (potentially user-facing README) (#205) * initial commit * merge readmes * just need grammar and consistency review * Apply suggestions from code review * Update README.md Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> * Update README.md Co-authored-by: bogunowicz@arrival.com Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> Co-authored-by: bogunowicz@arrival.com Co-authored-by: Jeannie Finks <74554921+jeanniefinks@users.noreply.github.com> --- README.md | 254 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 239 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index abd43707..346d51d9 100644 --- a/README.md +++ b/README.md @@ -83,35 +83,259 @@ pip install sparsezoo ## Quick Tour -### Python APIs +The SparseZoo Python API enables you to search and download sparsified models. Code examples are given below. +We encourage users to load SparseZoo models by copying a stub directly from a [model page]((https://sparsezoo.neuralmagic.com/)). -The Python APIs respect this format enabling you to search and download models. Some code examples are given below. -The [SparseZoo UI](https://sparsezoo.neuralmagic.com/) also enables users to load models by copying -a stub directly from a model page. +### Introduction to Model Class Object +The `Model` is a fundamental object that serves as a main interface with the SparseZoo library. +It represents a SparseZoo model, together with all its directories and files. -#### Loading from a Stub +#### Creating a Model Class Object From SparseZoo Stub +```python +from sparsezoo import Model + +stub = "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" + +model = Model(stub) +print(str(model)) + +>> Model(stub=zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none) +``` + +#### Creating a Model Class Object From Local Model Directory +```python +from sparsezoo import Model + +directory = ".../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0" + +model = Model(directory) +print(str(model)) + +>> Model(directory=.../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0) +``` + +#### Manually Specifying the Model Download Path + +Unless specified otherwise, the model created from the SparseZoo stub is saved to the local sparsezoo cache directory. +This can be overridden by passing the optional `download_path` argument to the constructor: + +```python +from sparsezoo import Model + +stub = "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none" +download_directory = "./model_download_directory" + +model = Model(stub, download_path = download_directory) +``` +#### Downloading the Model Files +Once the model is initialized from a stub, it may be downloaded either by calling the `download()` method or by invoking a `path` property. Both pathways are universal for all the files in SparseZoo. Invoking the `path` property will always trigger file download unless the file has already been downloaded. + +```python +# method 1 +model.download() + +# method 2 +model_path = model.path +``` + +#### Inspecting the Contents of the SparseZoo Model + +We call the `available_files` method to inspect which files are present in the SparseZoo model. Then, we select a file by calling the appropriate attribute: + +```python +model.available_files + +>> {'training': Directory(name=training), +>> 'deployment': Directory(name=deployment), +>> 'sample_inputs': Directory(name=sample_inputs.tar.gz), +>> 'sample_outputs': {'framework': Directory(name=sample_outputs.tar.gz)}, +>> 'sample_labels': Directory(name=sample_labels.tar.gz), +>> 'model_card': File(name=model.md), +>> 'recipes': Directory(name=recipe), +>> 'onnx_model': File(name=model.onnx)} +``` +Then, we might take a closer look at the contents of the SparseZoo model: +```python +model_card = model.model_card +print(model_card) + +>> File(name=model.md) +``` +```python +model_card_path = model.model_card.path +print(model_card_path) + +>> .../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0/model.md +``` + + +### Model, Directory, and File + +In general, every file in the SparseZoo model shares a set of attributes: `name`, `path`, `URL`, and `parent`: +- `name` serves as an identifier of the file/directory +- `path` points to the location of the file/directory +- `URL` specifies the server address of the file/directory in question +- `parent` points to the location of the parent directory of the file/directory in question + +A directory is a unique type of file that contains other files. For that reason, it has an additional `files` attribute. + +```python +print(model.onnx_model) + +>> File(name=model.onnx) + +print(f"File name: {model.onnx_model.name}\n" + f"File path: {model.onnx_model.path}\n" + f"File URL: {model.onnx_model.url}\n" + f"Parent directory: {model.onnx_model.parent_directory}") + +>> File name: model.onnx +>> File path: .../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0/model.onnx +>> File URL: https://models.neuralmagic.com/cv-classification/... +>> Parent directory: .../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0 +``` + +```python +print(model.recipes) + +>> Directory(name=recipe) + +print(f"File name: {model.recipes.name}\n" + f"Contains: {[file.name for file in model.recipes.files]}\n" + f"File path: {model.recipes.path}\n" + f"File URL: {model.recipes.url}\n" + f"Parent directory: {model.recipes.parent_directory}") + +>> File name: recipe +>> Contains: ['recipe_original.md', 'recipe_transfer-classification.md'] +>> File path: /home/user/.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0/recipe +>> File URL: None +>> Parent directory: /home/user/.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0 +``` + +### Selecting Checkpoint-Specific Data + +A SparseZoo model may contain several checkpoints. The model may contain a checkpoint that had been saved before the model was quantized - that checkpoint would be used for transfer learning. Another checkpoint might have been saved after the quantization step - that one is usually directly used for inference. + +The recipes may also vary depending on the use case. We may want to access a recipe that was used to sparsify the dense model (`recipe_original`) or the one that enables us to sparse transfer learn from the already sparsified model (`recipe_transfer`). + +There are two ways to access those specific files. + +#### Accessing Recipes (Through Python API) +```python +available_recipes = model.recipes.available +print(available_recipes) + +>> ['original', 'transfer-classification'] + +transfer_recipe = model.recipes["transfer-classification"] +print(transfer_recipe) + +>> File(name=recipe_transfer-classification.md) + +original_recipe = model.recipes.default # recipe defaults to `original` +original_recipe_path = original_recipe.path # downloads the recipe and returns its path +print(original_recipe_path) + +>> .../.cache/sparsezoo/eb977dae-2454-471b-9870-4cf38074acf0/recipe/recipe_original.md +``` + +#### Accessing Checkpoints (Through Python API) +In general, we are expecting the following checkpoints to be included in the model: + +- `checkpoint_prepruning` +- `checkpoint_postpruning` +- `checkpoint_preqat` +- `checkpoint_postqat` + +The checkpoint that the model defaults to is the `preqat` state (just before the quantization step). ```python from sparsezoo import Model -# copied from https://sparsezoo.neuralmagic.com/ -stub = "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned90_quant-none" +stub = "zoo:nlp/question_answering/bert-base/pytorch/huggingface/squad/pruned_quant_3layers-aggressive_84" + model = Model(stub) -print(model) +available_checkpoints = model.training.available +print(available_checkpoints) + +>> ['preqat'] + +preqat_checkpoint = model.training.default # recipe defaults to `preqat` +preqat_checkpoint_path = preqat_checkpoint.path # downloads the checkpoint and returns its path +print(preqat_checkpoint_path) + +>> .../.cache/sparsezoo/0857c6f2-13c1-43c9-8db8-8f89a548dccd/training + +[print(file.name) for file in preqat_checkpoint.files] + +>> vocab.txt +>> special_tokens_map.json +>> pytorch_model.bin +>> config.json +>> training_args.bin +>> tokenizer_config.json +>> trainer_state.json +>> tokenizer.json ``` -#### Searching the Zoo + +#### Accessing Recipes (Through Stub String Arguments) + +You can also directly request a specific recipe/checkpoint type by appending the appropriate URL query arguments to the stub: +```python +from sparsezoo import Model + +stub = "zoo:cv/classification/resnet_v1-50/pytorch/sparseml/imagenet/pruned95_quant-none?recipe=transfer" + +model = Model(stub) + +# Inspect which files are present. +# Note that the available recipes are restricted +# according to the specified URL query arguments +print(model.recipes.available) + +>> ['transfer-classification'] + +transfer_recipe = model.recipes.default # Now the recipes default to the one selected by the stub string arguments +print(transfer_recipe) + +>> File(name=recipe_transfer-classification.md) +``` + +### Accessing Sample Data + +The user may easily request a sample batch of data that represents the inputs and outputs of the model. + +```python +sample_data = model.sample_batch(batch_size = 10) + +print(sample_data['sample_inputs'][0].shape) +>> (10, 3, 224, 224) # (batch_size, num_channels, image_dim, image_dim) + +print(sample_data['sample_outputs'][0].shape) +>> (10, 1000) # (batch_size, num_classes) +``` + +### Model Search +The function `search_models` enables the user to quickly filter the contents of SparseZoo repository to find the stubs of interest: ```python from sparsezoo import search_models -models = search_models( - domain="cv", - sub_domain="classification", - return_stubs=True, -) -print(models) +args = { + "domain": "cv", + "sub_domain": "segmentation", + "architecture": "yolact", +} + +models = search_models(**args) +[print(model) for model in models] + +>> Model(stub=zoo:cv/segmentation/yolact-darknet53/pytorch/dbolya/coco/pruned82_quant-none) +>> Model(stub=zoo:cv/segmentation/yolact-darknet53/pytorch/dbolya/coco/pruned90-none) +>> Model(stub=zoo:cv/segmentation/yolact-darknet53/pytorch/dbolya/coco/base-none) ``` ### Environmental Variables