Merge remote-tracking branch 'origin/main' into resume_train

foundation-model-stack · Sep 3, 2024 · fcad583 · fcad583
2 parents 37d5e83 + 6cfb2ff
commit fcad583
Show file tree

Hide file tree

Showing 12 changed files with 783 additions and 127 deletions.
diff --git a/README.md b/README.md
@@ -116,9 +116,9 @@ Formatting will happen on the fly while tuning. The keys in template should matc
 
 ##### In conclusion, if using the reponse_template and single sequence, either the `data_formatter_template` argument or `dataset_text_field` needs to be supplied to the trainer.
 
-### 2. JSONL with input and output fields (no response template)
+### 2. JSON/JSONL with input and output fields (no response template)
 
-  Pass a JSONL containing fields "input" with source text and "output" with class labels. Pre-format the input as you see fit. The output field will simply be concatenated to the end of input to create single sequence, and input will be masked.
+  Pass a JSON/JSONL containing fields "input" with source text and "output" with class labels. Pre-format the input as you see fit. The output field will simply be concatenated to the end of input to create single sequence, and input will be masked.
 
   The "input" and "output" field names are mandatory and cannot be changed. 
 
@@ -270,6 +270,13 @@ generation_config.json	model-00005-of-00006.safetensors  tokenizer.model
 
 </details>
 
+#### Optimizing writing checkpoints
+Writing models to Cloud Object Storage (COS) is an expensive operation. Saving model checkpoints to a local directory causes much faster training times than writing to COS. You can use `output_dir` and `save_model_dir` to control which type of storage you write your checkpoints and final model to.
+
+You can set `output_dir` to a local directory and set `save_model_dir` to COS to save time on write operations while ensuring checkpoints are saved.
+
+In order to achieve the fastest train time, set `save_strategy="no"`, as saving no checkpoints except for the final model will remove intermediate write operations all together.
+
 ## Tuning Techniques:
 
 ### LoRA Tuning Example
@@ -280,15 +287,15 @@ Set `peft_method` to `"lora"`. You can additionally pass any arguments from [Lor
 r: int =8 
 lora_alpha: int = 32
 target_modules: List[str] = field(
-  default_factory=lambda: ["q_proj", "v_proj"],
-      metadata={
-            "help": "The names of the modules to apply LORA to. LORA selects modules which either \
-            completely match or "
-            'end with one of the strings. If the value is ["all-linear"], \
-            then LORA selects all linear and Conv1D '
-            "modules except for the output layer."
-        },
-    )
+  default=None,
+  metadata={
+        "help": "The names of the modules to apply LORA to. LORA selects modules which either \
+        completely match or "
+        'end with one of the strings. If the value is ["all-linear"], \
+        then LORA selects all linear and Conv1D '
+        "modules except for the output layer."
+  },
+)
 bias = "none"
 lora_dropout: float = 0.05
 ```
@@ -331,8 +338,11 @@ Equally you can pass in a JSON configuration for running tuning. See [build doc]
 }
 ```
 
-Notice the `target_modules` that are set are the default values. `target_modules` are the names of the modules to apply the adapter to. If this is specified, only the modules with the specified names will be replaced. When passing a list of strings, either an exact match will be performed or it is checked if the name of the module ends with any of the passed strings. If this is specified as `all-linear`, then all linear/Conv1D modules are chosen, excluding the output layer. If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised — in this case, you should specify the target modules manually. See [HuggingFace docs](https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig) for more details.
+Notice the `target_modules` are the names of the modules to apply the adapter to.
+- If this is specified, only the modules with the specified names will be replaced. When passing a list of strings, either an exact match will be performed or it is checked if the name of the module ends with any of the passed strings. If this is specified as `all-linear`, then all linear/Conv1D modules are chosen, excluding the output layer. If this is specified as `lm_head` which is an output layer, the `lm_head` layer will be chosen. See the Note of this [section](#recommended-target-modules-per-model-architecture) on recommended target modules by model architecture.
+- If this is not specified, modules will be chosen according to the model architecture. If the architecture is not known, an error will be raised — in this case, you should specify the target modules manually. See [HuggingFace docs](https://huggingface.co/docs/peft/en/package_reference/lora#peft.LoraConfig) for more details.
 
+#### How to get list of LoRA target_modules of a model
 For each model, the `target_modules` will depend on the type of model architecture. You can specify linear or attention layers to `target_modules`. To obtain list of `target_modules` for a model:
 
 ```py
@@ -387,7 +397,38 @@ For example for LLaMA model the modules look like:
 You can specify attention or linear layers. With the CLI, you can specify layers with `--target_modules "q_proj" "v_proj" "k_proj" "o_proj"` or `--target_modules "all-linear"`.
 
 #### Recommended target modules per model architecture 
-As per [LoRA paper](https://arxiv.org/pdf/2106.09685), section 4.2 , by using the query and value projection matrices, we can achieve reasonable quality with efficient GPU utilization. Hence, while thinking about what LoRA adapters to specify, we recommend starting with query and value matrices. You could also refer to the defaults specified by PEFT library for popular model architectures in section [TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING](https://github.com/huggingface/peft/blob/7b1c08d2b5e13d3c99b7d6ee83eab90e1216d4ba/src/peft/utils/constants.py#L70) as a good starting point. 
+As per [LoRA paper](https://arxiv.org/pdf/2106.09685), section 4.2 , by using the query and value projection matrices, we can achieve reasonable quality with efficient GPU utilization. Hence, while thinking about what LoRA adapters to specify, we recommend starting with query and value matrices. You could also refer to the defaults specified by PEFT library for popular model architectures in section [TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING](https://github.com/huggingface/peft/blob/7b1c08d2b5e13d3c99b7d6ee83eab90e1216d4ba/src/peft/utils/constants.py#L70) as a good starting point.
+
+<details>
+
+<summary>How to specify lm_head as a target module</summary>
+
+Since `lm_head` is an output layer, it will **not** be included as a target module if you specify `all-linear`. You can, however, specify to apply the LoRA adapter to the `lm_head` layer by explicitly naming it in the `target_modules` arg.
+
+**NOTE**: Specifying `["lm_head", "all-linear"]` will not tune the `lm_head` layer, but will run the equivalent of `["all-linear"]`. To include `lm_head`, you must explicitly specify all of the layers to tune on. Using the example of the Llama model above, you would need to list `"q_proj" "v_proj" "k_proj" "o_proj" "lm_head"` to tune the all linear layers including `lm_head`. These 5 layers will be produced in the LoRA adapter.
+
+Example 1: 
+```json
+{
+    "target_modules": ["lm_head"] // this produces lm_head layer only
+}
+```
+
+Example 2:
+```json
+{
+    "target_modules": ["lm_head", "c_proj", "c_attn", "c_fc"] // this produces lm_head, c_proj, c_attn and c_fc layers 
+}
+```
+
+Example 3:
+```json
+{
+    "target_modules": ["lm_head", "all-linear"] // this produces the equivalent of all-linear only, no lm_head
+}
+```
+
+</details>
 
 _________________________
 

diff --git a/build/Dockerfile b/build/Dockerfile
@@ -21,7 +21,7 @@ ARG PYTHON_VERSION=3.11
 ARG WHEEL_VERSION=""
 
 ## Base Layer ##################################################################
-FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} as base
+FROM registry.access.redhat.com/ubi9/ubi:${BASE_UBI_IMAGE_TAG} AS base
 
 ARG PYTHON_VERSION
 ARG USER
@@ -44,14 +44,14 @@ RUN useradd -u $USER_UID ${USER} -m -g 0 --system && \
     chmod g+rx /home/${USER}
 
 ## Used as base of the Release stage to removed unrelated the packages and CVEs
-FROM base as release-base
+FROM base AS release-base
 
 # Removes the python3.9 code to eliminate possible CVEs.  Also removes dnf
 RUN rpm -e $(dnf repoquery python3-* -q --installed) dnf python3 yum crypto-policies-scripts
 
 
 ## CUDA Base ###################################################################
-FROM base as cuda-base
+FROM base AS cuda-base
 
 # Ref: https://docs.nvidia.com/cuda/archive/12.1.0/cuda-toolkit-release-notes/
 ENV CUDA_VERSION=12.1.0 \
@@ -75,7 +75,7 @@ ENV CUDA_HOME="/usr/local/cuda" \
     LD_LIBRARY_PATH="/usr/local/nvidia/lib:/usr/local/nvidia/lib64:$CUDA_HOME/lib64:$CUDA_HOME/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
 
 ## CUDA Development ############################################################
-FROM cuda-base as cuda-devel
+FROM cuda-base AS cuda-devel
 
 # Ref: https://developer.nvidia.com/nccl/nccl-legacy-downloads
 ENV NV_CUDA_CUDART_DEV_VERSION=12.1.55-1 \
@@ -99,7 +99,7 @@ RUN dnf config-manager \
 
 ENV LIBRARY_PATH="$CUDA_HOME/lib64/stubs"
 
-FROM cuda-devel as python-installations
+FROM cuda-devel AS python-installations
 
 ARG WHEEL_VERSION
 ARG USER
@@ -142,7 +142,7 @@ RUN --mount=type=cache,target=/home/${USER}/.cache/pip,uid=${USER_UID} \
     rm $(head bdist_name) /tmp/bdist_name
 
 ## Final image ################################################
-FROM release-base as release
+FROM release-base AS release
 ARG USER
 ARG PYTHON_VERSION
 

diff --git a/tests/build/test_launch_script.py b/tests/build/test_launch_script.py
@@ -26,7 +26,7 @@
 # First Party
 from build.accelerate_launch import main
 from build.utils import serialize_args, get_highest_checkpoint
-from tests.data import TWITTER_COMPLAINTS_DATA
+from tests.data import TWITTER_COMPLAINTS_DATA_JSONL
 from tuning.utils.error_logging import (
     USER_ERROR_EXIT_CODE,
     INTERNAL_ERROR_EXIT_CODE,
@@ -37,7 +37,7 @@
 MODEL_NAME = "Maykeye/TinyLLama-v0"
 BASE_KWARGS = {
     "model_name_or_path": MODEL_NAME,
-    "training_data_path": TWITTER_COMPLAINTS_DATA,
+    "training_data_path": TWITTER_COMPLAINTS_DATA_JSONL,
     "num_train_epochs": 5,
     "per_device_train_batch_size": 4,
     "per_device_eval_batch_size": 4,

diff --git a/tests/data/__init__.py b/tests/data/__init__.py
@@ -19,12 +19,18 @@
 
 ### Constants used for data
 DATA_DIR = os.path.join(os.path.dirname(__file__))
-TWITTER_COMPLAINTS_DATA = os.path.join(DATA_DIR, "twitter_complaints_small.jsonl")
-TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT = os.path.join(
+TWITTER_COMPLAINTS_DATA_JSON = os.path.join(DATA_DIR, "twitter_complaints_small.json")
+TWITTER_COMPLAINTS_DATA_JSONL = os.path.join(DATA_DIR, "twitter_complaints_small.jsonl")
+TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSON = os.path.join(
+    DATA_DIR, "twitter_complaints_input_output.json"
+)
+TWITTER_COMPLAINTS_DATA_INPUT_OUTPUT_JSONL = os.path.join(
     DATA_DIR, "twitter_complaints_input_output.jsonl"
 )
-TWITTER_COMPLAINTS_JSON_FORMAT = os.path.join(DATA_DIR, "twitter_complaints_json.json")
-TWITTER_COMPLAINTS_TOKENIZED = os.path.join(
+TWITTER_COMPLAINTS_TOKENIZED_JSON = os.path.join(
+    DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.json"
+)
+TWITTER_COMPLAINTS_TOKENIZED_JSONL = os.path.join(
     DATA_DIR, "twitter_complaints_tokenized_with_maykeye_tinyllama_v0.jsonl"
 )
 EMPTY_DATA = os.path.join(DATA_DIR, "empty_data.json")