move code under executorch/example (#3176)

Summary: Pull Request resolved: #3176 This diff moves llm manual code from outside github (Dave's and Georgey's) to executorch codebase for better pointing to. After this diff. //executorch/examples/llm_maunal will become the only source of truth of our llm manual code. Reviewed By: byjlw, dbort Differential Revision: D56365058 fbshipit-source-id: 97280fc0ca955caabb6056cddbb72102ed711f2c (cherry picked from commit b6e54d0)
pytorch · Apr 24, 2024 · a9074fd · a9074fd
1 parent eabdeb0
commit a9074fd
Show file tree

Hide file tree

Showing 7 changed files with 489 additions and 0 deletions.
diff --git a/examples/llm_manual/CMakeLists.txt b/examples/llm_manual/CMakeLists.txt
@@ -0,0 +1,33 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.19)
+project(nanogpt_runner)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED True)
+
+# Set options for executorch build.
+option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
+option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
+option(EXECUTORCH_BUILD_OPTIMIZED "" ON)
+option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend
+
+# Include the executorch subdirectory.
+add_subdirectory(
+    ${CMAKE_CURRENT_SOURCE_DIR}/third-party/executorch
+    ${CMAKE_BINARY_DIR}/executorch)
+
+# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)
+
+add_executable(nanogpt_runner main.cpp)
+target_link_libraries(
+    nanogpt_runner
+    PRIVATE
+    executorch
+    extension_module_static # Provides the Module class
+    optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
+    xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
diff --git a/examples/llm_manual/README.md b/examples/llm_manual/README.md
@@ -0,0 +1,3 @@
+# LLM Manual
+
+This repository is a storage place for the files that [LLM Maunal](https://pytorch.org/executorch/main/llm/getting-started.html) needs. Please refer to the documentation website for more information.
diff --git a/examples/llm_manual/basic_sampler.h b/examples/llm_manual/basic_sampler.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <vector>
+class BasicSampler {
+ public:
+  BasicSampler() {}
+  int64_t sample(std::vector<float> logits) {
+    // Find the token with the highest log probability.
+    int64_t max_index =
+        std::max_element(logits.begin(), logits.end()) - logits.begin();
+    return max_index;
+  }
+};
diff --git a/examples/llm_manual/basic_tokenizer.h b/examples/llm_manual/basic_tokenizer.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+class BasicTokenizer {
+ public:
+  BasicTokenizer(const std::string& filePath) {
+    std::ifstream file(filePath);
+
+    if (!file) {
+      std::cerr << "Unable to open file";
+      exit(9); // return with error code
+    }
+    std::string str(
+        (std::istreambuf_iterator<char>(file)),
+        std::istreambuf_iterator<char>());
+
+    size_t i = 0u;
+    i = consume_whitespace(str, i);
+    i = expect(str, i, '{');
+
+    while (i < str.size() && str[i] != '}') {
+      i = consume_field(str, i);
+    }
+
+    // Build decode map as inverse of encode.
+    for (auto& i : encode_) {
+      decode_[i.second] = i.first;
+    }
+  }
+
+  std::vector<int64_t> encode(const std::string& prompt) {
+    std::vector<std::string> words = parse_prompt(prompt);
+    std::vector<int64_t> result;
+    for (auto word : words) {
+      result.push_back(encode_[word]);
+    }
+    return result;
+  }
+
+  std::string decode(const std::vector<int64_t>& indices) {
+    std::string result;
+    for (const auto& index : indices) {
+      result += decode_[index];
+    }
+    return result;
+  }
+
+ private:
+  std::unordered_map<std::string, int64_t> encode_;
+  std::unordered_map<int64_t, std::string> decode_;
+
+  // Advance the input string index until a non-whitespace character is found
+  // or it reaches the end of string.
+  size_t consume_whitespace(const std::string& data, size_t i) {
+    while (i < data.size() && std::isspace(data[i])) {
+      i++;
+    }
+
+    return i;
+  }
+
+  // Consumes an JSON field of the form
+  //  "str": id,
+  size_t consume_field(const std::string& data, size_t i) {
+    i = consume_whitespace(data, i);
+
+    // Parse the key literal.
+    i = expect(data, i, '"');
+
+    auto in_escape = false;
+    std::string key = "";
+    while (i < data.size()) {
+      if (in_escape) {
+        key += data[i];
+        i++;
+        in_escape = false;
+      } else { // !in_escape
+        if (data[i] == '"') { // End of string literal
+          i++;
+          break;
+        } else if (data[i] == '\\') { // Escaped code point
+          in_escape = true;
+        }
+        key += data[i];
+        i++;
+      }
+    }
+
+    key = post_process_key(key);
+
+    i = expect(data, i, ':');
+    i = consume_whitespace(data, i);
+
+    // Read unsigned integer value
+    auto value_start = i;
+    while (i < data.size() && std::isdigit(data[i])) {
+      i++;
+    }
+    auto value = static_cast<int64_t>(
+        std::stol(data.substr(value_start, i - value_start)));
+
+    encode_[key] = value;
+
+    i = consume_whitespace(data, i);
+    if (i < data.size() && data[i] == ',') {
+      i++;
+    }
+
+    return i;
+  }
+
+  // Assert that the next character in the input string is equal to c. Increment
+  // the input string index by one.
+  size_t expect(const std::string& data, size_t i, char c) {
+    if (i >= data.size() || data[i] != c) {
+      std::cerr << "Invalid tokenizer vocabulary file. Expected '" << c
+                << "' at index " << i << std::endl;
+      exit(1);
+    }
+
+    return i + 1;
+  }
+
+  std::string post_process_key(std::string key) {
+    // Replace the unicode characters with the corresponding byte encoding
+    // TODO: adopt byte encoder to handle unicode characters in json file.
+
+    std::unordered_map<std::string, std::string> replacements = {
+        {"\\u0120", " "},
+        {"\\u010a", "\n"},
+    };
+
+    for (const auto& replacement : replacements) {
+      size_t pos = 0;
+      // While loop through all instances of the substring in the string
+      while ((pos = key.find(replacement.first, pos)) != std::string::npos) {
+        key.replace(pos, replacement.first.length(), replacement.second);
+        pos += replacement.second.length();
+      }
+    }
+
+    // remove duplicate backslashes
+    for (size_t idx = 0; idx < key.length(); idx++) {
+      if (key[idx] == '\\') {
+        key.erase(idx, 1);
+        if (key[idx] == '\\') {
+          // If there are two backslashes, keep the second one
+          idx += 1;
+        }
+      }
+    }
+
+    return key;
+  }
+  std::vector<std::string> parse_prompt(const std::string& prompt) {
+    std::vector<std::string> result;
+    std::string word;
+    for (char c : prompt) {
+      if (c == ' ') {
+        if (!word.empty()) {
+          result.push_back(word);
+          word.clear();
+        }
+        word += c;
+      } else if (ispunct(c)) {
+        if (!word.empty()) {
+          result.push_back(word);
+          word.clear();
+        }
+        result.push_back(std::string(1, c));
+      } else {
+        word += c;
+      }
+    }
+    if (!word.empty()) {
+      result.push_back(word);
+    }
+    return result;
+  }
+};
diff --git a/examples/llm_manual/export_nanogpt.py b/examples/llm_manual/export_nanogpt.py
@@ -0,0 +1,45 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# export_nanogpt.py
+
+# Load partitioner for Xnnpack backend
+import torch
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
+
+# Model to be delegated to specific backend should use specific edge compile config
+from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
+from executorch.exir import to_edge
+
+from model import GPT
+from torch._export import capture_pre_autograd_graph
+from torch.export import export
+from torch.nn.attention import sdpa_kernel, SDPBackend
+
+model = GPT.from_pretrained("gpt2")  # use gpt2 weight as pretrained weight
+example_inputs = (
+    torch.randint(0, 100, (1, model.config.block_size), dtype=torch.long),
+)
+dynamic_shape = ({1: torch.export.Dim("token_dim", max=model.config.block_size)},)
+
+# Trace the model, converting it to a portable intermediate representation.
+# The torch.no_grad() call tells PyTorch to exclude training-specific logic.
+with sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
+    m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape)
+    traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)
+
+# Convert the model into a runnable ExecuTorch program.
+# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
+edge_config = get_xnnpack_edge_compile_config()
+edge_manager = to_edge(traced_model, compile_config=edge_config)
+
+# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner.
+edge_manager = edge_manager.to_backend(XnnpackPartitioner())
+et_program = edge_manager.to_executorch()
+
+# Save the Xnnpack-delegated ExecuTorch program to a file.
+with open("nanogpt.pte", "wb") as file:
+    file.write(et_program.buffer)