Skip to content

Commit

Permalink
move code under executorch/example (#3176)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3176
This diff moves llm manual code from outside github (Dave's and Georgey's) to executorch codebase for better pointing to.
After this diff. //executorch/examples/llm_maunal will become the only source of truth of our llm manual code.

Reviewed By: byjlw, dbort

Differential Revision: D56365058

fbshipit-source-id: 97280fc0ca955caabb6056cddbb72102ed711f2c
(cherry picked from commit b6e54d0)
  • Loading branch information
Gasoonjia authored and pytorchbot committed Apr 24, 2024
1 parent eabdeb0 commit a9074fd
Show file tree
Hide file tree
Showing 7 changed files with 489 additions and 0 deletions.
33 changes: 33 additions & 0 deletions examples/llm_manual/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

cmake_minimum_required(VERSION 3.19)
project(nanogpt_runner)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED True)

# Set options for executorch build.
option(EXECUTORCH_BUILD_EXTENSION_DATA_LOADER "" ON)
option(EXECUTORCH_BUILD_EXTENSION_MODULE "" ON)
option(EXECUTORCH_BUILD_OPTIMIZED "" ON)
option(EXECUTORCH_BUILD_XNNPACK "" ON) # Build with Xnnpack backend

# Include the executorch subdirectory.
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/third-party/executorch
${CMAKE_BINARY_DIR}/executorch)

# include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src)

add_executable(nanogpt_runner main.cpp)
target_link_libraries(
nanogpt_runner
PRIVATE
executorch
extension_module_static # Provides the Module class
optimized_native_cpu_ops_lib # Provides baseline cross-platform kernels
xnnpack_backend) # Provides the XNNPACK CPU acceleration backend
3 changes: 3 additions & 0 deletions examples/llm_manual/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# LLM Manual

This repository is a storage place for the files that [LLM Maunal](https://pytorch.org/executorch/main/llm/getting-started.html) needs. Please refer to the documentation website for more information.
20 changes: 20 additions & 0 deletions examples/llm_manual/basic_sampler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <algorithm>
#include <vector>
class BasicSampler {
public:
BasicSampler() {}
int64_t sample(std::vector<float> logits) {
// Find the token with the highest log probability.
int64_t max_index =
std::max_element(logits.begin(), logits.end()) - logits.begin();
return max_index;
}
};
192 changes: 192 additions & 0 deletions examples/llm_manual/basic_tokenizer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>

class BasicTokenizer {
public:
BasicTokenizer(const std::string& filePath) {
std::ifstream file(filePath);

if (!file) {
std::cerr << "Unable to open file";
exit(9); // return with error code
}
std::string str(
(std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());

size_t i = 0u;
i = consume_whitespace(str, i);
i = expect(str, i, '{');

while (i < str.size() && str[i] != '}') {
i = consume_field(str, i);
}

// Build decode map as inverse of encode.
for (auto& i : encode_) {
decode_[i.second] = i.first;
}
}

std::vector<int64_t> encode(const std::string& prompt) {
std::vector<std::string> words = parse_prompt(prompt);
std::vector<int64_t> result;
for (auto word : words) {
result.push_back(encode_[word]);
}
return result;
}

std::string decode(const std::vector<int64_t>& indices) {
std::string result;
for (const auto& index : indices) {
result += decode_[index];
}
return result;
}

private:
std::unordered_map<std::string, int64_t> encode_;
std::unordered_map<int64_t, std::string> decode_;

// Advance the input string index until a non-whitespace character is found
// or it reaches the end of string.
size_t consume_whitespace(const std::string& data, size_t i) {
while (i < data.size() && std::isspace(data[i])) {
i++;
}

return i;
}

// Consumes an JSON field of the form
// "str": id,
size_t consume_field(const std::string& data, size_t i) {
i = consume_whitespace(data, i);

// Parse the key literal.
i = expect(data, i, '"');

auto in_escape = false;
std::string key = "";
while (i < data.size()) {
if (in_escape) {
key += data[i];
i++;
in_escape = false;
} else { // !in_escape
if (data[i] == '"') { // End of string literal
i++;
break;
} else if (data[i] == '\\') { // Escaped code point
in_escape = true;
}
key += data[i];
i++;
}
}

key = post_process_key(key);

i = expect(data, i, ':');
i = consume_whitespace(data, i);

// Read unsigned integer value
auto value_start = i;
while (i < data.size() && std::isdigit(data[i])) {
i++;
}
auto value = static_cast<int64_t>(
std::stol(data.substr(value_start, i - value_start)));

encode_[key] = value;

i = consume_whitespace(data, i);
if (i < data.size() && data[i] == ',') {
i++;
}

return i;
}

// Assert that the next character in the input string is equal to c. Increment
// the input string index by one.
size_t expect(const std::string& data, size_t i, char c) {
if (i >= data.size() || data[i] != c) {
std::cerr << "Invalid tokenizer vocabulary file. Expected '" << c
<< "' at index " << i << std::endl;
exit(1);
}

return i + 1;
}

std::string post_process_key(std::string key) {
// Replace the unicode characters with the corresponding byte encoding
// TODO: adopt byte encoder to handle unicode characters in json file.

std::unordered_map<std::string, std::string> replacements = {
{"\\u0120", " "},
{"\\u010a", "\n"},
};

for (const auto& replacement : replacements) {
size_t pos = 0;
// While loop through all instances of the substring in the string
while ((pos = key.find(replacement.first, pos)) != std::string::npos) {
key.replace(pos, replacement.first.length(), replacement.second);
pos += replacement.second.length();
}
}

// remove duplicate backslashes
for (size_t idx = 0; idx < key.length(); idx++) {
if (key[idx] == '\\') {
key.erase(idx, 1);
if (key[idx] == '\\') {
// If there are two backslashes, keep the second one
idx += 1;
}
}
}

return key;
}
std::vector<std::string> parse_prompt(const std::string& prompt) {
std::vector<std::string> result;
std::string word;
for (char c : prompt) {
if (c == ' ') {
if (!word.empty()) {
result.push_back(word);
word.clear();
}
word += c;
} else if (ispunct(c)) {
if (!word.empty()) {
result.push_back(word);
word.clear();
}
result.push_back(std::string(1, c));
} else {
word += c;
}
}
if (!word.empty()) {
result.push_back(word);
}
return result;
}
};
45 changes: 45 additions & 0 deletions examples/llm_manual/export_nanogpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# export_nanogpt.py

# Load partitioner for Xnnpack backend
import torch
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner

# Model to be delegated to specific backend should use specific edge compile config
from executorch.backends.xnnpack.utils.configs import get_xnnpack_edge_compile_config
from executorch.exir import to_edge

from model import GPT
from torch._export import capture_pre_autograd_graph
from torch.export import export
from torch.nn.attention import sdpa_kernel, SDPBackend

model = GPT.from_pretrained("gpt2") # use gpt2 weight as pretrained weight
example_inputs = (
torch.randint(0, 100, (1, model.config.block_size), dtype=torch.long),
)
dynamic_shape = ({1: torch.export.Dim("token_dim", max=model.config.block_size)},)

# Trace the model, converting it to a portable intermediate representation.
# The torch.no_grad() call tells PyTorch to exclude training-specific logic.
with sdpa_kernel([SDPBackend.MATH]), torch.no_grad():
m = capture_pre_autograd_graph(model, example_inputs, dynamic_shapes=dynamic_shape)
traced_model = export(m, example_inputs, dynamic_shapes=dynamic_shape)

# Convert the model into a runnable ExecuTorch program.
# To be further lowered to Xnnpack backend, `traced_model` needs xnnpack-specific edge compile config
edge_config = get_xnnpack_edge_compile_config()
edge_manager = to_edge(traced_model, compile_config=edge_config)

# Delegate exported model to Xnnpack backend by invoking `to_backend` function with Xnnpack partitioner.
edge_manager = edge_manager.to_backend(XnnpackPartitioner())
et_program = edge_manager.to_executorch()

# Save the Xnnpack-delegated ExecuTorch program to a file.
with open("nanogpt.pte", "wb") as file:
file.write(et_program.buffer)
Loading

0 comments on commit a9074fd

Please sign in to comment.