rustformers · setzer22 · Mar 21, 2023 · Mar 17, 2023 · Mar 18, 2023 · Mar 18, 2023
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/README.md b/README.md
@@ -32,14 +32,6 @@ Make sure you have a rust toolchain set up.
 3. Build (`cargo build --release`)
 4. Run with `cargo run --release -- <ARGS>`
 
-Some additional things to try:
-
-- Use `--help` to see a list of available options.
-- Prompt files can be precomputed to speed up processing using the
-  `--cache-prompt` and `--restore-prompt` flags.
-
-[^1]: The only legal source to get the weights at the time of writing is [this repository](https://github.com/facebookresearch/llama/blob/main/README.md#llama). The choice of words also may or may not hint at the existence of other kinds of sources.
-
 **NOTE**: For best results, make sure to build and run in release mode. Debug builds are going to be very slow.
 
 For example, you try the following prompt:
@@ -48,6 +40,23 @@ For example, you try the following prompt:
 cargo run --release -- -m /data/Llama/LLaMA/7B/ggml-model-q4_0.bin -p "Tell me how cool the Rust programming language is:"
 ```
 
+Some additional things to try:
+
+- Use `--help` to see a list of available options.
+- If you have the [alpaca-lora](https://github.com/tloen/alpaca-lora) weights,
+  try `--repl` mode! `cargo run --release -- -m <path>/ggml-alpaca-7b-q4.bin
+  -f examples/alpaca_prompt.txt --repl`.
+
+  ![Gif showcasing alpaca repl mode](./doc/resources/alpaca_repl_screencap.gif)
+
+- Prompt files can be precomputed to speed up processing using the
+  `--cache-prompt` and `--restore-prompt` flags so you can save processing time
+  for lengthy prompts. 
+
+  ![Gif showcasing prompt caching](./doc/resources/prompt_caching_screencap.gif)
+
+[^1]: The only legal source to get the weights at the time of writing is [this repository](https://github.com/facebookresearch/llama/blob/main/README.md#llama). The choice of words also may or may not hint at the existence of other kinds of sources.
+
 ## Q&A
 
 - **Q: Why did you do this?**

diff --git a/doc/resources/alpaca_repl_screencap.gif b/doc/resources/alpaca_repl_screencap.gif
diff --git a/doc/resources/prompt_caching_screencap.gif b/doc/resources/prompt_caching_screencap.gif
diff --git a/examples/alpaca_prompt.txt b/examples/alpaca_prompt.txt
@@ -0,0 +1,7 @@
+Below is an instruction that describes a task. Write a response that appropriately completes the request.
+
+### Instruction:
+
+$PROMPT
+
+### Response:
diff --git a/llama-cli/Cargo.toml b/llama-cli/Cargo.toml
@@ -14,4 +14,7 @@ num_cpus = "1.15.0"
 
 llama-rs = { path = "../llama-rs" }
 
-rand = { workspace = true }
+rand = { workspace = true }
+
+rustyline = "11.0.0"
+spinners = "4.1.0"
diff --git a/llama-cli/src/cli_args.rs b/llama-cli/src/cli_args.rs
@@ -16,6 +16,10 @@ pub struct Args {
     #[arg(long, short = 'f', default_value = None)]
     pub prompt_file: Option<String>,
 
+    /// Run in REPL mode.
+    #[arg(long, short = 'R', default_value_t = false)]
+    pub repl: bool,
+
     /// Sets the number of threads to use
     #[arg(long, short = 't', default_value_t = num_cpus::get_physical())]
     pub num_threads: usize,