Implements modularized system for use-cases

Using the new @use_case and @configurable system, it is now possible to add further use-cases to the wintermute project, while having them automatically inject all required configurations. It also formalizes some parts of the code, such as LLM capabilities, which are now specifically created. This is still a point where some functionality is missing though, as the capabilities are not yet properly passed to function calling LLMs, which should be done in a future release. Proper care was taken, to not change the semantics of the resulting use-cases (such as local_privesc_linux) other than that the configuration now works differently. If there are regressions then this is not on purpose and should be reported. This commit also contains various code-cleanup operations, which happened through the re-work for the new system. However, especially helper methods, and the contents of the privesc use-case have largely stayed unchanged, other than adapting them to the new surrounding infrastructure.
ipa-lab · Apr 6, 2024 · d6fe107 · d6fe107
1 parent 394d89b
commit d6fe107
Show file tree

Hide file tree

Showing 47 changed files with 1,003 additions and 839 deletions.
diff --git a/.env.example b/.env.example
@@ -1,16 +1,18 @@
-OPENAI_KEY="your-openai-key"
-MODEL="gpt-4"
-CONTEXT_SIZE=7000
+llm.api_key='your-openai-key'
+log_db.connection_string='log_db.sqlite3'
 
 # exchange with the IP of your target VM
-TARGET_IP='enter-the-private-ip-of-some-vm.local'
+conn.host='enter-the-private-ip-of-some-vm.local'
+conn.hostname='the-hostname-of-the-vm-used-for-root-detection'
+conn.port=2222
 
 # exchange with the user for your target VM
-TARGET_USER='bob'
-TARGET_PASSWORD='secret'
+conn.username='bob'
+conn.password='secret'
 
-# which LLM driver to use (can be openai_rest or oobabooga for now)
-LLM_CONNECTION = "openai_rest"
+# which LLM model to use (can be anything openai supports, or if you use a custom llm.api_url, anything your api provides for the model parameter
+llm.model='gpt-3.5-turbo'
+llm.context_size=16385
 
 # how many rounds should this thing go?
-MAX_ROUNDS = 20
+max_turns = 20
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,6 @@ venv/
 __pycache__/
 *.swp
 *.log
+.idea/
+*.sqlite3
+*.sqlite3-jounal
diff --git a/README.md b/README.md
@@ -67,7 +67,7 @@ This work is partially based upon our empiric research into [how hackers work](h
 
 This is a simple example run of `wintermute.py` using GPT-4 against a vulnerable VM. More example runs can be seen in [our collection of historic runs](docs/old_runs/old_runs.md).
 
-![Example wintermute run](example_run_gpt4.png)
+![Example wintermute run](docs/example_run_gpt4.png)
 
 Some things to note:
 
@@ -105,8 +105,13 @@ $ cp .env.example .env
 # IMPORTANT: setup your OpenAI API key, the VM's IP and credentials within .env
 $ vi .env
 
-# start wintermute, i.e., attack the configured virtual machine
+# if you start wintermute without parameters, it will list all available use cases
 $ python wintermute.py
+usage: wintermute.py [-h] {linux_privesc,windows privesc} ...
+wintermute.py: error: the following arguments are required: {linux_privesc,windows privesc}
+
+# start wintermute, i.e., attack the configured virtual machine
+$ python wintermute.py linux_privesc --enable_explanation true --enable_update_state true
 ~~~
 
 # Disclaimers

diff --git a/args.py b/args.py
diff --git a/capabilities/__init__.py b/capabilities/__init__.py
@@ -0,0 +1,5 @@
+from .capability import Capability
+from .psexec_test_credential import PSExecTestCredential
+from .psexec_run_command import PSExecRunCommand
+from .ssh_run_command import SSHRunCommand
+from .ssh_test_credential import SSHTestCredential
diff --git a/capabilities/capability.py b/capabilities/capability.py
@@ -0,0 +1,35 @@
+import abc
+
+
+class Capability(abc.ABC):
+    """
+    A capability is something that can be used by an LLM to perform a task.
+    The method signature for the __call__ method is not yet defined, but it will probably be different for different
+    types of capabilities (though it is recommended to have the same signature for capabilities, that accomplish the
+    same task but slightly different / for a different target).
+
+    At the moment, this is not yet a very powerful class, but in the near-term future, this will provide an automated
+    way of providing a json schema for the capabilities, which can then be used for function-calling LLMs.
+    """
+    @abc.abstractmethod
+    def describe(self, name: str = None) -> str:
+        """
+        describe should return a string that describes the capability. This is used to generate the help text for the
+        LLM.
+        I don't like, that at the moment the name under which the capability is available to the LLM is allowed to be
+        passed in, but it is necessary at the moment, to be backwards compatible. Please do not use the name if you
+        don't really have to, then we can see if we can remove it in the future.
+
+        This is a method and not just a simple property on purpose (though it could become a @property in the future, if
+        we don't need the name parameter anymore), so that it can template in some of the capabilities parameters into
+        the description.
+        """
+        pass
+
+    @abc.abstractmethod
+    def __call__(self, *args, **kwargs):
+        """
+        The actual execution of a capability, please make sure, that the parameters and return type of your
+        implementation are well typed, as this will make it easier to support full function calling soon.
+        """
+        pass
diff --git a/capabilities/psexec_run_command.py b/capabilities/psexec_run_command.py
@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from typing import Tuple
+
+from utils import PSExecConnection
+from .capability import Capability
+
+
+@dataclass
+class PSExecRunCommand(Capability):
+    conn: PSExecConnection
+
+    @property
+    def describe(self, name: str = None) -> str:
+        return f"give a command to be executed on the shell and I will respond with the terminal output when running this command on the windows machine. The given command must not require user interaction. Only state the to be executed command. The command should be used for enumeration or privilege escalation."
+
+    def __call__(self, command: str) -> Tuple[str, bool]:
+        return self.conn.run(command)[0], False
diff --git a/capabilities/psexec_test_credential.py b/capabilities/psexec_test_credential.py
@@ -0,0 +1,23 @@
+import warnings
+from dataclasses import dataclass
+from typing import Tuple
+
+from utils import PSExecConnection
+from .capability import Capability
+
+
+@dataclass
+class PSExecTestCredential(Capability):
+    conn: PSExecConnection
+
+    def describe(self, name: str = None) -> str:
+        return f"give credentials to be tested by stating `{name} username password`"
+
+    def __call__(self, username: str, password: str) -> Tuple[str, bool]:
+        try:
+            test_conn = self.conn.new_with(username=username, password=password)
+            test_conn.init()
+            warnings.warn("full credential testing is not implemented yet for psexec, we have logged in, but do not know who we are, returning True for now")
+            return "Login as root was successful\n", True
+        except Exception:
+            return "Authentication error, credentials are wrong\n", False
diff --git a/capabilities/ssh_run_command.py b/capabilities/ssh_run_command.py
@@ -0,0 +1,52 @@
+import re
+from dataclasses import dataclass
+from typing import Tuple
+
+from invoke import Responder
+
+from utils import SSHConnection
+from .capability import Capability
+
+
+GOT_ROOT_REXEXPs = [
+    re.compile("^# $"),
+    re.compile("^bash-[0-9]+.[0-9]# $")
+]
+
+
+@dataclass
+class SSHRunCommand(Capability):
+    conn: SSHConnection
+
+    def describe(self, name: str = None) -> str:
+        return f"give a command to be executed on the shell and I will respond with the terminal output when running this command on the linux server. The given command must not require user interaction. Only state the to be executed command. The command should be used for enumeration or privilege escalation."
+
+    def __call__(self, command: str) -> Tuple[str, bool]:
+        got_root = False
+        sudo_pass = Responder(
+            pattern=r'\[sudo\] password for ' + self.conn.username + ':',
+            response=self.conn.password + '\n',
+        )
+
+        try:
+            stdout, stderr, rc = self.conn.run(command, pty=True, warn=True, watchers=[sudo_pass], timeout=10)
+        except Exception as e:
+            print("TIMEOUT! Could we have become root?")
+            stdout, stderr, rc = "", "", -1
+        tmp = ""
+        last_line = ""
+        for line in stdout.splitlines():
+            if not line.startswith('[sudo] password for ' + self.conn.username + ':'):
+                last_line = line
+                tmp = tmp + line
+
+        # remove ansi shell codes
+        ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+        last_line = ansi_escape.sub('', last_line)
+
+        for i in GOT_ROOT_REXEXPs:
+            if i.fullmatch(last_line):
+                got_root = True
+        if last_line.startswith(f'root@{self.conn.hostname}:'):
+            got_root = True
+        return tmp, got_root
diff --git a/capabilities/ssh_test_credential.py b/capabilities/ssh_test_credential.py
@@ -0,0 +1,34 @@
+from dataclasses import dataclass
+from typing import Tuple
+
+import paramiko
+
+from utils import SSHConnection
+from .capability import Capability
+
+
+@dataclass
+class SSHTestCredential(Capability):
+    conn: SSHConnection
+
+    def describe(self, name: str = None) -> str:
+        return f"give credentials to be tested by stating `{name} username password`"
+
+    def __call__(self, command: str) -> Tuple[str, bool]:
+        cmd_parts = command.split(" ")
+        assert (cmd_parts[0] == "test_credential")
+
+        if len(cmd_parts) != 3:
+            return "didn't provide username/password", False
+
+        test_conn = self.conn.new_with(username=cmd_parts[1], password=cmd_parts[2])
+        try:
+            test_conn.init()
+            user = test_conn.run("whoami")[0].strip('\n\r ')
+            if user == "root":
+                return "Login as root was successful\n", True
+            else:
+                return "Authentication successful, but user is not root\n", False
+
+        except paramiko.ssh_exception.AuthenticationException:
+            return "Authentication error, credentials are wrong\n", False
diff --git a/cmd_cleaner.py b/cmd_cleaner.py
diff --git a/example_run_gpt4.png → docs/example_run_gpt4.png b/example_run_gpt4.png → docs/example_run_gpt4.png
diff --git a/handlers.py b/handlers.py