Skip to content

Commit

Permalink
FEAT: adding hex code converter (Azure#666) (Azure#681)
Browse files Browse the repository at this point in the history
  • Loading branch information
millashin authored Feb 1, 2025
1 parent 451428c commit 00ac5de
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pyrit/prompt_converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
from pyrit.prompt_converter.string_join_converter import StringJoinConverter
from pyrit.prompt_converter.suffix_append_converter import SuffixAppendConverter
from pyrit.prompt_converter.tense_converter import TenseConverter
from pyrit.prompt_converter.text_to_hex_converter import TextToHexConverter
from pyrit.prompt_converter.tone_converter import ToneConverter
from pyrit.prompt_converter.translation_converter import TranslationConverter
from pyrit.prompt_converter.unicode_confusable_converter import UnicodeConfusableConverter
Expand Down Expand Up @@ -90,6 +91,7 @@
"SearchReplaceConverter",
"StringJoinConverter",
"SuffixAppendConverter",
"TextToHexConverter",
"TenseConverter",
"ToneConverter",
"TranslationConverter",
Expand Down
27 changes: 27 additions & 0 deletions pyrit/prompt_converter/text_to_hex_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import base64

from pyrit.models import PromptDataType
from pyrit.prompt_converter import ConverterResult, PromptConverter


class TextToHexConverter(PromptConverter):

async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult:
"""
Converts text to a hexadecimal encoded utf-8 string.
"""
hex_representation = ""

if not self.input_supported(input_type):
raise ValueError("Input type not supported")

hex_representation += prompt.encode("utf-8").hex().upper()

return ConverterResult(output_text=hex_representation, output_type="text")

def input_supported(self, input_type: PromptDataType) -> bool:
return input_type == "text"

60 changes: 60 additions & 0 deletions tests/unit/converter/test_text_to_hex_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import pytest

from pyrit.prompt_converter import TextToHexConverter, ConverterResult


@pytest.mark.asyncio
async def test_text_to_hex_converter_ascii():
converter = TextToHexConverter()
prompt = "Test random string[#$!; > 18% \n" # String of ascii characters
expected_output = "546573742072616E646F6D20737472696E675B2324213B203E20313825200A" # hex representation of prompt
result = await converter.convert_async(prompt=prompt, input_type="text")
assert isinstance(result, ConverterResult)
assert result.output_text == expected_output
assert result.output_type == "text"


@pytest.mark.asyncio
async def test_text_to_hex_converter_extended_ascii():
converter = TextToHexConverter()
prompt = "éħæ" # String of extended ascii characters
expected_output = "C3A9C384C2A7C3A6" # hex representation of extended ascii characters
result = await converter.convert_async(prompt=prompt, input_type="text")
assert isinstance(result, ConverterResult)
assert result.output_text == expected_output
assert result.output_type == "text"


@pytest.mark.asyncio
async def test_text_to_hex_converter_empty_string():
converter = TextToHexConverter()
prompt = "" # Empty input string
expected_output = "" # Empty output string
result = await converter.convert_async(prompt=prompt, input_type="text")
assert result.output_text == expected_output
assert result.output_type == "text"


@pytest.mark.asyncio
async def test_text_to_hex_converter_multilingual():
converter = TextToHexConverter()
prompt = "বাংলা 日本語 ᬅᬓ᭄ᬱᬭᬩᬮᬶ" # Bengali, Japanese, Balinese
expected_output = "E0A6ACE0A6BEE0A682E0A6B2E0A6BE20E697A5E69CACE8AA9E20E1AC85E1AC93E1" \
"AD84E1ACB1E1ACADE1ACA9E1ACAEE1ACB6" # hex representation of multilingual string
result = await converter.convert_async(prompt=prompt, input_type="text")
assert result.output_text == expected_output
assert result.output_type == "text"


@pytest.mark.asyncio
async def test_text_to_hex_converter_emoji():
converter = TextToHexConverter()
prompt = "😊" # Emoji character with code point U+1F60A
expected_output = "F09F988A" # hex representation of '😊'
result = await converter.convert_async(prompt=prompt, input_type="text")
assert isinstance(result, ConverterResult)
assert result.output_text == expected_output
assert result.output_type == "text"

0 comments on commit 00ac5de

Please sign in to comment.