Skip to content

Commit

Permalink
added mostly-proper Unicode support.
Browse files Browse the repository at this point in the history
Note, the single character decoding obviously won't work with multi code-unit characters.
To fix that, the code-unit by code-unit decoding would need to be modified to detect code-units in the surrogate range, and continue decoding more code-units until a complete code-point is found.
Fortunately, most languages are within the Basic Multilingual Plane.
  • Loading branch information
Marcus10110 committed Oct 12, 2023
1 parent 8eae758 commit ab7de26
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 6 deletions.
11 changes: 9 additions & 2 deletions src/USBAnalyzerResults.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#include <iostream>
#include <fstream>
#include <algorithm>

#include <locale>
#include <codecvt>
#include <stdio.h>

#include <AnalyzerHelpers.h>
Expand Down Expand Up @@ -517,7 +518,13 @@ void GetCtrlTransFrameDesc( const Frame& frm, DisplayBase display_base, std::vec
}
else if( formatter == Fld_Wchar )
{
desc = std::string( " char='" ) + char( val & 0xff ) + '\'';
// utf-8 encode the utf-16 character.
std::u16string utf_16_str;
char16_t u16_char = static_cast<char16_t>( val );
utf_16_str.push_back( u16_char );
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
std::string utf8_str = convert.to_bytes( utf_16_str );
desc = std::string( " char='" ) + utf8_str + '\'';
}
else if( formatter == Fld_wLANGID )
{
Expand Down
11 changes: 8 additions & 3 deletions src/USBControlTransfers.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include <algorithm>
#include <cassert>
#include <iostream>
#include <locale>
#include <codecvt>

#include <AnalyzerChannelData.h>
#include <AnalyzerHelpers.h>
Expand Down Expand Up @@ -371,12 +373,13 @@ bool USBControlTransferParser::ParseStringDescriptor()
else
{
if( mParsedOffset == 2 )
stringDescriptor.clear();
utf16StringDescriptor.clear();

// the actual UNICODE string
while( mDescBytes > mParsedOffset && mPacketDataBytes > mPacketOffset )
{
stringDescriptor += ( char )pPacket->GetDataPayload( mPacketOffset, 1 );
char16_t code_unit = static_cast<char16_t>( pPacket->GetDataPayload( mPacketOffset, 2 ) );
utf16StringDescriptor.push_back( code_unit );

pResults->AddFrame( pPacket->GetDataPayloadField( mPacketOffset, 2, mAddress, "wchar", Fld_Wchar ) );
mPacketOffset += 2;
Expand All @@ -386,7 +389,9 @@ bool USBControlTransferParser::ParseStringDescriptor()
// do we have the entire string?
if( mDescBytes == mParsedOffset )
{
pResults->AddStringDescriptor( mAddress, mRequest.GetRequestedDescriptorIndex(), stringDescriptor );
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert;
std::string utf8_string_descriptor = convert.to_bytes( utf16StringDescriptor );
pResults->AddStringDescriptor( mAddress, mRequest.GetRequestedDescriptorIndex(), utf8_string_descriptor );
}
}
// return true if we've finished parsing the string descriptor, according to the parsed bLength
Expand Down
3 changes: 2 additions & 1 deletion src/USBControlTransfers.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,8 @@ class USBControlTransferParser

USBRequest mRequest;

std::string stringDescriptor;
std::u16string utf16StringDescriptor;


// these are only valid during a ParsePacket call, and invalid before and after
USBPacket* pPacket;
Expand Down

0 comments on commit ab7de26

Please sign in to comment.