From 78ce7e567fce86d611353c5a9194833a54a6fbe0 Mon Sep 17 00:00:00 2001 From: Marius Hillenbrand Date: Tue, 19 Oct 2021 18:09:52 +0200 Subject: [PATCH] Fix encoding/decoding of string literals for big-endian systems Per SPIR-V spec, a string literal's UTF-8 octets are encoded packed into words with little-endian convention. Explicitly perform that encoding instead of assuming that the host system is little-endian. Note that this change requires corresponding fixes in SPIRV-Tools. Fixes #202 --- SPIRV/SPVRemapper.cpp | 18 +++++++++++------ SPIRV/disassemble.cpp | 47 ++++++++++++++++++++++++++++--------------- SPIRV/spvIR.h | 22 +++++++++----------- 3 files changed, 52 insertions(+), 35 deletions(-) diff --git a/SPIRV/SPVRemapper.cpp b/SPIRV/SPVRemapper.cpp index 56d6d5d4a5..fdfbeb90cd 100644 --- a/SPIRV/SPVRemapper.cpp +++ b/SPIRV/SPVRemapper.cpp @@ -297,15 +297,21 @@ namespace spv { std::string spirvbin_t::literalString(unsigned word) const { std::string literal; + const spirword_t * pos = spv.data() + word; literal.reserve(16); - const char* bytes = reinterpret_cast(spv.data() + word); - - while (bytes && *bytes) - literal += *bytes++; - - return literal; + do { + spirword_t word = *pos; + for (int i = 0; i < 4; i++) { + char c = word & 0xff; + if (c == '\0') + return literal; + literal += c; + word >>= 8; + } + pos++; + } while (true); } void spirvbin_t::applyMap() diff --git a/SPIRV/disassemble.cpp b/SPIRV/disassemble.cpp index 73c988c5b3..74dd605409 100644 --- a/SPIRV/disassemble.cpp +++ b/SPIRV/disassemble.cpp @@ -43,6 +43,7 @@ #include #include #include +#include #include "disassemble.h" #include "doc.h" @@ -100,6 +101,7 @@ class SpirvStream { void outputMask(OperandClass operandClass, unsigned mask); void disassembleImmediates(int numOperands); void disassembleIds(int numOperands); + std::pair decodeString(); int disassembleString(); void disassembleInstruction(Id resultId, Id typeId, Op opCode, int numOperands); @@ -290,31 +292,44 @@ void SpirvStream::disassembleIds(int numOperands) } } -// return the number of operands consumed by the string -int SpirvStream::disassembleString() +// decode string from words at current position (non-consuming) +std::pair SpirvStream::decodeString() { - int startWord = word; - - out << " \""; - - const char* wordString; + std::string res; + int wordPos = word; + char c; bool done = false; + do { - unsigned int content = stream[word]; - wordString = (const char*)&content; + unsigned int content = stream[wordPos]; for (int charCount = 0; charCount < 4; ++charCount) { - if (*wordString == 0) { + c = content & 0xff; + content >>= 8; + if (c == '\0') { done = true; break; } - out << *(wordString++); + res += c; } - ++word; - } while (! done); + ++wordPos; + } while(! done); + + return std::make_pair(wordPos - word, res); +} + +// return the number of operands consumed by the string +int SpirvStream::disassembleString() +{ + out << " \""; + std::pair decoderes = decodeString(); + + out << decoderes.second; out << "\""; - return word - startWord; + word += decoderes.first; + + return decoderes.first; } void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, int numOperands) @@ -331,7 +346,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, nextNestedControl = 0; } } else if (opCode == OpExtInstImport) { - idDescriptor[resultId] = (const char*)(&stream[word]); + idDescriptor[resultId] = decodeString().second; } else { if (resultId != 0 && idDescriptor[resultId].size() == 0) { @@ -428,7 +443,7 @@ void SpirvStream::disassembleInstruction(Id resultId, Id /*typeId*/, Op opCode, --numOperands; // Get names for printing "(XXX)" for readability, *after* this id if (opCode == OpName) - idDescriptor[stream[word - 1]] = (const char*)(&stream[word]); + idDescriptor[stream[word - 1]] = decodeString().second; break; case OperandVariableIds: disassembleIds(numOperands); diff --git a/SPIRV/spvIR.h b/SPIRV/spvIR.h index 486e80d000..5249a5ba73 100644 --- a/SPIRV/spvIR.h +++ b/SPIRV/spvIR.h @@ -111,27 +111,23 @@ class Instruction { void addStringOperand(const char* str) { - unsigned int word; - char* wordString = (char*)&word; - char* wordPtr = wordString; - int charCount = 0; + unsigned int word = 0; + unsigned int shiftAmount = 0; char c; + do { c = *(str++); - *(wordPtr++) = c; - ++charCount; - if (charCount == 4) { + word |= ((unsigned int)c) << shiftAmount; + shiftAmount += 8; + if (shiftAmount == 32) { addImmediateOperand(word); - wordPtr = wordString; - charCount = 0; + word = 0; + shiftAmount = 0; } } while (c != 0); // deal with partial last word - if (charCount > 0) { - // pad with 0s - for (; charCount < 4; ++charCount) - *(wordPtr++) = 0; + if (shiftAmount > 0) { addImmediateOperand(word); } }