From 58577ca64affdf784efd675aa7d157040694404d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Klaus=20K=C3=A4mpf?= Date: Sun, 30 Oct 2022 20:01:10 +0100 Subject: [PATCH 3/8] Introduce operand_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Klaus Kämpf --- .../Decompiler/src/decompile/cpp/pcodeparse.y | 1 + .../Decompiler/src/decompile/cpp/semantics.cc | 9 +++ .../Decompiler/src/decompile/cpp/semantics.hh | 2 +- .../Decompiler/src/decompile/cpp/slaformat.cc | 4 ++ .../Decompiler/src/decompile/cpp/slaformat.hh | 4 ++ .../src/decompile/cpp/slgh_compile.cc | 4 +- .../src/decompile/cpp/slghpatexpress.cc | 16 +++++ .../src/decompile/cpp/slghpatexpress.hh | 14 ++++ .../src/decompile/cpp/slghsymbol.cc | 69 ++++++++++++++++++- .../src/decompile/cpp/slghsymbol.hh | 19 ++++- GhidraDocs/languages/html/sleigh_symbols.html | 7 ++ 11 files changed, 145 insertions(+), 4 deletions(-) diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y index 87cd725974..27667bf777 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/pcodeparse.y @@ -741,6 +741,7 @@ int4 PcodeSnippet::lex(void) yylval.operandsym = (OperandSymbol *)sym; return OPERANDSYM; case SleighSymbol::start_symbol: + case SleighSymbol::offset_symbol: case SleighSymbol::end_symbol: case SleighSymbol::next2_symbol: case SleighSymbol::flowdest_symbol: diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc index cd9b9835b1..d9b02eea18 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.cc @@ -121,6 +121,8 @@ uintb ConstTpl::fix(const ParserWalker &walker) const switch(type) { case j_start: return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address + case j_offset: + return walker.getAddr().getOffset(); // Fill in starting address placeholder with real address case j_next: return walker.getNaddr().getOffset(); // Fill in next address placeholder with real address case j_next2: @@ -318,6 +320,10 @@ void ConstTpl::encode(Encoder &encoder) const encoder.openElement(sla::ELEM_CONST_START); encoder.closeElement(sla::ELEM_CONST_START); break; + case j_offset: + encoder.openElement(sla::ELEM_CONST_OFFSET); + encoder.closeElement(sla::ELEM_CONST_OFFSET); + break; case j_next: encoder.openElement(sla::ELEM_CONST_NEXT); encoder.closeElement(sla::ELEM_CONST_NEXT); @@ -417,6 +423,9 @@ void ConstTpl::decode(Decoder &decoder) else if (el == sla::ELEM_CONST_FLOWDEST_SIZE) { type = j_flowdest_size; } + else if (el == sla::ELEM_CONST_OFFSET) { + type = j_offset; + } else throw LowlevelError("Bad constant type"); decoder.closeElement(el); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh index e0b069959d..c8ca547856 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/semantics.hh @@ -35,7 +35,7 @@ class ConstTpl { public: enum const_type { real=0, handle=1, j_start=2, j_next=3, j_next2=4, j_curspace=5, j_curspace_size=6, spaceid=7, j_relative=8, - j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12 }; + j_flowref=9, j_flowref_size=10, j_flowdest=11, j_flowdest_size=12, j_offset=13 }; enum v_field { v_space=0, v_offset=1, v_size=2, v_offset_plus=3 }; private: const_type type; diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc index f8b3bcfa73..59269347ff 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.cc @@ -166,6 +166,10 @@ ElementId ELEM_CONST_FLOWREF = ElementId("const_flowref", 85, FORMAT_SCOPE); ElementId ELEM_CONST_FLOWREF_SIZE = ElementId("const_flowref_size", 86, FORMAT_SCOPE); ElementId ELEM_CONST_FLOWDEST = ElementId("const_flowdest", 87, FORMAT_SCOPE); ElementId ELEM_CONST_FLOWDEST_SIZE = ElementId("const_flowdest_size", 88, FORMAT_SCOPE); +ElementId ELEM_OFFSET_EXP = ElementId("offset_exp", 89, FORMAT_SCOPE); +ElementId ELEM_OFFSET_SYM = ElementId("offset_sym", 90, FORMAT_SCOPE); +ElementId ELEM_OFFSET_SYM_HEAD = ElementId("offset_sym_head", 91, FORMAT_SCOPE); +ElementId ELEM_CONST_OFFSET = ElementId("const_offset", 92, FORMAT_SCOPE); /// The bytes of the header are read from the stream and verified against the required form and current version. /// If the form matches, \b true is returned. No additional bytes are read. diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh index a8eb11b63c..7034cf794e 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slaformat.hh @@ -172,6 +172,10 @@ extern ElementId ELEM_CONST_FLOWREF; ///< SLA format element "const_flowref" extern ElementId ELEM_CONST_FLOWREF_SIZE; ///< SLA format element "const_flowref_size" extern ElementId ELEM_CONST_FLOWDEST; ///< SLA format element "const_flowdest" extern ElementId ELEM_CONST_FLOWDEST_SIZE; ///< SLA format element "const_flowdest_size" +extern ElementId ELEM_OFFSET_EXP; ///< SLA format element "offset_exp" +extern ElementId ELEM_OFFSET_SYM; ///< SLA format element "operand_offset_sym" +extern ElementId ELEM_OFFSET_SYM_HEAD; ///< SLA format element "operand_offset_sym_head" +extern ElementId ELEM_CONST_OFFSET; ///< SLA format element "offset_start" extern bool isSlaFormat(istream &s); ///< Verify a .sla file header at the current point of the given stream extern void writeSlaHeader(ostream &s); ///< Write a .sla file header to the given stream diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc index 50d85e22ba..6f311f230c 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slgh_compile.cc @@ -1796,7 +1796,7 @@ SleighCompile::SleighCompile(void) } /// Create the address spaces: \b const, \b unique, and \b other. -/// Define the special symbols: \b inst_start, \b inst_next, \b inst_next2, \b epsilon. +/// Define the special symbols: \b inst_start, \b operand_offset, \b inst_next, \b inst_next2, \b epsilon. /// Define the root subtable symbol: \b instruction void SleighCompile::predefinedSymbols(void) @@ -1818,6 +1818,8 @@ void SleighCompile::predefinedSymbols(void) symtab.addSymbol(spacesym); StartSymbol *startsym = new StartSymbol("inst_start",getConstantSpace()); symtab.addSymbol(startsym); + OffsetSymbol *offsetsym = new OffsetSymbol("operand_offset",getConstantSpace()); + symtab.addSymbol(offsetsym); EndSymbol *endsym = new EndSymbol("inst_next",getConstantSpace()); symtab.addSymbol(endsym); Next2Symbol *next2sym = new Next2Symbol("inst_next2",getConstantSpace()); diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc index 9410978595..e16bf18b74 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.cc @@ -502,6 +502,8 @@ PatternExpression *PatternExpression::decodeExpression(Decoder &decoder,Translat res = new MinusExpression(); else if (el == sla::ELEM_NOT_EXP) res = new NotExpression(); + else if (el == sla::ELEM_OFFSET_EXP) + res = new OffsetInstructionValue(); else return (PatternExpression *)0; @@ -711,6 +713,20 @@ void StartInstructionValue::decode(Decoder &decoder,Translate *trans) decoder.closeElement(el); } +void OffsetInstructionValue::encode(Encoder &encoder) const + +{ + encoder.openElement(sla::ELEM_OFFSET_EXP); + encoder.closeElement(sla::ELEM_OFFSET_EXP); +} + +void OffsetInstructionValue::decode(Decoder &decoder,Translate *trans) + +{ + uint4 el = decoder.openElement(sla::ELEM_OFFSET_EXP); + decoder.closeElement(el); +} + void EndInstructionValue::encode(Encoder &encoder) const { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh index 118fe3cc94..6e264ded3f 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghpatexpress.hh @@ -153,6 +153,20 @@ public: virtual void encode(Encoder &encoder) const; virtual void decode(Decoder &decoder,Translate *trans); }; + +class OffsetInstructionValue : public PatternValue { +public: + OffsetInstructionValue(void) {} + virtual intb getValue(ParserWalker &walker) const { + return (intb)walker.getOffset(-1); + } + virtual TokenPattern genMinPattern(const vector &ops) const { return TokenPattern(); } + virtual TokenPattern genPattern(intb val) const { return TokenPattern(); } + virtual intb minValue(void) const { return (intb)0; } + virtual intb maxValue(void) const { return (intb)0; } + virtual void encode(Encoder &encoder) const; + virtual void decode(Decoder &decoder,Translate *trans); +}; class EndInstructionValue : public PatternValue { public: diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc index b35dd6ec79..7c9f2abe69 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.cc @@ -1,4 +1,4 @@ -/* ### +#/* ### * IP: GHIDRA * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -224,6 +224,8 @@ void SymbolTable::decodeSymbolHeader(Decoder &decoder) sym = new OperandSymbol(); else if (el == sla::ELEM_START_SYM_HEAD) sym = new StartSymbol(); + else if (el == sla::ELEM_OFFSET_SYM_HEAD) + sym = new OffsetSymbol(); else if (el == sla::ELEM_END_SYM_HEAD) sym = new EndSymbol(); else if (el == sla::ELEM_NEXT2_SYM_HEAD) @@ -1137,6 +1139,71 @@ void StartSymbol::decode(Decoder &decoder,SleighBase *trans) decoder.closeElement(sla::ELEM_START_SYM.getId()); } +OffsetSymbol::OffsetSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm) + +{ + const_space = cspc; + patexp = new OffsetInstructionValue(); + patexp->layClaim(); +} + +OffsetSymbol::~OffsetSymbol(void) + +{ + if (patexp != (PatternExpression *)0) + PatternExpression::release(patexp); +} + +VarnodeTpl *OffsetSymbol::getVarnode(void) const + +{ // Returns current operand offset as a constant + ConstTpl spc(const_space); + ConstTpl off(ConstTpl::j_offset); + ConstTpl sz_zero; + return new VarnodeTpl(spc,off,sz_zero); +} + +void OffsetSymbol::getFixedHandle(FixedHandle &hand,ParserWalker &walker) const + +{ + hand.space = walker.getCurSpace(); + hand.offset_space = (AddrSpace *)0; + hand.offset_offset = walker.getAddr().getOffset(); // Get starting address of instruction + hand.size = hand.space->getAddrSize(); +} + +void OffsetSymbol::print(ostream &s,ParserWalker &walker) const + +{ + intb val = (intb) walker.getAddr().getOffset(); + s << "0x" << std::hex << val << std::dec; +} + +void OffsetSymbol::encode(Encoder &encoder) const + +{ + encoder.openElement(sla::ELEM_OFFSET_SYM); + encoder.writeUnsignedInteger(sla::ATTRIB_ID, getId()); + encoder.closeElement(sla::ELEM_OFFSET_SYM); +} + +void OffsetSymbol::encodeHeader(Encoder &encoder) const + +{ + encoder.openElement(sla::ELEM_OFFSET_SYM_HEAD); + SleighSymbol::encodeHeader(encoder); + encoder.closeElement(sla::ELEM_OFFSET_SYM_HEAD); +} + +void OffsetSymbol::decode(Decoder &decoder,SleighBase *trans) + +{ + const_space = trans->getConstantSpace(); + patexp = new StartInstructionValue(); + patexp->layClaim(); + decoder.closeElement(sla::ELEM_OFFSET_SYM.getId()); +} + EndSymbol::EndSymbol(const string &nm,AddrSpace *cspc) : SpecificSymbol(nm) { diff --git a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh index 5e8b4d3dfd..e460a43bac 100644 --- a/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh +++ b/Ghidra/Features/Decompiler/src/decompile/cpp/slghsymbol.hh @@ -27,7 +27,7 @@ class SleighSymbol { public: enum symbol_type { space_symbol, token_symbol, userop_symbol, value_symbol, valuemap_symbol, name_symbol, varnode_symbol, varnodelist_symbol, operand_symbol, - start_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol, + start_symbol, offset_symbol, end_symbol, next2_symbol, subtable_symbol, macro_symbol, section_symbol, bitrange_symbol, context_symbol, epsilon_symbol, label_symbol, flowdest_symbol, flowref_symbol, dummy_symbol }; private: @@ -373,6 +373,23 @@ public: virtual void decode(Decoder &decoder,SleighBase *trans); }; +class OffsetSymbol : public SpecificSymbol { + AddrSpace *const_space; + PatternExpression *patexp; +public: + OffsetSymbol(void) { patexp = (PatternExpression *)0; } // For use with decode + OffsetSymbol(const string &nm,AddrSpace *cspc); + virtual ~OffsetSymbol(void); + virtual VarnodeTpl *getVarnode(void) const; + virtual PatternExpression *getPatternExpression(void) const { return patexp; } + virtual void getFixedHandle(FixedHandle &hand,ParserWalker &walker) const; + virtual void print(ostream &s,ParserWalker &walker) const; + virtual symbol_type getType(void) const { return offset_symbol; } + virtual void encode(Encoder &encoder) const; + virtual void encodeHeader(Encoder &encoder) const; + virtual void decode(Decoder &decoder,SleighBase *trans); +}; + class EndSymbol : public SpecificSymbol { AddrSpace *const_space; PatternExpression *patexp; diff --git a/GhidraDocs/languages/html/sleigh_symbols.html b/GhidraDocs/languages/html/sleigh_symbols.html index 70598b7310..f5ee18878e 100644 --- a/GhidraDocs/languages/html/sleigh_symbols.html +++ b/GhidraDocs/languages/html/sleigh_symbols.html @@ -186,6 +186,10 @@ We list all of the symbols that are predefined by SLEIGH. epsilon A special identifier indicating an empty bit pattern. + + operand_offset + Offset of the address of the current operand. Useful for variable-length instructions. + @@ -205,6 +209,9 @@ identifiers are address spaces. The epsiloninstruction identifier is the root instruction table. +operand_offset was introduced to support VAX +variable-length, multi-operand instructions. PC-relative addressing in +VAX is relative to the operand address, not the instruction address.

-- 2.50.0