Skip to content

Commit 77f43b8

Browse files
committed
Add a Ghidra plugin to do "PCode op List" -> "signature string"
This is intended to be a PCode analogue to the StmtParser.cpp Clang front-end plugin. This Ghidra plugin will collect up the sequence of PCode operations, and write them out to a JSON object where each key is the function name, and each value is a list of the opcodes from beginning to end. Optionally, a feature also exists to output a condensed form of this that matches the function signature string output from StmtParser.cpp.
1 parent 6b5be49 commit 77f43b8

File tree

1 file changed

+147
-0
lines changed

1 file changed

+147
-0
lines changed

GetAllFunctionsPcodeToJSON.py

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Retrieves the sequence of PCode ops for each function, and organizes these lists
2+
# into a JSON file
3+
#
4+
# @category colemankane
5+
#
6+
# Use the Python json library
7+
import json
8+
9+
# Add the Python argument parser
10+
from argparse import ArgumentParser
11+
12+
# Import some of the Ghidra classes we will be using
13+
from ghidra.util.task import ConsoleTaskMonitor
14+
15+
# Map each Ghidra PCode op to a unique byte value
16+
pcode_maps = {
17+
'BOOL_AND': 'A',
18+
'BOOL_NEGATE': 'B',
19+
'BOOL_OR': 'C',
20+
'BOOL_XOR': 'D',
21+
'BRANCH': 'E',
22+
'BRANCHIND': 'F',
23+
'CALL': 'G',
24+
'CALLIND': 'H',
25+
'CALLOTHER': 'I',
26+
'CAST': 'J',
27+
'CBRANCH': 'K',
28+
'COPY': 'L',
29+
'CPOOLREF': 'M',
30+
'EXTRACT': 'N',
31+
'FLOAT_ABS': 'O',
32+
'FLOAT_ADD': 'P',
33+
'FLOAT_CEIL': 'Q',
34+
'FLOAT_DIV': 'R',
35+
'FLOAT_EQUAL': 'S',
36+
'FLOAT_FLOAT2FLOAT': 'T',
37+
'FLOAT_FLOOR': 'U',
38+
'FLOAT_INT2FLOAT': 'V',
39+
'FLOAT_LESS': 'W',
40+
'FLOAT_LESSEQUAL': 'X',
41+
'FLOAT_MULT': 'Y',
42+
'FLOAT_NAN': 'Z',
43+
'FLOAT_NEG': 'a',
44+
'FLOAT_NOTEQUAL': 'b',
45+
'FLOAT_ROUND': 'c',
46+
'FLOAT_SQRT': 'd',
47+
'FLOAT_SUB': 'e',
48+
'FLOAT_TRUNC': 'f',
49+
'INDIRECT': 'g',
50+
'INSERT': 'h',
51+
'INT_2COMP': 'i',
52+
'INT_ADD': 'j',
53+
'INT_AND': 'k',
54+
'INT_CARRY': 'l',
55+
'INT_DIV': 'm',
56+
'INT_EQUAL': 'n',
57+
'INT_LEFT': 'o',
58+
'INT_LESS': 'p',
59+
'INT_LESSEQUAL': 'q',
60+
'INT_MULT': 'r',
61+
'INT_NEGATE': 's',
62+
'INT_NOTEQUAL': 't',
63+
'INT_OR': 'u',
64+
'INT_REM': 'v',
65+
'INT_RIGHT': 'w',
66+
'INT_SBORROW': 'x',
67+
'INT_SCARRY': 'y',
68+
'INT_SDIV': 'z',
69+
'INT_SEXT': '0',
70+
'INT_SLESS': '1',
71+
'INT_SLESSEQUAL': '2',
72+
'INT_SREM': '3',
73+
'INT_SRIGHT': '4',
74+
'INT_SUB': '5',
75+
'INT_XOR': '6',
76+
'INT_ZEXT': '7',
77+
'LOAD': '8',
78+
'MULTIEQUAL': '9',
79+
'NEW': '0',
80+
'PCODE_MAX': '~',
81+
'PIECE': '!',
82+
'POPCOUNT': '@',
83+
'PTRADD': '#',
84+
'PTRSUB': '$',
85+
'RETURN': '%',
86+
'SEGMENTOP': '^',
87+
'STORE': '&',
88+
'SUBPIECE': '*',
89+
'UNIMPLEMENTED': '-'
90+
}
91+
92+
# Initialize an empty dict for the "all functions" report
93+
fn_report = {}
94+
95+
# Set up parser for the script arguments
96+
arg_parser = ArgumentParser(description="P-Code statistical analysis", prog='script', prefix_chars='+')
97+
98+
# The "raw" JSON output file containing all the details
99+
arg_parser.add_argument('+o', '++output', required=True, help='Output file for JSON')
100+
101+
# Optionally, a text file for the "signature strings"
102+
arg_parser.add_argument('+s', '++strings', required=False, default=None, help='Output file for strings')
103+
104+
# Parse the arguments like a normal Python program
105+
args = arg_parser.parse_args(args=getScriptArgs())
106+
107+
# the Program.getFunctionManager() provides an interface to navigate the functions
108+
# that Ghidra has found within the program. The getFunctions() method will provide
109+
# an iterator that allows you to walk through the list forward (True) or
110+
# backward (False).
111+
for fn in getCurrentProgram().getFunctionManager().getFunctions(True):
112+
113+
# Get the earliest instruction defined within the function, to start our exploration
114+
instr = getInstructionAt(fn.getBody().getMinAddress())
115+
116+
# Walk through each instruction that's determined to be part of this function
117+
while instr and instr.getMinAddress() <= fn.getBody().getMaxAddress():
118+
if fn.getBody().contains(instr.getMinAddress()):
119+
# Iterate across the list of P-Code operations that are expanded from
120+
# the parsed machine instruction
121+
for pcode_op in instr.getPcode():
122+
123+
# Get the string name of the PCode operation
124+
pcode_name = pcode_op.getMnemonic()
125+
126+
# Create a new empty list for this function the first time we get a valid instruction
127+
# This way we can easily assume to use .append() below
128+
if fn.getName() not in fn_report:
129+
fn_report[fn.getName()] = []
130+
131+
# Push the PCode op name on the end of the list
132+
fn_report[fn.getName()].append(pcode_name)
133+
134+
# Advance to the next instruction
135+
instr = instr.getNext()
136+
137+
# Now, open the file provided by the user, and write the JSON into it
138+
with open(args.output, 'w') as outfile:
139+
outfile.write(json.dumps(fn_report))
140+
141+
if args.strings:
142+
# Finally, if provided, condense the PCode lists into a string where each byte represents a single
143+
# PCode opcode, as mapped in the pcode_maps data structure
144+
with open(args.strings, 'w') as stringsfile:
145+
for fn in fn_report.keys():
146+
outstr = ''.join(pcode_maps[x] for x in fn_report[fn])
147+
stringsfile.write('{fn}:{s}\n'.format(fn=fn, s=outstr)) # Write one signature per line

0 commit comments

Comments
 (0)