Skip to content

Commit fbb4342

Browse files
authored
Merge pull request #2662 from crytic/fix-unicode-src-mappings
Improved unicode support in mutator, flattener, and more
2 parents 2923fcd + 260b744 commit fbb4342

35 files changed

+338
-268
lines changed

Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ PY_MODULE := slither
44
TEST_MODULE := tests
55

66
ALL_PY_SRCS := $(shell find $(PY_MODULE) -name '*.py') \
7-
$(shell find test -name '*.py')
7+
$(shell find tests -name '*.py')
88

99
# Optionally overridden by the user, if they're using a virtual environment manager.
1010
VENV ?= env
@@ -85,4 +85,4 @@ package: $(VENV)/pyvenv.cfg
8585

8686
.PHONY: edit
8787
edit:
88-
$(EDITOR) $(ALL_PY_SRCS)
88+
$(EDITOR) $(ALL_PY_SRCS)

slither/analyses/evm/convert.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -133,12 +133,12 @@ def _get_evm_instructions_node(node_info):
133133
contract_file = (
134134
node_info["slither"]
135135
.source_code[node_info["contract"].source_mapping.filename.absolute]
136-
.encode("utf-8")
136+
.encode("utf8")
137137
)
138138

139139
# Get evm instructions corresponding to node's source line number
140140
node_source_line = (
141-
contract_file[0 : node_info["node"].source_mapping.start].count("\n".encode("utf-8")) + 1
141+
contract_file[0 : node_info["node"].source_mapping.start].count("\n".encode("utf8")) + 1
142142
)
143143
node_pcs = contract_pcs.get(node_source_line, [])
144144
node_ins = []
@@ -169,7 +169,7 @@ def generate_source_to_evm_ins_mapping(evm_instructions, srcmap_runtime, slither
169169
"""
170170

171171
source_to_evm_mapping = {}
172-
file_source = slither.source_code[filename].encode("utf-8")
172+
file_source = slither.source_code[filename].encode("utf8")
173173
prev_mapping = []
174174

175175
for idx, mapping in enumerate(srcmap_runtime):
@@ -193,7 +193,7 @@ def generate_source_to_evm_ins_mapping(evm_instructions, srcmap_runtime, slither
193193
# See https://github.com/ethereum/solidity/issues/6119#issuecomment-467797635
194194
continue
195195

196-
line_number = file_source[0 : int(offset)].count("\n".encode("utf-8")) + 1
196+
line_number = file_source[0 : int(offset)].count("\n".encode("utf8")) + 1
197197

198198
# Append evm instructions to the corresponding source line number
199199
# Note: Some evm instructions in mapping are not necessarily in program execution order

slither/core/source_mapping/source_mapping.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,20 @@ def content(self) -> str:
7373
"""
7474
Return the txt content of the Source
7575
76-
Returns:
76+
Use this property instead of eg source_code[start:end]
77+
Above will return incorrect content if source_code contains any unicode
78+
because self.start and self.end are byte offsets, not char offsets
7779
80+
Returns: str
7881
"""
7982
# If the compilation unit was not initialized, it means that the set_offset was never called
8083
# on the corresponding object, which should not happen
8184
assert self.compilation_unit
82-
return self.compilation_unit.core.source_code[self.filename.absolute][self.start : self.end]
85+
return (
86+
self.compilation_unit.core.source_code[self.filename.absolute]
87+
.encode("utf8")[self.start : self.end]
88+
.decode("utf8")
89+
)
8390

8491
@property
8592
def content_hash(self) -> str:

slither/detectors/source/rtlo.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -57,16 +57,16 @@ class RightToLeftOverride(AbstractDetector):
5757

5858
WIKI_RECOMMENDATION = "Special control characters must not be allowed."
5959

60-
RTLO_CHARACTER_ENCODED = "\u202e".encode("utf-8")
60+
RTLO_CHARACTER_ENCODED = "\u202e".encode("utf8")
6161
STANDARD_JSON = False
6262

6363
def _detect(self) -> List[Output]:
6464
results = []
65-
pattern = re.compile(".*\u202e.*".encode("utf-8"))
65+
pattern = re.compile(".*\u202e.*".encode("utf8"))
6666

6767
for filename, source in self.slither.source_code.items():
6868
# Attempt to find all RTLO characters in this source file.
69-
original_source_encoded = source.encode("utf-8")
69+
original_source_encoded = source.encode("utf8")
7070
start_index = 0
7171

7272
# Keep searching all file contents for the character.

slither/formatters/attributes/const_functions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def _patch(
4343
in_file_str = compilation_unit.core.source_code[in_file].encode("utf8")
4444
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
4545
# Find the keywords view|pure|constant and remove them
46-
m = re.search("(view|pure|constant)", old_str_of_interest.decode("utf-8"))
46+
m = re.search("(view|pure|constant)", old_str_of_interest.decode("utf8"))
4747
if m:
4848
create_patch(
4949
result,

slither/formatters/functions/external_function.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,16 @@ def _patch(
3232
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
3333
# Search for 'public' keyword which is in-between the function name and modifier name (if present)
3434
# regex: 'public' could have spaces around or be at the end of the line
35-
m = re.search(r"((\spublic)\s+)|(\spublic)$|(\)public)$", old_str_of_interest.decode("utf-8"))
35+
m = re.search(r"((\spublic)\s+)|(\spublic)$|(\)public)$", old_str_of_interest.decode("utf8"))
3636
if m is None:
3737
# No visibility specifier exists; public by default.
3838
create_patch(
3939
result,
4040
in_file,
4141
# start after the function definition's closing paranthesis
42-
modify_loc_start + len(old_str_of_interest.decode("utf-8").split(")")[0]) + 1,
42+
modify_loc_start + len(old_str_of_interest.decode("utf8").split(")")[0]) + 1,
4343
# end is same as start because we insert the keyword `external` at that location
44-
modify_loc_start + len(old_str_of_interest.decode("utf-8").split(")")[0]) + 1,
44+
modify_loc_start + len(old_str_of_interest.decode("utf8").split(")")[0]) + 1,
4545
"",
4646
" external",
4747
) # replace_text is `external`

slither/formatters/naming_convention/naming_convention.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,10 @@ def _is_var_declaration(slither: SlitherCompilationUnit, filename: str, start: i
339339
:return:
340340
"""
341341
v = "var "
342-
return slither.core.source_code[filename][start : start + len(v)] == v
342+
return (
343+
slither.core.source_code[filename].encode("utf8")[start : start + len(v)].decode("utf8")
344+
== v
345+
)
343346

344347

345348
def _explore_type( # pylint: disable=too-many-arguments,too-many-locals,too-many-branches

slither/formatters/variables/unchanged_state_variables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def _patch( # pylint: disable=too-many-arguments
4141
old_str_of_interest = in_file_str[modify_loc_start:modify_loc_end]
4242
# Add keyword `constant` before the variable name
4343
(new_str_of_interest, num_repl) = re.subn(
44-
match_text, replace_text, old_str_of_interest.decode("utf-8"), 1
44+
match_text, replace_text, old_str_of_interest.decode("utf8"), 1
4545
)
4646
if num_repl != 0:
4747
create_patch(

slither/formatters/variables/unused_state_variables.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ def _patch(
2222
in_file_str = compilation_unit.core.source_code[in_file].encode("utf8")
2323
old_str_of_interest = in_file_str[modify_loc_start:]
2424
old_str = (
25-
old_str_of_interest.decode("utf-8").partition(";")[0]
26-
+ old_str_of_interest.decode("utf-8").partition(";")[1]
25+
old_str_of_interest.decode("utf8").partition(";")[0]
26+
+ old_str_of_interest.decode("utf8").partition(";")[1]
2727
)
2828

2929
create_patch(
3030
result,
3131
in_file,
3232
int(modify_loc_start),
3333
# Remove the entire declaration until the semicolon
34-
int(modify_loc_start + len(old_str_of_interest.decode("utf-8").partition(";")[0]) + 1),
34+
int(modify_loc_start + len(old_str_of_interest.decode("utf8").partition(";")[0]) + 1),
3535
old_str,
3636
"",
3737
)

slither/slithir/convert.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,7 @@ def _fits_under_byte(val: Union[int, str]) -> List[str]:
196196
size = len(hex_val) // 2
197197
return [f"bytes{size}"]
198198
# val is a str
199-
length = len(val.encode("utf-8"))
199+
length = len(val.encode("utf8"))
200200
return [f"bytes{f}" for f in range(length, 33)] + ["bytes"]
201201

202202

slither/tools/documentation/__main__.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -154,15 +154,17 @@ def _handle_function(
154154
):
155155
return overwrite
156156
prompt = "Create a natpsec documentation for this solidity code with only notice and dev.\n"
157-
src_mapping = function.source_mapping
158-
content = function.compilation_unit.core.source_code[src_mapping.filename.absolute]
159-
start = src_mapping.start
160-
end = src_mapping.start + src_mapping.length
161-
prompt += content[start:end]
162-
163-
use_tab = _use_tab(content[start - 1])
164-
if use_tab is None and src_mapping.starting_column > 1:
165-
logger.info(f"Non standard space indentation found {content[start - 1:end]}")
157+
srcmap = function.source_mapping
158+
src = function.compilation_unit.core.source_code[srcmap.filename.absolute]
159+
first_char_index = len(
160+
src.encode("utf8")[: srcmap.start].decode("utf8")
161+
) # convert byte offset to char offset
162+
prev_char = src[first_char_index - 1]
163+
prompt += srcmap.content
164+
165+
use_tab = _use_tab(prev_char)
166+
if use_tab is None and srcmap.starting_column > 1:
167+
logger.info(f"Non standard indentation found: '{prev_char}'")
166168
if overwrite:
167169
logger.info("Disable overwrite to avoid mistakes")
168170
overwrite = False
@@ -189,7 +191,7 @@ def _handle_function(
189191
if logging_file:
190192
codex.log_codex(logging_file, "A: " + str(answer))
191193

192-
answer_processed = _handle_codex(answer, src_mapping.starting_column, use_tab, force)
194+
answer_processed = _handle_codex(answer, srcmap.starting_column, use_tab, force)
193195
if answer_processed:
194196
break
195197

@@ -201,7 +203,9 @@ def _handle_function(
201203
if not answer_processed:
202204
return overwrite
203205

204-
create_patch(all_patches, src_mapping.filename.absolute, start, start, "", answer_processed)
206+
create_patch(
207+
all_patches, srcmap.filename.absolute, srcmap.start, srcmap.start, "", answer_processed
208+
)
205209

206210
return overwrite
207211

slither/tools/flattening/flattening.py

+36-15
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,9 @@ def _get_source_code(
107107
:return:
108108
"""
109109
src_mapping = contract.source_mapping
110-
content = self._compilation_unit.core.source_code[src_mapping.filename.absolute]
111-
start = src_mapping.start
112-
end = src_mapping.start + src_mapping.length
110+
src_bytes = self._compilation_unit.core.source_code[src_mapping.filename.absolute].encode(
111+
"utf8"
112+
)
113113

114114
to_patch = []
115115
# interface must use external
@@ -124,7 +124,7 @@ def _get_source_code(
124124
+ f.parameters_src().source_mapping.length
125125
)
126126
attributes_end = f.returns_src().source_mapping.start
127-
attributes = content[attributes_start:attributes_end]
127+
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
128128
regex = re.search(r"((\sexternal)\s+)|(\sexternal)$|(\)external)$", attributes)
129129
if regex:
130130
to_patch.append(
@@ -140,7 +140,7 @@ def _get_source_code(
140140
if var.location == "calldata":
141141
calldata_start = var.source_mapping.start
142142
calldata_end = calldata_start + var.source_mapping.length
143-
calldata_idx = content[calldata_start:calldata_end].find(" calldata ")
143+
calldata_idx = src_bytes[calldata_start:calldata_end].find(" calldata ")
144144
to_patch.append(
145145
Patch(
146146
calldata_start + calldata_idx + 1,
@@ -158,7 +158,7 @@ def _get_source_code(
158158
+ f.parameters_src().source_mapping["length"]
159159
)
160160
attributes_end = f.returns_src().source_mapping["start"]
161-
attributes = content[attributes_start:attributes_end]
161+
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
162162
regex = (
163163
re.search(r"((\sexternal)\s+)|(\sexternal)$|(\)external)$", attributes)
164164
if visibility == "external"
@@ -183,7 +183,7 @@ def _get_source_code(
183183
if variable.visibility == "private":
184184
attributes_start = variable.source_mapping.start
185185
attributes_end = attributes_start + variable.source_mapping.length
186-
attributes = content[attributes_start:attributes_end]
186+
attributes = src_bytes[attributes_start:attributes_end].decode("utf8")
187187
regex = re.search(r" private ", attributes)
188188
if regex:
189189
to_patch.append(
@@ -211,26 +211,47 @@ def _get_source_code(
211211

212212
to_patch.sort(key=lambda x: x.index, reverse=True)
213213

214-
content = content[start:end]
214+
content = src_mapping.content.encode("utf8")
215+
start = src_mapping.start
215216
for patch in to_patch:
216217
patch_type = patch.patch_type
217218
index = patch.index
218219
index = index - start
219220
if patch_type == "public_to_external":
220-
content = content[:index] + "public" + content[index + len("external") :]
221+
content = (
222+
content[:index].decode("utf8")
223+
+ "public"
224+
+ content[index + len("external") :].decode("utf8")
225+
)
221226
elif patch_type == "external_to_internal":
222-
content = content[:index] + "internal" + content[index + len("external") :]
227+
content = (
228+
content[:index].decode("utf8")
229+
+ "internal"
230+
+ content[index + len("external") :].decode("utf8")
231+
)
223232
elif patch_type == "public_to_internal":
224-
content = content[:index] + "internal" + content[index + len("public") :]
233+
content = (
234+
content[:index].decode("utf8")
235+
+ "internal"
236+
+ content[index + len("public") :].decode("utf8")
237+
)
225238
elif patch_type == "private_to_internal":
226-
content = content[:index] + "internal" + content[index + len("private") :]
239+
content = (
240+
content[:index].decode("utf8")
241+
+ "internal"
242+
+ content[index + len("private") :].decode("utf8")
243+
)
227244
elif patch_type == "calldata_to_memory":
228-
content = content[:index] + "memory" + content[index + len("calldata") :]
245+
content = (
246+
content[:index].decode("utf8")
247+
+ "memory"
248+
+ content[index + len("calldata") :].decode("utf8")
249+
)
229250
else:
230251
assert patch_type == "line_removal"
231-
content = content[:index] + " // " + content[index:]
252+
content = content[:index].decode("utf8") + " // " + content[index:].decode("utf8")
232253

233-
self._source_codes[contract] = content
254+
self._source_codes[contract] = content.decode("utf8")
234255

235256
def _pragmas(self) -> str:
236257
"""

slither/tools/mutator/__main__.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,10 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
167167
# get all the contracts as a list from given codebase
168168
sol_file_list: List[str] = get_sol_file_list(Path(args.codebase), paths_to_ignore_list)
169169

170-
logger.info(blue("Preparing to mutate files:\n- " + "\n- ".join(sol_file_list)))
170+
if not contract_names:
171+
logger.info(blue("Preparing to mutate files:\n- " + "\n- ".join(sol_file_list)))
172+
else:
173+
logger.info(blue("Preparing to mutate contracts:\n- " + "\n- ".join(contract_names)))
171174

172175
# folder where backup files and uncaught mutants are saved
173176
if output_dir is None:
@@ -240,7 +243,8 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
240243

241244
# perform mutations on {target_contract} in file {file_name}
242245
# setup placeholder val to signal whether we need to skip if no target_contract is found
243-
target_contract = "SLITHER_SKIP_MUTATIONS" if contract_names else ""
246+
skip_flag = "SLITHER_SKIP_MUTATIONS"
247+
target_contract = skip_flag if contract_names else ""
244248
try:
245249
# loop through all contracts in file_name
246250
for compilation_unit_of_main_file in sl.compilation_units:
@@ -258,8 +262,7 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
258262
)
259263
continue
260264

261-
if target_contract == "SLITHER_SKIP_MUTATIONS":
262-
logger.debug(f"Skipping mutations in {filename}")
265+
if target_contract == skip_flag:
263266
continue
264267

265268
# TODO: find a more specific way to omit interfaces
@@ -334,6 +337,10 @@ def main() -> None: # pylint: disable=too-many-statements,too-many-branches,too
334337
# transfer and delete the backup files
335338
transfer_and_delete(files_dict)
336339

340+
if target_contract == skip_flag:
341+
logger.debug(f"No target contracts found in {filename}, skipping")
342+
continue
343+
337344
# log results for this file
338345
logger.info(blue(f"Done mutating {target_contract}."))
339346
if total_mutant_counts[0] > 0:

0 commit comments

Comments
 (0)