diff --git a/.travis.yml b/.travis.yml index 983ecab..feaa795 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,6 @@ python: - 3.5 - 3.4.2 install: - - pip install git+https://github.com/juhakivekas/multidiff + - pip install . script: - python -m pytest diff --git a/multidiff/Render.py b/multidiff/Render.py index eb8d538..7776d6a 100644 --- a/multidiff/Render.py +++ b/multidiff/Render.py @@ -1,9 +1,11 @@ from multidiff.Ansi import Ansi import binascii import html +import textwrap +import re class Render(): - def __init__(self, encoder='hexdump', color='ansi'): + def __init__(self, encoder='hexdump', color='ansi', bytes=16, width=None): '''Configure the output encoding and coloring method of this rendering object''' if color == 'ansi': self.highligther = ansi_colored @@ -16,18 +18,45 @@ def __init__(self, encoder='hexdump', color='ansi'): self.encoder = HexEncoder elif encoder == 'utf8': self.encoder = Utf8Encoder - + + self.width = width + self.bytes = bytes + self.stats = [] + def render(self, model, diff): '''Render the diff in the given model into a UTF-8 String''' result = self.encoder(self.highligther) obj = model.objects[diff.target] for op in diff.opcodes: data = obj.data[op[3]:op[4]] + data2 = "" if type(data) == bytes: - result.append(data, op[0]) + result.append(data, op[0], self.width, self.bytes, data2) elif type(data) == str: - result.append(bytes(data, "utf8"), op[0]) - return result.final() + result.append(bytes(data, "utf8"), op[0], self.width, self.bytes, bytes(data2, "utf8")) + if result.additions or result.deletions: + self.stats = [result.additions, result.deletions] + if self.bytes != 16: + return result.reformat(result.final(data2), int(self.bytes)) + return result.final(data2) + + def diff_render(self, model, diff): + '''Render the smaller diff in the given model into a UTF-8 String ''' + result = self.encoder(self.highligther) + obj1 = model.objects[diff.target-1] + obj2 = model.objects[diff.target] + for op in diff.opcodes: + data1 = obj1.data[op[1]:op[2]] + data2 = obj2.data[op[3]:op[4]] + if type(data2) == bytes: + result.append(data1, op[0], self.width, self.bytes, data2) + elif type(data2) == str: + result.append(bytes(data1, "utf8"), op[0], self.width, self.bytes, bytes(data2, "utf8")) + if result.additions or result.deletions: + self.stats = [result.additions, result.deletions] + if self.bytes != 16: + return result.reformat(result.final(data2), int(self.bytes)) + return result.final(data2).rstrip() def dumps(self, model): '''Dump all diffs in a model. Mostly good for debugging''' @@ -41,11 +70,16 @@ class Utf8Encoder(): def __init__(self, highligther): self.highligther = highligther self.output = '' + self.additions = 0 + self.deletions = 0 - def append(self, data, color): + def append(self, data, color, width=None, bytes=16, data2=""): self.output += self.highligther(str(data, 'utf8'), color) - - def final(self): + if width: + if len(self.output) > int(width): + self.output = textwrap.fill(self.output, int(width)) + + def final(self, data): return self.output class HexEncoder(): @@ -53,11 +87,17 @@ class HexEncoder(): def __init__(self, highligther): self.highligther = highligther self.output = '' - def append(self, data, color): + self.additions = 0 + self.deletions = 0 + + def append(self, data, color, width=None, bytes=16, data2=""): data = str(binascii.hexlify(data),'utf8') self.output += self.highligther(data, color) - - def final(self): + if width: + if len(self.output) > int(width): + self.output = textwrap.fill(self.output, int(width)) + + def final(self, data): return self.output class HexdumpEncoder(): @@ -70,47 +110,103 @@ def __init__(self, highligther): self.hexrow = '' self.skipspace = False self.asciirow = '' + self.additions = 0 + self.deletions = 0 - def append(self, data, color): - if len(data) == 0: - self._append(data, color) - while len(data) > 0: - if self.rowlen == 16: - self._newrow() - consumed = self._append(data[:16 - self.rowlen], color) - data = data[consumed:] - - def _append(self, data, color): - if len(data) == 0: - #in the case of highlightig a deletion in a target or an - #addition in the source, print a highlighted space and mark - #it skippanble for the next append - hexs = ' ' - self.skipspace = True + def append(self, data, color, width=None, bytes=16, data2=""): + if data2 == "": + if len(data) == 0: + self._append(data, color, width, data2) + while len(data) > 0: + if self.rowlen == 16: + self._newrow(data2) + consumed = self._append(data[:16 - self.rowlen], color, width, data2) + data = data[consumed:] else: - self._add_hex_space() - #encode to hex and add some spaces - hexs = str(binascii.hexlify(data), 'utf8') - hexs = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) - asciis = '' - #make the ascii dump - for byte in data: - if 0x20 <= byte <= 0x7E: - asciis += chr(byte) - else: - asciis += '.' - self.asciirow += self.highligther(asciis, color) + data1 = data + if len(data2) == 0: + self._append(data1, color, width, data2) + while len(data2) > 0: + if self.rowlen == 16: + self._newrow(data2) + consumed = self._append(data1[:16 - self.rowlen], color, width, data2[:16 - self.rowlen],) + data2 = data2[consumed:] - self.hexrow += self.highligther(hexs, color) - self.rowlen += len(data) - return len(data) + def stats(self, string, op, string2): + if op == 'insert': + self.additions += len(string.split()) + if op == 'delete': + self.deletions += len(string.split()) + if op == 'replace': + self.additions += len(string.split()) + self.deletions += len(string2.split()) - def _newrow(self): + def _append(self, data, color, width, data2): + if data2 == "": + if len(data) == 0: + #in the case of highlightig a deletion in a target or an + #addition in the source, print a highlighted space and mark + #it skippanble for the next append + hexs = ' ' + self.skipspace = True + else: + self._add_hex_space() + #encode to hex and add some spaces + hexs = str(binascii.hexlify(data), 'utf8') + hexs = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) + asciis = '' + #make the ascii dump + for byte in data: + if 0x20 <= byte <= 0x7E: + asciis += chr(byte) + else: + asciis += '.' + self.asciirow += self.highligther(asciis, color) + + self.hexrow += self.highligther(hexs, color) + if width: + if len(self.hexrow) > int(width): + self.hexrow = textwrap.fill(self.hexrow, int(width)) + self.rowlen += len(data) + return len(data) + else: + data1 = data + # + if len(data2) == 0: + hexs = str(binascii.hexlify(data1), 'utf8') + hexs = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) + hexs2 = str(binascii.hexlify(data2), 'utf8') + hexs2 = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) + else: + self._add_hex_space() + #encode to hex and add some spaces + hexs = str(binascii.hexlify(data2), 'utf8') + hexs = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) + hexs2 = str(binascii.hexlify(data), 'utf8') + hexs2 = ' '.join([hexs[i:i+2] for i in range(0, len(hexs), 2)]) + + self.hexrow += self.highligther(hexs, color) + self.stats(hexs, color, hexs2) + if width: + if len(self.hexrow) > int(width): + self.hexrow = textwrap.fill(self.hexrow, int(width)) + self.rowlen += len(data2) + return len(data2) + + def _newrow(self, data): self._add_hex_space() - if self.addr != 0: - self.body += '\n' - self.body += "{:06x}:{:s}|{:s}|".format( - self.addr, self.hexrow, self.asciirow); + ops = ['insert', 'delete', 'replace', Ansi.delete, Ansi.replace, Ansi.insert] + if data == "": + if self.addr != 0: + self.body += '\n' + self.body += "{:06x}:{:s}|{:s}|".format( + self.addr, self.hexrow, self.asciirow); + else: + if self.addr != 0: + self.body = self.body + if any(ext in self.hexrow for ext in ops): + self.body += "{:06x}:{:s}\n".format( + self.addr, self.hexrow); self.addr += 16 self.rowlen = 0 self.hexrow = '' @@ -121,14 +217,61 @@ def _add_hex_space(self): self.skipspace = False else: self.hexrow += ' ' - - def final(self): + + def final(self, data=""): self.hexrow += 3*(16 - self.rowlen) * ' ' self.asciirow += (16 - self.rowlen) * ' ' - self._newrow() + self._newrow(data) return self.body + def reformat(self, body, n=16): + asciis = '' + instring = '' + foo = body.split('\n') + for line in foo: + line.rstrip() + line = line[line.find(':')+1:line.find('|')] + instring += line + instring += '\n' + outstring = '' + # Remove line numbers and newlines. + clean_string = instring.replace(r'\d+:|\n', '') + # Split on spaces that are not in tags. + elements = re.split(r'\s+(?![^<]+>)', clean_string) + # Omit first tag so that everything else can be chunked by n. + clean_elements = elements[1:] + # Chunk by n. + chunks = [' '.join(clean_elements[i:i+n]) + for i in range(0, len(clean_elements), n)] + # Concatenate the chunks as a line in outstring, with a line number. + for i, chunk in enumerate(chunks): + asciis = '' + ansi = [Ansi.reset, Ansi.delete, Ansi.replace, Ansi.insert] + html = ["", "", "", ""] + res = chunk + if self.highligther == html_colored: + ops = html + else: + ops = ansi + for op in ops: + res = res.replace(op, "") + res = res.replace(" ", "").strip() + res = str(binascii.unhexlify(res), 'utf8') + res = res.encode('utf-8') + #make the ascii dump + for byte in res: + if 0x20 <= byte <= 0x7E: + asciis += chr(byte) + else: + asciis += '.' + addr = '{:06x}'.format(i*n) + if i == 0: + outstring += '{}:{} {} |{}|\n'.format(addr, elements[0], chunk, asciis) + else: + outstring += '{}: {} |{}|\n'.format(addr, chunk, asciis) + return outstring + def ansi_colored(string, op): if op == 'equal': return string diff --git a/multidiff/StreamView.py b/multidiff/StreamView.py index a196878..20733f4 100644 --- a/multidiff/StreamView.py +++ b/multidiff/StreamView.py @@ -4,11 +4,15 @@ class StreamView(): '''A class for building UIs. Has some pretty serious side effects. Use Render instead if you're not making a long-running UI''' - def __init__(self, model, encoding='hexdump', mode='sequence', color='ansi'): + def __init__(self, model, encoding='hexdump', mode='sequence', color='ansi', bytes=16, width=None, diff=False): self.color = color - self.render = Render(color=color, encoder=encoding) + self.render = Render(color=color, encoder=encoding, bytes=bytes, width=width) self.mode = mode self.model = model + self.bytes = bytes + self.diff = diff + self.differ = None + self.stats = [0, 0] model.add_listener(self) def object_added(self, index): @@ -21,7 +25,11 @@ def diff_added(self, diff): print(Ansi.bold + self.model.objects[diff.target].info + Ansi.reset) elif self.color == 'html': print('' + html.escape(self.model.objects[diff.target].info) + '') - print(self.render.render(self.model, diff)) + if self.diff: + self.differ = self.render.diff_render(self.model, diff) + else: + self.differ = self.render.render(self.model, diff) + self.stats = self.render.stats #StreamView is designed to run for long times so we delete #old objects and diffs to not leak memory del(self.model.diffs[0]) diff --git a/multidiff/command_line_interface.py b/multidiff/command_line_interface.py index 020538a..732c120 100755 --- a/multidiff/command_line_interface.py +++ b/multidiff/command_line_interface.py @@ -1,16 +1,19 @@ #!/usr/bin/python3 import argparse from multidiff import MultidiffModel, StreamView, SocketController, FileController, StdinController +import shutil +import sys -def main(): - args = make_parser().parse_args() +def main(args=None): + + if args is None: + args = sys.argv[1:] + args = make_parser().parse_args(args) m = MultidiffModel() - v = StreamView(m, encoding=args.outformat, mode=args.mode, color=args.color) - + if len(args.file) > 0: - informat = args.informat if args.informat else 'raw' - files = FileController(m, informat) - files.add_paths(args.file) + result = diff_files(args) + print(result) if args.stdin: informat = args.informat if args.informat else 'utf8' stdin = StdinController(m, informat) @@ -20,6 +23,25 @@ def main(): server = SocketController(('127.0.0.1', args.port), m, informat) server.serve_forever() +def diff_files(args): + m = MultidiffModel() + v = StreamView(m, encoding=args.outformat, mode=args.mode, color=args.color, bytes=args.bytes, width=args.width, diff=args.diff) + + if args.width == 'max': + args.width = get_max_width(args) + + informat = args.informat if args.informat else 'raw' + files = FileController(m, informat) + files.add_paths(args.file) + if v.stats: + print('{} additions, {} deletions'.format(v.stats[0], v.stats[1])) + return v.differ + +def get_max_width(args): + columns = int(shutil.get_terminal_size((120,30)).columns) + args.width = columns + return args.width + def make_parser(): parser = argparse.ArgumentParser( formatter_class=argparse.RawTextHelpFormatter, @@ -32,17 +54,17 @@ def make_parser(): │ sensor module │ └───────────────┘ """) - + parser.add_argument('file', type=str, nargs='*', help='file or directory to include in multidiff') - + parser.add_argument('-p','--port', dest='port', type=int, help='start a local socket server on the given port') - + parser.add_argument('-s','--stdin', dest='stdin', action='store_true', @@ -52,7 +74,7 @@ def make_parser(): dest='mode', default='sequence', help='mode of operation, either "baseline" or "sequence"') - + parser.add_argument('-i','--informat', dest='informat', help='input data format:\n' + @@ -60,13 +82,13 @@ def make_parser(): ' raw (file and server default)\n' + ' hex\n'+ ' json') - + parser.add_argument('-o','--outformat', dest='outformat', default='hexdump', - help='output data format:\n' + - ' utf8\n' + - ' hex\n' + + help='output data format:\n' + + ' utf8\n' + + ' hex\n' + ' hexdump (default)') parser.add_argument('--html', @@ -75,6 +97,21 @@ def make_parser(): const='html', default='ansi', help='use html for colors instead of ansi codes') + + parser.add_argument('-w', '--width', + dest='width', + default='82', + help='number of bytes printed per line, either an integer or max(width of console)') + + parser.add_argument('-b', '--bytes', + dest='bytes', + default=16, + help='number of hexs printed per line, either an integer or max(width of console)') + + parser.add_argument('-d', '--diff', + dest='diff', + action='store_true', + help='only show addresses where bytes have changed(only for hexdump outformat)') return parser if __name__ == '__main__': diff --git a/test/bin_file1 b/test/bin_file1 new file mode 100644 index 0000000..7cd027a --- /dev/null +++ b/test/bin_file1 @@ -0,0 +1 @@ +0123456789abcdef012345678 \ No newline at end of file diff --git a/test/bin_file2 b/test/bin_file2 new file mode 100644 index 0000000..454f6b3 --- /dev/null +++ b/test/bin_file2 @@ -0,0 +1 @@ +0123456789abcdef \ No newline at end of file diff --git a/test/cli_test.py b/test/cli_test.py new file mode 100644 index 0000000..5dd3867 --- /dev/null +++ b/test/cli_test.py @@ -0,0 +1,82 @@ +from multidiff.Render import * +from multidiff import Ansi + +import unittest +from pathlib import Path +import subprocess + + +class MainCLITests(unittest.TestCase): + + def call_run(self, expected_stdout, args): + got_stdout = '(no stdout)' + cmd = ['multidiff'] + args + try: + got_stdout = subprocess.check_output(cmd, universal_newlines=True) + except subprocess.CalledProcessError as err: + print('Got stderr: `{err_message}`'.format(err_message=err)) + finally: + print('Got stdout: `{stdout}`'.format(stdout=got_stdout)) + + self.assertEqual(expected_stdout, got_stdout) + + def test_diff_cli_no_args(self): + expected_output = '' + self.call_run(expected_output, []) + + def test_diff_cli_simple(self): + p = Path(".") + res = list(p.glob("**/bin_file*")) + res = [str(x) for x in res] + + dump = Ansi.bold + res[1] + Ansi.reset + dump += "\n000000: " + dump += "30 31 32 33 34 35 36 37 38 39 61 62 63 64 65 66" + dump += Ansi.delete + " " + Ansi.reset + dump += "|0123456789abcdef|\n" + + expected_output = dump + self.call_run(expected_output, res) + + def test_diff_cli_with_width_flag(self): + p = Path('.') + res = res = list(p.glob('**/bin_file*')) + res = [str(x) for x in res] + res += ['--width', '25'] + + dump = Ansi.bold + res[1] + Ansi.reset + dump += "\n000000: " + dump += "30 31 32 33 34 35 36 37" + dump += "\n38 39 61 62 63 64 65" + dump += "\n66" + dump += Ansi.delete + " " + Ansi.reset + dump += "|0123456789abcdef|\n" + + expected_output = dump + self.call_run(expected_output, res) + + def test_diff_cli_with_bytes_flag(self): + p = Path('.') + res = res = list(p.glob('**/bin_file*')) + res = [str(x) for x in res] + res += ['--bytes', '6'] + + dump = Ansi.bold + res[1] + Ansi.reset + dump += "\n000000: " + dump += "30 31 32 33 34 35" + dump += ' |012345|' + dump += "\n000006: " + dump += "36 37 38 39 61 62" + dump += ' |6789ab|' + dump += "\n00000c: " + dump += "63 64 65 66" + dump += Ansi.delete + " " + Ansi.reset + dump += ' |cdef|' + dump += '\n\n' + print(dump) + + expected_output = dump + self.call_run(expected_output, res) + +if __name__ == '__main__': + unittest.main() diff --git a/test/width_test.py b/test/width_test.py new file mode 100644 index 0000000..d8f47cc --- /dev/null +++ b/test/width_test.py @@ -0,0 +1,88 @@ +from multidiff.Render import * +from multidiff import Ansi +from multidiff.command_line_interface import make_parser, get_max_width + +import shutil +from unittest import TestCase +import argparse + + +class WidthTests(TestCase): + + def setUp(self): + pass + + def test_parser(self): + args = make_parser().parse_args([]) + res = argparse.Namespace( + bytes=16, color='ansi', diff=False, file=[], informat=None, + mode='sequence', outformat='hexdump', port=None, stdin=False, + width='82' + ) + assert(args == res) + + def test_get_max_width(self): + args = make_parser().parse_args(['--width', 'max']) + args.width = get_max_width(args) + columns = int(shutil.get_terminal_size((120, 30)).columns) + assert(args.width == columns) + + def test_width_default(self): + # width = 82(default) + args = make_parser().parse_args([]) + args.width = get_max_width(args) + hd = HexdumpEncoder(html_colored) + hd.append(bytes("012", "utf8"), "replace", args.width) + hd.append(bytes("3456789ab", "utf8"), "equal", args.width) + hd.append(bytes("", "utf8"), "delete", args.width) + hd.append(bytes("cdef", "utf8"), "equal", args.width) + result = hd.final() + + dump = "000000: " + dump += "30 31 32 " + dump += "33 34 35 36 37 38 39 61 62" + dump += " " + dump += "63 64 65 66" + dump += " |" + dump += "