Skip to content

Commit 4c6fba3

Browse files
committed
[jsinterp] Improve try/catch/finally support
1 parent d619dd7 commit 4c6fba3

File tree

2 files changed

+63
-43
lines changed

2 files changed

+63
-43
lines changed

test/test_jsinterp.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ def test_operators(self):
7474
jsi = JSInterpreter('function f(){return 0 ?? 42;}')
7575
self.assertEqual(jsi.call_function('f'), 0)
7676

77+
jsi = JSInterpreter('function f(){return "life, the universe and everything" < 42;}')
78+
self.assertFalse(jsi.call_function('f'))
79+
7780
def test_array_access(self):
7881
jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}')
7982
self.assertEqual(jsi.call_function('f'), [5, 2, 7])
@@ -198,7 +201,6 @@ def test_catch(self):
198201
''')
199202
self.assertEqual(jsi.call_function('x'), 5)
200203

201-
@unittest.expectedFailure
202204
def test_finally(self):
203205
jsi = JSInterpreter('''
204206
function x() { try{throw 10} finally {return 42} }
@@ -212,7 +214,7 @@ def test_finally(self):
212214
def test_nested_try(self):
213215
jsi = JSInterpreter('''
214216
function x() {try {
215-
try{throw 10} finally {throw 42}
217+
try{throw 10} finally {throw 42}
216218
} catch(e){return 5} }
217219
''')
218220
self.assertEqual(jsi.call_function('x'), 5)
@@ -229,6 +231,14 @@ def test_for_loop_break(self):
229231
''')
230232
self.assertEqual(jsi.call_function('x'), 0)
231233

234+
def test_for_loop_try(self):
235+
jsi = JSInterpreter('''
236+
function x() {
237+
for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} };
238+
return 42 }
239+
''')
240+
self.assertEqual(jsi.call_function('x'), 42)
241+
232242
def test_literal_list(self):
233243
jsi = JSInterpreter('''
234244
function x() { return [1, 2, "asdf", [5, 6, 7]][3] }

youtube_dl/jsinterp.py

Lines changed: 51 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import math
66
import operator
77
import re
8-
from collections import Counter
98

109
from .utils import (
1110
error_to_compat_str,
@@ -15,6 +14,7 @@
1514
unified_timestamp,
1615
)
1716
from .compat import (
17+
compat_basestring,
1818
compat_collections_chain_map as ChainMap,
1919
compat_itertools_zip_longest as zip_longest,
2020
compat_str,
@@ -76,6 +76,10 @@ def _js_comp_op(op):
7676
def wrapped(a, b):
7777
if JS_Undefined in (a, b):
7878
return False
79+
if isinstance(a, compat_basestring):
80+
b = compat_str(b or 0)
81+
elif isinstance(b, compat_basestring):
82+
a = compat_str(a or 0)
7983
return op(a or 0, b or 0)
8084

8185
return wrapped
@@ -195,7 +199,6 @@ class JSInterpreter(object):
195199
'y': 4096, # Perform a "sticky" search that matches starting at the current position in the target string
196200
}
197201

198-
_EXC_NAME = '__youtube_dl_exception__'
199202
_OBJ_NAME = '__youtube_dl_jsinterp_obj'
200203

201204
OP_CHARS = None
@@ -242,9 +245,8 @@ def _regex_flags(cls, expr):
242245
def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
243246
if not expr:
244247
return
245-
# collections.Counter() is ~10% slower
248+
# collections.Counter() is ~10% slower in both 2.7 and 3.9
246249
counters = {k: 0 for k in _MATCHING_PARENS.values()}
247-
# counters = Counter()
248250
start, splits, pos, delim_len = 0, 0, 0, len(delim) - 1
249251
in_quote, escaping, skipping = None, False, 0
250252
after_op, in_regex_char_group, skip_re = True, False, 0
@@ -291,7 +293,9 @@ def _separate(cls, expr, delim=',', max_split=None, skip_delims=None):
291293
yield expr[start:]
292294

293295
@classmethod
294-
def _separate_at_paren(cls, expr, delim):
296+
def _separate_at_paren(cls, expr, delim=None):
297+
if delim is None:
298+
delim = expr and _MATCHING_PARENS[expr[0]]
295299
separated = list(cls._separate(expr, delim, 1))
296300

297301
if len(separated) < 2:
@@ -376,7 +380,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
376380
if expr.startswith('new '):
377381
obj = expr[4:]
378382
if obj.startswith('Date('):
379-
left, right = self._separate_at_paren(obj[4:], ')')
383+
left, right = self._separate_at_paren(obj[4:])
380384
expr = unified_timestamp(
381385
self.interpret_expression(left, local_vars, allow_recursion), False)
382386
if not expr:
@@ -390,7 +394,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
390394
return None, should_return
391395

392396
if expr.startswith('{'):
393-
inner, outer = self._separate_at_paren(expr, '}')
397+
inner, outer = self._separate_at_paren(expr)
394398
# try for object expression (Map)
395399
sub_expressions = [list(self._separate(sub_expr.strip(), ':', 1)) for sub_expr in self._separate(inner)]
396400
if all(len(sub_expr) == 2 for sub_expr in sub_expressions):
@@ -406,65 +410,71 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
406410
expr = self._dump(inner, local_vars) + outer
407411

408412
if expr.startswith('('):
409-
inner, outer = self._separate_at_paren(expr, ')')
413+
inner, outer = self._separate_at_paren(expr)
410414
inner, should_abort = self.interpret_statement(inner, local_vars, allow_recursion)
411415
if not outer or should_abort:
412416
return inner, should_abort or should_return
413417
else:
414418
expr = self._dump(inner, local_vars) + outer
415419

416420
if expr.startswith('['):
417-
inner, outer = self._separate_at_paren(expr, ']')
421+
inner, outer = self._separate_at_paren(expr)
418422
name = self._named_object(local_vars, [
419423
self.interpret_expression(item, local_vars, allow_recursion)
420424
for item in self._separate(inner)])
421425
expr = name + outer
422426

423427
m = re.match(r'''(?x)
424-
(?P<try>try|finally)\s*|
425-
(?P<catch>catch\s*(?P<err>\(\s*{_NAME_RE}\s*\)))|
426-
(?P<switch>switch)\s*\(|
427-
(?P<for>for)\s*\(|
428-
'''.format(**globals()), expr)
428+
(?P<try>try)\s*\{|
429+
(?P<switch>switch)\s*\(|
430+
(?P<for>for)\s*\(
431+
''', expr)
429432
md = m.groupdict() if m else {}
430433
if md.get('try'):
431-
if expr[m.end()] == '{':
432-
try_expr, expr = self._separate_at_paren(expr[m.end():], '}')
433-
else:
434-
try_expr, expr = expr[m.end() - 1:], ''
434+
try_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
435+
err = None
435436
try:
436437
ret, should_abort = self.interpret_statement(try_expr, local_vars, allow_recursion)
437438
if should_abort:
438439
return ret, True
439-
except JS_Throw as e:
440-
local_vars[self._EXC_NAME] = e.error
441440
except Exception as e:
442441
# XXX: This works for now, but makes debugging future issues very hard
443-
local_vars[self._EXC_NAME] = e
444-
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
445-
return ret, should_abort or should_return
446-
447-
elif md.get('catch'):
448-
449-
catch_expr, expr = self._separate_at_paren(expr[m.end():], '}')
450-
if self._EXC_NAME in local_vars:
451-
catch_vars = local_vars.new_child({m.group('err'): local_vars.pop(self._EXC_NAME)})
452-
ret, should_abort = self.interpret_statement(catch_expr, catch_vars, allow_recursion)
442+
err = e
443+
444+
pending = (None, False)
445+
m = re.match(r'catch\s*(?P<err>\(\s*{_NAME_RE}\s*\))?\{{'.format(**globals()), expr)
446+
if m:
447+
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
448+
if err:
449+
catch_vars = {}
450+
if m.group('err'):
451+
catch_vars[m.group('err')] = err.error if isinstance(err, JS_Throw) else err
452+
catch_vars = local_vars.new_child(m=catch_vars)
453+
err = None
454+
pending = self.interpret_statement(sub_expr, catch_vars, allow_recursion)
455+
456+
m = re.match(r'finally\s*\{', expr)
457+
if m:
458+
sub_expr, expr = self._separate_at_paren(expr[m.end() - 1:])
459+
ret, should_abort = self.interpret_statement(sub_expr, local_vars, allow_recursion)
453460
if should_abort:
454461
return ret, True
455462

456-
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
463+
ret, should_abort = pending
464+
if should_abort:
465+
return ret, True
457466

458-
return ret, should_abort or should_return
467+
if err:
468+
raise err
459469

460470
elif md.get('for'):
461-
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
471+
constructor, remaining = self._separate_at_paren(expr[m.end() - 1:])
462472
if remaining.startswith('{'):
463-
body, expr = self._separate_at_paren(remaining, '}')
473+
body, expr = self._separate_at_paren(remaining)
464474
else:
465475
switch_m = re.match(r'switch\s*\(', remaining) # FIXME
466476
if switch_m:
467-
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:], ')')
477+
switch_val, remaining = self._separate_at_paren(remaining[switch_m.end() - 1:])
468478
body, expr = self._separate_at_paren(remaining, '}')
469479
body = 'switch(%s){%s}' % (switch_val, body)
470480
else:
@@ -483,11 +493,9 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
483493
except JS_Continue:
484494
pass
485495
self.interpret_expression(increment, local_vars, allow_recursion)
486-
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
487-
return ret, should_abort or should_return
488496

489497
elif md.get('switch'):
490-
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:], ')')
498+
switch_val, remaining = self._separate_at_paren(expr[m.end() - 1:])
491499
switch_val = self.interpret_expression(switch_val, local_vars, allow_recursion)
492500
body, expr = self._separate_at_paren(remaining, '}')
493501
items = body.replace('default:', 'case default:').split('case ')[1:]
@@ -510,6 +518,8 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
510518
break
511519
if matched:
512520
break
521+
522+
if md:
513523
ret, should_abort = self.interpret_statement(expr, local_vars, allow_recursion)
514524
return ret, should_abort or should_return
515525

@@ -618,7 +628,7 @@ def interpret_statement(self, stmt, local_vars, allow_recursion=100):
618628
member = self.interpret_expression(m.group('member2'), local_vars, allow_recursion)
619629
arg_str = expr[m.end():]
620630
if arg_str.startswith('('):
621-
arg_str, remaining = self._separate_at_paren(arg_str, ')')
631+
arg_str, remaining = self._separate_at_paren(arg_str)
622632
else:
623633
arg_str, remaining = None, arg_str
624634

@@ -795,7 +805,7 @@ def extract_function_code(self, funcname):
795805
\((?P<args>[^)]*)\)\s*
796806
(?P<code>{.+})''' % {'name': re.escape(funcname)},
797807
self.code)
798-
code, _ = self._separate_at_paren(func_m.group('code'), '}') # refine the match
808+
code, _ = self._separate_at_paren(func_m.group('code')) # refine the match
799809
if func_m is None:
800810
raise self.Exception('Could not find JS function "{funcname}"'.format(**locals()))
801811
return self.build_arglist(func_m.group('args')), code
@@ -810,7 +820,7 @@ def extract_function_from_code(self, argnames, code, *global_stack):
810820
if mobj is None:
811821
break
812822
start, body_start = mobj.span()
813-
body, remaining = self._separate_at_paren(code[body_start - 1:], '}')
823+
body, remaining = self._separate_at_paren(code[body_start - 1:])
814824
name = self._named_object(
815825
local_vars,
816826
self.extract_function_from_code(

0 commit comments

Comments
 (0)