Skip to content

Ensure fsencode/fsdecode works on ASCII FS. Use tarball for unicode test files #758

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 2, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def read(*names, **kwargs):
'bz2file >= 0.98',

# commoncode
'backports.os == 0.1rc1',
'backports.os == 0.1.1',
'future >= 0.16.0, < 0.17.0',
'text-unidecode >= 1.0, < 2.0',

Expand Down
7 changes: 6 additions & 1 deletion src/commoncode/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@
# Python 3
unicode = str

try:
from os import fsencode
except ImportError:
from backports.os import fsencode


"""
Minimal wrapper for executing external commands in sub-processes. The approach
Expand Down Expand Up @@ -344,7 +349,7 @@ def load_lib(libname, root_dir):
if os.path.exists(so):
if not isinstance(so, bytes):
# ensure that the path is not Unicode...
so = so.encode(fileutils.FS_ENCODING)
so = fsencode(so)
lib = ctypes.CDLL(so)
if lib and lib._name:
return lib
Expand Down
18 changes: 11 additions & 7 deletions src/commoncode/fileutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@
# Python 3
unicode = str

try:
from os import fsencode
except ImportError:
from backports.os import fsencode
from backports.os import fsdecode


import codecs
import errno
import os
Expand All @@ -45,7 +52,6 @@
import sys
import tempfile

from backports import os as osb

from commoncode import filetype
from commoncode.filetype import is_rwx
Expand Down Expand Up @@ -77,8 +83,6 @@ def logger_debug(*args):
return logger.debug(' '.join(isinstance(a, basestring) and a or repr(a) for a in args))


FS_ENCODING = sys.getfilesystemencoding() or sys.getdefaultencoding()

# Paths can only be sanely handled as raw bytes on Linux
PATH_TYPE = bytes if on_linux else unicode
POSIX_PATH_SEP = b'/' if on_linux else '/'
Expand Down Expand Up @@ -217,8 +221,8 @@ def path_to_unicode(path):
"""
if isinstance(path, unicode):
return path
if TRACE: logger_debug('path_to_unicode:', osb.fsdecode(path))
return osb.fsdecode(path)
if TRACE: logger_debug('path_to_unicode:', fsdecode(path))
return fsdecode(path)


def path_to_bytes(path):
Expand All @@ -227,8 +231,8 @@ def path_to_bytes(path):
"""
if isinstance(path, bytes):
return path
if TRACE: logger_debug('path_to_bytes:' , repr(osb.fsencode(path)))
return osb.fsencode(path)
if TRACE: logger_debug('path_to_bytes:' , repr(fsencode(path)))
return fsencode(path)


def is_posixpath(location):
Expand Down
20 changes: 14 additions & 6 deletions src/commoncode/testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@
from __future__ import division
from __future__ import unicode_literals

from unittest import TestCase as TestCaseClass

import filecmp
from functools import partial
import os
import shutil
import stat
import sys
import tarfile
from unittest import TestCase as TestCaseClass
import zipfile

from commoncode import fileutils
Expand Down Expand Up @@ -278,19 +278,27 @@ def extract_test_tar(self, test_path, verbatim=False):
def extract_test_tar_raw(self, test_path, *args, **kwargs):
return self.__extract(test_path, extract_tar_raw)

def extract_test_tar_unicode(self, test_path, *args, **kwargs):
return self.__extract(test_path, extract_tar_uni)

def extract_tar_raw(test_path, target_dir, *args, **kwargs):

def _extract_tar_raw(test_path, target_dir, to_bytes, *args, **kwargs):
"""
Raw simplified extract for certain really weird paths and file
names.
"""
# use bytes for paths on ALL OSes (though this may fail on macOS)
target_dir = path_to_bytes(target_dir)
test_path = path_to_bytes(test_path)
if to_bytes:
# use bytes for paths on ALL OSes (though this may fail on macOS)
target_dir = path_to_bytes(target_dir)
test_path = path_to_bytes(test_path)
tar = tarfile.open(test_path)
tar.extractall(path=target_dir)
tar.close()

extract_tar_raw = partial(_extract_tar_raw, to_bytes=True)

extract_tar_uni = partial(_extract_tar_raw, to_bytes=False)


def extract_tar(location, target_dir, verbatim=False, *args, **kwargs):
"""
Expand Down
9 changes: 8 additions & 1 deletion src/extractcode/libarchive2.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@
from extractcode import ExtractErrorPasswordProtected


# Python 2 and 3 support
try:
from os import fsencode
except ImportError:
from backports.os import fsencode


logger = logging.getLogger(__name__)
DEBUG = False
# logging.basicConfig(level=logging.DEBUG)
Expand Down Expand Up @@ -104,7 +111,7 @@ def load_lib():
if os.path.exists(libarchive):
if not isinstance(libarchive, bytes):
# ensure that the path is not Unicode...
libarchive = libarchive.encode(fileutils.FS_ENCODING)
libarchive = fsencode(libarchive)
lib = ctypes.CDLL(libarchive)
if lib and lib._name:
return lib
Expand Down
10 changes: 8 additions & 2 deletions src/typecode/magic2.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,17 @@

import os.path
import ctypes
import sys

from commoncode import system
from commoncode import command

# Python 2 and 3 support
try:
from os import fsencode
except ImportError:
from backports.os import fsencode


"""
magic2 is minimal and specialized wrapper around a vendored libmagic file
identification library. This is NOT thread-safe. It is based on python-magic
Expand Down Expand Up @@ -204,7 +210,7 @@ def load_lib():
if os.path.exists(magic_so):
if not isinstance(magic_so, bytes):
# ensure that the path is not Unicode...
magic_so = magic_so.encode(sys.getfilesystemencoding() or sys.getdefaultencoding())
magic_so = fsencode(magic_so)
lib = ctypes.CDLL(magic_so)
if lib and lib._name:
return lib
Expand Down
Binary file added tests/scancode/data/unicode_fixtures.tar.gz
Binary file not shown.
1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/.hidden

This file was deleted.

Empty file.
1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/foo bar

This file was deleted.

1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/nums

This file was deleted.

1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/pets/names.txt

This file was deleted.

Empty file.
1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/todo.html

This file was deleted.

1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/todo.txt

This file was deleted.

1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/users/index.html

This file was deleted.

1 change: 0 additions & 1 deletion tests/scancode/data/unicode_fixtures/users/tobi.txt

This file was deleted.

20 changes: 13 additions & 7 deletions tests/scancode/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
# from click.testing import CliRunner

from commoncode import fileutils
from commoncode.fileutils import path_to_bytes
from commoncode.testcase import FileDrivenTesting
from commoncode.system import on_linux
from commoncode.system import on_mac
Expand All @@ -47,7 +48,6 @@
from scancode.cli_test_utils import run_scan_plain

from scancode import cli
from commoncode.fileutils import path_to_bytes


test_env = FileDrivenTesting()
Expand Down Expand Up @@ -206,23 +206,23 @@ def test_scan_mark_source_without_info(monkeypatch):
result_file = test_env.get_temp_file('json')
expected_file = test_env.get_test_loc('mark_source/without_info.expected.json')

result = run_scan_click(['--mark-source', test_dir, result_file], monkeypatch)
_result = run_scan_click(['--mark-source', test_dir, result_file], monkeypatch)
check_json_scan(expected_file, result_file)

def test_scan_mark_source_with_info(monkeypatch):
test_dir = test_env.extract_test_tar('mark_source/JGroups.tgz')
result_file = test_env.get_temp_file('json')
expected_file = test_env.get_test_loc('mark_source/with_info.expected.json')

result = run_scan_click(['--info', '--mark-source', test_dir, result_file], monkeypatch)
_result = run_scan_click(['--info', '--mark-source', test_dir, result_file], monkeypatch)
check_json_scan(expected_file, result_file)

def test_scan_only_findings(monkeypatch):
test_dir = test_env.extract_test_tar('info/basic.tgz')
result_file = test_env.get_temp_file('json')
expected_file = test_env.get_test_loc('only_findings/expected.json')

result = run_scan_click(['--only-findings', test_dir, result_file], monkeypatch)
_result = run_scan_click(['--only-findings', test_dir, result_file], monkeypatch)
check_json_scan(expected_file, result_file)


Expand Down Expand Up @@ -451,11 +451,17 @@ def test_scan_does_not_fail_when_scanning_unicode_files_and_paths():
check_json_scan(test_env.get_test_loc(expected), result_file, strip_dates=True, regen=False)


@skipIf(on_windows, 'Python tar cannot extract these files on Windows')
def test_scan_does_not_fail_when_scanning_unicode_test_files_from_express():
test_dir = test_env.get_test_loc(u'unicode_fixtures')

if on_linux:
test_dir = path_to_bytes(test_dir)
# On Windows, Python tar cannot extract these files. Other
# extractors either fail or change the file name, making the test
# moot. Git cannot check these files. So for now it makes no sense
# to test this on Windows at all. Extractcode works fine, but does
# rename the problematic files.

test_dir = test_env.extract_test_tar_raw(b'unicode_fixtures.tar.gz')
test_dir = path_to_bytes(test_dir)

args = ['-n0', '--info', '--license', '--copyright',
'--package', '--email', '--url', '--strip-root',
Expand Down
Binary file not shown.
Binary file removed thirdparty/prod/backports.os-0.1rc1-py2-none-any.whl
Binary file not shown.
6 changes: 3 additions & 3 deletions thirdparty/prod/backports.os.ABOUT
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
about_resource: backports.os-0.1rc1-py2-none-any.whl
version: 0.1rc1
about_resource: backports.os-0.1.1-py2.py3-none-any.whl
version: 0.1.1
name: backports.os
home_url: https://github.com/pjdelport/backports.os
download_url: https://pypi.python.org/packages/82/1d/461604fd8b2c6f798a18fb9161019d35c005a489bdd91279dfcf9b65859e/backports.os-0.1rc1-py2-none-any.whl#md5=7677278e0dd1135b7c422495daa72573
download_url: https://pypi.python.org/packages/5b/0b/5ba79ac5d09e0b38725498e0a212ace2d166c0b7a38a42045df824b68d59/backports.os-0.1.1-py2.py3-none-any.whl
license_text:
- backports.os.LICENSE
- PSF.LICENSE
Expand Down
1 change: 0 additions & 1 deletion thirdparty/prod/backports.os.LICENSE
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

Copyright (c) Piët Delport, and Python Software Foundation
Licensed under the Python license

Expand Down