From f15fbdd3405a7b6f0929a6befdb870970e5402c7 Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 4 Oct 2016 11:41:29 +0100 Subject: [PATCH] Support use of Path in ParsingUtils, SeekableStreamFactory, AbstractVCFCodec. --- build.gradle | 1 + .../htsjdk/samtools/SamInputResource.java | 5 +- .../seekablestream/SeekableStreamFactory.java | 3 + .../java/htsjdk/samtools/util/IOUtil.java | 44 +++++++++++++ .../htsjdk/tribble/util/ParsingUtils.java | 6 ++ .../htsjdk/variant/vcf/AbstractVCFCodec.java | 10 ++- .../htsjdk/tribble/util/ParsingUtilsTest.java | 61 +++++++++++++++++++ 7 files changed, 125 insertions(+), 5 deletions(-) diff --git a/build.gradle b/build.gradle index b29f8e3d8d..ddd12a34d8 100644 --- a/build.gradle +++ b/build.gradle @@ -42,6 +42,7 @@ dependencies { compile "gov.nih.nlm.ncbi:ngs-java:1.2.4" testCompile "org.testng:testng:6.9.9" + testCompile "com.google.jimfs:jimfs:1.1" } sourceCompatibility = 1.8 diff --git a/src/main/java/htsjdk/samtools/SamInputResource.java b/src/main/java/htsjdk/samtools/SamInputResource.java index d76694eeb0..c1ffb2dd4d 100644 --- a/src/main/java/htsjdk/samtools/SamInputResource.java +++ b/src/main/java/htsjdk/samtools/SamInputResource.java @@ -29,6 +29,7 @@ import htsjdk.samtools.seekablestream.SeekableStream; import htsjdk.samtools.seekablestream.SeekableStreamFactory; import htsjdk.samtools.sra.SRAAccession; +import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.Lazy; import htsjdk.samtools.util.RuntimeIOException; @@ -366,8 +367,8 @@ public File asFile() { @Override public Path asPath() { try { - return Paths.get(urlResource.toURI()); - } catch (URISyntaxException | IllegalArgumentException | + return IOUtil.getPath(urlResource.toExternalForm()); + } catch (IOException | IllegalArgumentException | FileSystemNotFoundException | SecurityException e) { return null; } diff --git a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java index fe8f42a561..ec8b9526e5 100644 --- a/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java +++ b/src/main/java/htsjdk/samtools/seekablestream/SeekableStreamFactory.java @@ -23,6 +23,7 @@ */ package htsjdk.samtools.seekablestream; +import htsjdk.samtools.util.IOUtil; import java.io.File; import java.io.IOException; import java.net.URL; @@ -78,6 +79,8 @@ public SeekableStream getStreamFor(final String path) throws IOException { return new SeekableFTPStream(new URL(path)); } else if (path.startsWith("file:")) { return new SeekableFileStream(new File(new URL(path).getPath())); + } else if (IOUtil.hasScheme(path)) { + return new SeekablePathStream(IOUtil.getPath(path)); } else { return new SeekableFileStream(new File(path)); } diff --git a/src/main/java/htsjdk/samtools/util/IOUtil.java b/src/main/java/htsjdk/samtools/util/IOUtil.java index 07ae9006aa..97d4d9cdc2 100644 --- a/src/main/java/htsjdk/samtools/util/IOUtil.java +++ b/src/main/java/htsjdk/samtools/util/IOUtil.java @@ -48,14 +48,20 @@ import java.io.Reader; import java.io.Writer; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.nio.charset.Charset; +import java.nio.file.FileSystemNotFoundException; +import java.nio.file.FileSystems; import java.nio.file.Files; import java.nio.file.Path; +import java.nio.file.Paths; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Scanner; @@ -943,4 +949,42 @@ public static List unrollFiles(final Collection inputs, final String return output; } + + /** + * Check if the given URI has a scheme. + * + * @param uriString the URI to check + * @return true if the given URI has a scheme, false if + * not, or if the URI is malformed. + */ + public static boolean hasScheme(String uriString) { + try { + return new URI(uriString).getScheme() != null; + } catch (URISyntaxException e) { + return false; + } + } + + /** + * Converts the given URI to a {@link Path} object. If the filesystem cannot be found in the usual way, then attempt + * to load the filesystem provider using the thread context classloader. This is needed when the filesystem + * provider is loaded using a URL classloader (e.g. in spark-submit). + * + * @param uriString the URI to convert + * @return the resulting {@code Path} + * @throws IOException an I/O error occurs creating the file system + */ + public static Path getPath(String uriString) throws IOException { + URI uri = URI.create(uriString); + try { + // if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the filesystem to use + return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri); + } catch (FileSystemNotFoundException e) { + ClassLoader cl = Thread.currentThread().getContextClassLoader(); + if (cl == null) { + throw e; + } + return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri); + } + } } diff --git a/src/main/java/htsjdk/tribble/util/ParsingUtils.java b/src/main/java/htsjdk/tribble/util/ParsingUtils.java index 2a88ed7355..38cf8ab7f0 100644 --- a/src/main/java/htsjdk/tribble/util/ParsingUtils.java +++ b/src/main/java/htsjdk/tribble/util/ParsingUtils.java @@ -23,6 +23,7 @@ */ package htsjdk.tribble.util; +import htsjdk.samtools.util.IOUtil; import java.awt.Color; import java.io.File; import java.io.FileInputStream; @@ -31,6 +32,7 @@ import java.lang.reflect.Constructor; import java.net.MalformedURLException; import java.net.URL; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -83,6 +85,8 @@ public static InputStream openInputStream(String path) if (path.startsWith("http:") || path.startsWith("https:") || path.startsWith("ftp:")) { inputStream = getURLHelper(new URL(path)).openInputStream(); + } else if (IOUtil.hasScheme(path)) { + inputStream = Files.newInputStream(IOUtil.getPath(path)); } else { File file = new File(path); inputStream = new FileInputStream(file); @@ -400,6 +404,8 @@ public static boolean resourceExists(String resource) throws IOException{ } URLHelper helper = getURLHelper(url); return helper.exists(); + } else if (IOUtil.hasScheme(resource)) { + return Files.exists(IOUtil.getPath(resource)); } else { return (new File(resource)).exists(); } diff --git a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java index 16857b4e6e..04887aeea3 100644 --- a/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java +++ b/src/main/java/htsjdk/variant/vcf/AbstractVCFCodec.java @@ -26,6 +26,7 @@ package htsjdk.variant.vcf; import htsjdk.samtools.util.BlockCompressedInputStream; +import htsjdk.samtools.util.IOUtil; import htsjdk.tribble.AsciiFeatureCodec; import htsjdk.tribble.Feature; import htsjdk.tribble.NameAwareCodec; @@ -45,6 +46,8 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -616,10 +619,11 @@ private static void parseSingleAltAllele(List alleles, String alt, int l public static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) { try { + Path path = IOUtil.getPath(potentialInput); //isVCFStream closes the stream that's passed in - return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) || - isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) || - isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE); + return isVCFStream(Files.newInputStream(path), MAGIC_HEADER_LINE) || + isVCFStream(new GZIPInputStream(Files.newInputStream(path)), MAGIC_HEADER_LINE) || + isVCFStream(new BlockCompressedInputStream(Files.newInputStream(path)), MAGIC_HEADER_LINE); } catch ( FileNotFoundException e ) { return false; } catch ( IOException e ) { diff --git a/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java b/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java index 1c3ad1f547..85f414e871 100644 --- a/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java +++ b/src/test/java/htsjdk/tribble/util/ParsingUtilsTest.java @@ -1,6 +1,16 @@ package htsjdk.tribble.util; +import com.google.common.jimfs.Configuration; +import com.google.common.jimfs.Jimfs; +import htsjdk.samtools.util.IOUtil; +import java.io.BufferedWriter; +import java.io.File; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.nio.file.FileSystem; +import java.nio.file.Files; +import java.nio.file.Path; import org.testng.Assert; import org.testng.annotations.Test; @@ -117,6 +127,37 @@ public void testSplitJoinEmptyFirst() { testSplitJoinRoundtrip("\ta\tb", '\t', Arrays.asList("", "a", "b")); } + @Test + public void testFileDoesExist() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.deleteOnExit(); + tstExists(tempFile.getAbsolutePath(), true); + tstExists(tempFile.toURI().toString(), true); + } + + @Test + public void testFileDoesNotExist() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.delete(); + tstExists(tempFile.getAbsolutePath(), false); + tstExists(tempFile.toURI().toString(), false); + } + + @Test + public void testInMemoryNioFileDoesExist() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + Files.createFile(file); + tstExists(file.toUri().toString(), true); + } + + @Test + public void testInMemoryNioFileDoesNotExist() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + tstExists(file.toUri().toString(), false); + } + @Test public void testFTPDoesExist() throws IOException{ tstExists(AVAILABLE_FTP_URL, true); @@ -142,6 +183,26 @@ private void tstExists(String path, boolean expectExists) throws IOException{ Assert.assertEquals(exists, expectExists); } + @Test + public void testFileOpenInputStream() throws IOException{ + File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); + tempFile.deleteOnExit(); + OutputStream os = IOUtil.openFileForWriting(tempFile); + BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); + writer.write("hello"); + writer.close(); + tstStream(tempFile.getAbsolutePath()); + tstStream(tempFile.toURI().toString()); + } + + @Test + public void testInMemoryNioFileOpenInputStream() throws IOException{ + FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); + Path file = fs.getPath("/file"); + Files.write(file, "hello".getBytes("UTF-8")); + tstStream(file.toUri().toString()); + } + @Test public void testFTPOpenInputStream() throws IOException{ tstStream(AVAILABLE_FTP_URL);