-
Notifications
You must be signed in to change notification settings - Fork 243
Support use of Path in ParsingUtils, SeekableStreamFactory, AbstractVCFCodec #724
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,14 +48,20 @@ | |
import java.io.Reader; | ||
import java.io.Writer; | ||
import java.net.MalformedURLException; | ||
import java.net.URI; | ||
import java.net.URISyntaxException; | ||
import java.net.URL; | ||
import java.nio.charset.Charset; | ||
import java.nio.file.FileSystemNotFoundException; | ||
import java.nio.file.FileSystems; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.nio.file.Paths; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.Scanner; | ||
|
@@ -943,4 +949,42 @@ public static List<File> unrollFiles(final Collection<File> inputs, final String | |
|
||
return output; | ||
} | ||
|
||
/** | ||
* Check if the given URI has a scheme. | ||
* | ||
* @param uriString the URI to check | ||
* @return <code>true</code> if the given URI has a scheme, <code>false</code> if | ||
* not, or if the URI is malformed. | ||
*/ | ||
public static boolean hasScheme(String uriString) { | ||
try { | ||
return new URI(uriString).getScheme() != null; | ||
} catch (URISyntaxException e) { | ||
return false; | ||
} | ||
} | ||
|
||
/** | ||
* Converts the given URI to a {@link Path} object. If the filesystem cannot be found in the usual way, then attempt | ||
* to load the filesystem provider using the thread context classloader. This is needed when the filesystem | ||
* provider is loaded using a URL classloader (e.g. in spark-submit). | ||
* | ||
* @param uriString the URI to convert | ||
* @return the resulting {@code Path} | ||
* @throws IOException an I/O error occurs creating the file system | ||
*/ | ||
public static Path getPath(String uriString) throws IOException { | ||
URI uri = URI.create(uriString); | ||
try { | ||
// if the URI has no scheme, then treat as a local file, otherwise use the scheme to determine the filesystem to use | ||
return uri.getScheme() == null ? Paths.get(uriString) : Paths.get(uri); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you add a comment here explaining this line. It's going to be very confusing for anyone who doesn't know that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
} catch (FileSystemNotFoundException e) { | ||
ClassLoader cl = Thread.currentThread().getContextClassLoader(); | ||
if (cl == null) { | ||
throw e; | ||
} | ||
return FileSystems.newFileSystem(uri, new HashMap<>(), cl).provider().getPath(uri); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think there's a bug here. I tested this code explicitly by including the gcs provider in the hellbender path and running the following tests: @Test
public void testCompletePath() throws IOException {
newFilesystem(URI.create("gs://hellbender/somefile.txt"));
}
@Test
public void testBucketOnly() throws IOException {
newFilesystem(URI.create("gs://hellbender"));
}
private void newFilesystem(URI uri) throws IOException {
ClassLoader cl = Thread.currentThread().getContextClassLoader();
final FileSystem fileSystem = FileSystems.newFileSystem(uri, new HashMap<>(), cl);
} the first test fails with
The second passes. So we can't just use the @jean-philippe-martin Can you comment on this? It looks like a simple patch to the gcs provider would make it so that this approach would work. @tomwhite Have you tested this running in spark? I don't see how it can work, but maybe I'm missing something important. I do see that our implementation of this method in gatk has a special case for gcs still. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did try running this on HDFS with Spark a while ago and it worked. There's a fix for the GCS issue from @jean-philippe-martin here: googleapis/google-cloud-java#1470 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lbergerson: yes this was a bug, it's fixed now. @tomwhite: Does this mean that on Spark after someone calls getPath on a gs:// URI, the fs provider will be loaded and Path.get will work correctly? Because that would be sweet. |
||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,16 @@ | ||
package htsjdk.tribble.util; | ||
|
||
|
||
import com.google.common.jimfs.Configuration; | ||
import com.google.common.jimfs.Jimfs; | ||
import htsjdk.samtools.util.IOUtil; | ||
import java.io.BufferedWriter; | ||
import java.io.File; | ||
import java.io.OutputStream; | ||
import java.io.OutputStreamWriter; | ||
import java.nio.file.FileSystem; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import org.testng.Assert; | ||
import org.testng.annotations.Test; | ||
|
||
|
@@ -117,6 +127,37 @@ public void testSplitJoinEmptyFirst() { | |
testSplitJoinRoundtrip("\ta\tb", '\t', Arrays.asList("", "a", "b")); | ||
} | ||
|
||
@Test | ||
public void testFileDoesExist() throws IOException{ | ||
File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); | ||
tempFile.deleteOnExit(); | ||
tstExists(tempFile.getAbsolutePath(), true); | ||
tstExists(tempFile.toURI().toString(), true); | ||
} | ||
|
||
@Test | ||
public void testFileDoesNotExist() throws IOException{ | ||
File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); | ||
tempFile.delete(); | ||
tstExists(tempFile.getAbsolutePath(), false); | ||
tstExists(tempFile.toURI().toString(), false); | ||
} | ||
|
||
@Test | ||
public void testInMemoryNioFileDoesExist() throws IOException{ | ||
FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); | ||
Path file = fs.getPath("/file"); | ||
Files.createFile(file); | ||
tstExists(file.toUri().toString(), true); | ||
} | ||
|
||
@Test | ||
public void testInMemoryNioFileDoesNotExist() throws IOException{ | ||
FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); | ||
Path file = fs.getPath("/file"); | ||
tstExists(file.toUri().toString(), false); | ||
} | ||
|
||
@Test | ||
public void testFTPDoesExist() throws IOException{ | ||
tstExists(AVAILABLE_FTP_URL, true); | ||
|
@@ -142,6 +183,26 @@ private void tstExists(String path, boolean expectExists) throws IOException{ | |
Assert.assertEquals(exists, expectExists); | ||
} | ||
|
||
@Test | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need a way of testing non-file URI's. I'm not sure the best way to go about it. One possibility would be to include one of the alternative filesystem providers as a test dependency. Either that or maybe we could mock one. There is a [google in memory filesystem] that might make a good test for the non-file based file systems. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @droazen What do you think about including a new test dependency for providing alternative paths. It might help us avoid problems like the index one we saw before. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea. I've added a test dependency on https://github.com/google/jimfs, and a few tests that use non-file URIs that exercise the NIO file path. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
public void testFileOpenInputStream() throws IOException{ | ||
File tempFile = File.createTempFile(getClass().getSimpleName(), ".tmp"); | ||
tempFile.deleteOnExit(); | ||
OutputStream os = IOUtil.openFileForWriting(tempFile); | ||
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os)); | ||
writer.write("hello"); | ||
writer.close(); | ||
tstStream(tempFile.getAbsolutePath()); | ||
tstStream(tempFile.toURI().toString()); | ||
} | ||
|
||
@Test | ||
public void testInMemoryNioFileOpenInputStream() throws IOException{ | ||
FileSystem fs = Jimfs.newFileSystem(Configuration.unix()); | ||
Path file = fs.getPath("/file"); | ||
Files.write(file, "hello".getBytes("UTF-8")); | ||
tstStream(file.toUri().toString()); | ||
} | ||
|
||
@Test | ||
public void testFTPOpenInputStream() throws IOException{ | ||
tstStream(AVAILABLE_FTP_URL); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you replace the use of
Paths.get
inSamInputResource.UrlInputResource
with a call to this instead? Just in case...There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done