Skip to content

Commit c5bf7dd

Browse files
committed
feat(Unicode): allows non-panicing on invalid unicode characters
1 parent 3d0199d commit c5bf7dd

File tree

2 files changed

+222
-26
lines changed

2 files changed

+222
-26
lines changed

src/app/app.rs

+204-26
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ use std::io::{self, BufRead, BufWriter, Write};
44
use std::path::Path;
55
use std::process;
66
use std::error::Error;
7+
use std::ffi::OsStr;
8+
use std::borrow::Borrow;
79

810
#[cfg(feature = "yaml")]
911
use yaml_rust::Yaml;
@@ -1879,6 +1881,10 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
18791881
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
18801882
/// the real parsing function for all subcommands
18811883
///
1884+
/// # Panics
1885+
///
1886+
/// If any arguments contain invalid unicode characters. If this is not desired it is
1887+
/// recommended to use the `*_safe()` or `*_lossy()` versions of this method.
18821888
///
18831889
/// # Example
18841890
///
@@ -1893,6 +1899,23 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
18931899
self.get_matches_from(env::args())
18941900
}
18951901

1902+
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
1903+
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
1904+
/// `U+FFFD REPLACEMENT CHARACTER`
1905+
///
1906+
/// # Examples
1907+
///
1908+
/// ```no_run
1909+
/// # use clap::{App, Arg};
1910+
/// let matches = App::new("myprog")
1911+
/// // Args and options go here...
1912+
/// .get_matches();
1913+
/// ```
1914+
pub fn get_matches_lossy(self) -> ArgMatches<'ar, 'ar> {
1915+
// Start the parsing
1916+
self.get_matches_from_lossy(env::args_os())
1917+
}
1918+
18961919
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
18971920
/// the real parsing function for all subcommands
18981921
///
@@ -1911,7 +1934,29 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
19111934
/// ```
19121935
pub fn get_matches_safe(self) -> Result<ArgMatches<'ar, 'ar>, ClapError> {
19131936
// Start the parsing
1914-
self.get_matches_from_safe(env::args())
1937+
self.get_matches_from_safe(env::args_os())
1938+
}
1939+
1940+
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
1941+
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
1942+
/// `U+FFFD REPLACEMENT CHARACTER`
1943+
///
1944+
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
1945+
/// manually.
1946+
///
1947+
///
1948+
/// # Example
1949+
///
1950+
/// ```no_run
1951+
/// # use clap::{App, Arg};
1952+
/// let matches = App::new("myprog")
1953+
/// // Args and options go here...
1954+
/// .get_matches_safe()
1955+
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
1956+
/// ```
1957+
pub fn get_matches_safe_lossy(self) -> Result<ArgMatches<'ar, 'ar>, ClapError> {
1958+
// Start the parsing
1959+
self.get_matches_from_safe_lossy(env::args_os())
19151960
}
19161961

19171962
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
@@ -1938,8 +1983,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
19381983
itr: I)
19391984
-> ArgMatches<'ar, 'ar>
19401985
where I: IntoIterator<Item = T>,
1941-
T: AsRef<str>
1942-
{
1986+
T: AsRef<OsStr> {
19431987
match self.get_matches_from_safe_borrow(itr) {
19441988
Ok(m) => return m,
19451989
Err(e) => {
@@ -1955,6 +1999,47 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
19551999
}
19562000
}
19572001

2002+
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
2003+
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
2004+
/// `U+FFFD REPLACEMENT CHARACTER`
2005+
///
2006+
/// **NOTE:** The first argument will be parsed as the binary name.
2007+
///
2008+
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
2009+
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
2010+
/// `App::get_matches()`
2011+
///
2012+
///
2013+
/// # Example
2014+
///
2015+
/// ```no_run
2016+
/// # use clap::{App, Arg};
2017+
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
2018+
///
2019+
/// let matches = App::new("myprog")
2020+
/// // Args and options go here...
2021+
/// .get_matches_from(arg_vec);
2022+
/// ```
2023+
pub fn get_matches_from_lossy<I, T>(mut self,
2024+
itr: I)
2025+
-> ArgMatches<'ar, 'ar>
2026+
where I: IntoIterator<Item = T>,
2027+
T: AsRef<OsStr> {
2028+
match self.get_matches_from_safe_borrow_lossy(itr) {
2029+
Ok(m) => return m,
2030+
Err(e) => {
2031+
wlnerr!("{}", e.error);
2032+
if self.wait_on_error {
2033+
wlnerr!("\nPress [ENTER] / [RETURN] to continue...");
2034+
let mut s = String::new();
2035+
let i = io::stdin();
2036+
i.lock().read_line(&mut s).unwrap();
2037+
}
2038+
process::exit(1);
2039+
}
2040+
}
2041+
}
2042+
19582043
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
19592044
/// the real parsing function for all subcommands
19602045
///
@@ -1967,6 +2052,9 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
19672052
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
19682053
/// manually.
19692054
///
2055+
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
2056+
/// `ClapErrorType::InvalidUnicode`
2057+
///
19702058
///
19712059
/// # Example
19722060
///
@@ -1983,14 +2071,13 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
19832071
itr: I)
19842072
-> Result<ArgMatches<'ar, 'ar>, ClapError>
19852073
where I: IntoIterator<Item = T>,
1986-
T: AsRef<str>
1987-
{
2074+
T: AsRef<OsStr> {
19882075
self.get_matches_from_safe_borrow(itr)
19892076
}
19902077

1991-
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
1992-
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
1993-
/// consume `self`.
2078+
/// Starts the parsing process. Called on top level parent app **ONLY** then recursively calls
2079+
/// the real parsing function for all subcommands. Invalid unicode characters are replaced with
2080+
/// `U+FFFD REPLACEMENT CHARACTER`
19942081
///
19952082
/// **NOTE:** The first argument will be parsed as the binary name.
19962083
///
@@ -2001,23 +2088,32 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
20012088
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
20022089
/// manually.
20032090
///
2091+
///
20042092
/// # Example
20052093
///
20062094
/// ```no_run
20072095
/// # use clap::{App, Arg};
20082096
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
20092097
///
2010-
/// let mut app = App::new("myprog");
2098+
/// let matches = App::new("myprog")
20112099
/// // Args and options go here...
2012-
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
2100+
/// .get_matches_from_safe(arg_vec)
20132101
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
20142102
/// ```
2015-
pub fn get_matches_from_safe_borrow<I, T>(&mut self,
2016-
itr: I)
2103+
pub fn get_matches_from_safe_lossy<I, T>(mut self,
2104+
itr: I)
2105+
-> Result<ArgMatches<'ar, 'ar>, ClapError>
2106+
where I: IntoIterator<Item = T>,
2107+
T: AsRef<OsStr> {
2108+
self._get_matches_from_safe_borrow(itr, true)
2109+
}
2110+
2111+
fn _get_matches_from_safe_borrow<I, T>(&mut self,
2112+
itr: I,
2113+
lossy: bool)
20172114
-> Result<ArgMatches<'ar, 'ar>, ClapError>
20182115
where I: IntoIterator<Item = T>,
2019-
T: AsRef<str>
2020-
{
2116+
T: AsRef<OsStr> {
20212117
// Verify all positional assertions pass
20222118
self.verify_positionals();
20232119
// If there are global arguments, we need to propgate them down to subcommands before
@@ -2044,13 +2140,83 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
20442140
}
20452141

20462142
// do the real parsing
2047-
if let Err(e) = self.get_matches_with(&mut matches, &mut it) {
2143+
if let Err(e) = self.get_matches_with(&mut matches, &mut it, lossy) {
20482144
return Err(e);
20492145
}
20502146

20512147
Ok(matches)
20522148
}
20532149

2150+
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
2151+
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
2152+
/// consume `self`.
2153+
///
2154+
/// **NOTE:** The first argument will be parsed as the binary name.
2155+
///
2156+
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
2157+
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
2158+
/// `App::get_matches_safe()`
2159+
///
2160+
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
2161+
/// manually.
2162+
///
2163+
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
2164+
/// `ClapErrorType::InvalidUnicode`
2165+
///
2166+
/// # Example
2167+
///
2168+
/// ```no_run
2169+
/// # use clap::{App, Arg};
2170+
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
2171+
///
2172+
/// let mut app = App::new("myprog");
2173+
/// // Args and options go here...
2174+
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
2175+
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
2176+
/// ```
2177+
pub fn get_matches_from_safe_borrow<I, T>(&mut self,
2178+
itr: I)
2179+
-> Result<ArgMatches<'ar, 'ar>, ClapError>
2180+
where I: IntoIterator<Item = T>,
2181+
T: AsRef<OsStr> {
2182+
self._get_matches_from_safe_borrow(itr, false)
2183+
}
2184+
2185+
/// Starts the parsing process without consuming the `App` struct `self`. This is normally not
2186+
/// the desired functionality, instead prefer `App::get_matches_from_safe` which *does*
2187+
/// consume `self`. Invalid unicode characters are replaced with `U+FFFD REPLACEMENT CHARACTER`
2188+
///
2189+
/// **NOTE:** The first argument will be parsed as the binary name.
2190+
///
2191+
/// **NOTE:** This method should only be used when absolutely necessary, such as needing to
2192+
/// parse arguments from something other than `std::env::args()`. If you are unsure, use
2193+
/// `App::get_matches_safe()`
2194+
///
2195+
/// **NOTE:** This method should only be used when is absolutely necessary to handle errors
2196+
/// manually.
2197+
///
2198+
/// **NOTE:** Invalid unicode characters will result in an `Err` with type
2199+
/// `ClapErrorType::InvalidUnicode`
2200+
///
2201+
/// # Example
2202+
///
2203+
/// ```no_run
2204+
/// # use clap::{App, Arg};
2205+
/// let arg_vec = vec!["my_prog", "some", "args", "to", "parse"];
2206+
///
2207+
/// let mut app = App::new("myprog");
2208+
/// // Args and options go here...
2209+
/// let matches = app.get_matches_from_safe_borrow(arg_vec)
2210+
/// .unwrap_or_else( |e| { panic!("An error occurs: {}", e) });
2211+
/// ```
2212+
pub fn get_matches_from_safe_borrow_lossy<I, T>(&mut self,
2213+
itr: I)
2214+
-> Result<ArgMatches<'ar, 'ar>, ClapError>
2215+
where I: IntoIterator<Item = T>,
2216+
T: AsRef<OsStr> {
2217+
self._get_matches_from_safe_borrow(itr, true)
2218+
}
2219+
20542220

20552221
fn verify_positionals(&mut self) {
20562222
// Because you must wait until all arguments have been supplied, this is the first chance
@@ -2137,11 +2303,11 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
21372303
// The actual parsing function
21382304
fn get_matches_with<I, T>(&mut self,
21392305
matches: &mut ArgMatches<'ar, 'ar>,
2140-
it: &mut I)
2306+
it: &mut I,
2307+
lossy: bool)
21412308
-> Result<(), ClapError>
21422309
where I: Iterator<Item = T>,
2143-
T: AsRef<str>
2144-
{
2310+
T: AsRef<OsStr> {
21452311
// First we create the `--help` and `--version` arguments and add them if necessary
21462312
self.create_help_and_version();
21472313

@@ -2151,7 +2317,20 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
21512317
let mut pos_counter = 1;
21522318
let mut val_counter = 0;
21532319
while let Some(arg) = it.next() {
2154-
let arg_slice = arg.as_ref();
2320+
let arg_cow = match arg.as_ref().to_str() {
2321+
Some(s) => s.into(),
2322+
None => {
2323+
if !lossy {
2324+
return Err(ClapError{
2325+
error: format!("{} Invalid unicode character in one or more arguments",
2326+
Format::Error("error:")),
2327+
error_type: ClapErrorType::InvalidUnicode
2328+
});
2329+
}
2330+
arg.as_ref().to_string_lossy()
2331+
}
2332+
};
2333+
let arg_slice: &str = arg_cow.borrow();
21552334
let mut skip = false;
21562335

21572336
// we need to know if we're parsing a new argument, or the value of previous argument,
@@ -2186,7 +2365,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
21862365
if num == vals.len() as u8 && !opt.multiple {
21872366
return Err(self.report_error(format!("The argument '{}' \
21882367
was found, but '{}' only expects {} values",
2189-
Format::Warning(arg.as_ref()),
2368+
Format::Warning(arg_slice),
21902369
Format::Warning(opt.to_string()),
21912370
Format::Good(vals.len().to_string())),
21922371
ClapErrorType::InvalidValue,
@@ -2335,7 +2514,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
23352514
format!("The subcommand '{}' isn't valid\n\tDid you mean '{}' ?\n\n\
23362515
If you received this message in error, try \
23372516
re-running with '{} {} {}'",
2338-
Format::Warning(arg.as_ref()),
2517+
Format::Warning(arg_slice),
23392518
Format::Good(candidate_subcommand),
23402519
self.bin_name.clone().unwrap_or(self.name.clone()),
23412520
Format::Good("--"),
@@ -2350,7 +2529,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
23502529
if self.positionals_idx.is_empty() {
23512530
return Err(self.report_error(
23522531
format!("Found argument '{}', but {} wasn't expecting any",
2353-
Format::Warning(arg.as_ref()),
2532+
Format::Warning(arg_slice),
23542533
self.bin_name.clone().unwrap_or(self.name.clone())),
23552534
ClapErrorType::UnexpectedArgument,
23562535
App::get_args(matches)));
@@ -2391,7 +2570,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
23912570
if vals.len() as u8 == num {
23922571
return Err(self.report_error(format!("The argument '{}' \
23932572
was found, but '{}' wasn't expecting any more values",
2394-
Format::Warning(arg.as_ref()),
2573+
Format::Warning(arg_slice),
23952574
Format::Warning(p.to_string())),
23962575
ClapErrorType::TooMuchValues,
23972576
App::get_args(matches)));
@@ -2481,7 +2660,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
24812660
}
24822661
} else {
24832662
return Err(self.report_error(format!("The argument '{}' was found, but '{}' \
2484-
wasn't expecting any", Format::Warning(arg.as_ref()),
2663+
wasn't expecting any", Format::Warning(arg_slice),
24852664
self.bin_name.clone().unwrap_or(self.name.clone())),
24862665
ClapErrorType::UnexpectedArgument,
24872666
App::get_args(matches)));
@@ -2575,7 +2754,7 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
25752754
""
25762755
},
25772756
sc.name.clone()));
2578-
if let Err(e) = sc.get_matches_with(&mut new_matches, it) {
2757+
if let Err(e) = sc.get_matches_with(&mut new_matches, it, lossy) {
25792758
e.exit();
25802759
}
25812760
matches.subcommand = Some(Box::new(SubCommand {
@@ -3554,5 +3733,4 @@ impl<'a, 'v, 'ab, 'u, 'h, 'ar> App<'a, 'v, 'ab, 'u, 'h, 'ar>{
35543733
None => (String::new(), None),
35553734
}
35563735
}
3557-
35583736
}

0 commit comments

Comments
 (0)