-
Notifications
You must be signed in to change notification settings - Fork 271
Add Unified Address parser structural checks #416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
01a8dba
cd94b41
7708b27
b175b9b
384af07
060a15e
77d1f0c
478625f
8527dcb
b875f6c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,11 @@ | ||
use std::cmp; | ||
use std::collections::HashSet; | ||
use std::convert::{TryFrom, TryInto}; | ||
use std::error::Error; | ||
use std::fmt; | ||
use std::iter; | ||
|
||
use crate::{kind, ParseError}; | ||
use crate::kind; | ||
|
||
mod f4jumble; | ||
|
||
|
@@ -24,32 +28,156 @@ pub(crate) const REGTEST: &str = "uregtest"; | |
|
||
const PADDING_LEN: usize = 16; | ||
|
||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] | ||
pub enum Typecode { | ||
P2pkh, | ||
P2sh, | ||
Sapling, | ||
Orchard, | ||
Unknown(u8), | ||
} | ||
|
||
impl Ord for Typecode { | ||
fn cmp(&self, other: &Self) -> cmp::Ordering { | ||
match (self, other) { | ||
// Trivial equality checks. | ||
(Self::Orchard, Self::Orchard) | ||
| (Self::Sapling, Self::Sapling) | ||
| (Self::P2sh, Self::P2sh) | ||
| (Self::P2pkh, Self::P2pkh) => cmp::Ordering::Equal, | ||
|
||
// We don't know for certain the preference order of unknown receivers, but it | ||
// is likely that the higher typecode has higher preference. The exact order | ||
// doesn't really matter, as unknown receivers have lower preference than | ||
// known receivers. | ||
(Self::Unknown(a), Self::Unknown(b)) => b.cmp(a), | ||
|
||
// For the remaining cases, we rely on `match` always choosing the first arm | ||
// with a matching pattern. Patterns below are listed in priority order: | ||
(Self::Orchard, _) => cmp::Ordering::Less, | ||
(_, Self::Orchard) => cmp::Ordering::Greater, | ||
|
||
(Self::Sapling, _) => cmp::Ordering::Less, | ||
(_, Self::Sapling) => cmp::Ordering::Greater, | ||
|
||
(Self::P2sh, _) => cmp::Ordering::Less, | ||
(_, Self::P2sh) => cmp::Ordering::Greater, | ||
|
||
(Self::P2pkh, _) => cmp::Ordering::Less, | ||
(_, Self::P2pkh) => cmp::Ordering::Greater, | ||
} | ||
} | ||
} | ||
|
||
impl PartialOrd for Typecode { | ||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
impl From<u8> for Typecode { | ||
fn from(typecode: u8) -> Self { | ||
match typecode { | ||
0x00 => Typecode::P2pkh, | ||
0x01 => Typecode::P2sh, | ||
0x02 => Typecode::Sapling, | ||
0x03 => Typecode::Orchard, | ||
_ => Typecode::Unknown(typecode), | ||
} | ||
} | ||
} | ||
|
||
impl From<Typecode> for u8 { | ||
fn from(t: Typecode) -> Self { | ||
match t { | ||
Typecode::P2pkh => 0x00, | ||
Typecode::P2sh => 0x01, | ||
Typecode::Sapling => 0x02, | ||
Typecode::Orchard => 0x03, | ||
Typecode::Unknown(typecode) => typecode, | ||
} | ||
} | ||
} | ||
|
||
impl Typecode { | ||
fn is_shielded(&self) -> bool { | ||
match self { | ||
Typecode::P2pkh | Typecode::P2sh => false, | ||
// Assume that unknown typecodes are shielded, because they might be. | ||
_ => true, | ||
} | ||
} | ||
str4d marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
/// An error while attempting to parse a string as a Zcash address. | ||
#[derive(Debug, PartialEq)] | ||
pub enum ParseError { | ||
/// The unified address contains both P2PKH and P2SH receivers. | ||
BothP2phkAndP2sh, | ||
/// The unified address contains a duplicated typecode. | ||
DuplicateTypecode(Typecode), | ||
/// The string is an invalid encoding. | ||
InvalidEncoding, | ||
/// The unified address only contains transparent receivers. | ||
OnlyTransparent, | ||
} | ||
|
||
impl fmt::Display for ParseError { | ||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
match self { | ||
ParseError::BothP2phkAndP2sh => write!(f, "UA contains both P2PKH and P2SH receivers"), | ||
ParseError::DuplicateTypecode(typecode) => { | ||
write!(f, "Duplicate typecode {}", u8::from(*typecode)) | ||
} | ||
ParseError::InvalidEncoding => write!(f, "Invalid encoding"), | ||
ParseError::OnlyTransparent => write!(f, "UA only contains transparent receivers"), | ||
} | ||
} | ||
} | ||
|
||
impl Error for ParseError {} | ||
|
||
/// The set of known Receivers for Unified Addresses. | ||
/// | ||
/// This enum is an internal-only type, and is maintained in preference order, so that the | ||
/// derived [`PartialOrd`] will sort receivers correctly. From its documentation: | ||
/// | ||
/// > When derived on enums, variants are ordered by their top-to-bottom discriminant | ||
/// > order. | ||
str4d marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] | ||
pub(crate) enum Receiver { | ||
#[derive(Clone, Debug, PartialEq, Eq, Hash)] | ||
pub enum Receiver { | ||
Orchard([u8; 43]), | ||
Sapling(kind::sapling::Data), | ||
P2pkh(kind::p2pkh::Data), | ||
P2sh(kind::p2sh::Data), | ||
Unknown { typecode: u8, data: Vec<u8> }, | ||
} | ||
|
||
impl cmp::Ord for Receiver { | ||
fn cmp(&self, other: &Self) -> cmp::Ordering { | ||
match self.typecode().cmp(&other.typecode()) { | ||
cmp::Ordering::Equal => self.addr().cmp(other.addr()), | ||
res => res, | ||
} | ||
} | ||
} | ||
|
||
impl cmp::PartialOrd for Receiver { | ||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { | ||
Some(self.cmp(other)) | ||
} | ||
} | ||
|
||
impl TryFrom<(u8, &[u8])> for Receiver { | ||
type Error = ParseError; | ||
|
||
fn try_from((typecode, addr): (u8, &[u8])) -> Result<Self, Self::Error> { | ||
match typecode { | ||
0x00 => addr.try_into().map(Receiver::P2pkh), | ||
0x01 => addr.try_into().map(Receiver::P2sh), | ||
0x02 => addr.try_into().map(Receiver::Sapling), | ||
0x03 => addr.try_into().map(Receiver::Orchard), | ||
_ => Ok(Receiver::Unknown { | ||
match typecode.into() { | ||
Typecode::P2pkh => addr.try_into().map(Receiver::P2pkh), | ||
Typecode::P2sh => addr.try_into().map(Receiver::P2sh), | ||
Typecode::Sapling => addr.try_into().map(Receiver::Sapling), | ||
Typecode::Orchard => addr.try_into().map(Receiver::Orchard), | ||
Typecode::Unknown(_) => Ok(Receiver::Unknown { | ||
typecode, | ||
data: addr.to_vec(), | ||
}), | ||
|
@@ -59,13 +187,13 @@ impl TryFrom<(u8, &[u8])> for Receiver { | |
} | ||
|
||
impl Receiver { | ||
fn typecode(&self) -> u8 { | ||
fn typecode(&self) -> Typecode { | ||
match self { | ||
Receiver::P2pkh(_) => 0x00, | ||
Receiver::P2sh(_) => 0x01, | ||
Receiver::Sapling(_) => 0x02, | ||
Receiver::Orchard(_) => 0x03, | ||
Receiver::Unknown { typecode, .. } => *typecode, | ||
Receiver::P2pkh(_) => Typecode::P2pkh, | ||
Receiver::P2sh(_) => Typecode::P2sh, | ||
Receiver::Sapling(_) => Typecode::Sapling, | ||
Receiver::Orchard(_) => Typecode::Orchard, | ||
Receiver::Unknown { typecode, .. } => Typecode::Unknown(*typecode), | ||
} | ||
} | ||
|
||
|
@@ -113,7 +241,34 @@ impl TryFrom<&[u8]> for Address { | |
_ => Some(Err(ParseError::InvalidEncoding)), | ||
}) | ||
.collect::<Result<_, _>>() | ||
.map(Address) | ||
.and_then(|receivers: Vec<Receiver>| receivers.try_into()) | ||
} | ||
} | ||
|
||
impl TryFrom<Vec<Receiver>> for Address { | ||
type Error = ParseError; | ||
|
||
fn try_from(receivers: Vec<Receiver>) -> Result<Self, Self::Error> { | ||
let mut typecodes = HashSet::with_capacity(receivers.len()); | ||
for receiver in &receivers { | ||
let t = receiver.typecode(); | ||
if typecodes.contains(&t) { | ||
return Err(ParseError::DuplicateTypecode(t)); | ||
} else if (t == Typecode::P2pkh && typecodes.contains(&Typecode::P2sh)) | ||
|| (t == Typecode::P2sh && typecodes.contains(&Typecode::P2pkh)) | ||
{ | ||
return Err(ParseError::BothP2phkAndP2sh); | ||
} else { | ||
typecodes.insert(t); | ||
} | ||
} | ||
|
||
if !typecodes.iter().any(|t| t.is_shielded()) { | ||
str4d marked this conversation as resolved.
Show resolved
Hide resolved
|
||
Err(ParseError::OnlyTransparent) | ||
} else { | ||
// All checks pass! | ||
Ok(Address(receivers)) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -129,7 +284,7 @@ impl Address { | |
assert!(addr.len() < 256); | ||
|
||
iter::empty() | ||
.chain(Some(receiver.typecode())) | ||
.chain(Some(receiver.typecode().into())) | ||
.chain(Some(addr.len() as u8)) | ||
.chain(addr.iter().cloned()) | ||
}) | ||
|
@@ -138,6 +293,23 @@ impl Address { | |
|
||
f4jumble::f4jumble(&encoded).unwrap() | ||
} | ||
|
||
/// Returns the receivers contained within this address, sorted in preference order. | ||
pub fn receivers(&self) -> Vec<Receiver> { | ||
let mut receivers = self.0.clone(); | ||
// Unstable sorting is fine, because all receivers are guaranteed by construction | ||
// to have distinct typecodes. | ||
receivers.sort_unstable_by_key(|r| r.typecode()); | ||
receivers | ||
} | ||
|
||
/// Returns the receivers contained within this address, in the order they were | ||
/// parsed from the string encoding. | ||
/// | ||
/// This API is for advanced usage; in most cases you should use `Address::receivers`. | ||
pub fn receivers_as_parsed(&self) -> &[Receiver] { | ||
&self.0 | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
|
@@ -149,7 +321,7 @@ mod tests { | |
prelude::*, | ||
}; | ||
|
||
use super::{Address, Receiver}; | ||
use super::{Address, ParseError, Receiver, Typecode}; | ||
|
||
prop_compose! { | ||
fn uniform43()(a in uniform11(0u8..), b in uniform32(0u8..)) -> [u8; 43] { | ||
|
@@ -193,4 +365,73 @@ mod tests { | |
prop_assert_eq!(decoded, Ok(ua)); | ||
} | ||
} | ||
|
||
#[test] | ||
fn duplicate_typecode() { | ||
// Construct and serialize an invalid UA. | ||
let ua = Address(vec![Receiver::Sapling([1; 43]), Receiver::Sapling([2; 43])]); | ||
let encoded = ua.to_bytes(); | ||
assert_eq!( | ||
Address::try_from(&encoded[..]), | ||
Err(ParseError::DuplicateTypecode(Typecode::Sapling)) | ||
); | ||
} | ||
|
||
#[test] | ||
fn p2pkh_and_p2sh() { | ||
// Construct and serialize an invalid UA. | ||
let ua = Address(vec![Receiver::P2pkh([0; 20]), Receiver::P2sh([0; 20])]); | ||
let encoded = ua.to_bytes(); | ||
assert_eq!( | ||
Address::try_from(&encoded[..]), | ||
Err(ParseError::BothP2phkAndP2sh) | ||
); | ||
} | ||
|
||
#[test] | ||
fn only_transparent() { | ||
// Encoding of `Address(vec![Receiver::P2pkh([0; 20])])`. | ||
let encoded = vec![ | ||
0x3b, 0x3d, 0xe6, 0xb3, 0xed, 0xaa, 0x0a, 0x36, 0x12, 0xbc, 0x8d, 0x2b, 0x1a, 0xaa, | ||
0x27, 0x7e, 0x45, 0xc0, 0xc2, 0x0e, 0xf9, 0x6f, 0x24, 0x9b, 0x79, 0x0a, 0x68, 0x76, | ||
0xa8, 0x4c, 0x3f, 0xf0, 0x1f, 0x39, 0x97, 0xbd, 0x15, 0x0d, | ||
]; | ||
|
||
// We can't actually exercise this error, because at present the only transparent | ||
// receivers we can use are P2PKH and P2SH (which cannot be used together), and | ||
// with only one of them we don't have sufficient data for F4Jumble (so we hit a | ||
// different error). | ||
assert_eq!( | ||
Address::try_from(&encoded[..]), | ||
Err(ParseError::InvalidEncoding) | ||
); | ||
} | ||
|
||
#[test] | ||
fn receivers_are_sorted() { | ||
// Construct a UA with receivers in an unsorted order. | ||
let ua = Address(vec![ | ||
Receiver::P2pkh([0; 20]), | ||
Receiver::Orchard([0; 43]), | ||
Receiver::Unknown { | ||
typecode: 0xff, | ||
data: vec![], | ||
}, | ||
Receiver::Sapling([0; 43]), | ||
]); | ||
|
||
// `Address::receivers` sorts the receivers in priority order. | ||
assert_eq!( | ||
ua.receivers(), | ||
vec![ | ||
Receiver::Orchard([0; 43]), | ||
Receiver::Sapling([0; 43]), | ||
Receiver::P2pkh([0; 20]), | ||
Receiver::Unknown { | ||
typecode: 0xff, | ||
data: vec![], | ||
}, | ||
] | ||
) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add a test for this case:
An example is a UA containing a Sapling address with a non-canonical encoding (according to ZIP 216) of pkd. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also:
Examples would be:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Alternatively we could make |
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It'd be better to explicitly list the remaining typecodes here, just for future-proofness. This reminds me, how will TZE interact with the addressing scheme?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note that in zcash/zips#536, I clarified the requirement that uses this as follows:
So it is explicitly only P2SH and P2PKH addresses that are treated as transparent here (which fits with the rationale).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nuttycom we cannot list the remaining typecodes here, because they include
Typecode::Unknown
which is explicitly unbounded. As for TZE: we would need to decide at the time whether the property we want is "UA must contain a shielded component" (which would preclude TZE-only addrs) or "UA must not contain only a t-address-equivalent component" (which would enable the creation of non-shielded UAs). I suspect the latter is what we will want, treating UAs as the adapter mechanism for plugging in new receivers rather than needing to define a new address encoding every time.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The presence of Typecode::Unknown makes me tempted to say that this function should actually return an
Option<Bool>
because otherwise callers can't use it in isolation of other checks against the typecode. This is probably fine; it just means that this won't be flagged by the compiler as a call site that needs inspection if the set of receivers changes. I'm just lazy, I want the compiler to do that work for me, rather than having to remember. :)