Skip to content

Commit 30d87f6

Browse files
authored
Merge branch 'trunk' into runarorama/avroMagic
2 parents 9da9f56 + 9d7d818 commit 30d87f6

File tree

9 files changed

+482
-187
lines changed

9 files changed

+482
-187
lines changed

unison-runtime/src/Unison/Runtime/ANF.hs

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,7 @@ import Unison.Reference (Id, Reference, Reference' (Builtin, DerivedId), toShort
125125
import Unison.Referent (Referent, pattern Con, pattern Ref)
126126
import Unison.Runtime.Array qualified as PA
127127
import Unison.Runtime.Foreign.Function.Type (ForeignFunc (..))
128+
import Unison.Runtime.Referenced
128129
import Unison.Runtime.TypeTags (CTag (..), PackedTag (..), RTag (..), Tag (..), maskTags, packTags, unpackTags)
129130
import Unison.ShortHash (shortenTo)
130131
import Unison.Symbol (Symbol)
@@ -1609,28 +1610,6 @@ type ANFD v = Compose (ANFM v) (Directed ())
16091610
data GroupRef = GR Reference Word64
16101611
deriving (Show, Eq)
16111612

1612-
-- A value with optional optimization information for serialization.
1613-
-- The references are required for serialization V5, and are assumed
1614-
-- to be the only references used in the value _up to in-memory
1615-
-- uniqueness_.
1616-
--
1617-
-- This is parameterized so that it can be used with both Value and
1618-
-- Code.
1619-
--
1620-
-- Also note, the stored referenced might not be 'tight' in the sense
1621-
-- that they all actually occur in the value. Maintaining this
1622-
-- invariant together with actual canonicalization would be onerous
1623-
-- and isn't done at this time.
1624-
data Referenced a
1625-
= -- types, terms
1626-
WithRefs [Reference] [Reference] a
1627-
| Plain a
1628-
deriving (Show, Eq)
1629-
1630-
dereference :: Referenced a -> a
1631-
dereference (WithRefs _ _ x) = x
1632-
dereference (Plain x) = x
1633-
16341613
-- | A list of either unboxed or boxed values.
16351614
-- Each slot is one of unboxed or boxed but not both.
16361615
type ValList = [Value]

unison-runtime/src/Unison/Runtime/ANF/Serialize.hs

Lines changed: 44 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ module Unison.Runtime.ANF.Serialize where
77

88
import Control.Monad
99
import Control.Monad.Reader
10+
import Control.Monad.State.Strict (StateT (..))
1011
import Data.Bifunctor (bimap, first)
1112
import Data.Binary.Get (runGetOrFail)
1213
import Data.Binary.Get qualified as BGet
@@ -37,8 +38,10 @@ import Unison.Runtime.ANF.Optimize as ANF
3738
import Unison.Runtime.ANF.Serialize.CodeV4 qualified as CodeV4
3839
import Unison.Runtime.ANF.Serialize.Tags
3940
import Unison.Runtime.ANF.Serialize.ValueV5 qualified as ValueV5
41+
import Unison.Runtime.Canonicalizer qualified as C
4042
import Unison.Runtime.Exception
4143
import Unison.Runtime.Foreign.Function.Type (ForeignFunc)
44+
import Unison.Runtime.Referenced
4245
import Unison.Runtime.Serialize
4346
import Unison.Util.Text qualified as Util.Text
4447
import Unison.Var (Type (ANFBlank), Var (..))
@@ -964,27 +967,30 @@ deserializeCode bs = runGetS go bs
964967
-- Boolean argument determines whether ForeignFunc occurrences are
965968
-- allowed to be serialized. For interchange, this should be False.
966969
serializeCode :: Bool -> Referenced Code -> ByteString
967-
serializeCode fops (WithRefs tys tms co) =
968-
runPutS (putWord32be 4 *> CodeV4.putCodeWithHeader tys tms fops co)
969-
serializeCode fops (Plain co) = runPutS (putVersion *> putCode fops co)
970+
serializeCode fops (dereference -> co) =
971+
runPutS (putVersion *> putCode fops co)
970972
where
971973
putVersion = putWord32be codeVersion
972974

973975
serializeCodeWithVersion ::
974-
Word64 -> Bool -> Referenced Code -> Either String L.ByteString
975-
serializeCodeWithVersion v fops = \case
976-
WithRefs tys tms co
977-
| v == 4 ->
978-
Right . runPutL $
976+
Word64 -> Bool -> Referenced Code -> IO (Either String L.ByteString)
977+
serializeCodeWithVersion v fops rco
978+
| v == 4 =
979+
enreference rco >>= \(tys, tms, co) ->
980+
pure . Right . runPutL $
979981
putWord32be 4 *> CodeV4.putCodeWithHeader tys tms fops co
980-
rco
981-
| v == 3 ->
982-
Right . runPutL $
983-
putWord32be 3 *> putCode fops (dereference rco)
984-
| v == 4 ->
985-
Left "could not serialize plain code at v4"
986-
| otherwise ->
987-
Left $ "unsupported code serialization version: " ++ show v
982+
| v == 3 =
983+
pure . Right . runPutL $
984+
putWord32be 3 *> putCode fops (dereference rco)
985+
| otherwise =
986+
pure . Left $ "unsupported code serialization version: " ++ show v
987+
where
988+
enreference (WithRefs tys tms co) = pure (tys, tms, co)
989+
enreference (Plain co) =
990+
runStateT
991+
(canonicalizeRefs traverseCodeRefs co)
992+
(C.empty, [], [])
993+
>>= \(co, (_, tys, tms)) -> pure (tys, tms, co)
988994

989995
-- | Serializes a `SuperGroup` for rehashing.
990996
--
@@ -1042,14 +1048,28 @@ serializeValue (dereference -> v) =
10421048
where
10431049
putVersion = putWord32be valueVersion
10441050

1045-
serializeValueWithVersion :: Word64 -> Referenced Value -> L.ByteString
1046-
serializeValueWithVersion v = \case
1047-
WithRefs tys tms x
1048-
| v == 5 ->
1049-
runPutL $ putWord32be 5 *> ValueV5.putValueWithHeader tys tms x
1050-
rval
1051-
| n <- fromIntegral v ->
1052-
runPutL $ putWord32be n *> putValue (Transfer n) (dereference rval)
1051+
serializeValueWithVersion ::
1052+
Word64 -> Referenced Value -> IO L.ByteString
1053+
serializeValueWithVersion v rval
1054+
| v == 5 = case rval of
1055+
WithRefs tys tms x -> v5ser tys tms x
1056+
Plain x -> do
1057+
(x, (_, tys, tms)) <-
1058+
runStateT
1059+
(canonicalizeRefs traverseValueRefs x)
1060+
(C.empty, [], [])
1061+
v5ser tys tms x
1062+
| v < 5,
1063+
n <- fromIntegral v =
1064+
pure . runPutL $
1065+
putWord32be n
1066+
*> putValue (Transfer n) (dereference rval)
1067+
| otherwise =
1068+
die $ "Value.serialize.versioned: unrecognized version: " ++ show v
1069+
where
1070+
v5ser tys tms x =
1071+
pure . runPutL $
1072+
putWord32be 5 *> ValueV5.putValueWithHeader tys tms x
10531073

10541074
-- This serializer is used exclusively for hashing unison values.
10551075
-- For this reason, it doesn't prefix the string with the current

unison-runtime/src/Unison/Runtime/Foreign/Function.hs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ foreignCallHelper = \case
519519
pure . Bytes.fromArray $ ANF.serializeCode False co
520520
Code_serialize_versioned -> mkForeign $
521521
\(ver :: Word64, co :: ANF.Referenced ANF.Code) ->
522-
case ANF.serializeCodeWithVersion ver False co of
522+
ANF.serializeCodeWithVersion ver False co >>= \case
523523
Left err -> die err
524524
Right bs -> pure $ Bytes.fromLazyByteString bs
525525
Code_deserialize ->
@@ -536,7 +536,7 @@ foreignCallHelper = \case
536536
pure . Bytes.fromArray . ANF.serializeValue
537537
Value_serialize_versioned ->
538538
mkForeign $
539-
pure . Bytes.fromLazyByteString . uncurry ANF.serializeValueWithVersion
539+
fmap Bytes.fromLazyByteString . uncurry ANF.serializeValueWithVersion
540540
Value_deserialize ->
541541
mkForeign $
542542
pure . ANF.deserializeValue . Bytes.toLazyByteString

unison-runtime/src/Unison/Runtime/Machine.hs

Lines changed: 10 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ import Unison.Runtime.Foreign.Function
8787
import Unison.Runtime.MCode
8888
import Unison.Runtime.Machine.Primops
8989
import Unison.Runtime.Machine.Types
90+
import Unison.Runtime.Referenced
9091
import Unison.Runtime.Stack
9192
import Unison.Runtime.TypeTags qualified as TT
9293
import Unison.Symbol (Symbol)
@@ -1415,85 +1416,24 @@ type Reflect = StateT ReflectionState IO
14151416
emptyRS :: ReflectionState
14161417
emptyRS = RS HM.empty HM.empty C.empty [] []
14171418

1418-
type RTrav a =
1419-
forall f.
1420-
(Applicative f) =>
1421-
(Bool -> Reference -> f Reference) ->
1422-
(a -> f a)
1419+
mediate ::
1420+
StateT (C.Canonicalizer Reference, [Reference], [Reference]) IO a ->
1421+
Reflect a
1422+
mediate act = StateT \(RS sty stm canon tys tms) -> do
1423+
(v, (canon, tys, tms)) <- runStateT act (canon, tys, tms)
1424+
pure $ (v, RS sty stm canon tys tms)
1425+
{-# INLINE mediate #-}
14231426

14241427
canonicalizeReference :: Bool -> Reference -> Reflect Reference
1425-
canonicalizeReference isTy r = StateT \st@(RS _ _ canon tys tms) ->
1426-
C.categorize canon r >>= \case
1427-
C.Canonical -> pure (r, st)
1428-
C.Equivalent s canon -> (s,) <$> evaluate (st {_canon = canon})
1429-
C.Novel canon ->
1430-
(r,)
1431-
<$> evaluate
1432-
st
1433-
{ _canon = canon,
1434-
_tys = if isTy then r : tys else tys,
1435-
_tms = if isTy then tms else r : tms
1436-
}
1428+
canonicalizeReference isTy = mediate . canonicalizeRefs (\f -> f isTy)
14371429

14381430
canonicalizeReferent :: Referent -> Reflect Referent
14391431
canonicalizeReferent (Ref r) = Ref <$> canonicalizeReference False r
14401432
canonicalizeReferent (Con (ConstructorReference r i) j) =
14411433
flip Con j . flip ConstructorReference i <$> canonicalizeReference True r
14421434

14431435
canonicalizeReferenced :: RTrav a -> ANF.Referenced a -> Reflect a
1444-
canonicalizeReferenced trav = \case
1445-
-- no stored refs, have to traverse
1446-
ANF.Plain v -> trav h v
1447-
ANF.WithRefs tys tms v -> do
1448-
typs <- mapMaybe id <$> traverse (g True) tys
1449-
tmps <- mapMaybe id <$> traverse (g False) tms
1450-
1451-
ctys <- lift $ C.fromList typs
1452-
ctms <- lift $ C.fromList tmps
1453-
1454-
let f False r = C.findWithDefault r r ctms
1455-
f True r = C.findWithDefault r r ctys
1456-
1457-
if null typs && null tmps
1458-
then -- all references are already canonical
1459-
pure v
1460-
else lift $ trav f v
1461-
where
1462-
-- traversal function for plain values
1463-
g isTy r = StateT \st@(RS _ _ canon tys tms) ->
1464-
C.categorize canon r >>= \case
1465-
C.Canonical -> pure (Nothing, st)
1466-
C.Novel canon ->
1467-
(Nothing,)
1468-
<$> evaluate
1469-
st
1470-
{ _canon = canon,
1471-
_tys = if isTy then r : tys else tys,
1472-
_tms = if isTy then tms else r : tms
1473-
}
1474-
C.Equivalent s canon ->
1475-
(Just (r, s),) <$> evaluate (st {_canon = canon})
1476-
1477-
-- traversal function for remapping WithRefs values
1478-
h isTy r = StateT \st@(RS _ _ canon tys tms) ->
1479-
C.categorize canon r >>= \case
1480-
C.Canonical -> pure (r, st)
1481-
C.Novel canon ->
1482-
(r,)
1483-
<$> evaluate
1484-
if isTy
1485-
then
1486-
st
1487-
{ _canon = canon,
1488-
_tys = r : tys
1489-
}
1490-
else
1491-
st
1492-
{ _canon = canon,
1493-
_tms = r : tms
1494-
}
1495-
C.Equivalent r canon ->
1496-
(r,) <$> evaluate (st {_canon = canon})
1436+
canonicalizeReferenced trav x = mediate $ recanonicalizeRefs trav x
14971437
{-# INLINE canonicalizeReferenced #-}
14981438

14991439
reflectValue :: CCache -> Val -> IO (ANF.Referenced ANF.Value)
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
module Unison.Runtime.Referenced
2+
( Referenced (..),
3+
dereference,
4+
RTrav,
5+
Canonize,
6+
canonicalizeRefs,
7+
recanonicalizeRefs,
8+
)
9+
where
10+
11+
import Control.Monad.State.Strict
12+
import Data.Maybe (mapMaybe)
13+
import Unison.Reference
14+
import Unison.Runtime.Canonicalizer
15+
16+
-- A value with optional optimization information for serialization.
17+
-- The references are required for serialization V5, and are assumed
18+
-- to be the only references used in the value _up to in-memory
19+
-- uniqueness_.
20+
--
21+
-- This is parameterized so that it can be used with both Value and
22+
-- Code.
23+
--
24+
-- Also note, the stored referenced might not be 'tight' in the sense
25+
-- that they all actually occur in the value. Maintaining this
26+
-- invariant together with actual canonicalization would be onerous
27+
-- and isn't done at this time.
28+
data Referenced a
29+
= -- types, terms
30+
WithRefs [Reference] [Reference] a
31+
| Plain a
32+
deriving (Show, Eq)
33+
34+
dereference :: Referenced a -> a
35+
dereference (WithRefs _ _ x) = x
36+
dereference (Plain x) = x
37+
38+
type RTrav a =
39+
forall f.
40+
(Applicative f) =>
41+
(Bool -> Reference -> f Reference) ->
42+
(a -> f a)
43+
44+
type Canonize =
45+
StateT (Canonicalizer Reference, [Reference], [Reference]) IO
46+
47+
-- Given a reference traversal, canonicalizes the references in a
48+
-- value. The operation is presented as a state transformation, so
49+
-- that it can hook into a larger canonicalization procedure. The
50+
-- lists of canonical references of each sort are yielded as part of
51+
-- the state.
52+
canonicalizeRefs :: RTrav a -> a -> Canonize a
53+
canonicalizeRefs trav = trav h
54+
where
55+
h isTy r = StateT \st@(canon, tys, tms) ->
56+
categorize canon r >>= \case
57+
Canonical -> pure (r, st)
58+
Novel canon
59+
| isTy -> pure (r, (canon, r : tys, tms))
60+
| otherwise -> pure (r, (canon, tys, r : tms))
61+
Equivalent s canon -> pure (s, (canon, tys, tms))
62+
{-# INLINE canonicalizeRefs #-}
63+
64+
-- Given a `Referenced` value, this recanonicalizes the references in
65+
-- the wrapped value. The intention is for this to be hooked into a
66+
-- larger canonicalization procedure, so that already canonicalized
67+
-- values can be more efficiently brought in line with other values
68+
-- that are already canonicalized.
69+
--
70+
-- If the `Referenced` value is `Plain`, then all we can do is
71+
-- traverse it, canonicalizing the references. However, if it is
72+
-- tagged with canonical refs, we can see if they all match existing
73+
-- canonical refs. If so, we don't need to traverse the value. Even if
74+
-- not, we can traverse with marginally more efficient lookups.
75+
recanonicalizeRefs :: RTrav a -> Referenced a -> Canonize a
76+
recanonicalizeRefs trav = \case
77+
Plain v -> canonicalizeRefs trav v
78+
WithRefs tys tms v -> do
79+
typs <- mapMaybe id <$> traverse (g True) tys
80+
tmps <- mapMaybe id <$> traverse (g False) tms
81+
82+
ctys <- lift $ fromList typs
83+
ctms <- lift $ fromList tmps
84+
85+
let f False r = findWithDefault r r ctms
86+
f True r = findWithDefault r r ctys
87+
88+
if null typs && null tmps
89+
then pure v -- already canonical
90+
else lift $ trav f v
91+
where
92+
g isTy r = StateT \st@(canon, tys, tms) ->
93+
categorize canon r >>= \case
94+
Canonical -> pure (Nothing, st)
95+
Novel canon ->
96+
pure
97+
( Nothing,
98+
( canon,
99+
if isTy then r : tys else tys,
100+
if isTy then tms else r : tms
101+
)
102+
)
103+
Equivalent s canon -> pure (Just (r, s), (canon, tys, tms))
104+
{-# INLINE recanonicalizeRefs #-}

unison-runtime/unison-runtime.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ library
6666
Unison.Runtime.MCode
6767
Unison.Runtime.MCode.Serialize
6868
Unison.Runtime.Pattern
69+
Unison.Runtime.Referenced
6970
Unison.Runtime.Serialize
7071
Unison.Runtime.SparseVector
7172
Unison.Runtime.Stack

0 commit comments

Comments
 (0)