Skip to content

Commit ef86491

Browse files
authored
Add ADL/single-node-view of a full unixFS file. (#14)
* Add ADL/single-node-view for unixFS files.
1 parent 28f6929 commit ef86491

10 files changed

+736
-14
lines changed

data/builder/file_test.go

+39
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,14 @@ package builder
22

33
import (
44
"bytes"
5+
"context"
6+
"io"
57
"testing"
68

79
"github.com/ipfs/go-cid"
810
u "github.com/ipfs/go-ipfs-util"
11+
"github.com/ipfs/go-unixfsnode/file"
12+
dagpb "github.com/ipld/go-codec-dagpb"
913
"github.com/ipld/go-ipld-prime"
1014
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
1115
)
@@ -39,3 +43,38 @@ func TestBuildUnixFSFile(t *testing.T) {
3943
t.Fatal("expected top of file to be in store.")
4044
}
4145
}
46+
47+
func TestUnixFSFileRoundtrip(t *testing.T) {
48+
buf := make([]byte, 10*1024*1024)
49+
u.NewSeededRand(0xdeadbeef).Read(buf)
50+
r := bytes.NewReader(buf)
51+
52+
ls := cidlink.DefaultLinkSystem()
53+
storage := cidlink.Memory{}
54+
ls.StorageReadOpener = storage.OpenRead
55+
ls.StorageWriteOpener = storage.OpenWrite
56+
57+
f, _, err := BuildUnixFSFile(r, "", &ls)
58+
if err != nil {
59+
t.Fatal(err)
60+
}
61+
62+
// get back the root node substrate from the link at the top of the builder.
63+
fr, err := ls.Load(ipld.LinkContext{}, f, dagpb.Type.PBNode)
64+
if err != nil {
65+
t.Fatal(err)
66+
}
67+
68+
ufn, err := file.NewUnixFSFile(context.Background(), fr, &ls)
69+
if err != nil {
70+
t.Fatal(err)
71+
}
72+
// read back out the file.
73+
out, err := io.ReadAll(ufn)
74+
if err != nil {
75+
t.Fatal(err)
76+
}
77+
if !bytes.Equal(out, buf) {
78+
t.Fatal("Not equal")
79+
}
80+
}

file/file.go

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package file
2+
3+
import (
4+
"context"
5+
"io"
6+
7+
"github.com/ipld/go-ipld-prime"
8+
)
9+
10+
// NewUnixFSFile attempts to construct an ipld node from the base protobuf node representing the
11+
// root of a unixfs File.
12+
// It provides a `bytes` view over the file, along with access to io.Reader streaming access
13+
// to file data.
14+
func NewUnixFSFile(ctx context.Context, substrate ipld.Node, lsys *ipld.LinkSystem) (StreamableByteNode, error) {
15+
if substrate.Kind() == ipld.Kind_Bytes {
16+
// A raw / single-node file.
17+
return &singleNodeFile{substrate, 0}, nil
18+
}
19+
// see if it's got children.
20+
links, err := substrate.LookupByString("Links")
21+
if err != nil {
22+
return nil, err
23+
}
24+
if links.Length() == 0 {
25+
// no children.
26+
return newWrappedNode(substrate)
27+
}
28+
29+
return &shardNodeFile{
30+
ctx: ctx,
31+
lsys: lsys,
32+
substrate: substrate,
33+
done: false,
34+
rdr: nil}, nil
35+
}
36+
37+
// A StreamableByteNode is an ipld.Node that can be streamed over. It is guaranteed to have a Bytes type.
38+
type StreamableByteNode interface {
39+
ipld.Node
40+
io.Reader
41+
}
42+
43+
type singleNodeFile struct {
44+
ipld.Node
45+
offset int
46+
}
47+
48+
func (f *singleNodeFile) Read(p []byte) (int, error) {
49+
buf, err := f.Node.AsBytes()
50+
if err != nil {
51+
return 0, err
52+
}
53+
if f.offset >= len(buf) {
54+
return 0, io.EOF
55+
}
56+
n := copy(p, buf[f.offset:])
57+
f.offset += n
58+
return n, nil
59+
}

file/file_test.go

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package file_test
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"fmt"
7+
"io"
8+
"testing"
9+
10+
"github.com/ipfs/go-unixfsnode"
11+
"github.com/ipfs/go-unixfsnode/directory"
12+
"github.com/ipfs/go-unixfsnode/file"
13+
"github.com/ipld/go-car/v2/blockstore"
14+
dagpb "github.com/ipld/go-codec-dagpb"
15+
"github.com/ipld/go-ipld-prime"
16+
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
17+
)
18+
19+
func TestRootV0File(t *testing.T) {
20+
baseFile := "./fixtures/QmT78zSuBmuS4z925WZfrqQ1qHaJ56DQaTfyMUF7F8ff5o.car"
21+
root, ls := open(baseFile, t)
22+
file, err := file.NewUnixFSFile(context.Background(), root, ls)
23+
if err != nil {
24+
t.Fatal(err)
25+
}
26+
fc, err := file.AsBytes()
27+
if err != nil {
28+
t.Fatal(err)
29+
}
30+
if !bytes.Equal(fc, []byte("hello world\n")) {
31+
t.Errorf("file content does not match: %s", string(fc))
32+
}
33+
}
34+
35+
func TestNamedV0File(t *testing.T) {
36+
baseFile := "./fixtures/QmT8EC9sJq63SkDZ1mWLbWWyVV66PuqyHWpKkH4pcVyY4H.car"
37+
root, ls := open(baseFile, t)
38+
dir, err := unixfsnode.Reify(ipld.LinkContext{}, root, ls)
39+
if err != nil {
40+
t.Fatal(err)
41+
}
42+
dpbn := dir.(directory.UnixFSBasicDir)
43+
name, link := dpbn.Iterator().Next()
44+
if name.String() != "b.txt" {
45+
t.Fatal("unexpected filename")
46+
}
47+
fileNode, err := ls.Load(ipld.LinkContext{}, link.Link(), dagpb.Type.PBNode)
48+
if err != nil {
49+
t.Fatal(err)
50+
}
51+
file, err := file.NewUnixFSFile(context.Background(), fileNode, ls)
52+
if err != nil {
53+
t.Fatal(err)
54+
}
55+
fc, err := file.AsBytes()
56+
if err != nil {
57+
t.Fatal(err)
58+
}
59+
if !bytes.Equal(fc, []byte("hello world\n")) {
60+
t.Errorf("file content does not match: %s", string(fc))
61+
}
62+
}
63+
64+
func open(car string, t *testing.T) (ipld.Node, *ipld.LinkSystem) {
65+
baseStore, err := blockstore.OpenReadOnly(car)
66+
if err != nil {
67+
t.Fatal(err)
68+
}
69+
ls := cidlink.DefaultLinkSystem()
70+
ls.StorageReadOpener = func(_ ipld.LinkContext, l ipld.Link) (io.Reader, error) {
71+
cl, ok := l.(cidlink.Link)
72+
if !ok {
73+
return nil, fmt.Errorf("couldn't load link")
74+
}
75+
blk, err := baseStore.Get(cl.Cid)
76+
if err != nil {
77+
return nil, err
78+
}
79+
return bytes.NewBuffer(blk.RawData()), nil
80+
}
81+
carRoots, err := baseStore.Roots()
82+
if err != nil {
83+
t.Fatal(err)
84+
}
85+
root, err := ls.Load(ipld.LinkContext{}, cidlink.Link{Cid: carRoots[0]}, dagpb.Type.PBNode)
86+
if err != nil {
87+
t.Fatal(err)
88+
}
89+
return root, &ls
90+
}
Binary file not shown.
Binary file not shown.

file/shard.go

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
package file
2+
3+
import (
4+
"context"
5+
"io"
6+
7+
dagpb "github.com/ipld/go-codec-dagpb"
8+
"github.com/ipld/go-ipld-prime"
9+
cidlink "github.com/ipld/go-ipld-prime/linking/cid"
10+
"github.com/ipld/go-ipld-prime/node/basicnode"
11+
"github.com/multiformats/go-multicodec"
12+
)
13+
14+
type shardNodeFile struct {
15+
ctx context.Context
16+
lsys *ipld.LinkSystem
17+
substrate ipld.Node
18+
done bool
19+
rdr io.Reader
20+
}
21+
22+
var _ ipld.Node = (*shardNodeFile)(nil)
23+
24+
func (s *shardNodeFile) Read(p []byte) (int, error) {
25+
if s.done {
26+
return 0, io.EOF
27+
}
28+
// collect the sub-nodes on first use
29+
if s.rdr == nil {
30+
links, err := s.substrate.LookupByString("Links")
31+
if err != nil {
32+
return 0, err
33+
}
34+
readers := make([]io.Reader, 0)
35+
lnki := links.ListIterator()
36+
for !lnki.Done() {
37+
_, lnk, err := lnki.Next()
38+
if err != nil {
39+
return 0, err
40+
}
41+
lnkhash, err := lnk.LookupByString("Hash")
42+
if err != nil {
43+
return 0, err
44+
}
45+
lnklnk, err := lnkhash.AsLink()
46+
if err != nil {
47+
return 0, err
48+
}
49+
target, err := s.lsys.Load(ipld.LinkContext{Ctx: s.ctx}, lnklnk, protoFor(lnklnk))
50+
if err != nil {
51+
return 0, err
52+
}
53+
54+
asFSNode, err := NewUnixFSFile(s.ctx, target, s.lsys)
55+
if err != nil {
56+
return 0, err
57+
}
58+
readers = append(readers, asFSNode)
59+
}
60+
s.rdr = io.MultiReader(readers...)
61+
}
62+
n, err := s.rdr.Read(p)
63+
if err == io.EOF {
64+
s.rdr = nil
65+
s.done = true
66+
}
67+
return n, err
68+
}
69+
70+
func protoFor(link ipld.Link) ipld.NodePrototype {
71+
if lc, ok := link.(cidlink.Link); ok {
72+
if lc.Cid.Prefix().Codec == uint64(multicodec.DagPb) {
73+
return dagpb.Type.PBNode
74+
}
75+
}
76+
return basicnode.Prototype.Any
77+
}
78+
79+
func (s *shardNodeFile) Kind() ipld.Kind {
80+
return ipld.Kind_Bytes
81+
}
82+
83+
func (s *shardNodeFile) AsBytes() ([]byte, error) {
84+
return io.ReadAll(s)
85+
}
86+
87+
func (s *shardNodeFile) AsBool() (bool, error) {
88+
return false, ipld.ErrWrongKind{TypeName: "bool", MethodName: "AsBool", AppropriateKind: ipld.KindSet_JustBytes}
89+
}
90+
91+
func (s *shardNodeFile) AsInt() (int64, error) {
92+
return 0, ipld.ErrWrongKind{TypeName: "int", MethodName: "AsInt", AppropriateKind: ipld.KindSet_JustBytes}
93+
}
94+
95+
func (s *shardNodeFile) AsFloat() (float64, error) {
96+
return 0, ipld.ErrWrongKind{TypeName: "float", MethodName: "AsFloat", AppropriateKind: ipld.KindSet_JustBytes}
97+
}
98+
99+
func (s *shardNodeFile) AsString() (string, error) {
100+
return "", ipld.ErrWrongKind{TypeName: "string", MethodName: "AsString", AppropriateKind: ipld.KindSet_JustBytes}
101+
}
102+
103+
func (s *shardNodeFile) AsLink() (ipld.Link, error) {
104+
return nil, ipld.ErrWrongKind{TypeName: "link", MethodName: "AsLink", AppropriateKind: ipld.KindSet_JustBytes}
105+
}
106+
107+
func (s *shardNodeFile) AsNode() (ipld.Node, error) {
108+
return nil, nil
109+
}
110+
111+
func (s *shardNodeFile) Size() int {
112+
return 0
113+
}
114+
115+
func (s *shardNodeFile) IsAbsent() bool {
116+
return false
117+
}
118+
119+
func (s *shardNodeFile) IsNull() bool {
120+
return s.substrate.IsNull()
121+
}
122+
123+
func (s *shardNodeFile) Length() int64 {
124+
return 0
125+
}
126+
127+
func (s *shardNodeFile) ListIterator() ipld.ListIterator {
128+
return nil
129+
}
130+
131+
func (s *shardNodeFile) MapIterator() ipld.MapIterator {
132+
return nil
133+
}
134+
135+
func (s *shardNodeFile) LookupByIndex(idx int64) (ipld.Node, error) {
136+
return nil, ipld.ErrWrongKind{}
137+
}
138+
139+
func (s *shardNodeFile) LookupByString(key string) (ipld.Node, error) {
140+
return nil, ipld.ErrWrongKind{}
141+
}
142+
143+
func (s *shardNodeFile) LookupByNode(key ipld.Node) (ipld.Node, error) {
144+
return nil, ipld.ErrWrongKind{}
145+
}
146+
147+
func (s *shardNodeFile) LookupBySegment(seg ipld.PathSegment) (ipld.Node, error) {
148+
return nil, ipld.ErrWrongKind{}
149+
}
150+
151+
// shardded files / nodes look like dagpb nodes.
152+
func (s *shardNodeFile) Prototype() ipld.NodePrototype {
153+
return dagpb.Type.PBNode
154+
}

file/wrapped.go

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package file
2+
3+
import (
4+
"github.com/ipfs/go-unixfsnode/data"
5+
"github.com/ipld/go-ipld-prime"
6+
"github.com/ipld/go-ipld-prime/node/basicnode"
7+
)
8+
9+
func newWrappedNode(substrate ipld.Node) (StreamableByteNode, error) {
10+
dataField, err := substrate.LookupByString("Data")
11+
if err != nil {
12+
return nil, err
13+
}
14+
// unpack as unixfs proto.
15+
dfb, err := dataField.AsBytes()
16+
if err != nil {
17+
return nil, err
18+
}
19+
ufd, err := data.DecodeUnixFSData(dfb)
20+
if err != nil {
21+
return nil, err
22+
}
23+
24+
if ufd.Data.Exists() {
25+
return &singleNodeFile{
26+
Node: ufd.Data.Must(),
27+
}, nil
28+
}
29+
30+
// an empty degenerate one.
31+
return &singleNodeFile{
32+
Node: basicnode.NewBytes(nil),
33+
}, nil
34+
}

0 commit comments

Comments
 (0)