filestore.go 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. // Copyright 2016 The go-ethereum Authors
  2. // This file is part of the go-ethereum library.
  3. //
  4. // The go-ethereum library is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Lesser General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // The go-ethereum library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Lesser General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Lesser General Public License
  15. // along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
  16. package storage
  17. import (
  18. "context"
  19. "io"
  20. "sort"
  21. "sync"
  22. "github.com/ethereum/go-ethereum/swarm/chunk"
  23. "github.com/ethereum/go-ethereum/swarm/storage/localstore"
  24. )
  25. /*
  26. FileStore provides the client API entrypoints Store and Retrieve to store and retrieve
  27. It can store anything that has a byte slice representation, so files or serialised objects etc.
  28. Storage: FileStore calls the Chunker to segment the input datastream of any size to a merkle hashed tree of chunks. The key of the root block is returned to the client.
  29. Retrieval: given the key of the root block, the FileStore retrieves the block chunks and reconstructs the original data and passes it back as a lazy reader. A lazy reader is a reader with on-demand delayed processing, i.e. the chunks needed to reconstruct a large file are only fetched and processed if that particular part of the document is actually read.
  30. As the chunker produces chunks, FileStore dispatches them to its own chunk store
  31. implementation for storage or retrieval.
  32. */
  33. const (
  34. defaultLDBCapacity = 5000000 // capacity for LevelDB, by default 5*10^6*4096 bytes == 20GB
  35. defaultCacheCapacity = 10000 // capacity for in-memory chunks' cache
  36. defaultChunkRequestsCacheCapacity = 5000000 // capacity for container holding outgoing requests for chunks. should be set to LevelDB capacity
  37. )
  38. type FileStore struct {
  39. ChunkStore
  40. hashFunc SwarmHasher
  41. tags *chunk.Tags
  42. }
  43. type FileStoreParams struct {
  44. Hash string
  45. }
  46. func NewFileStoreParams() *FileStoreParams {
  47. return &FileStoreParams{
  48. Hash: DefaultHash,
  49. }
  50. }
  51. // for testing locally
  52. func NewLocalFileStore(datadir string, basekey []byte, tags *chunk.Tags) (*FileStore, error) {
  53. localStore, err := localstore.New(datadir, basekey, nil)
  54. if err != nil {
  55. return nil, err
  56. }
  57. return NewFileStore(chunk.NewValidatorStore(localStore, NewContentAddressValidator(MakeHashFunc(DefaultHash))), NewFileStoreParams(), tags), nil
  58. }
  59. func NewFileStore(store ChunkStore, params *FileStoreParams, tags *chunk.Tags) *FileStore {
  60. hashFunc := MakeHashFunc(params.Hash)
  61. return &FileStore{
  62. ChunkStore: store,
  63. hashFunc: hashFunc,
  64. tags: tags,
  65. }
  66. }
  67. // Retrieve is a public API. Main entry point for document retrieval directly. Used by the
  68. // FS-aware API and httpaccess
  69. // Chunk retrieval blocks on netStore requests with a timeout so reader will
  70. // report error if retrieval of chunks within requested range time out.
  71. // It returns a reader with the chunk data and whether the content was encrypted
  72. func (f *FileStore) Retrieve(ctx context.Context, addr Address) (reader *LazyChunkReader, isEncrypted bool) {
  73. isEncrypted = len(addr) > f.hashFunc().Size()
  74. tag, err := f.tags.GetFromContext(ctx)
  75. if err != nil {
  76. tag = chunk.NewTag(0, "ephemeral-retrieval-tag", 0)
  77. }
  78. getter := NewHasherStore(f.ChunkStore, f.hashFunc, isEncrypted, tag)
  79. reader = TreeJoin(ctx, addr, getter, 0)
  80. return
  81. }
  82. // Store is a public API. Main entry point for document storage directly. Used by the
  83. // FS-aware API and httpaccess
  84. func (f *FileStore) Store(ctx context.Context, data io.Reader, size int64, toEncrypt bool) (addr Address, wait func(context.Context) error, err error) {
  85. tag, err := f.tags.GetFromContext(ctx)
  86. if err != nil {
  87. // some of the parts of the codebase, namely the manifest trie, do not store the context
  88. // of the original request nor the tag with the trie, recalculating the trie hence
  89. // loses the tag uid. thus we create an ephemeral tag here for that purpose
  90. tag = chunk.NewTag(0, "", 0)
  91. //return nil, nil, err
  92. }
  93. putter := NewHasherStore(f.ChunkStore, f.hashFunc, toEncrypt, tag)
  94. return PyramidSplit(ctx, data, putter, putter, tag)
  95. }
  96. func (f *FileStore) HashSize() int {
  97. return f.hashFunc().Size()
  98. }
  99. // GetAllReferences is a public API. This endpoint returns all chunk hashes (only) for a given file
  100. func (f *FileStore) GetAllReferences(ctx context.Context, data io.Reader, toEncrypt bool) (addrs AddressCollection, err error) {
  101. tag := chunk.NewTag(0, "ephemeral-tag", 0) //this tag is just a mock ephemeral tag since we don't want to save these results
  102. // create a special kind of putter, which only will store the references
  103. putter := &hashExplorer{
  104. hasherStore: NewHasherStore(f.ChunkStore, f.hashFunc, toEncrypt, tag),
  105. }
  106. // do the actual splitting anyway, no way around it
  107. _, wait, err := PyramidSplit(ctx, data, putter, putter, tag)
  108. if err != nil {
  109. return nil, err
  110. }
  111. // wait for splitting to be complete and all chunks processed
  112. err = wait(ctx)
  113. if err != nil {
  114. return nil, err
  115. }
  116. // collect all references
  117. addrs = NewAddressCollection(0)
  118. for _, ref := range putter.references {
  119. addrs = append(addrs, Address(ref))
  120. }
  121. sort.Sort(addrs)
  122. return addrs, nil
  123. }
  124. // hashExplorer is a special kind of putter which will only store chunk references
  125. type hashExplorer struct {
  126. *hasherStore
  127. references []Reference
  128. lock sync.Mutex
  129. }
  130. // HashExplorer's Put will add just the chunk hashes to its `References`
  131. func (he *hashExplorer) Put(ctx context.Context, chunkData ChunkData) (Reference, error) {
  132. // Need to do the actual Put, which returns the references
  133. ref, err := he.hasherStore.Put(ctx, chunkData)
  134. if err != nil {
  135. return nil, err
  136. }
  137. // internally store the reference
  138. he.lock.Lock()
  139. he.references = append(he.references, ref)
  140. he.lock.Unlock()
  141. return ref, nil
  142. }