From f7a5c4af48b2f633160db88126793035bd9ba6c2 Mon Sep 17 00:00:00 2001 From: Stepan Usatiuk Date: Thu, 8 Jun 2023 14:35:59 +0200 Subject: [PATCH] a bit better but still not optimal --- src/commands/srcs/CommandMount.cpp | 3 +- src/commands/srcs/CommandRestore.cpp | 4 +- src/commands/srcs/CommandRun.cpp | 8 +-- src/fuse/includes/RepoFS.h | 19 ++---- src/fuse/srcs/RepoFS.cpp | 90 +++++++++++++--------------- src/repo/includes/objects/File.h | 4 +- src/repo/srcs/objects/File.cpp | 4 +- src/repo/srcs/objects/FileBuffer.cpp | 2 +- 8 files changed, 58 insertions(+), 76 deletions(-) diff --git a/src/commands/srcs/CommandMount.cpp b/src/commands/srcs/CommandMount.cpp index f0b8f37..0f5c65c 100644 --- a/src/commands/srcs/CommandMount.cpp +++ b/src/commands/srcs/CommandMount.cpp @@ -9,6 +9,5 @@ CommandMount::CommandMount() : Command("mount") { } void CommandMount::run(Context ctx) { - RepoFS rfs(ctx.repo, ctx.repo->getObjects(Object::ObjectType::Archive).begin()->second, "./hi"); - rfs.workerFn(); + RepoFS::start(ctx.repo, "./hi"); } diff --git a/src/commands/srcs/CommandRestore.cpp b/src/commands/srcs/CommandRestore.cpp index cd36429..f7de0a7 100644 --- a/src/commands/srcs/CommandRestore.cpp +++ b/src/commands/srcs/CommandRestore.cpp @@ -103,7 +103,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files return fullpath.u8string(); } if (file.fileType == File::Type::Symlink) { - auto dest = Serialize::deserialize(ctx.repo->getObject(file.chunks[0])); + auto dest = Serialize::deserialize(ctx.repo->getObject(file.chunks.at(0))); std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath); callback(0, 0, 1); return fullpath.u8string(); @@ -113,7 +113,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files for (const auto cid: file.chunks) { if (Signals::shouldQuit) throw Exception("Quitting!"); - Chunk c = Serialize::deserialize(ctx.repo->getObject(cid)); + Chunk c = Serialize::deserialize(ctx.repo->getObject(cid.second)); if (!c.data.empty()) { ostream.rdbuf()->sputn(c.data.data(), c.data.size()); callback(c.data.size(), 0, 0); diff --git a/src/commands/srcs/CommandRun.cpp b/src/commands/srcs/CommandRun.cpp index 64ce5f6..96e37a9 100644 --- a/src/commands/srcs/CommandRun.cpp +++ b/src/commands/srcs/CommandRun.cpp @@ -188,7 +188,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) { auto contents = File::getFileContents(orig); Chunk c(ctx.repo->getId(), SHA::calculate(contents), contents); - File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {c.id}, File::getFileType(orig)); + File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {{0, c.id}}, File::getFileType(orig)); ctx.repo->putObject(c); ctx.repo->putObject(f); return f.id; @@ -202,7 +202,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co SHA fileHash; - std::vector fileChunks; + std::map fileChunks; unsigned long long size = 0; for (auto chunkp: *chunker) { @@ -210,7 +210,6 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co if (Signals::shouldQuit) break; Object::idType chunkId; - size += chunkp.second.size(); if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) { /// If the chunk already exists, reuse it chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first); @@ -223,7 +222,8 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co ctx.repo->putObject(c); } fileHash.feedData(chunkp.second); - fileChunks.emplace_back(chunkId); + fileChunks.emplace(size, chunkId); + size += chunkp.second.size(); } /// We might have exited in the loop before, so we don't save an incomplete file diff --git a/src/fuse/includes/RepoFS.h b/src/fuse/includes/RepoFS.h index 75ae04c..324cb9e 100644 --- a/src/fuse/includes/RepoFS.h +++ b/src/fuse/includes/RepoFS.h @@ -5,6 +5,8 @@ #ifndef BACKUP_REPOFS_H #define BACKUP_REPOFS_H +#define FUSE_USE_VERSION 26 + #include #include "Repository.h" @@ -20,25 +22,12 @@ struct DirEntry { class RepoFS { public: - RepoFS(Repository *repo, Object::idType archiveId, std::string path); + static void start(Repository *repo, std::string path); - RepoFS &operator=(RepoFS rhs) = delete; - RepoFS(const RepoFS &orig) = delete; - - ~RepoFS(); - - void workerFn(); static inline DirEntry root; static inline Repository *repo; -private: - std::atomic stop = false;///< Stop flag - - Archive archive; - std::string path; - - - // std::thread thread;///< Worker thread + virtual ~RepoFS() = 0; }; diff --git a/src/fuse/srcs/RepoFS.cpp b/src/fuse/srcs/RepoFS.cpp index 7d08a75..b07c95e 100644 --- a/src/fuse/srcs/RepoFS.cpp +++ b/src/fuse/srcs/RepoFS.cpp @@ -2,50 +2,18 @@ // Created by Stepan Usatiuk on 07.06.2023. // + #include "../includes/RepoFS.h" -#define FUSE_USE_VERSION 26 - -#include +#include +#include #include #include #include -#include -#include #include "Serialize.h" #include "objects/Chunk.h" -RepoFS::RepoFS(Repository *repon, Object::idType archiveId, std::string path) : archive(Serialize::deserialize(repon->getObject(archiveId))), path(std::move(path)) { - RepoFS::repo = repon; - auto ars = repo->getObjects(Object::ObjectType::Archive); - for (auto const &r: ars) { - auto a = Serialize::deserialize(repon->getObject(r.second)); - for (auto const &f: a.files) { - auto file = Serialize::deserialize(repo->getObject(f)); - auto path = std::filesystem::u8path(file.name); - DirEntry *entry = &(root.children[r.first]); - entry->isFakeDir = true; - entry->name = a.name; - for (auto const &subp: path) { - entry = &entry->children[subp]; - } - entry->file.emplace(file); - entry->name = std::filesystem::u8path(file.name).filename().u8string(); - } - } - - // thread = std::thread(&RepoFS::workerFn, this); -} - -RepoFS::~RepoFS() { - // stop = true; - // thread.join(); -} - -static const char *hello_str = "Hello World!\n"; -static const char *hello_path = "/hello"; - DirEntry *getf(std::string path) { auto p = std::filesystem::relative(std::filesystem::u8path(path), "/"); DirEntry *entry = &RepoFS::root; @@ -127,18 +95,27 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset, entry = getf(path); } catch (...) { return -ENOENT; } - std::vector data; - for (auto const &id: entry->file->chunks) { - auto ch = Serialize::deserialize(RepoFS::repo->getObject(id)); - data.insert(data.end(), ch.data.begin(), ch.data.end()); - } - - len = data.size(); + len = entry->file->bytes; if (offset < len) { if (offset + size > len) size = len - offset; - memcpy(buf, data.data() + offset, size); + + auto curchunk = entry->file->chunks.upper_bound(offset); + --curchunk; + if (curchunk == entry->file->chunks.end()) { + std::cerr << "OOOOOPS" << std::endl; + } + size_t curInBuf = 0; + size_t curInChunk = offset - curchunk->first; + while (curInBuf < size) { + auto chunk = Serialize::deserialize(RepoFS::repo->getObject(curchunk->second)); + size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf); + memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read); + curInBuf += read; + curInChunk = 0; + ++curchunk; + } } else size = 0; @@ -152,10 +129,27 @@ static struct fuse_operations rfsOps = { .read = rfsRead, }; -void RepoFS::workerFn() { - int argc = 3; - char *argv[] = {"", "-d", const_cast(path.c_str())}; +void RepoFS::start(Repository *repo, std::string path) { + RepoFS::repo = repo; + auto ars = repo->getObjects(Object::ObjectType::Archive); + for (auto const &r: ars) { + auto a = Serialize::deserialize(repo->getObject(r.second)); + for (auto const &f: a.files) { + auto file = Serialize::deserialize(repo->getObject(f)); + auto path = std::filesystem::u8path(file.name); + DirEntry *entry = &(root.children[r.first]); + entry->isFakeDir = true; + entry->name = a.name; + for (auto const &subp: path) { + entry = &entry->children[subp]; + } + entry->file.emplace(file); + entry->name = std::filesystem::u8path(file.name).filename().u8string(); + } + } + + + int argc = 5; + char *argv[] = {"", "-d", "-s", "-f", const_cast(path.c_str())}; std::cout << static_cast(fuse_main(argc, argv, &rfsOps, nullptr)); - // while (!stop) { - // } } diff --git a/src/repo/includes/objects/File.h b/src/repo/includes/objects/File.h index 6107b5e..7082c21 100644 --- a/src/repo/includes/objects/File.h +++ b/src/repo/includes/objects/File.h @@ -26,7 +26,7 @@ public: static inline const std::unordered_map TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}}; - File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector chunks, Type fileType); + File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::map chunks, Type fileType); /// Deserialization constructor File(std::vector::const_iterator &in, const std::vector::const_iterator &end); @@ -69,7 +69,7 @@ public: /// List of the chunks in file /// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path - const std::vector chunks; + const std::map chunks; }; diff --git a/src/repo/srcs/objects/File.cpp b/src/repo/srcs/objects/File.cpp index f77ca1c..9d90baf 100644 --- a/src/repo/srcs/objects/File.cpp +++ b/src/repo/srcs/objects/File.cpp @@ -11,8 +11,8 @@ #include "../../../utils/includes/Exception.h" #include "../../includes/Serialize.h" -File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector chunks, Type fileType) - : Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(chunks) {} +File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::map chunks, Type fileType) + : Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(std::move(chunks)) {} File::File(std::vector::const_iterator &in, const std::vector::const_iterator &end) : Object(in, end), diff --git a/src/repo/srcs/objects/FileBuffer.cpp b/src/repo/srcs/objects/FileBuffer.cpp index 6e10e77..afde9b8 100644 --- a/src/repo/srcs/objects/FileBuffer.cpp +++ b/src/repo/srcs/objects/FileBuffer.cpp @@ -7,7 +7,7 @@ #include "../../includes/Serialize.h" FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) : repo(repo), file(Serialize::deserialize(repo->getObject(fileId))), chunksQueue() { - for (auto const &id: file.chunks) chunksQueue.emplace(id); + for (auto const &id: file.chunks) chunksQueue.emplace(id.second); }; int FileBuffer::sync() {