a bit better but still not optimal

This commit is contained in:
2023-06-08 14:35:59 +02:00
parent f59950d852
commit f7a5c4af48
8 changed files with 58 additions and 76 deletions

View File

@@ -9,6 +9,5 @@ CommandMount::CommandMount() : Command("mount") {
}
void CommandMount::run(Context ctx) {
RepoFS rfs(ctx.repo, ctx.repo->getObjects(Object::ObjectType::Archive).begin()->second, "./hi");
rfs.workerFn();
RepoFS::start(ctx.repo, "./hi");
}

View File

@@ -103,7 +103,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
return fullpath.u8string();
}
if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks[0]));
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks.at(0)));
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath);
callback(0, 0, 1);
return fullpath.u8string();
@@ -113,7 +113,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid));
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid.second));
if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0);

View File

@@ -188,7 +188,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
auto contents = File::getFileContents(orig);
Chunk c(ctx.repo->getId(), SHA::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {c.id}, File::getFileType(orig));
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {{0, c.id}}, File::getFileType(orig));
ctx.repo->putObject(c);
ctx.repo->putObject(f);
return f.id;
@@ -202,7 +202,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
SHA fileHash;
std::vector<Object::idType> fileChunks;
std::map<size_t, Object::idType> fileChunks;
unsigned long long size = 0;
for (auto chunkp: *chunker) {
@@ -210,7 +210,6 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
if (Signals::shouldQuit) break;
Object::idType chunkId;
size += chunkp.second.size();
if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) {
/// If the chunk already exists, reuse it
chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first);
@@ -223,7 +222,8 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
ctx.repo->putObject(c);
}
fileHash.feedData(chunkp.second);
fileChunks.emplace_back(chunkId);
fileChunks.emplace(size, chunkId);
size += chunkp.second.size();
}
/// We might have exited in the loop before, so we don't save an incomplete file

View File

@@ -5,6 +5,8 @@
#ifndef BACKUP_REPOFS_H
#define BACKUP_REPOFS_H
#define FUSE_USE_VERSION 26
#include <thread>
#include "Repository.h"
@@ -20,25 +22,12 @@ struct DirEntry {
class RepoFS {
public:
RepoFS(Repository *repo, Object::idType archiveId, std::string path);
static void start(Repository *repo, std::string path);
RepoFS &operator=(RepoFS rhs) = delete;
RepoFS(const RepoFS &orig) = delete;
~RepoFS();
void workerFn();
static inline DirEntry root;
static inline Repository *repo;
private:
std::atomic<bool> stop = false;///< Stop flag
Archive archive;
std::string path;
// std::thread thread;///< Worker thread
virtual ~RepoFS() = 0;
};

View File

@@ -2,50 +2,18 @@
// Created by Stepan Usatiuk on 07.06.2023.
//
#include "../includes/RepoFS.h"
#define FUSE_USE_VERSION 26
#include <errno.h>
#include <cerrno>
#include <cstring>
#include <fcntl.h>
#include <fuse.h>
#include <iostream>
#include <stdio.h>
#include <string.h>
#include "Serialize.h"
#include "objects/Chunk.h"
RepoFS::RepoFS(Repository *repon, Object::idType archiveId, std::string path) : archive(Serialize::deserialize<Archive>(repon->getObject(archiveId))), path(std::move(path)) {
RepoFS::repo = repon;
auto ars = repo->getObjects(Object::ObjectType::Archive);
for (auto const &r: ars) {
auto a = Serialize::deserialize<Archive>(repon->getObject(r.second));
for (auto const &f: a.files) {
auto file = Serialize::deserialize<File>(repo->getObject(f));
auto path = std::filesystem::u8path(file.name);
DirEntry *entry = &(root.children[r.first]);
entry->isFakeDir = true;
entry->name = a.name;
for (auto const &subp: path) {
entry = &entry->children[subp];
}
entry->file.emplace(file);
entry->name = std::filesystem::u8path(file.name).filename().u8string();
}
}
// thread = std::thread(&RepoFS::workerFn, this);
}
RepoFS::~RepoFS() {
// stop = true;
// thread.join();
}
static const char *hello_str = "Hello World!\n";
static const char *hello_path = "/hello";
DirEntry *getf(std::string path) {
auto p = std::filesystem::relative(std::filesystem::u8path(path), "/");
DirEntry *entry = &RepoFS::root;
@@ -127,18 +95,27 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset,
entry = getf(path);
} catch (...) { return -ENOENT; }
std::vector<char> data;
for (auto const &id: entry->file->chunks) {
auto ch = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(id));
data.insert(data.end(), ch.data.begin(), ch.data.end());
}
len = data.size();
len = entry->file->bytes;
if (offset < len) {
if (offset + size > len)
size = len - offset;
memcpy(buf, data.data() + offset, size);
auto curchunk = entry->file->chunks.upper_bound(offset);
--curchunk;
if (curchunk == entry->file->chunks.end()) {
std::cerr << "OOOOOPS" << std::endl;
}
size_t curInBuf = 0;
size_t curInChunk = offset - curchunk->first;
while (curInBuf < size) {
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(curchunk->second));
size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf);
memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read);
curInBuf += read;
curInChunk = 0;
++curchunk;
}
} else
size = 0;
@@ -152,10 +129,27 @@ static struct fuse_operations rfsOps = {
.read = rfsRead,
};
void RepoFS::workerFn() {
int argc = 3;
char *argv[] = {"", "-d", const_cast<char *>(path.c_str())};
void RepoFS::start(Repository *repo, std::string path) {
RepoFS::repo = repo;
auto ars = repo->getObjects(Object::ObjectType::Archive);
for (auto const &r: ars) {
auto a = Serialize::deserialize<Archive>(repo->getObject(r.second));
for (auto const &f: a.files) {
auto file = Serialize::deserialize<File>(repo->getObject(f));
auto path = std::filesystem::u8path(file.name);
DirEntry *entry = &(root.children[r.first]);
entry->isFakeDir = true;
entry->name = a.name;
for (auto const &subp: path) {
entry = &entry->children[subp];
}
entry->file.emplace(file);
entry->name = std::filesystem::u8path(file.name).filename().u8string();
}
}
int argc = 5;
char *argv[] = {"", "-d", "-s", "-f", const_cast<char *>(path.c_str())};
std::cout << static_cast<int>(fuse_main(argc, argv, &rfsOps, nullptr));
// while (!stop) {
// }
}

View File

@@ -26,7 +26,7 @@ public:
static inline const std::unordered_map<Type, std::string> TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}};
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector<idType> chunks, Type fileType);
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::map<size_t, idType> chunks, Type fileType);
/// Deserialization constructor
File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
@@ -69,7 +69,7 @@ public:
/// List of the chunks in file
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
const std::vector<idType> chunks;
const std::map<size_t, idType> chunks;
};

View File

@@ -11,8 +11,8 @@
#include "../../../utils/includes/Exception.h"
#include "../../includes/Serialize.h"
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector<idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(chunks) {}
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::map<size_t, idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(std::move(chunks)) {}
File::File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),

View File

@@ -7,7 +7,7 @@
#include "../../includes/Serialize.h"
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
for (auto const &id: file.chunks) chunksQueue.emplace(id);
for (auto const &id: file.chunks) chunksQueue.emplace(id.second);
};
int FileBuffer::sync() {