Compare commits

1 Commits

Author SHA1 Message Date
10d570f3ea Update cmake.yml
Fix Asan crash
2024-03-18 23:17:58 +01:00
24 changed files with 56 additions and 253 deletions

View File

@@ -19,10 +19,17 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v4
- run: apt-get update && apt-get install -y sudo - run: apt-get update && apt-get install -y sudo
if: env.ACT=='true' if: env.ACT=='true'
- name: Fix kernel mmap rnd bits
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
# high-entropy ASLR in much newer kernels that GitHub runners are
# using leading to random crashes: https://reviews.llvm.org/D148280
run: sudo sysctl vm.mmap_rnd_bits=28
if: env.ACT!='true'
- name: install everything - name: install everything
run: sudo apt-get update && sudo apt-get install -y fuse libfuse-dev cmake build-essential gcc g++ libssl-dev zlib1g-dev run: sudo apt-get update && sudo apt-get install -y fuse libfuse-dev cmake build-essential gcc g++ libssl-dev zlib1g-dev

View File

@@ -1,16 +1,6 @@
cmake_minimum_required(VERSION 3.18) cmake_minimum_required(VERSION 3.18)
add_library(commands add_library(commands srcs/CommandDiff.cpp srcs/CommandList.cpp srcs/CommandListFiles.cpp srcs/CommandRestore.cpp srcs/CommandRun.cpp srcs/CommandsCommon.cpp srcs/Diff.cpp srcs/CommandMount.cpp)
srcs/CommandDiff.cpp
srcs/CommandList.cpp
srcs/CommandListFiles.cpp
srcs/CommandRestore.cpp
srcs/CommandRun.cpp
srcs/CommandsCommon.cpp
srcs/Diff.cpp
srcs/CommandMount.cpp
srcs/CommandDelete.cpp
)
target_include_directories(commands PUBLIC includes) target_include_directories(commands PUBLIC includes)

View File

@@ -1,18 +0,0 @@
//
// Created by Stepan Usatiuk on 06.08.2023.
//
#ifndef BACKUP_COMMANDDELETE_H
#define BACKUP_COMMANDDELETE_H
#include "Command.h"
class CommandDelete : public Command {
public:
CommandDelete();
void run(Context ctx) override;
static constexpr std::string_view name{"delete"};
};
#endif//BACKUP_COMMANDDELETE_H

View File

@@ -1,15 +0,0 @@
//
// Created by Stepan Usatiuk on 06.08.2023.
//
#include "CommandDelete.h"
#include "CommandsCommon.h"
using namespace CommandsCommon;
CommandDelete::CommandDelete() {}
void CommandDelete::run(Context ctx) {
ctx.repo->deleteObjects({static_cast<unsigned long long>(ctx.repo->getConfig().getInt("aid"))});
}

View File

@@ -38,11 +38,11 @@ void CommandDiff::run(Context ctx) {
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads")
: std::thread::hardware_concurrency()); : std::thread::hardware_concurrency());
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive1)); auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
std::mutex filesLock; std::mutex filesLock;
std::map<std::filesystem::path, File> files;///< Files in the first archive std::map<std::filesystem::path, File> files;///< Files in the first archive
for (auto id: archiveO1.files) { for (auto id: archiveO1.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::path(file.name); auto path = std::filesystem::path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) files.emplace(file.getKey(), std::move(file)); if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) files.emplace(file.getKey(), std::move(file));
} }
@@ -76,13 +76,13 @@ void CommandDiff::run(Context ctx) {
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option /// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
if (ctx.repo->getConfig().exists("aid2")) { if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace( archiveO2.emplace(
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2")))); Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
threadPool.push([&]() { threadPool.push([&]() {
for (auto id: archiveO2.value().files) { for (auto id: archiveO2.value().files) {
/// Exit when asked to /// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting"); if (Signals::shouldQuit) throw Exception("Quitting");
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::path(file.name))) if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::path(file.name)))
threadPool.push([&, file]() { processFile(ComparableFile{file, ctx.repo}); }); threadPool.push([&, file]() { processFile(ComparableFile{file, ctx.repo}); });
if (Signals::shouldQuit) break; if (Signals::shouldQuit) break;
@@ -111,10 +111,10 @@ void CommandDiff::run(Context ctx) {
if (ctx.repo->getConfig().exists("aid2")) { if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace( archiveO2.emplace(
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2")))); Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
std::map<std::filesystem::path, File> files2;///< Files in the first archive std::map<std::filesystem::path, File> files2;///< Files in the first archive
for (auto id: archiveO2->files) { for (auto id: archiveO2->files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::path(file.name); auto path = std::filesystem::path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files2.emplace(file.getKey(), std::move(file)); files2.emplace(file.getKey(), std::move(file));

View File

@@ -13,9 +13,9 @@
CommandListFiles::CommandListFiles() : Command() {} CommandListFiles::CommandListFiles() : Command() {}
void CommandListFiles::run(Context ctx) { void CommandListFiles::run(Context ctx) {
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid"))); auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
for (auto const &fid: archive.files) { for (auto const &fid: archive.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid)); auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType)
<< " size: " << BytesFormatter::formatStr(file.bytes) << std::endl; << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
} }

View File

@@ -68,14 +68,14 @@ void CommandRestore::run(Context ctx) {
/// Add the main restore task /// Add the main restore task
threadPool.push([&, this]() { threadPool.push([&, this]() {
/// Get the archive and its file IDs /// Get the archive and its file IDs
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive)); auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
std::vector<Object::idType> files = archiveO.files; std::vector<Object::idType> files = archiveO.files;
/// For each file... /// For each file...
for (const auto fid: files) { for (const auto fid: files) {
/// Stop when asked to /// Stop when asked to
if (Signals::shouldQuit) break; if (Signals::shouldQuit) break;
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid)); auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
filesToRestoreCount++; filesToRestoreCount++;
bytesToRestore += file.bytes; bytesToRestore += file.bytes;
/// Spawn a restore task /// Spawn a restore task
@@ -105,7 +105,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
return fullpath.string(); return fullpath.string();
} }
if (file.fileType == File::Type::Symlink) { if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(file.chunks.at(0))); auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks.at(0)));
std::filesystem::create_symlink(std::filesystem::path(std::string{dest.data.begin(), dest.data.end()}), std::filesystem::create_symlink(std::filesystem::path(std::string{dest.data.begin(), dest.data.end()}),
fullpath); fullpath);
callback(0, 0, 1); callback(0, 0, 1);
@@ -116,7 +116,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
for (const auto cid: file.chunks) { for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!"); if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(cid.second)); Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid.second));
if (!c.data.empty()) { if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size()); ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0); callback(c.data.size(), 0, 0);

View File

@@ -89,7 +89,7 @@ void CommandRun::run(Context ctx) {
auto relPath = p.lexically_relative(from).string(); auto relPath = p.lexically_relative(from).string();
if (ctx.repo->exists(Object::ObjectType::File, relPath) != 0) { if (ctx.repo->exists(Object::ObjectType::File, relPath) != 0) {
File repoFile = Serialize::deserialize<File>(ctx.repo->getObjectRaw(Object::ObjectType::File, relPath)); File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(Object::ObjectType::File, relPath));
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) { if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
addFile(repoFile.id); addFile(repoFile.id);
progress.print("Skipped: " + relPath, 1); progress.print("Skipped: " + relPath, 1);

View File

@@ -12,11 +12,11 @@ FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) {
} }
std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const { std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const {
for (auto const &f: filters) from = std::move(f->filterWrite(std::move(from))); for (auto const &f: filters) from = f->filterWrite(std::move(from));
return from; return from;
} }
std::vector<char> FilterContainer::filterRead(std::vector<char> from) const { std::vector<char> FilterContainer::filterRead(std::vector<char> from) const {
for (auto f = filters.crbegin(); f != filters.crend(); f++) from = std::move((*f)->filterRead(std::move(from))); for (auto f = filters.crbegin(); f != filters.crend(); f++) from = (*f)->filterRead(std::move(from));
return from; return from;
} }

View File

@@ -101,7 +101,7 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset, struc
size_t curInBuf = 0; size_t curInBuf = 0;
size_t curInChunk = offset - curchunk->first; size_t curInChunk = offset - curchunk->first;
while (curInBuf < size) { while (curInBuf < size) {
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(curchunk->second)); auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(curchunk->second));
size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf); size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf);
memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read); memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read);
curInBuf += read; curInBuf += read;
@@ -121,7 +121,7 @@ static int rfsReadlink(const char *path, char *buf, size_t size) {
} catch (...) { return -ENOENT; } } catch (...) { return -ENOENT; }
if (entry->file->fileType != File::Type::Symlink) return -ENOENT; if (entry->file->fileType != File::Type::Symlink) return -ENOENT;
auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(entry->file->chunks.at(0))); auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(entry->file->chunks.at(0)));
strncpy(buf, dst.data.data(), std::min(dst.data.size(), size)); strncpy(buf, dst.data.data(), std::min(dst.data.size(), size));
return 0; return 0;
@@ -139,9 +139,9 @@ void RepoFS::start(Repository *repo, std::string path) {
RepoFS::repo = repo; RepoFS::repo = repo;
auto ars = repo->getObjects(Object::ObjectType::Archive); auto ars = repo->getObjects(Object::ObjectType::Archive);
for (auto const &r: ars) { for (auto const &r: ars) {
auto a = Serialize::deserialize<Archive>(repo->getObjectRaw(r.second)); auto a = Serialize::deserialize<Archive>(repo->getObject(r.second));
for (auto const &f: a.files) { for (auto const &f: a.files) {
auto file = Serialize::deserialize<File>(repo->getObjectRaw(f)); auto file = Serialize::deserialize<File>(repo->getObject(f));
auto path = std::filesystem::path(file.name); auto path = std::filesystem::path(file.name);
DirEntry *entry = root->children[std::to_string(a.id)].get() DirEntry *entry = root->children[std::to_string(a.id)].get()
? root->children[std::to_string(a.id)].get() ? root->children[std::to_string(a.id)].get()

View File

@@ -3,7 +3,6 @@
#include "BytesFormatter.h" #include "BytesFormatter.h"
#include "Command.h" #include "Command.h"
#include "CommandDelete.h"
#include "CommandDiff.h" #include "CommandDiff.h"
#include "CommandList.h" #include "CommandList.h"
#include "CommandListFiles.h" #include "CommandListFiles.h"
@@ -110,7 +109,6 @@ int main(int argc, char *argv[]) {
commands.emplace(CommandListFiles::name, std::make_unique<CommandListFiles>()); commands.emplace(CommandListFiles::name, std::make_unique<CommandListFiles>());
commands.emplace(CommandList::name, std::make_unique<CommandList>()); commands.emplace(CommandList::name, std::make_unique<CommandList>());
commands.emplace(CommandMount::name, std::make_unique<CommandMount>()); commands.emplace(CommandMount::name, std::make_unique<CommandMount>());
commands.emplace(CommandDelete::name, std::make_unique<CommandDelete>());
if (commands.count(opt) == 0) { if (commands.count(opt) == 0) {
std::cerr << "Unknown argument" << std::endl; std::cerr << "Unknown argument" << std::endl;

View File

@@ -35,11 +35,11 @@ public:
bool init() override; bool init() override;
bool flush() override; bool flush() override;
std::vector<char> getObjectRaw(Object::idType id) const override; std::vector<char> getObject(Object::idType id) const override;
bool putObject(const Object &obj) override; bool putObject(const Object &obj) override;
bool deleteObjects(const std::vector<Object::idType> &objs) override; bool deleteObject(const Object &obj) override;
std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const override; std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override; Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override; std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
@@ -104,8 +104,6 @@ private:
unsigned long long maxFileId = 1; ///< Largest ID of object storage file unsigned long long maxFileId = 1; ///< Largest ID of object storage file
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
std::unordered_map<Object::idType, std::set<Object::idType>>
fileToObjs;///< Used to locate Object%s in the filesystem
std::mutex writeCacheLock; ///< Write cache lock std::mutex writeCacheLock; ///< Write cache lock
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
@@ -118,12 +116,9 @@ private:
/// \param lockW Write cache lock /// \param lockW Write cache lock
void flushWriteCache(std::unique_lock<std::mutex> &&lockW); void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
Object::idType largestUnusedId = 1; ///< Largest available objectID Object::idType largestUnusedId = 1;///< Largest available objectID
std::vector<Object::idType> unusedIds;///< Vector of unused IDs
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>>
keyIndex;///< Maps Object%'s keys to their ID's keyIndex;///< Maps Object%'s keys to their ID's
std::unordered_map<Object::idType, uint64_t> refCounts;///< Count of references to an object per its id
}; };

View File

@@ -31,16 +31,9 @@ public:
/// All derived objects should implement this method /// All derived objects should implement this method
virtual std::string getKey() const = 0; virtual std::string getKey() const = 0;
/// Returns the keys of that this object refers to
virtual const std::vector<idType> &getRefs() const;
const idType id; ///< Unique numerical of the object const idType id; ///< Unique numerical of the object
const ObjectType type;///< Type of the object const ObjectType type;///< Type of the object
static std::unique_ptr<Object> deserialize(const std::vector<char> &src);
static std::unique_ptr<Object> deserialize(std::vector<char>::const_iterator &in,
const std::vector<char>::const_iterator &end);
protected: protected:
/// Default constructor /// Default constructor
/// \param id Object ID /// \param id Object ID

View File

@@ -45,13 +45,7 @@ public:
/// \param id ID of object to return /// \param id ID of object to return
/// \return Serialized object /// \return Serialized object
/// \throws Exception on any error or if object doesn't exist /// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObjectRaw(Object::idType id) const = 0; virtual std::vector<char> getObject(Object::idType id) const = 0;
/// Returns the Object with id \p id
/// \param id ID of object to return
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
std::unique_ptr<Object> getObject(Object::idType id) const;
/// Adds the Object \p obj to the Repository /// Adds the Object \p obj to the Repository
/// \param obj Constant reference to the object /// \param obj Constant reference to the object
@@ -60,17 +54,17 @@ public:
virtual bool putObject(const Object &obj) = 0; virtual bool putObject(const Object &obj) = 0;
/// Deletes Object \p obj from the Repository /// Deletes Object \p obj from the Repository
/// \param obj Constant reference to the vector with ids of objects to delete /// \param obj Constant reference to the object
/// \return True if successful, False if it didn't exist /// \return True if successful, False if it didn't exist
/// \throws Exception on any error /// \throws Exception on any error
virtual bool deleteObjects(const std::vector<Object::idType> &objs) = 0; virtual bool deleteObject(const Object &obj) = 0;
/// Returns the Object of type \p type and with key \p key /// Returns the Object of type \p type and with key \p key
/// \param type Type of the object /// \param type Type of the object
/// \param key Constant reference to the key of the object /// \param key Constant reference to the key of the object
/// \return Serialized object /// \return Serialized object
/// \throws Exception on any error or if object doesn't exist /// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const = 0; virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the id of an Object of type \p type and with key \p key /// Returns the id of an Object of type \p type and with key \p key
/// \param type Type of the object /// \param type Type of the object

View File

@@ -10,7 +10,7 @@
#include "../Object.h" #include "../Object.h"
/// Object representing a backup /// Object representing a backup
class Archive final : public Object { class Archive : public Object {
public: public:
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files); Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files);
@@ -22,9 +22,6 @@ public:
/// Returns the name of the archive /// Returns the name of the archive
std::string getKey() const override; std::string getKey() const override;
/// Returns the files in this archive
const std::vector<Object::idType> &getRefs() const override;
const std::string name; ///< Archive name const std::string name; ///< Archive name
const unsigned long long mtime; ///< Time of creation const unsigned long long mtime; ///< Time of creation
const std::vector<idType> files;///< List of ids of File objects in the Archive const std::vector<idType> files;///< List of ids of File objects in the Archive

View File

@@ -11,7 +11,7 @@
#include "../Object.h" #include "../Object.h"
/// Object representing a part of a File /// Object representing a part of a File
class Chunk final : public Object { class Chunk : public Object {
public: public:
Chunk(idType id, std::string, std::vector<char> data); Chunk(idType id, std::string, std::vector<char> data);

View File

@@ -15,7 +15,7 @@
#include "../Object.h" #include "../Object.h"
/// Object representing a saved file /// Object representing a saved file
class File final : public Object { class File : public Object {
public: public:
enum class Type { Normal, Symlink, Directory, END }; enum class Type { Normal, Symlink, Directory, END };
@@ -68,12 +68,6 @@ public:
/// List of the chunks in file /// List of the chunks in file
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path /// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
const std::map<size_t, idType> chunks; const std::map<size_t, idType> chunks;
const std::vector<idType> &getRefs() const override;
private:
void makeChunksList() const;
mutable std::optional<std::vector<idType>> chunksList{std::nullopt};
}; };

View File

@@ -5,10 +5,8 @@
#include "FileRepository.h" #include "FileRepository.h"
#include <exception> #include <exception>
#include <iostream>
#include <iterator> #include <iterator>
#include <mutex> #include <mutex>
#include <queue>
#include "CheckFilter.h" #include "CheckFilter.h"
#include "FilterFactory.h" #include "FilterFactory.h"
@@ -47,9 +45,6 @@ bool FileRepository::open() {
std::tie(keyIndex, largestUnusedId) = std::tie(keyIndex, largestUnusedId) =
Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>( Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(
filters.filterRead(readFile(root / "index"))); filters.filterRead(readFile(root / "index")));
refCounts = Serialize::deserialize<decltype(refCounts)>(filters.filterRead(readFile(root / "refcounts")));
unusedIds = Serialize::deserialize<decltype(unusedIds)>(filters.filterRead(readFile(root / "unusedIds")));
fileToObjs = Serialize::deserialize<decltype(fileToObjs)>(filters.filterRead(readFile(root / "fileToObjs")));
} catch (const std::exception &e) { } catch (const std::exception &e) {
ready = false; ready = false;
throw; throw;
@@ -84,13 +79,10 @@ FileRepository::~FileRepository() {
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex)))); writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId)))); writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
writeFile(root / "unusedIds", filters.filterWrite(Serialize::serialize(unusedIds)));
writeFile(root / "refcounts", filters.filterWrite(Serialize::serialize(refCounts)));
writeFile(root / "fileToObjs", filters.filterWrite(Serialize::serialize(fileToObjs)));
} }
} }
std::vector<char> FileRepository::getObjectRaw(Object::idType id) const { std::vector<char> FileRepository::getObject(Object::idType id) const {
if (!ready) throw Exception("Tried working with uninitialized repo!"); if (!ready) throw Exception("Tried working with uninitialized repo!");
std::unique_lock lock(repoLock); std::unique_lock lock(repoLock);
@@ -141,7 +133,6 @@ void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
{ {
std::lock_guard lockI(repoLock); std::lock_guard lockI(repoLock);
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size())); offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
fileToObjs[currentFileId].emplace(i.first);
} }
offset += i.second.size(); offset += i.second.size();
ofstream.rdbuf()->sputn(i.second.data(), i.second.size()); ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
@@ -153,81 +144,14 @@ bool FileRepository::putObject(const Object &obj) {
{ {
std::lock_guard lock(repoLock); std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id; keyIndex[obj.type][obj.getKey()] = obj.id;
for (auto const &i: obj.getRefs()) refCounts[i]++;
} }
writeObject(obj); writeObject(obj);
return true; return true;
} }
bool FileRepository::deleteObjects(const std::vector<Object::idType> &objs) { bool FileRepository::deleteObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!"); if (!ready) throw Exception("Tried working with uninitialized repo!");
throw Exception("Deletion not implemented!");
std::queue<Object::idType> toVisit;
std::set<Object::idType> toDelete;
for (auto const &o: objs) {
toVisit.emplace(o);
toDelete.emplace(o);
}
std::cout << "Scanning for objects" << std::endl;
while (!toVisit.empty()) {
auto o = toVisit.back();
toVisit.pop();
auto obj = getObject(o);
for (const auto &id: obj->getRefs()) {
std::unique_lock lock(repoLock);
refCounts[id]--;
if (refCounts.at(id) == 0) {
toDelete.emplace(id);
toVisit.emplace(id);
refCounts.erase(id);
}
}
}
std::cout << "Found " << toDelete.size() << " to delete " << std::endl;
std::unordered_map<uint64_t, Object::idType> fileToObj;
std::set<uint64_t> touchedFiles;
for (auto const &id: toDelete) {
fileToObj.emplace(offsetIndex.at(id).fileId, id);
touchedFiles.emplace(offsetIndex.at(id).fileId);
}
std::cout << "Will rewrite " << touchedFiles.size() << " files" << std::endl;
for (auto const &f: touchedFiles) {
std::cout << "Rewriting file " << f << std::endl;
const auto &objs = fileToObjs.at(f);
std::vector<std::unique_ptr<Object>> objects;
for (auto const &o: objs) {
auto obj = getObject(o);
{
std::unique_lock lock(repoLock);
offsetIndex.erase(o);
}
if (toDelete.find(o) == toDelete.end()) putObject(*obj);
}
{
std::unique_lock lock(repoLock);
fileToObjs.erase(f);
}
std::filesystem::remove(root / std::to_string(f));
}
{
std::unique_lock lock(repoLock);
for (auto const &id: toDelete) {
unusedIds.emplace_back(id);
// FIXME: this is a bit inefficient
for (auto &m: keyIndex) erase_if(m.second, [&](const auto &t) { return toDelete.contains(t.second); });
}
}
return true;
} }
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset,
@@ -264,8 +188,8 @@ bool FileRepository::writeFile(const std::filesystem::path &file, const std::vec
return true; return true;
} }
std::vector<char> FileRepository::getObjectRaw(Object::ObjectType type, const std::string &key) const { std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
return getObjectRaw(getObjectId(type, key)); return getObject(getObjectId(type, key));
} }
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const { Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
@@ -290,11 +214,6 @@ bool FileRepository::exists(Object::ObjectType type, const std::string &key) con
Object::idType FileRepository::getId() { Object::idType FileRepository::getId() {
std::lock_guard lock(repoLock); std::lock_guard lock(repoLock);
if (!unusedIds.empty()) {
auto ret = unusedIds.back();
unusedIds.pop_back();
return ret;
}
return largestUnusedId++; return largestUnusedId++;
} }

View File

@@ -6,10 +6,6 @@
#include "Serialize.h" #include "Serialize.h"
#include "objects/Archive.h"
#include "objects/Chunk.h"
#include "objects/File.h"
Object::Object(idType id, ObjectType type) : id(id), type(type) {} Object::Object(idType id, ObjectType type) : id(id), type(type) {}
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end) Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
@@ -21,32 +17,3 @@ void Object::serialize(std::vector<char> &out) const {
} }
Object::~Object() = default; Object::~Object() = default;
static std::vector<Object::idType> emptyRef{};
const std::vector<Object::idType> &Object::getRefs() const { return emptyRef; }
std::unique_ptr<Object> Object::deserialize(std::vector<char>::const_iterator &in,
const std::vector<char>::const_iterator &end) {
auto inCpy = in;
auto id = Serialize::deserialize<idType>(in, end);
auto type = Serialize::deserialize<ObjectType>(in, end);
switch (type) {
case ObjectType::Archive:
return std::make_unique<Archive>(Serialize::deserialize<Archive>(inCpy, end));
case ObjectType::File:
return std::make_unique<File>(Serialize::deserialize<File>(inCpy, end));
case ObjectType::Chunk:
return std::make_unique<Chunk>(Serialize::deserialize<Chunk>(inCpy, end));
case ObjectType::END:
break;
default:
throw Exception("Bad object!");
}
}
std::unique_ptr<Object> Object::deserialize(const std::vector<char> &src) {
auto srcIterator = src.cbegin();
return deserialize(srcIterator, src.end());
}

View File

@@ -8,7 +8,3 @@ Repository::~Repository() = default;
Repository::Repository(Config config) : config(std::move(config)) {} Repository::Repository(Config config) : config(std::move(config)) {}
const Config &Repository::getConfig() const { return config; } const Config &Repository::getConfig() const { return config; }
std::unique_ptr<Object> Repository::getObject(Object::idType id) const {
return Object::deserialize(this->getObjectRaw(id));
}

View File

@@ -28,5 +28,3 @@ void Archive::serialize(std::vector<char> &out) const {
} }
std::string Archive::getKey() const { return name; } std::string Archive::getKey() const { return name; }
const std::vector<Object::idType> &Archive::getRefs() const { return files; }

View File

@@ -83,16 +83,3 @@ unsigned long long File::getFileSize(const std::filesystem::path &p) {
else else
return getFileContents(p).size(); return getFileContents(p).size();
} }
void File::makeChunksList() const {
if (chunksList) return;
chunksList.emplace();
chunksList->reserve(chunks.size());
for (auto const &c: chunks) chunksList->emplace_back(c.second);
}
const std::vector<Object::idType> &File::getRefs() const {
if (!chunksList) makeChunksList();
return *chunksList;
}

View File

@@ -7,7 +7,7 @@
#include "Serialize.h" #include "Serialize.h"
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId)
: repo(repo), file(Serialize::deserialize<File>(repo->getObjectRaw(fileId))), chunksQueue() { : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
for (auto const &id: file.chunks) chunksQueue.emplace(id.second); for (auto const &id: file.chunks) chunksQueue.emplace(id.second);
}; };
@@ -35,7 +35,7 @@ int FileBuffer::underflow() {
if (getBuf.empty() || curGetBufPos == getBuf.size()) { if (getBuf.empty() || curGetBufPos == getBuf.size()) {
if (chunksQueue.empty()) return traits_type::eof(); if (chunksQueue.empty()) return traits_type::eof();
else { else {
auto chunk = Serialize::deserialize<Chunk>(repo->getObjectRaw(chunksQueue.front())); auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
getBuf = chunk.data; getBuf = chunk.data;
chunksQueue.pop(); chunksQueue.pop();
curGetBufPos = 0; curGetBufPos = 0;

View File

@@ -48,8 +48,8 @@ TEST(FileRepository, Deserialize) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666); ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777); ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
auto o1o = repo.getObjectRaw(666); auto o1o = repo.getObject(666);
auto o2o = repo.getObjectRaw(777); auto o2o = repo.getObject(777);
auto o1ob = o1o.cbegin(); auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin(); auto o2ob = o2o.cbegin();
@@ -126,7 +126,7 @@ TEST(FileRepository, Filters) {
try { try {
auto o1o = repo.getObjectRaw(666); auto o1o = repo.getObject(666);
auto o1ob = o1o.cbegin(); auto o1ob = o1o.cbegin();
Chunk o1(o1ob, o1o.cend()); Chunk o1(o1ob, o1o.cend());
@@ -134,7 +134,7 @@ TEST(FileRepository, Filters) {
} catch (...) {} } catch (...) {}
try { try {
auto o2o = repo.getObjectRaw(777); auto o2o = repo.getObject(777);
auto o2ob = o2o.cbegin(); auto o2ob = o2o.cbegin();
Chunk o2(o2ob, o2o.cend()); Chunk o2(o2ob, o2o.cend());
@@ -163,8 +163,8 @@ TEST(FileRepository, Filters) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666); ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777); ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
auto o1o = repo.getObjectRaw(666); auto o1o = repo.getObject(666);
auto o2o = repo.getObjectRaw(777); auto o2o = repo.getObject(777);
auto o1ob = o1o.cbegin(); auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin(); auto o2ob = o2o.cbegin();
@@ -192,6 +192,7 @@ TEST(FileRepository, Filters) {
} }
TEST(FileRepository, IDsDisabled) { TEST(FileRepository, IDsDisabled) {
GTEST_SKIP();
Cleaner c({"IDS/testrepo"}); Cleaner c({"IDS/testrepo"});
{ {
Config conf; Config conf;
@@ -223,8 +224,8 @@ TEST(FileRepository, IDsDisabled) {
conf.add("repo", "IDS/testrepo"); conf.add("repo", "IDS/testrepo");
FileRepository repo(conf); FileRepository repo(conf);
repo.open(); repo.open();
auto o1o = repo.getObjectRaw(1); auto o1o = repo.getObject(1);
auto o2o = repo.getObjectRaw(2); auto o2o = repo.getObject(2);
auto o1ob = o1o.cbegin(); auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin(); auto o2ob = o2o.cbegin();
@@ -256,7 +257,7 @@ TEST(FileRepository, IDsDisabled) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2); ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2);
repo.deleteObjects({o1.id}); repo.deleteObject(o1);
} }
{ {
Config conf; Config conf;