mirror of
https://github.com/usatiuk/backup.git
synced 2025-10-26 17:37:47 +01:00
Compare commits
5 Commits
10d570f3ea
...
cleaning
| Author | SHA1 | Date | |
|---|---|---|---|
| 8c1e9fe4bf | |||
| 1dad7d1501 | |||
| 0b2040603e | |||
| 476904b5aa | |||
| 061e164c6a |
9
.github/workflows/cmake.yml
vendored
9
.github/workflows/cmake.yml
vendored
@@ -19,17 +19,10 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- run: apt-get update && apt-get install -y sudo
|
||||
if: env.ACT=='true'
|
||||
|
||||
- name: Fix kernel mmap rnd bits
|
||||
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
|
||||
# high-entropy ASLR in much newer kernels that GitHub runners are
|
||||
# using leading to random crashes: https://reviews.llvm.org/D148280
|
||||
run: sudo sysctl vm.mmap_rnd_bits=28
|
||||
if: env.ACT!='true'
|
||||
|
||||
- name: install everything
|
||||
run: sudo apt-get update && sudo apt-get install -y fuse libfuse-dev cmake build-essential gcc g++ libssl-dev zlib1g-dev
|
||||
|
||||
@@ -1,6 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
add_library(commands srcs/CommandDiff.cpp srcs/CommandList.cpp srcs/CommandListFiles.cpp srcs/CommandRestore.cpp srcs/CommandRun.cpp srcs/CommandsCommon.cpp srcs/Diff.cpp srcs/CommandMount.cpp)
|
||||
add_library(commands
|
||||
srcs/CommandDiff.cpp
|
||||
srcs/CommandList.cpp
|
||||
srcs/CommandListFiles.cpp
|
||||
srcs/CommandRestore.cpp
|
||||
srcs/CommandRun.cpp
|
||||
srcs/CommandsCommon.cpp
|
||||
srcs/Diff.cpp
|
||||
srcs/CommandMount.cpp
|
||||
srcs/CommandDelete.cpp
|
||||
)
|
||||
|
||||
target_include_directories(commands PUBLIC includes)
|
||||
|
||||
|
||||
18
src/commands/includes/CommandDelete.h
Normal file
18
src/commands/includes/CommandDelete.h
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 06.08.2023.
|
||||
//
|
||||
|
||||
#ifndef BACKUP_COMMANDDELETE_H
|
||||
#define BACKUP_COMMANDDELETE_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
class CommandDelete : public Command {
|
||||
public:
|
||||
CommandDelete();
|
||||
void run(Context ctx) override;
|
||||
static constexpr std::string_view name{"delete"};
|
||||
};
|
||||
|
||||
|
||||
#endif//BACKUP_COMMANDDELETE_H
|
||||
15
src/commands/srcs/CommandDelete.cpp
Normal file
15
src/commands/srcs/CommandDelete.cpp
Normal file
@@ -0,0 +1,15 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 06.08.2023.
|
||||
//
|
||||
|
||||
#include "CommandDelete.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
using namespace CommandsCommon;
|
||||
|
||||
CommandDelete::CommandDelete() {}
|
||||
|
||||
void CommandDelete::run(Context ctx) {
|
||||
ctx.repo->deleteObjects({static_cast<unsigned long long>(ctx.repo->getConfig().getInt("aid"))});
|
||||
}
|
||||
@@ -38,11 +38,11 @@ void CommandDiff::run(Context ctx) {
|
||||
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads")
|
||||
: std::thread::hardware_concurrency());
|
||||
|
||||
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
|
||||
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive1));
|
||||
std::mutex filesLock;
|
||||
std::map<std::filesystem::path, File> files;///< Files in the first archive
|
||||
for (auto id: archiveO1.files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
|
||||
auto path = std::filesystem::path(file.name);
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) files.emplace(file.getKey(), std::move(file));
|
||||
}
|
||||
@@ -76,13 +76,13 @@ void CommandDiff::run(Context ctx) {
|
||||
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
|
||||
if (ctx.repo->getConfig().exists("aid2")) {
|
||||
archiveO2.emplace(
|
||||
Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
|
||||
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2"))));
|
||||
|
||||
threadPool.push([&]() {
|
||||
for (auto id: archiveO2.value().files) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::path(file.name)))
|
||||
threadPool.push([&, file]() { processFile(ComparableFile{file, ctx.repo}); });
|
||||
if (Signals::shouldQuit) break;
|
||||
@@ -111,10 +111,10 @@ void CommandDiff::run(Context ctx) {
|
||||
|
||||
if (ctx.repo->getConfig().exists("aid2")) {
|
||||
archiveO2.emplace(
|
||||
Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
|
||||
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2"))));
|
||||
std::map<std::filesystem::path, File> files2;///< Files in the first archive
|
||||
for (auto id: archiveO2->files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
|
||||
auto path = std::filesystem::path(file.name);
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
|
||||
files2.emplace(file.getKey(), std::move(file));
|
||||
|
||||
@@ -13,9 +13,9 @@
|
||||
CommandListFiles::CommandListFiles() : Command() {}
|
||||
|
||||
void CommandListFiles::run(Context ctx) {
|
||||
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
|
||||
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid")));
|
||||
for (auto const &fid: archive.files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid));
|
||||
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType)
|
||||
<< " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
|
||||
}
|
||||
|
||||
@@ -68,14 +68,14 @@ void CommandRestore::run(Context ctx) {
|
||||
/// Add the main restore task
|
||||
threadPool.push([&, this]() {
|
||||
/// Get the archive and its file IDs
|
||||
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
|
||||
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive));
|
||||
std::vector<Object::idType> files = archiveO.files;
|
||||
/// For each file...
|
||||
for (const auto fid: files) {
|
||||
/// Stop when asked to
|
||||
if (Signals::shouldQuit) break;
|
||||
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid));
|
||||
filesToRestoreCount++;
|
||||
bytesToRestore += file.bytes;
|
||||
/// Spawn a restore task
|
||||
@@ -105,7 +105,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
|
||||
return fullpath.string();
|
||||
}
|
||||
if (file.fileType == File::Type::Symlink) {
|
||||
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks.at(0)));
|
||||
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(file.chunks.at(0)));
|
||||
std::filesystem::create_symlink(std::filesystem::path(std::string{dest.data.begin(), dest.data.end()}),
|
||||
fullpath);
|
||||
callback(0, 0, 1);
|
||||
@@ -116,7 +116,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
|
||||
for (const auto cid: file.chunks) {
|
||||
if (Signals::shouldQuit) throw Exception("Quitting!");
|
||||
|
||||
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid.second));
|
||||
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(cid.second));
|
||||
if (!c.data.empty()) {
|
||||
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
|
||||
callback(c.data.size(), 0, 0);
|
||||
|
||||
@@ -89,7 +89,7 @@ void CommandRun::run(Context ctx) {
|
||||
auto relPath = p.lexically_relative(from).string();
|
||||
|
||||
if (ctx.repo->exists(Object::ObjectType::File, relPath) != 0) {
|
||||
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(Object::ObjectType::File, relPath));
|
||||
File repoFile = Serialize::deserialize<File>(ctx.repo->getObjectRaw(Object::ObjectType::File, relPath));
|
||||
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
|
||||
addFile(repoFile.id);
|
||||
progress.print("Skipped: " + relPath, 1);
|
||||
|
||||
@@ -12,11 +12,11 @@ FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) {
|
||||
}
|
||||
|
||||
std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const {
|
||||
for (auto const &f: filters) from = f->filterWrite(std::move(from));
|
||||
for (auto const &f: filters) from = std::move(f->filterWrite(std::move(from)));
|
||||
return from;
|
||||
}
|
||||
|
||||
std::vector<char> FilterContainer::filterRead(std::vector<char> from) const {
|
||||
for (auto f = filters.crbegin(); f != filters.crend(); f++) from = (*f)->filterRead(std::move(from));
|
||||
for (auto f = filters.crbegin(); f != filters.crend(); f++) from = std::move((*f)->filterRead(std::move(from)));
|
||||
return from;
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset, struc
|
||||
size_t curInBuf = 0;
|
||||
size_t curInChunk = offset - curchunk->first;
|
||||
while (curInBuf < size) {
|
||||
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(curchunk->second));
|
||||
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(curchunk->second));
|
||||
size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf);
|
||||
memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read);
|
||||
curInBuf += read;
|
||||
@@ -121,7 +121,7 @@ static int rfsReadlink(const char *path, char *buf, size_t size) {
|
||||
} catch (...) { return -ENOENT; }
|
||||
|
||||
if (entry->file->fileType != File::Type::Symlink) return -ENOENT;
|
||||
auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(entry->file->chunks.at(0)));
|
||||
auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(entry->file->chunks.at(0)));
|
||||
strncpy(buf, dst.data.data(), std::min(dst.data.size(), size));
|
||||
|
||||
return 0;
|
||||
@@ -139,9 +139,9 @@ void RepoFS::start(Repository *repo, std::string path) {
|
||||
RepoFS::repo = repo;
|
||||
auto ars = repo->getObjects(Object::ObjectType::Archive);
|
||||
for (auto const &r: ars) {
|
||||
auto a = Serialize::deserialize<Archive>(repo->getObject(r.second));
|
||||
auto a = Serialize::deserialize<Archive>(repo->getObjectRaw(r.second));
|
||||
for (auto const &f: a.files) {
|
||||
auto file = Serialize::deserialize<File>(repo->getObject(f));
|
||||
auto file = Serialize::deserialize<File>(repo->getObjectRaw(f));
|
||||
auto path = std::filesystem::path(file.name);
|
||||
DirEntry *entry = root->children[std::to_string(a.id)].get()
|
||||
? root->children[std::to_string(a.id)].get()
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "BytesFormatter.h"
|
||||
#include "Command.h"
|
||||
#include "CommandDelete.h"
|
||||
#include "CommandDiff.h"
|
||||
#include "CommandList.h"
|
||||
#include "CommandListFiles.h"
|
||||
@@ -109,6 +110,7 @@ int main(int argc, char *argv[]) {
|
||||
commands.emplace(CommandListFiles::name, std::make_unique<CommandListFiles>());
|
||||
commands.emplace(CommandList::name, std::make_unique<CommandList>());
|
||||
commands.emplace(CommandMount::name, std::make_unique<CommandMount>());
|
||||
commands.emplace(CommandDelete::name, std::make_unique<CommandDelete>());
|
||||
|
||||
if (commands.count(opt) == 0) {
|
||||
std::cerr << "Unknown argument" << std::endl;
|
||||
|
||||
@@ -35,11 +35,11 @@ public:
|
||||
bool init() override;
|
||||
bool flush() override;
|
||||
|
||||
std::vector<char> getObject(Object::idType id) const override;
|
||||
std::vector<char> getObjectRaw(Object::idType id) const override;
|
||||
bool putObject(const Object &obj) override;
|
||||
bool deleteObject(const Object &obj) override;
|
||||
bool deleteObjects(const std::vector<Object::idType> &objs) override;
|
||||
|
||||
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
|
||||
std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const override;
|
||||
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
|
||||
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
|
||||
|
||||
@@ -104,6 +104,8 @@ private:
|
||||
|
||||
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
|
||||
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
|
||||
std::unordered_map<Object::idType, std::set<Object::idType>>
|
||||
fileToObjs;///< Used to locate Object%s in the filesystem
|
||||
|
||||
std::mutex writeCacheLock; ///< Write cache lock
|
||||
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
|
||||
@@ -116,9 +118,12 @@ private:
|
||||
/// \param lockW Write cache lock
|
||||
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
|
||||
|
||||
Object::idType largestUnusedId = 1;///< Largest available objectID
|
||||
Object::idType largestUnusedId = 1; ///< Largest available objectID
|
||||
std::vector<Object::idType> unusedIds;///< Vector of unused IDs
|
||||
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>>
|
||||
keyIndex;///< Maps Object%'s keys to their ID's
|
||||
|
||||
std::unordered_map<Object::idType, uint64_t> refCounts;///< Count of references to an object per its id
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -31,9 +31,16 @@ public:
|
||||
/// All derived objects should implement this method
|
||||
virtual std::string getKey() const = 0;
|
||||
|
||||
/// Returns the keys of that this object refers to
|
||||
virtual const std::vector<idType> &getRefs() const;
|
||||
|
||||
const idType id; ///< Unique numerical of the object
|
||||
const ObjectType type;///< Type of the object
|
||||
|
||||
static std::unique_ptr<Object> deserialize(const std::vector<char> &src);
|
||||
static std::unique_ptr<Object> deserialize(std::vector<char>::const_iterator &in,
|
||||
const std::vector<char>::const_iterator &end);
|
||||
|
||||
protected:
|
||||
/// Default constructor
|
||||
/// \param id Object ID
|
||||
|
||||
@@ -45,7 +45,13 @@ public:
|
||||
/// \param id ID of object to return
|
||||
/// \return Serialized object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
virtual std::vector<char> getObject(Object::idType id) const = 0;
|
||||
virtual std::vector<char> getObjectRaw(Object::idType id) const = 0;
|
||||
|
||||
/// Returns the Object with id \p id
|
||||
/// \param id ID of object to return
|
||||
/// \return Serialized object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
std::unique_ptr<Object> getObject(Object::idType id) const;
|
||||
|
||||
/// Adds the Object \p obj to the Repository
|
||||
/// \param obj Constant reference to the object
|
||||
@@ -54,17 +60,17 @@ public:
|
||||
virtual bool putObject(const Object &obj) = 0;
|
||||
|
||||
/// Deletes Object \p obj from the Repository
|
||||
/// \param obj Constant reference to the object
|
||||
/// \param obj Constant reference to the vector with ids of objects to delete
|
||||
/// \return True if successful, False if it didn't exist
|
||||
/// \throws Exception on any error
|
||||
virtual bool deleteObject(const Object &obj) = 0;
|
||||
virtual bool deleteObjects(const std::vector<Object::idType> &objs) = 0;
|
||||
|
||||
/// Returns the Object of type \p type and with key \p key
|
||||
/// \param type Type of the object
|
||||
/// \param key Constant reference to the key of the object
|
||||
/// \return Serialized object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
|
||||
virtual std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const = 0;
|
||||
|
||||
/// Returns the id of an Object of type \p type and with key \p key
|
||||
/// \param type Type of the object
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a backup
|
||||
class Archive : public Object {
|
||||
class Archive final : public Object {
|
||||
public:
|
||||
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files);
|
||||
|
||||
@@ -22,6 +22,9 @@ public:
|
||||
/// Returns the name of the archive
|
||||
std::string getKey() const override;
|
||||
|
||||
/// Returns the files in this archive
|
||||
const std::vector<Object::idType> &getRefs() const override;
|
||||
|
||||
const std::string name; ///< Archive name
|
||||
const unsigned long long mtime; ///< Time of creation
|
||||
const std::vector<idType> files;///< List of ids of File objects in the Archive
|
||||
|
||||
@@ -11,7 +11,7 @@
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a part of a File
|
||||
class Chunk : public Object {
|
||||
class Chunk final : public Object {
|
||||
public:
|
||||
Chunk(idType id, std::string, std::vector<char> data);
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a saved file
|
||||
class File : public Object {
|
||||
class File final : public Object {
|
||||
public:
|
||||
enum class Type { Normal, Symlink, Directory, END };
|
||||
|
||||
@@ -68,6 +68,12 @@ public:
|
||||
/// List of the chunks in file
|
||||
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
|
||||
const std::map<size_t, idType> chunks;
|
||||
|
||||
const std::vector<idType> &getRefs() const override;
|
||||
|
||||
private:
|
||||
void makeChunksList() const;
|
||||
mutable std::optional<std::vector<idType>> chunksList{std::nullopt};
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
#include "FileRepository.h"
|
||||
|
||||
#include <exception>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
|
||||
#include "CheckFilter.h"
|
||||
#include "FilterFactory.h"
|
||||
@@ -45,6 +47,9 @@ bool FileRepository::open() {
|
||||
std::tie(keyIndex, largestUnusedId) =
|
||||
Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(
|
||||
filters.filterRead(readFile(root / "index")));
|
||||
refCounts = Serialize::deserialize<decltype(refCounts)>(filters.filterRead(readFile(root / "refcounts")));
|
||||
unusedIds = Serialize::deserialize<decltype(unusedIds)>(filters.filterRead(readFile(root / "unusedIds")));
|
||||
fileToObjs = Serialize::deserialize<decltype(fileToObjs)>(filters.filterRead(readFile(root / "fileToObjs")));
|
||||
} catch (const std::exception &e) {
|
||||
ready = false;
|
||||
throw;
|
||||
@@ -79,10 +84,13 @@ FileRepository::~FileRepository() {
|
||||
|
||||
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
|
||||
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
|
||||
writeFile(root / "unusedIds", filters.filterWrite(Serialize::serialize(unusedIds)));
|
||||
writeFile(root / "refcounts", filters.filterWrite(Serialize::serialize(refCounts)));
|
||||
writeFile(root / "fileToObjs", filters.filterWrite(Serialize::serialize(fileToObjs)));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::getObject(Object::idType id) const {
|
||||
std::vector<char> FileRepository::getObjectRaw(Object::idType id) const {
|
||||
if (!ready) throw Exception("Tried working with uninitialized repo!");
|
||||
|
||||
std::unique_lock lock(repoLock);
|
||||
@@ -133,6 +141,7 @@ void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
|
||||
{
|
||||
std::lock_guard lockI(repoLock);
|
||||
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
|
||||
fileToObjs[currentFileId].emplace(i.first);
|
||||
}
|
||||
offset += i.second.size();
|
||||
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
|
||||
@@ -144,14 +153,81 @@ bool FileRepository::putObject(const Object &obj) {
|
||||
{
|
||||
std::lock_guard lock(repoLock);
|
||||
keyIndex[obj.type][obj.getKey()] = obj.id;
|
||||
for (auto const &i: obj.getRefs()) refCounts[i]++;
|
||||
}
|
||||
writeObject(obj);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileRepository::deleteObject(const Object &obj) {
|
||||
bool FileRepository::deleteObjects(const std::vector<Object::idType> &objs) {
|
||||
if (!ready) throw Exception("Tried working with uninitialized repo!");
|
||||
throw Exception("Deletion not implemented!");
|
||||
|
||||
std::queue<Object::idType> toVisit;
|
||||
std::set<Object::idType> toDelete;
|
||||
|
||||
for (auto const &o: objs) {
|
||||
toVisit.emplace(o);
|
||||
toDelete.emplace(o);
|
||||
}
|
||||
|
||||
std::cout << "Scanning for objects" << std::endl;
|
||||
|
||||
while (!toVisit.empty()) {
|
||||
auto o = toVisit.back();
|
||||
toVisit.pop();
|
||||
|
||||
auto obj = getObject(o);
|
||||
for (const auto &id: obj->getRefs()) {
|
||||
std::unique_lock lock(repoLock);
|
||||
refCounts[id]--;
|
||||
if (refCounts.at(id) == 0) {
|
||||
toDelete.emplace(id);
|
||||
toVisit.emplace(id);
|
||||
refCounts.erase(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Found " << toDelete.size() << " to delete " << std::endl;
|
||||
|
||||
|
||||
std::unordered_map<uint64_t, Object::idType> fileToObj;
|
||||
std::set<uint64_t> touchedFiles;
|
||||
|
||||
for (auto const &id: toDelete) {
|
||||
fileToObj.emplace(offsetIndex.at(id).fileId, id);
|
||||
touchedFiles.emplace(offsetIndex.at(id).fileId);
|
||||
}
|
||||
|
||||
std::cout << "Will rewrite " << touchedFiles.size() << " files" << std::endl;
|
||||
|
||||
for (auto const &f: touchedFiles) {
|
||||
std::cout << "Rewriting file " << f << std::endl;
|
||||
const auto &objs = fileToObjs.at(f);
|
||||
std::vector<std::unique_ptr<Object>> objects;
|
||||
for (auto const &o: objs) {
|
||||
auto obj = getObject(o);
|
||||
{
|
||||
std::unique_lock lock(repoLock);
|
||||
offsetIndex.erase(o);
|
||||
}
|
||||
if (toDelete.find(o) == toDelete.end()) putObject(*obj);
|
||||
}
|
||||
{
|
||||
std::unique_lock lock(repoLock);
|
||||
fileToObjs.erase(f);
|
||||
}
|
||||
std::filesystem::remove(root / std::to_string(f));
|
||||
}
|
||||
{
|
||||
std::unique_lock lock(repoLock);
|
||||
for (auto const &id: toDelete) {
|
||||
unusedIds.emplace_back(id);
|
||||
// FIXME: this is a bit inefficient
|
||||
for (auto &m: keyIndex) erase_if(m.second, [&](const auto &t) { return toDelete.contains(t.second); });
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset,
|
||||
@@ -188,8 +264,8 @@ bool FileRepository::writeFile(const std::filesystem::path &file, const std::vec
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
|
||||
return getObject(getObjectId(type, key));
|
||||
std::vector<char> FileRepository::getObjectRaw(Object::ObjectType type, const std::string &key) const {
|
||||
return getObjectRaw(getObjectId(type, key));
|
||||
}
|
||||
|
||||
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
|
||||
@@ -214,6 +290,11 @@ bool FileRepository::exists(Object::ObjectType type, const std::string &key) con
|
||||
|
||||
Object::idType FileRepository::getId() {
|
||||
std::lock_guard lock(repoLock);
|
||||
if (!unusedIds.empty()) {
|
||||
auto ret = unusedIds.back();
|
||||
unusedIds.pop_back();
|
||||
return ret;
|
||||
}
|
||||
return largestUnusedId++;
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,10 @@
|
||||
|
||||
#include "Serialize.h"
|
||||
|
||||
#include "objects/Archive.h"
|
||||
#include "objects/Chunk.h"
|
||||
#include "objects/File.h"
|
||||
|
||||
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
|
||||
|
||||
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
|
||||
@@ -17,3 +21,32 @@ void Object::serialize(std::vector<char> &out) const {
|
||||
}
|
||||
|
||||
Object::~Object() = default;
|
||||
|
||||
static std::vector<Object::idType> emptyRef{};
|
||||
|
||||
const std::vector<Object::idType> &Object::getRefs() const { return emptyRef; }
|
||||
|
||||
std::unique_ptr<Object> Object::deserialize(std::vector<char>::const_iterator &in,
|
||||
const std::vector<char>::const_iterator &end) {
|
||||
auto inCpy = in;
|
||||
auto id = Serialize::deserialize<idType>(in, end);
|
||||
auto type = Serialize::deserialize<ObjectType>(in, end);
|
||||
|
||||
switch (type) {
|
||||
case ObjectType::Archive:
|
||||
return std::make_unique<Archive>(Serialize::deserialize<Archive>(inCpy, end));
|
||||
case ObjectType::File:
|
||||
return std::make_unique<File>(Serialize::deserialize<File>(inCpy, end));
|
||||
case ObjectType::Chunk:
|
||||
return std::make_unique<Chunk>(Serialize::deserialize<Chunk>(inCpy, end));
|
||||
case ObjectType::END:
|
||||
break;
|
||||
default:
|
||||
throw Exception("Bad object!");
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<Object> Object::deserialize(const std::vector<char> &src) {
|
||||
auto srcIterator = src.cbegin();
|
||||
return deserialize(srcIterator, src.end());
|
||||
}
|
||||
|
||||
@@ -8,3 +8,7 @@ Repository::~Repository() = default;
|
||||
Repository::Repository(Config config) : config(std::move(config)) {}
|
||||
|
||||
const Config &Repository::getConfig() const { return config; }
|
||||
|
||||
std::unique_ptr<Object> Repository::getObject(Object::idType id) const {
|
||||
return Object::deserialize(this->getObjectRaw(id));
|
||||
}
|
||||
|
||||
@@ -28,3 +28,5 @@ void Archive::serialize(std::vector<char> &out) const {
|
||||
}
|
||||
|
||||
std::string Archive::getKey() const { return name; }
|
||||
|
||||
const std::vector<Object::idType> &Archive::getRefs() const { return files; }
|
||||
|
||||
@@ -83,3 +83,16 @@ unsigned long long File::getFileSize(const std::filesystem::path &p) {
|
||||
else
|
||||
return getFileContents(p).size();
|
||||
}
|
||||
|
||||
void File::makeChunksList() const {
|
||||
if (chunksList) return;
|
||||
chunksList.emplace();
|
||||
|
||||
chunksList->reserve(chunks.size());
|
||||
for (auto const &c: chunks) chunksList->emplace_back(c.second);
|
||||
}
|
||||
|
||||
const std::vector<Object::idType> &File::getRefs() const {
|
||||
if (!chunksList) makeChunksList();
|
||||
return *chunksList;
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
#include "Serialize.h"
|
||||
|
||||
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId)
|
||||
: repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
|
||||
: repo(repo), file(Serialize::deserialize<File>(repo->getObjectRaw(fileId))), chunksQueue() {
|
||||
for (auto const &id: file.chunks) chunksQueue.emplace(id.second);
|
||||
};
|
||||
|
||||
@@ -35,7 +35,7 @@ int FileBuffer::underflow() {
|
||||
if (getBuf.empty() || curGetBufPos == getBuf.size()) {
|
||||
if (chunksQueue.empty()) return traits_type::eof();
|
||||
else {
|
||||
auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
|
||||
auto chunk = Serialize::deserialize<Chunk>(repo->getObjectRaw(chunksQueue.front()));
|
||||
getBuf = chunk.data;
|
||||
chunksQueue.pop();
|
||||
curGetBufPos = 0;
|
||||
|
||||
@@ -48,8 +48,8 @@ TEST(FileRepository, Deserialize) {
|
||||
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
|
||||
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
|
||||
|
||||
auto o1o = repo.getObject(666);
|
||||
auto o2o = repo.getObject(777);
|
||||
auto o1o = repo.getObjectRaw(666);
|
||||
auto o2o = repo.getObjectRaw(777);
|
||||
|
||||
auto o1ob = o1o.cbegin();
|
||||
auto o2ob = o2o.cbegin();
|
||||
@@ -126,7 +126,7 @@ TEST(FileRepository, Filters) {
|
||||
|
||||
|
||||
try {
|
||||
auto o1o = repo.getObject(666);
|
||||
auto o1o = repo.getObjectRaw(666);
|
||||
auto o1ob = o1o.cbegin();
|
||||
|
||||
Chunk o1(o1ob, o1o.cend());
|
||||
@@ -134,7 +134,7 @@ TEST(FileRepository, Filters) {
|
||||
} catch (...) {}
|
||||
|
||||
try {
|
||||
auto o2o = repo.getObject(777);
|
||||
auto o2o = repo.getObjectRaw(777);
|
||||
auto o2ob = o2o.cbegin();
|
||||
|
||||
Chunk o2(o2ob, o2o.cend());
|
||||
@@ -163,8 +163,8 @@ TEST(FileRepository, Filters) {
|
||||
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
|
||||
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
|
||||
|
||||
auto o1o = repo.getObject(666);
|
||||
auto o2o = repo.getObject(777);
|
||||
auto o1o = repo.getObjectRaw(666);
|
||||
auto o2o = repo.getObjectRaw(777);
|
||||
|
||||
auto o1ob = o1o.cbegin();
|
||||
auto o2ob = o2o.cbegin();
|
||||
@@ -192,7 +192,6 @@ TEST(FileRepository, Filters) {
|
||||
}
|
||||
|
||||
TEST(FileRepository, IDsDisabled) {
|
||||
GTEST_SKIP();
|
||||
Cleaner c({"IDS/testrepo"});
|
||||
{
|
||||
Config conf;
|
||||
@@ -224,8 +223,8 @@ TEST(FileRepository, IDsDisabled) {
|
||||
conf.add("repo", "IDS/testrepo");
|
||||
FileRepository repo(conf);
|
||||
repo.open();
|
||||
auto o1o = repo.getObject(1);
|
||||
auto o2o = repo.getObject(2);
|
||||
auto o1o = repo.getObjectRaw(1);
|
||||
auto o2o = repo.getObjectRaw(2);
|
||||
|
||||
auto o1ob = o1o.cbegin();
|
||||
auto o2ob = o2o.cbegin();
|
||||
@@ -257,7 +256,7 @@ TEST(FileRepository, IDsDisabled) {
|
||||
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2);
|
||||
|
||||
|
||||
repo.deleteObject(o1);
|
||||
repo.deleteObjects({o1.id});
|
||||
}
|
||||
{
|
||||
Config conf;
|
||||
|
||||
Reference in New Issue
Block a user