Compare commits

..

4 Commits

Author SHA1 Message Date
763e5e5515 draft 2024-03-18 08:57:15 +01:00
e82bb93976 getRefs on objects 2024-03-17 23:08:57 +01:00
c2ca48a4c8 getObject => getObjectRaw 2024-03-17 23:03:13 +01:00
83bbc00303 Make objects final
Might be a slight optimization
2024-03-17 22:20:06 +01:00
28 changed files with 282 additions and 103 deletions

View File

@@ -19,17 +19,10 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
- run: apt-get update && apt-get install -y sudo
if: env.ACT=='true'
- name: Fix kernel mmap rnd bits
# Asan in llvm 14 provided in ubuntu 22.04 is incompatible with
# high-entropy ASLR in much newer kernels that GitHub runners are
# using leading to random crashes: https://reviews.llvm.org/D148280
run: sudo sysctl vm.mmap_rnd_bits=28
if: env.ACT!='true'
- name: install everything
run: sudo apt-get update && sudo apt-get install -y fuse libfuse-dev cmake build-essential gcc g++ libssl-dev zlib1g-dev

7
.idea/misc.xml generated
View File

@@ -1,11 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
<component name="CidrRootsConfiguration">
<excludeRoots>
<file path="$PROJECT_DIR$/cmake-build-debug" />
<file path="$PROJECT_DIR$/cmake-build-relwithdebinfo" />
<file path="$PROJECT_DIR$/cmake-build-relwithsan" />
</excludeRoots>
</component>
</project>

View File

@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.18)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(change_detectors)

View File

@@ -16,13 +16,13 @@ ComparableFile::ComparableFile(const File &file, const Repository *repo)
contents([file, repo]() { return std::make_unique<FileBuffer>(repo, file.id); }) {}
ComparableFile::ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base)
: path(p.lexically_relative(base).string()), type(File::getFileType(p)), bytes(File::getFileSize(p)),
: path(p.lexically_relative(base).u8string()), type(File::getFileType(p)), bytes(File::getFileSize(p)),
mtime(File::getFileMtime(p)),
contents([p, path = this->path, type = this->type]() -> std::unique_ptr<std::streambuf> {
if (type == File::Type::Normal) {
auto fb = std::make_unique<std::filebuf>();
fb->open(p, std::ios::in | std::ios::binary);
if (!fb->is_open()) throw Exception("Can't open " + p.string() + " for reading!");
if (!fb->is_open()) throw Exception("Can't open " + p.u8string() + " for reading!");
return fb;
}

View File

@@ -1,6 +1,16 @@
cmake_minimum_required(VERSION 3.18)
add_library(commands srcs/CommandDiff.cpp srcs/CommandList.cpp srcs/CommandListFiles.cpp srcs/CommandRestore.cpp srcs/CommandRun.cpp srcs/CommandsCommon.cpp srcs/Diff.cpp srcs/CommandMount.cpp)
add_library(commands
srcs/CommandDiff.cpp
srcs/CommandList.cpp
srcs/CommandListFiles.cpp
srcs/CommandRestore.cpp
srcs/CommandRun.cpp
srcs/CommandsCommon.cpp
srcs/Diff.cpp
srcs/CommandMount.cpp
srcs/CommandDelete.cpp
)
target_include_directories(commands PUBLIC includes)

View File

@@ -0,0 +1,18 @@
//
// Created by Stepan Usatiuk on 06.08.2023.
//
#ifndef BACKUP_COMMANDDELETE_H
#define BACKUP_COMMANDDELETE_H
#include "Command.h"
class CommandDelete : public Command {
public:
CommandDelete();
void run(Context ctx) override;
static constexpr std::string_view name{"delete"};
};
#endif//BACKUP_COMMANDDELETE_H

View File

@@ -0,0 +1,15 @@
//
// Created by Stepan Usatiuk on 06.08.2023.
//
#include "CommandDelete.h"
#include "CommandsCommon.h"
using namespace CommandsCommon;
CommandDelete::CommandDelete() {}
void CommandDelete::run(Context ctx) {
ctx.repo->deleteObjects({static_cast<unsigned long long>(ctx.repo->getConfig().getInt("aid"))});
}

View File

@@ -38,12 +38,12 @@ void CommandDiff::run(Context ctx) {
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads")
: std::thread::hardware_concurrency());
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive1));
std::mutex filesLock;
std::map<std::filesystem::path, File> files;///< Files in the first archive
for (auto id: archiveO1.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::path(file.name);
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) files.emplace(file.getKey(), std::move(file));
}
@@ -76,14 +76,14 @@ void CommandDiff::run(Context ctx) {
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(
Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2"))));
threadPool.push([&]() {
for (auto id: archiveO2.value().files) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::path(file.name)))
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::u8path(file.name)))
threadPool.push([&, file]() { processFile(ComparableFile{file, ctx.repo}); });
if (Signals::shouldQuit) break;
}
@@ -111,11 +111,11 @@ void CommandDiff::run(Context ctx) {
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(
Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2"))));
std::map<std::filesystem::path, File> files2;///< Files in the first archive
for (auto id: archiveO2->files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::path(file.name);
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files2.emplace(file.getKey(), std::move(file));
}
@@ -144,5 +144,5 @@ void CommandDiff::run(Context ctx) {
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
if (diffMode == "normal")
for (auto const &s: files) { ctx.logger->write(s.first.string() + " is removed\n", 0); }
for (auto const &s: files) { ctx.logger->write(s.first.u8string() + " is removed\n", 0); }
}

View File

@@ -13,9 +13,9 @@
CommandListFiles::CommandListFiles() : Command() {}
void CommandListFiles::run(Context ctx) {
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid")));
for (auto const &fid: archive.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid));
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType)
<< " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
}

View File

@@ -24,7 +24,7 @@ CommandRestore::CommandRestore() : Command() {}
void CommandRestore::run(Context ctx) {
Object::idType archive = ctx.repo->getConfig().getInt("aid");
std::filesystem::path to = std::filesystem::path(ctx.repo->getConfig().getStr("to"));
std::filesystem::path to = std::filesystem::u8path(ctx.repo->getConfig().getStr("to"));
std::atomic<unsigned long long> filesToRestoreCount = 0;
std::atomic<unsigned long long> bytesToRestore = 0;
@@ -68,14 +68,14 @@ void CommandRestore::run(Context ctx) {
/// Add the main restore task
threadPool.push([&, this]() {
/// Get the archive and its file IDs
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive));
std::vector<Object::idType> files = archiveO.files;
/// For each file...
for (const auto fid: files) {
/// Stop when asked to
if (Signals::shouldQuit) break;
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid));
filesToRestoreCount++;
bytesToRestore += file.bytes;
/// Spawn a restore task
@@ -95,28 +95,28 @@ void CommandRestore::run(Context ctx) {
std::string CommandRestore::backupRestoreFile(const File &file, const std::filesystem::path &baseDir,
workerStatsFunction &callback, Context ctx) {
auto fullpath = baseDir / std::filesystem::path(file.name);
auto fullpath = baseDir / std::filesystem::u8path(file.name);
std::filesystem::create_directories(fullpath.parent_path());
if (file.fileType == File::Type::Directory) {
std::filesystem::create_directory(fullpath);
callback(0, 0, 1);
return fullpath.string();
return fullpath.u8string();
}
if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks.at(0)));
std::filesystem::create_symlink(std::filesystem::path(std::string{dest.data.begin(), dest.data.end()}),
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(file.chunks.at(0)));
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}),
fullpath);
callback(0, 0, 1);
return fullpath.string();
return fullpath.u8string();
}
std::ofstream ostream(fullpath, std::ios::binary | std::ios::out | std::ios::trunc);
for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid.second));
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(cid.second));
if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0);
@@ -124,5 +124,5 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files
}
callback(0, 0, 1);
return fullpath.string();
return fullpath.u8string();
}

View File

@@ -79,17 +79,17 @@ void CommandRun::run(Context ctx) {
File::getFileType(absPath) == File::Type::Normal ? std::filesystem::file_size(absPath) : 0;
runnerStats.filesToSaveCount++;
threadPool.push([&, relPath, absPath]() {
addFile(backupChunkFile(absPath, relPath.string(), workerCallback, ctx));
progress.print("Copied: " + relPath.string(), 1);
addFile(backupChunkFile(absPath, relPath.u8string(), workerCallback, ctx));
progress.print("Copied: " + relPath.u8string(), 1);
});
};
/// Task to process an individual file in the backup
std::function<void(std::filesystem::path)> processFile = [&, this](const std::filesystem::path &p) {
auto relPath = p.lexically_relative(from).string();
auto relPath = p.lexically_relative(from).u8string();
if (ctx.repo->exists(Object::ObjectType::File, relPath) != 0) {
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(Object::ObjectType::File, relPath));
File repoFile = Serialize::deserialize<File>(ctx.repo->getObjectRaw(Object::ObjectType::File, relPath));
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
addFile(repoFile.id);
progress.print("Skipped: " + relPath, 1);
@@ -144,10 +144,10 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
ctx.repo->putObject(f);
return f.id;
}
if (!std::filesystem::is_regular_file(orig)) throw Exception(orig.string() + "is a special file, not saving");
if (!std::filesystem::is_regular_file(orig)) throw Exception(orig.u8string() + "is a special file, not saving");
std::ifstream ifstream(orig, std::ios::in | std::ios::binary);
if (!ifstream) throw Exception("Couldn't open " + orig.string() + " for reading");
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
SHA fileHash;
@@ -179,7 +179,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
/// We might have exited in the loop before, so we don't save an incomplete file
if (Signals::shouldQuit) throw Exception("Quitting!");
if (size != File::getFileSize(orig)) {
throw Exception("Something really bad happened or file " + orig.string() + " changed during backup");
throw Exception("Something really bad happened or file " + orig.u8string() + " changed during backup");
}
File f(ctx.repo->getId(), saveAs, size, File::getFileMtime(orig), fileHash.getHash(), fileChunks,
File::getFileType(orig));

View File

@@ -18,16 +18,16 @@ void CommandsCommon::workerCallback(unsigned long long int bytesWritten, unsigne
}
bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p) {
if (prefix.string().size() > p.string().size()) return false;
for (int i = 0; i < prefix.string().size(); i++)
if (p.string()[i] != prefix.string()[i]) return false;
if (prefix.u8string().size() > p.u8string().size()) return false;
for (int i = 0; i < prefix.u8string().size(); i++)
if (p.u8string()[i] != prefix.u8string()[i]) return false;
return true;
}
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore,
const std::function<void(std::function<void()>)> &spawner,
std::function<void(std::filesystem::directory_entry)> processFile) {
if (!std::filesystem::is_directory(dir)) throw Exception(dir.string() + " is not a directory!");
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
/// Don't process the directory if it has a ".nobackup" file
if (std::filesystem::exists(dir / ".nobackup")) return;
@@ -47,7 +47,7 @@ void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std:
/// Don't process the entry if it matches any of the ignore rules
if (std::any_of(ignore.begin(), ignore.end(), [dirEntry](auto pred) {
std::smatch m;
auto s = dirEntry.path().filename().string();
auto s = dirEntry.path().filename().u8string();
return std::regex_match(s, m, std::regex(pred));
}))
continue;

View File

@@ -15,7 +15,7 @@
#include "objects/Chunk.h"
DirEntry *getf(std::string path) {
auto p = std::filesystem::relative(std::filesystem::path(path), "/");
auto p = std::filesystem::relative(std::filesystem::u8path(path), "/");
DirEntry *entry = RepoFS::root.get();
if (p != ".")
for (auto const &subp: p) { entry = entry->children.at(subp).get(); }
@@ -101,7 +101,7 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset, struc
size_t curInBuf = 0;
size_t curInChunk = offset - curchunk->first;
while (curInBuf < size) {
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(curchunk->second));
auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(curchunk->second));
size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf);
memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read);
curInBuf += read;
@@ -121,7 +121,7 @@ static int rfsReadlink(const char *path, char *buf, size_t size) {
} catch (...) { return -ENOENT; }
if (entry->file->fileType != File::Type::Symlink) return -ENOENT;
auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(entry->file->chunks.at(0)));
auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(entry->file->chunks.at(0)));
strncpy(buf, dst.data.data(), std::min(dst.data.size(), size));
return 0;
@@ -139,10 +139,10 @@ void RepoFS::start(Repository *repo, std::string path) {
RepoFS::repo = repo;
auto ars = repo->getObjects(Object::ObjectType::Archive);
for (auto const &r: ars) {
auto a = Serialize::deserialize<Archive>(repo->getObject(r.second));
auto a = Serialize::deserialize<Archive>(repo->getObjectRaw(r.second));
for (auto const &f: a.files) {
auto file = Serialize::deserialize<File>(repo->getObject(f));
auto path = std::filesystem::path(file.name);
auto file = Serialize::deserialize<File>(repo->getObjectRaw(f));
auto path = std::filesystem::u8path(file.name);
DirEntry *entry = root->children[std::to_string(a.id)].get()
? root->children[std::to_string(a.id)].get()
: (root->children[std::to_string(a.id)] = std::make_unique<DirEntry>()).get();
@@ -153,7 +153,7 @@ void RepoFS::start(Repository *repo, std::string path) {
: (entry->children[subp] = std::make_unique<DirEntry>()).get();
}
entry->file.emplace(file);
entry->name = std::filesystem::path(file.name).filename().string();
entry->name = std::filesystem::u8path(file.name).filename().u8string();
}
}

View File

@@ -3,6 +3,7 @@
#include "BytesFormatter.h"
#include "Command.h"
#include "CommandDelete.h"
#include "CommandDiff.h"
#include "CommandList.h"
#include "CommandListFiles.h"
@@ -109,6 +110,7 @@ int main(int argc, char *argv[]) {
commands.emplace(CommandListFiles::name, std::make_unique<CommandListFiles>());
commands.emplace(CommandList::name, std::make_unique<CommandList>());
commands.emplace(CommandMount::name, std::make_unique<CommandMount>());
commands.emplace(CommandDelete::name, std::make_unique<CommandDelete>());
if (commands.count(opt) == 0) {
std::cerr << "Unknown argument" << std::endl;

View File

@@ -35,11 +35,11 @@ public:
bool init() override;
bool flush() override;
std::vector<char> getObject(Object::idType id) const override;
std::vector<char> getObjectRaw(Object::idType id) const override;
bool putObject(const Object &obj) override;
bool deleteObject(const Object &obj) override;
bool deleteObjects(const std::vector<Object::idType> &objs) override;
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const override;
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
@@ -104,6 +104,8 @@ private:
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
std::unordered_map<Object::idType, std::set<Object::idType>>
fileToObjs;///< Used to locate Object%s in the filesystem
std::mutex writeCacheLock; ///< Write cache lock
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
@@ -116,9 +118,12 @@ private:
/// \param lockW Write cache lock
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
Object::idType largestUnusedId = 1;///< Largest available objectID
Object::idType largestUnusedId = 1; ///< Largest available objectID
std::vector<Object::idType> unusedIds;///< Vector of unused IDs
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>>
keyIndex;///< Maps Object%'s keys to their ID's
std::unordered_map<Object::idType, uint64_t> refCounts;///< Count of references to an object per its id
};

View File

@@ -31,9 +31,16 @@ public:
/// All derived objects should implement this method
virtual std::string getKey() const = 0;
/// Returns the keys of that this object refers to
virtual const std::vector<idType> &getRefs() const;
const idType id; ///< Unique numerical of the object
const ObjectType type;///< Type of the object
static std::unique_ptr<Object> deserialize(const std::vector<char> &src);
static std::unique_ptr<Object> deserialize(std::vector<char>::const_iterator &in,
const std::vector<char>::const_iterator &end);
protected:
/// Default constructor
/// \param id Object ID

View File

@@ -45,7 +45,13 @@ public:
/// \param id ID of object to return
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::idType id) const = 0;
virtual std::vector<char> getObjectRaw(Object::idType id) const = 0;
/// Returns the Object with id \p id
/// \param id ID of object to return
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
std::unique_ptr<Object> getObject(Object::idType id) const;
/// Adds the Object \p obj to the Repository
/// \param obj Constant reference to the object
@@ -54,17 +60,17 @@ public:
virtual bool putObject(const Object &obj) = 0;
/// Deletes Object \p obj from the Repository
/// \param obj Constant reference to the object
/// \param obj Constant reference to the vector with ids of objects to delete
/// \return True if successful, False if it didn't exist
/// \throws Exception on any error
virtual bool deleteObject(const Object &obj) = 0;
virtual bool deleteObjects(const std::vector<Object::idType> &objs) = 0;
/// Returns the Object of type \p type and with key \p key
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
virtual std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the id of an Object of type \p type and with key \p key
/// \param type Type of the object

View File

@@ -10,7 +10,7 @@
#include "../Object.h"
/// Object representing a backup
class Archive : public Object {
class Archive final : public Object {
public:
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files);
@@ -22,6 +22,9 @@ public:
/// Returns the name of the archive
std::string getKey() const override;
/// Returns the files in this archive
const std::vector<Object::idType> &getRefs() const override;
const std::string name; ///< Archive name
const unsigned long long mtime; ///< Time of creation
const std::vector<idType> files;///< List of ids of File objects in the Archive

View File

@@ -11,7 +11,7 @@
#include "../Object.h"
/// Object representing a part of a File
class Chunk : public Object {
class Chunk final : public Object {
public:
Chunk(idType id, std::string, std::vector<char> data);

View File

@@ -15,7 +15,7 @@
#include "../Object.h"
/// Object representing a saved file
class File : public Object {
class File final : public Object {
public:
enum class Type { Normal, Symlink, Directory, END };
@@ -68,6 +68,12 @@ public:
/// List of the chunks in file
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
const std::map<size_t, idType> chunks;
const std::vector<idType> &getRefs() const override;
private:
void makeChunksList() const;
mutable std::optional<std::vector<idType>> chunksList{std::nullopt};
};

View File

@@ -5,8 +5,10 @@
#include "FileRepository.h"
#include <exception>
#include <iostream>
#include <iterator>
#include <mutex>
#include <queue>
#include "CheckFilter.h"
#include "FilterFactory.h"
@@ -45,6 +47,9 @@ bool FileRepository::open() {
std::tie(keyIndex, largestUnusedId) =
Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(
filters.filterRead(readFile(root / "index")));
refCounts = Serialize::deserialize<decltype(refCounts)>(filters.filterRead(readFile(root / "refcounts")));
unusedIds = Serialize::deserialize<decltype(unusedIds)>(filters.filterRead(readFile(root / "unusedIds")));
fileToObjs = Serialize::deserialize<decltype(fileToObjs)>(filters.filterRead(readFile(root / "fileToObjs")));
} catch (const std::exception &e) {
ready = false;
throw;
@@ -58,7 +63,7 @@ bool FileRepository::init() {
if (exists()) throw Exception("Trying to initialize already existing repository!");
if (!std::filesystem::is_directory(root) && !std::filesystem::create_directories(root))
throw Exception("Can't create directory " + root.string());
throw Exception("Can't create directory " + root.u8string());
writeFile(root / "info", CheckFilter::filterWriteStatic(Serialize::serialize(config)));
@@ -79,10 +84,13 @@ FileRepository::~FileRepository() {
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
writeFile(root / "unusedIds", filters.filterWrite(Serialize::serialize(unusedIds)));
writeFile(root / "refcounts", filters.filterWrite(Serialize::serialize(refCounts)));
writeFile(root / "fileToObjs", filters.filterWrite(Serialize::serialize(fileToObjs)));
}
}
std::vector<char> FileRepository::getObject(Object::idType id) const {
std::vector<char> FileRepository::getObjectRaw(Object::idType id) const {
if (!ready) throw Exception("Tried working with uninitialized repo!");
std::unique_lock lock(repoLock);
@@ -133,6 +141,7 @@ void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
{
std::lock_guard lockI(repoLock);
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
fileToObjs[currentFileId].emplace(i.first);
}
offset += i.second.size();
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
@@ -144,36 +153,96 @@ bool FileRepository::putObject(const Object &obj) {
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
for (auto const &i: obj.getRefs()) refCounts[i]++;
}
writeObject(obj);
return true;
}
bool FileRepository::deleteObject(const Object &obj) {
bool FileRepository::deleteObjects(const std::vector<Object::idType> &objs) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
throw Exception("Deletion not implemented!");
std::queue<Object::idType> toVisit;
std::set<Object::idType> toDelete;
for (auto const &o: objs) {
toVisit.emplace(o);
toDelete.emplace(o);
}
std::cout << "Scanning for objects" << std::endl;
while (!toVisit.empty()) {
auto o = toVisit.back();
toVisit.pop();
auto obj = getObject(o);
for (const auto &id: obj->getRefs()) {
std::unique_lock lock(repoLock);
refCounts[id]--;
if (refCounts.at(id) == 0) {
toDelete.emplace(id);
toVisit.emplace(id);
refCounts.erase(id);
}
}
}
std::cout << "Found " << toDelete.size() << " to delete " << std::endl;
std::unordered_map<uint64_t, Object::idType> fileToObj;
std::set<uint64_t> touchedFiles;
for (auto const &id: toDelete) {
fileToObj.emplace(offsetIndex.at(id).fileId, id);
touchedFiles.emplace(offsetIndex.at(id).fileId);
}
std::cout << "Will rewrite " << touchedFiles.size() << " files" << std::endl;
for (auto const &f: touchedFiles) {
std::cout << "Rewriting file " << f << std::endl;
const auto &objs = fileToObjs.at(f);
std::vector<std::unique_ptr<Object>> objects;
for (auto const &o: objs) {
auto obj = getObject(o);
{
std::unique_lock lock(repoLock);
offsetIndex.erase(o);
}
if (toDelete.find(o) == toDelete.end()) putObject(*obj);
}
{
std::unique_lock lock(repoLock);
fileToObjs.erase(f);
}
std::filesystem::remove(root / std::to_string(f));
}
return true;
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset,
unsigned long long size) const {
if (size > absoluteMaxFileLimit)
throw Exception("Tried to read " + std::to_string(size) + " bytes from " + file.string() +
throw Exception("Tried to read " + std::to_string(size) + " bytes from " + file.u8string() +
" which is more than absoluteMaxFileLimit");
std::ifstream ifstream(file, std::ios::binary | std::ios::in);
if (!ifstream.is_open()) throw Exception("Can't open file " + file.string() + " for reading!");
if (!ifstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for reading!");
std::vector<char> buf(size);
if (ifstream.rdbuf()->pubseekpos(offset) == std::streampos(std::streamoff(-1)))
throw Exception("Unexpected end of file " + file.string());
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.string());
throw Exception("Unexpected end of file " + file.u8string());
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.u8string());
return buf;
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file) const {
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.string() + " is not a regular file!");
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.u8string() + " is not a regular file!");
auto fileSize = std::filesystem::file_size(file);
if (fileSize == 0) return {};
return readFile(file, 0, fileSize);
@@ -181,15 +250,15 @@ std::vector<char> FileRepository::readFile(const std::filesystem::path &file) co
bool FileRepository::writeFile(const std::filesystem::path &file, const std::vector<char> &data) {
std::ofstream ofstream(file, std::ios::binary | std::ios::trunc | std::ios::out);
if (!ofstream.is_open()) throw Exception("Can't open file " + file.string() + " for writing!");
if (!ofstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for writing!");
if (ofstream.rdbuf()->sputn(data.data(), data.size()) != data.size())
throw Exception("Couldn't write all the data for " + file.string());
throw Exception("Couldn't write all the data for " + file.u8string());
return true;
}
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
return getObject(getObjectId(type, key));
std::vector<char> FileRepository::getObjectRaw(Object::ObjectType type, const std::string &key) const {
return getObjectRaw(getObjectId(type, key));
}
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {

View File

@@ -6,6 +6,10 @@
#include "Serialize.h"
#include "objects/Archive.h"
#include "objects/Chunk.h"
#include "objects/File.h"
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
@@ -17,3 +21,32 @@ void Object::serialize(std::vector<char> &out) const {
}
Object::~Object() = default;
static std::vector<Object::idType> emptyRef{};
const std::vector<Object::idType> &Object::getRefs() const { return emptyRef; }
std::unique_ptr<Object> Object::deserialize(std::vector<char>::const_iterator &in,
const std::vector<char>::const_iterator &end) {
auto inCpy = in;
auto id = Serialize::deserialize<idType>(in, end);
auto type = Serialize::deserialize<ObjectType>(in, end);
switch (type) {
case ObjectType::Archive:
return std::make_unique<Archive>(Serialize::deserialize<Archive>(inCpy, end));
case ObjectType::File:
return std::make_unique<File>(Serialize::deserialize<File>(inCpy, end));
case ObjectType::Chunk:
return std::make_unique<Chunk>(Serialize::deserialize<Chunk>(inCpy, end));
case ObjectType::END:
break;
default:
throw Exception("Bad object!");
}
}
std::unique_ptr<Object> Object::deserialize(const std::vector<char> &src) {
auto srcIterator = src.cbegin();
return deserialize(srcIterator, src.end());
}

View File

@@ -8,3 +8,7 @@ Repository::~Repository() = default;
Repository::Repository(Config config) : config(std::move(config)) {}
const Config &Repository::getConfig() const { return config; }
std::unique_ptr<Object> Repository::getObject(Object::idType id) const {
return Object::deserialize(this->getObjectRaw(id));
}

View File

@@ -28,3 +28,5 @@ void Archive::serialize(std::vector<char> &out) const {
}
std::string Archive::getKey() const { return name; }
const std::vector<Object::idType> &Archive::getRefs() const { return files; }

View File

@@ -42,20 +42,20 @@ File::Type File::getFileType(const std::filesystem::path &p) {
if (std::filesystem::is_symlink(p)) return Type::Symlink;
if (std::filesystem::is_directory(p)) return Type::Directory;
if (std::filesystem::is_regular_file(p)) return Type::Normal;
throw Exception("Unsupported file type! " + p.string());
throw Exception("Unsupported file type! " + p.u8string());
}
std::vector<char> File::getFileContents(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal) throw Exception(p.string() + " is a normal file!");
if (type == Type::Normal) throw Exception(p.u8string() + " is a normal file!");
if (type == Type::Directory) { return {}; }
if (type == Type::Symlink) {
auto target = std::filesystem::read_symlink(p).string();
auto target = std::filesystem::read_symlink(p).u8string();
std::vector<char> target_null_term = {target.begin(), target.end()};
target_null_term.emplace_back('\0');
return target_null_term;
}
throw Exception("Error with file " + p.string());
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileMtime(const std::filesystem::path &p) {
@@ -65,16 +65,16 @@ unsigned long long File::getFileMtime(const std::filesystem::path &p) {
std::chrono::duration_cast<std::chrono::seconds>(std::filesystem::last_write_time(p).time_since_epoch())
.count());
else if (type == Type::Symlink) {
auto path = p.string();
auto path = p.u8string();
struct stat sb;
if (lstat(path.c_str(), &sb) != 0) throw Exception("Error reading mtime for " + p.string());
if (lstat(path.c_str(), &sb) != 0) throw Exception("Error reading mtime for " + p.u8string());
#ifdef __APPLE__
return sb.st_mtimespec.tv_sec;
#else
return sb.st_mtime;
#endif
}
throw Exception("Error with file " + p.string());
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileSize(const std::filesystem::path &p) {
@@ -83,3 +83,16 @@ unsigned long long File::getFileSize(const std::filesystem::path &p) {
else
return getFileContents(p).size();
}
void File::makeChunksList() const {
if (chunksList) return;
chunksList.emplace();
chunksList->reserve(chunks.size());
for (auto const &c: chunks) chunksList->emplace_back(c.second);
}
const std::vector<Object::idType> &File::getRefs() const {
if (!chunksList) makeChunksList();
return *chunksList;
}

View File

@@ -7,7 +7,7 @@
#include "Serialize.h"
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId)
: repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
: repo(repo), file(Serialize::deserialize<File>(repo->getObjectRaw(fileId))), chunksQueue() {
for (auto const &id: file.chunks) chunksQueue.emplace(id.second);
};
@@ -35,7 +35,7 @@ int FileBuffer::underflow() {
if (getBuf.empty() || curGetBufPos == getBuf.size()) {
if (chunksQueue.empty()) return traits_type::eof();
else {
auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
auto chunk = Serialize::deserialize<Chunk>(repo->getObjectRaw(chunksQueue.front()));
getBuf = chunk.data;
chunksQueue.pop();
curGetBufPos = 0;

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.18)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")

View File

@@ -48,8 +48,8 @@ TEST(FileRepository, Deserialize) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
auto o1o = repo.getObject(666);
auto o2o = repo.getObject(777);
auto o1o = repo.getObjectRaw(666);
auto o2o = repo.getObjectRaw(777);
auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin();
@@ -126,7 +126,7 @@ TEST(FileRepository, Filters) {
try {
auto o1o = repo.getObject(666);
auto o1o = repo.getObjectRaw(666);
auto o1ob = o1o.cbegin();
Chunk o1(o1ob, o1o.cend());
@@ -134,7 +134,7 @@ TEST(FileRepository, Filters) {
} catch (...) {}
try {
auto o2o = repo.getObject(777);
auto o2o = repo.getObjectRaw(777);
auto o2ob = o2o.cbegin();
Chunk o2(o2ob, o2o.cend());
@@ -163,8 +163,8 @@ TEST(FileRepository, Filters) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666);
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777);
auto o1o = repo.getObject(666);
auto o2o = repo.getObject(777);
auto o1o = repo.getObjectRaw(666);
auto o2o = repo.getObjectRaw(777);
auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin();
@@ -224,8 +224,8 @@ TEST(FileRepository, IDsDisabled) {
conf.add("repo", "IDS/testrepo");
FileRepository repo(conf);
repo.open();
auto o1o = repo.getObject(1);
auto o2o = repo.getObject(2);
auto o1o = repo.getObjectRaw(1);
auto o2o = repo.getObjectRaw(2);
auto o1ob = o1o.cbegin();
auto o2ob = o2o.cbegin();
@@ -257,7 +257,7 @@ TEST(FileRepository, IDsDisabled) {
ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2);
repo.deleteObject(o1);
repo.deleteObjects({o1.id});
}
{
Config conf;