This commit is contained in:
2023-06-02 12:51:08 +02:00
commit 0e355fbe42
142 changed files with 10281 additions and 0 deletions

246
src/repo/FileRepository.cpp Normal file
View File

@@ -0,0 +1,246 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "FileRepository.h"
#include <exception>
#include <iterator>
#include <mutex>
#include "../filters/CheckFilter.h"
#include "../filters/FilterFactory.h"
#include "Object.h"
#include "Serialize.h"
FileRepository::FileRepository(Config config) : Repository(std::move(config)), root(std::filesystem::path(this->config.getStr("repo"))), writeCacheMax(config.getInt("repo-target") * 1024 * 1024) {}
bool FileRepository::exists() {
return std::filesystem::is_directory(root) && std::filesystem::exists(root / "info");
}
bool FileRepository::flush() {
flushWriteCache(std::unique_lock(writeCacheLock));
return true;
}
bool FileRepository::open() {
if (!exists()) throw Exception("Repository doesn't exist!");
auto readConf = Serialize::deserialize<Config>(CheckFilter::filterReadStatic(readFile(root / "info")));
std::swap(config, readConf);
config.merge(readConf);
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
try {
std::tie(maxFileId, offsetIndex) = Serialize::deserialize<std::pair<decltype(maxFileId), decltype(offsetIndex)>>(filters.filterRead(readFile(root / "offsets")));
std::tie(keyIndex, largestUnusedId) = Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(filters.filterRead(readFile(root / "index")));
} catch (const std::exception &e) {
ready = false;
throw;
}
return true;
}
bool FileRepository::init() {
if (ready) throw Exception("Trying to initialize already initialized repository!");
if (exists()) throw Exception("Trying to initialize already existing repository!");
if (!std::filesystem::is_directory(root) && !std::filesystem::create_directories(root))
throw Exception("Can't create directory " + root.u8string());
writeFile(root / "info", CheckFilter::filterWriteStatic(Serialize::serialize(config)));
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
return true;
}
FileRepository::~FileRepository() {
if (ready) {
ready = false;
flushWriteCache(std::unique_lock(writeCacheLock));
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
}
}
std::vector<char> FileRepository::getObject(Object::idType id) const {
if (!ready) throw Exception("Tried working with uninitialized repo!");
std::unique_lock lock(repoLock);
if (offsetIndex.count(id) == 0)
throw Exception("Object with id " + std::to_string(id) + " doesn't exist!");
auto entry = offsetIndex.at(id);
lock.unlock();
return filters.filterRead(readFile(root / std::to_string(entry.fileId), entry.offset, entry.length));
}
bool FileRepository::writeObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
auto tmp = filters.filterWrite(Serialize::serialize(obj));
{
std::unique_lock lockW(writeCacheLock);
writeCacheSize += tmp.size();
writeCache[obj.id] = std::move(tmp);
// If we have reached the target file size, flush the cache
if (writeCacheSize >= writeCacheMax) {
flushWriteCache(std::move(lockW));
}
}
return true;
}
void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
if (writeCache.empty()) {
lockW.unlock();
return;
}
// Swap the cache for a new one and unlock the mutex so other threads can continue working
decltype(writeCache) objs;
std::swap(writeCache, objs);
writeCacheSize = 0;
decltype(maxFileId) currentFileId;
{
std::lock_guard lockI(repoLock);
currentFileId = maxFileId;
maxFileId++;
}
lockW.unlock();
unsigned long long offset = 0;
std::ofstream ofstream(root / std::to_string(currentFileId), std::ios::binary | std::ios::trunc | std::ios::out);
for (auto &i: objs) {
{
std::lock_guard lockI(repoLock);
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
}
offset += i.second.size();
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
}
}
bool FileRepository::putObject(const Object &obj) {
// Put the object into index, and then write it to the storage
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
writeObject(obj);
return true;
}
bool FileRepository::deleteObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
throw Exception("Deletion not implemented!");
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const {
if (size > absoluteMaxFileLimit) throw Exception("Tried to read " + std::to_string(size) +
" bytes from " + file.u8string() +
" which is more than absoluteMaxFileLimit");
std::ifstream ifstream(file, std::ios::binary | std::ios::in);
if (!ifstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for reading!");
std::vector<char> buf(size);
if (ifstream.rdbuf()->pubseekpos(offset) == std::streampos(std::streamoff(-1))) throw Exception("Unexpected end of file " + file.u8string());
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.u8string());
return buf;
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file) const {
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.u8string() + " is not a regular file!");
auto fileSize = std::filesystem::file_size(file);
if (fileSize == 0) return {};
return readFile(file, 0, fileSize);
}
bool FileRepository::writeFile(const std::filesystem::path &file, const std::vector<char> &data) {
std::ofstream ofstream(file, std::ios::binary | std::ios::trunc | std::ios::out);
if (!ofstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for writing!");
if (ofstream.rdbuf()->sputn(data.data(), data.size()) != data.size())
throw Exception("Couldn't write all the data for " + file.u8string());
return true;
}
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
return getObject(getObjectId(type, key));
}
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) throw Exception("No objects of requested type!");
return keyIndex.at(type).at(key);
}
std::vector<std::pair<std::string, Object::idType>> FileRepository::getObjects(Object::ObjectType type) const {
std::lock_guard lock(repoLock);
std::vector<std::pair<std::string, Object::idType>> out;
if (keyIndex.count(type) == 0) return {};
for (auto const &i: keyIndex.at(type))
out.emplace_back(i);
return out;
}
bool FileRepository::exists(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) return false;
return keyIndex.at(type).count(key) > 0;
}
Object::idType FileRepository::getId() {
std::lock_guard lock(repoLock);
return largestUnusedId++;
}
FileRepository::OffsetEntry::OffsetEntry(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end)
: fileId(Serialize::deserialize<decltype(fileId)>(in, end)),
offset(Serialize::deserialize<decltype(offset)>(in, end)),
length(Serialize::deserialize<decltype(length)>(in, end)) {
}
void FileRepository::OffsetEntry::serialize(std::vector<char> &out) const {
Serialize::serialize(fileId, out);
Serialize::serialize(offset, out);
Serialize::serialize(length, out);
}
FileRepository::OffsetEntry::OffsetEntry(unsigned long long int fileId, unsigned long long int offset, unsigned long long int length)
: fileId(fileId), offset(offset), length(length) {}
bool FileRepository::clearCache(Object::ObjectType type) {
keyIndex[type] = {};
return true;
}
bool FileRepository::addToCache(const Object &obj) {
{
std::unique_lock lock(repoLock);
if (offsetIndex.count(obj.id) == 0)
throw Exception("Object with id " + std::to_string(obj.id) + " doesn't exist!");
}
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
return true;
}

125
src/repo/FileRepository.h Normal file
View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_FILEREPOSITORY_H
#define SEMBACKUP_FILEREPOSITORY_H
#include <filesystem>
#include <fstream>
#include <map>
#include <mutex>
#include "Object.h"
#include "Repository.h"
/// Repository implementation in the local filesystem
/**
* `root` Config value is used as a root
* Objects are stored concatenated in files with approximate size of `repo-target` MB (from Config)
* The object key/object id index is stored as a hash map, as a `index` file out of the object storage structure
* Hints for the location of objects inside of files are also stored as a hash map in the `offsets` file
* Config is stored in the `info` file, merged with the supplied Config on open()
*
* Thread safe, approx. max memory usage is `number of threads` * `repo-target`,
* as every thread can be flushing its write cache at the same time
*/
class FileRepository final : public Repository {
public:
/// Constructs a new FileRepository
/// \param config Config to use
FileRepository(Config config);
bool exists() override;
bool open() override;
bool init() override;
bool flush() override;
std::vector<char> getObject(Object::idType id) const override;
bool putObject(const Object &obj) override;
bool deleteObject(const Object &obj) override;
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
bool clearCache(Object::ObjectType type) override;
bool addToCache(const Object &obj) override;
bool exists(Object::ObjectType type, const std::string &key) const override;
Object::idType getId() override;
/// FileRepository destructor
/// Flushes write cache, and writes the metadata
~FileRepository() override;
FileRepository(const FileRepository &r) = delete;
FileRepository &operator=(const FileRepository &r) = delete;
private:
const std::filesystem::path root;///< Root of the repository in the filesystem
/// Puts the Object raw data into write cache
bool writeObject(const Object &obj);
bool ready = false;/// < Indicates whether the FileRepository was open or initialized
/// Reads the file and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \return Vector of bytes of the file
std::vector<char> readFile(const std::filesystem::path &file) const;
/// Reads the \psize bytes of the file from \p offset and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \param offset First byte of the file to read
/// \param size Amount of bytes to read (no more than absoluteMaxFileLimit)
/// \return Vector of bytes of the file
/// \throws Exception on any error, or when absoluteMaxFileLimit is reached
std::vector<char> readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const;
static constexpr unsigned long long absoluteMaxFileLimit{4ULL * 1024 * 1024 * 1024};///<Max file read size (4GB)
/// Writes \p data to \p file
/// \param file Constant reference to the absolute path of the file
/// \param data Constant reference to the vector of bytes to write
/// \return True
/// \throws Exception on any error
bool writeFile(const std::filesystem::path &file, const std::vector<char> &data);
mutable std::mutex repoLock;///< Lock for any operations on the Repository
/// Helper struct to store the location of objects in the filesystem
struct OffsetEntry {
unsigned long long fileId;///< ID of file where the object is located
unsigned long long offset;///< Offset in the file where the object starts
unsigned long long length;///< Length of the object
using serializable = std::true_type;
/// Default constructor
OffsetEntry(unsigned long long fileId, unsigned long long offset, unsigned long long length);
/// Deserialization constrictor
OffsetEntry(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Serializes the entry to \p out
void serialize(std::vector<char> &out) const;
};
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
std::mutex writeCacheLock; ///< Write cache lock
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
unsigned long long writeCacheSize = 0; ///< Current byte size of the write cache
const unsigned long long writeCacheMax; ///< Target size of the write cache, it is automatically flushed after this is reached
/// Flushes the write cache
/// Takes the cache lock, swaps the cache with an empty one and unlocks it
/// \param lockW Write cache lock
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
Object::idType largestUnusedId = 1; ///< Largest available objectID
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> keyIndex;///< Maps Object%'s keys to their ID's
};
#endif//SEMBACKUP_FILEREPOSITORY_H

21
src/repo/Object.cpp Normal file
View File

@@ -0,0 +1,21 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Object.h"
#include "Serialize.h"
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: id(Serialize::deserialize<idType>(in, end)),
type(Serialize::deserialize<ObjectType>(in, end)) {
}
void Object::serialize(std::vector<char> &out) const {
Serialize::serialize(id, out);
Serialize::serialize(type, out);
}
Object::~Object() = default;

53
src/repo/Object.h Normal file
View File

@@ -0,0 +1,53 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_OBJECT_H
#define SEMBACKUP_OBJECT_H
#include <cstdint>
#include <string>
#include <vector>
/// Base class for objects in the Repository
/**
* Every object has a unique id, and is also indexed by a type-key pair in the Repository cache
*/
class Object {
public:
using idType = uint64_t;///< Type alias for Object%'s ID
enum class ObjectType {
Archive,
File,
Chunk,
END
};
/// Serializes the object to \p out
virtual void serialize(std::vector<char> &out) const;
/// Signals the Serialization template to use Object's serialization/deserialization facilities
using serializable = std::true_type;
/// Default virtual destructor, don't allow to create an instance of Object
virtual ~Object() = 0;
/// Pure virtual function that returns the key by which will be the object indexed in the Repository cache
/// All derived objects should implement this method
virtual std::string getKey() const = 0;
const idType id; ///< Unique numerical of the object
const ObjectType type;///< Type of the object
protected:
/// Default constructor
/// \param id Object ID
/// \param type Object type
Object(idType id, ObjectType type);
/// Deserialization constructor
Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
};
#endif//SEMBACKUP_OBJECT_H

12
src/repo/Repository.cpp Normal file
View File

@@ -0,0 +1,12 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Repository.h"
Repository::~Repository() = default;
Repository::Repository(Config config) : config(std::move(config)) {}
const Config &Repository::getConfig() const {
return config;
}

122
src/repo/Repository.h Normal file
View File

@@ -0,0 +1,122 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_REPOSITORY_H
#define SEMBACKUP_REPOSITORY_H
#include <filesystem>
#include <mutex>
#include <set>
#include "../Config.h"
#include "../filters/FilterContainer.h"
#include "Object.h"
/// Abstract base class for an Object Repository
/**
* So far only FileRepository exists, and probably this interface is too inflexible
* to be easily used for creating other (database, object storage...) repositories,
* but it should be possible with some refactoring
*/
class Repository {
public:
/// Checks if a repository already exists
/// \return True if exists, False otherwise
virtual bool exists() = 0;
/// Tries to open the Repository
/// \return True
/// \throws Exception on any error
virtual bool open() = 0;
/// Tries to initialize the Repository
/// \return True
/// \throws Exception on any error, including if the Repository is already initialized
virtual bool init() = 0;
/// Tries to flush the Repository write cache
/// \returns True
/// \throws Exception on any error
virtual bool flush() = 0;
/// Returns the serialized Object with id \p id
/// \param id ID of object to return
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::idType id) const = 0;
/// Adds the Object \p obj to the Repository
/// \param obj Constant reference to the object
/// \return True
/// \throws Exception on any error
virtual bool putObject(const Object &obj) = 0;
/// Deletes Object \p obj from the Repository
/// \param obj Constant reference to the object
/// \return True if successful, False if it didn't exist
/// \throws Exception on any error
virtual bool deleteObject(const Object &obj) = 0;
/// Returns the Object of type \p type and with key \p key
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the id of an Object of type \p type and with key \p key
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return ID of the object
/// \throws Exception on any error or if object doesn't exist
virtual Object::idType getObjectId(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the list of Objects of type \p type
/// \param type Type of the object
/// \return Vector of pairs <key of object, id of object>
/// \throws Exception on any error
virtual std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const = 0;
/// Returns whether Object of type \p type and with key \p key exists
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return True if exists, False otherwise
/// \throws Exception on any error
virtual bool exists(Object::ObjectType type, const std::string &key) const = 0;
/// Erases all the cache entries of object type \p type
/// \param type Type of the objects
/// \return True
virtual bool clearCache(Object::ObjectType type) = 0;
/// Adds the object to the cache, but doesn't change it on disk otherwise
/// \param obj Constant reference to the object
/// \return True
/// \throws Exception on any error, or if the object doesn't exist
virtual bool addToCache(const Object &obj) = 0;
/// Returns the next available object id
virtual Object::idType getId() = 0;
/// Returns the const reference to Config object used for this Repository
const Config &getConfig() const;
/// Default virtual destructor
virtual ~Repository();
Repository(const Repository &r) = delete;
Repository &operator=(const Repository &r) = delete;
protected:
/// Base Repository class constructor
/// \param config Config to use
Repository(Config config);
Config config; ///< Config of this Repository
FilterContainer filters;///< Container of IO filters used to transform Objects when writing/reading to/from storage
};
#endif//SEMBACKUP_REPOSITORY_H

208
src/repo/Serialize.h Normal file
View File

@@ -0,0 +1,208 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_SERIALIZE_H
#define SEMBACKUP_SERIALIZE_H
#include <cstddef>
#include <memory>
#include <stdexcept>
#include <type_traits>
#include <utility>
#include <vector>
#ifdef __APPLE__
#include <machine/endian.h>
#define htobe64(x) htonll(x)
#define be64toh(x) ntohll(x)
#else
#include <endian.h>
#endif
#include "../Exception.h"
/// Serialization library
/**
* To serialize the objects in Repository, we have to handle a couple of cases:
* 1. Serializing integers (object ids, etc...)
* 2. Serializing enums (object types)
* 3. Serializing char vectors and strings
* 4. Serializing other STL containers (which also requires serializing pairs)
* 5. Serializing custom structs (including the objects themselves)
*
* With this library it is possible to do all of that.
* One problem is that it isn't really portable, but it can be fixed by changing the std::is_integral<T>::value case to use something like be64toh/htobe64
*
*/
namespace Serialize {
template<typename, typename = void, typename = void>
struct is_pair : std::false_type {};
template<typename P>
struct is_pair<P, std::void_t<decltype(std::declval<P>().first)>, std::void_t<decltype(std::declval<P>().second)>> : std::true_type {};
template<typename, typename, typename = void>
struct has_emplace_back : std::false_type {};
template<typename T, typename V>
struct has_emplace_back<T, V, std::void_t<decltype(T().emplace_back(std::declval<V>()))>> : std::true_type {};
template<typename, typename = void, typename = void>
struct serializable : std::false_type {};
/// Checks if the object has the `serializable` type
/// In that case, its serialization will be delegated to its .serialize() parameter,
/// and deserialization to its T(char vector iterator in, const char vector iterator end) constructor,
/// similar to Serialize::deserialize
template<typename T>
struct serializable<T, std::void_t<decltype(T::serializable::value)>> : std::true_type {};
/// Deserializes object of type \p T starting from fist byte \p in, advances the iterator past the end of object
/// \tparam T Type to deserialize
/// \param in Iterator to the first byte of the object
/// \param end End iterator of source container
/// \return Deserialized value
template<typename T>
static T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Serializes object of type \p T into vector \p out
/// \tparam T Type to serialize
/// \param what Constant reference to the serialized object
/// \param out Reference to output vector
template<typename T>
static void serialize(const T &what, std::vector<char> &out);
/// Serializes the object of type \p T and returns the resulting vector
/// \tparam T Type to serialize
/// \param o Constant reference to the serialized object
/// \return Serialized data
template<typename T>
static std::vector<char> serialize(const T &o);
/// Deserializes object of type \p T from input vector \p from
/// \tparam T Type to deserialize
/// \param from Constant reference to the serialized object
/// \return Deserialized value
template<typename T>
static T deserialize(const std::vector<char> &from);
template<typename T>
T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end) {
if (in >= end) throw Exception("Unexpected end of object!");
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its constructor with in and end
return T(in, end);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, deserialize the first and second element and return the pair
using KT = typename std::remove_const<decltype(T::first)>::type;
using VT = typename std::remove_const<decltype(T::second)>::type;
auto K = deserialize<KT>(in, end);
auto V = deserialize<VT>(in, end);
return T(std::move(K), std::move(V));
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, deserialize an int and cast it to the enum
auto tmp = deserialize<uint32_t>(in, end);
if (tmp >= 0 && tmp < static_cast<uint32_t>(T::END))
return static_cast<T>(tmp);
else
throw Exception("Enum out of range!");
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
if (std::distance(in, end) < sizeof(T))
throw Exception("Unexpected end of object!");
return *(in++);
} else if constexpr (std::is_integral<T>::value) {
uint64_t tmp;
static_assert(sizeof(tmp) == 8);
// If the object is a number, copy it byte-by-byte
if (std::distance(in, end) < sizeof(tmp))
throw Exception("Unexpected end of object!");
std::copy(in, in + sizeof(tmp), reinterpret_cast<char *>(&tmp));
in += sizeof(tmp);
return static_cast<T>(be64toh(tmp));
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
size_t size = deserialize<size_t>(in, end);
char b = deserialize<char>(in, end);
if (b != 'b') throw Exception("Error deserializing!");
T out;
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
if (std::distance(in, end) < size)
throw Exception("Unexpected end of object!");
out.insert(out.end(), in, in + size);
in += size;
} else
for (size_t i = 0; i < size; i++) {
using V = typename T::value_type;
V v = deserialize<V>(in, end);
// Try either emplace_back or emplace if it doesn't exist
if constexpr (has_emplace_back<T, V>::value)
out.emplace_back(std::move(v));
else
out.emplace(std::move(v));
}
b = deserialize<char>(in, end);
if (b != 'e') throw Exception("Error deserializing!");
return out;
}
}
template<typename T>
void serialize(const T &what, std::vector<char> &out) {
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its serialize method
what.serialize(out);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, serialize the first and second element
serialize(what.first, out);
serialize(what.second, out);
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, cast it to an int and serialize that
serialize(static_cast<uint32_t>(what), out);
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
out.emplace_back(what);
} else if constexpr (std::is_integral<T>::value) {
// If the object is a number, copy it byte-by-byte
uint64_t tmp = htobe64(static_cast<uint64_t>(what));
static_assert(sizeof(tmp) == 8);
out.insert(out.end(), (reinterpret_cast<const char *>(&tmp)), (reinterpret_cast<const char *>(&tmp) + sizeof(tmp)));
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
serialize(what.size(), out);
serialize('b', out);
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
out.insert(out.end(), what.begin(), what.end());
} else
for (auto const &i: what) {
serialize(i, out);
}
serialize('e', out);
}
}
template<typename T>
std::vector<char> serialize(const T &o) {
std::vector<char> out;
serialize(o, out);
return out;
}
template<typename T>
T deserialize(const std::vector<char> &from) {
auto bgwr = from.cbegin();
return deserialize<T>(bgwr, from.cend());
}
}// namespace Serialize
#endif//SEMBACKUP_SERIALIZE_H

View File

@@ -0,0 +1,35 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Archive.h"
#include "../../Exception.h"
#include "../Serialize.h"
Archive::Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full)
: Object(id, ObjectType::Archive), name(name), mtime(mtime), files(files), isFull(full) {}
Archive::Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
name(Serialize::deserialize<std::string>(in, end)),
mtime(Serialize::deserialize<unsigned long long>(in, end)),
files(Serialize::deserialize<std::remove_const<decltype(files)>::type>(in, end)),
isFull(Serialize::deserialize<bool>(in, end)) {
if (type != ObjectType::Archive) throw Exception("Type mismatch for Archive!");
auto filesN = Serialize::deserialize<decltype(files.size())>(in, end);
if (files.size() != filesN) throw Exception("Number of files recorded doesn't match the number of files read!");
}
void Archive::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(name, out);
Serialize::serialize(mtime, out);
Serialize::serialize(files, out);
Serialize::serialize(isFull, out);
Serialize::serialize(files.size(), out);
}
std::string Archive::getKey() const {
return name;
}

View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_ARCHIVE_H
#define SEMBACKUP_ARCHIVE_H
#include <array>
#include "../Object.h"
/// Object representing a backup
class Archive : public Object {
public:
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full = false);
/// \copydoc Object::serialize
Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
void serialize(std::vector<char> &out) const override;
/// Returns the name of the archive
std::string getKey() const override;
const std::string name; ///< Archive name
const unsigned long long mtime; ///< Time of creation
const std::vector<idType> files;///< List of ids of File objects in the Archive
const bool isFull = false; ///< Whether this was a full archive
};
#endif//SEMBACKUP_ARCHIVE_H

View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Chunk.h"
#include "../../Exception.h"
#include "../Serialize.h"
Chunk::Chunk(idType id, std::string md5, std::vector<char> data) : Object(id, ObjectType::Chunk), data(std::move(data)), md5(std::move(md5)), length(this->data.size()) {}
Chunk::Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
data(Serialize::deserialize<std::remove_const<decltype(data)>::type>(in, end)),
length(Serialize::deserialize<std::remove_const<decltype(length)>::type>(in, end)) {
if (type != ObjectType::Chunk) throw Exception("Type mismatch for Chunk!");
if (length != data.size()) throw Exception("Recorded length and actual length don't match for Chunk!");
}
void Chunk::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(md5, out);
Serialize::serialize(data, out);
Serialize::serialize(length, out);
}
std::string Chunk::getKey() const {
return md5;
}

33
src/repo/objects/Chunk.h Normal file
View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_CHUNK_H
#define SEMBACKUP_CHUNK_H
#include <array>
#include <vector>
#include "../Object.h"
/// Object representing a part of a File
class Chunk : public Object {
public:
Chunk(idType id, std::string, std::vector<char> data);
/// Deserialization constructor
Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// \copydoc Object::serialize
void serialize(std::vector<char> &out) const override;
/// Returns the MD5 of the chunk
std::string getKey() const override;
const std::string md5; ///< MD5 hash of the chunk
const std::vector<char> data; ///< Raw chunk data
const unsigned long long length;///< Size of chunk in bytes
};
#endif//SEMBACKUP_CHUNK_H

84
src/repo/objects/File.cpp Normal file
View File

@@ -0,0 +1,84 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "File.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "../../Exception.h"
#include "../Serialize.h"
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), md5(md5), fileType(fileType), chunks(chunks) {}
File::File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
name(Serialize::deserialize<std::string>(in, end)),
bytes(Serialize::deserialize<unsigned long long>(in, end)),
mtime(Serialize::deserialize<unsigned long long>(in, end)),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
fileType(Serialize::deserialize<std::remove_const<decltype(fileType)>::type>(in, end)),
chunks(Serialize::deserialize<std::remove_const<decltype(chunks)>::type>(in, end)) {
if (type != ObjectType::File) throw Exception("Type mismatch for File!");
}
void File::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(name, out);
Serialize::serialize(bytes, out);
Serialize::serialize(mtime, out);
Serialize::serialize(md5, out);
Serialize::serialize(fileType, out);
Serialize::serialize(chunks, out);
}
std::string File::getKey() const {
return name;
}
File::Type File::getFileType(const std::filesystem::path &p) {
if (std::filesystem::is_symlink(p)) return Type::Symlink;
if (std::filesystem::is_directory(p)) return Type::Directory;
if (std::filesystem::is_regular_file(p)) return Type::Normal;
throw Exception("Unsupported file type! " + p.u8string());
}
std::vector<char> File::getFileContents(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal) throw Exception(p.u8string() + " is a normal file!");
if (type == Type::Directory) {
return {};
}
if (type == Type::Symlink) {
auto target = std::filesystem::read_symlink(p).u8string();
return {target.begin(), target.end()};
}
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileMtime(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal || type == Type::Directory)
return static_cast<const unsigned long long int>(std::chrono::duration_cast<std::chrono::seconds>(std::filesystem::last_write_time(p).time_since_epoch()).count());
else if (type == Type::Symlink) {
auto path = p.u8string();
struct stat sb;
if (lstat(path.c_str(), &sb) != 0) throw Exception("Error reading mtime for " + p.u8string());
#ifdef __APPLE__
return sb.st_mtimespec.tv_sec;
#else
return sb.st_mtime;
#endif
}
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileSize(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal) return std::filesystem::file_size(p);
else
return getFileContents(p).size();
}

76
src/repo/objects/File.h Normal file
View File

@@ -0,0 +1,76 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_FILE_H
#define SEMBACKUP_FILE_H
#include <array>
#include <filesystem>
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "../Object.h"
/// Object representing a saved file
class File : public Object {
public:
enum class Type {
Normal,
Symlink,
Directory,
END
};
static inline const std::unordered_map<Type, std::string> TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}};
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType);
/// Deserialization constructor
File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// \copydoc Object::serialize
void serialize(std::vector<char> &out) const override;
/// Returns the file relative path as key
std::string getKey() const override;
/// Helper static function to return Type for any file in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return Type of the file
static Type getFileType(const std::filesystem::path &p);
/// Helper static function to return "contents" for non-regular files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File contents, (for symlinks - its destination, for directory - empty)
/// \throws Exception on any error, or if file is regular
static std::vector<char> getFileContents(const std::filesystem::path &p);
/// Helper static function to return modification time for files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File last modification time
/// \throws Exception on any error
static unsigned long long getFileMtime(const std::filesystem::path &p);
/// Helper static function to return file size for files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File size
/// \throws Exception on any error
static unsigned long long getFileSize(const std::filesystem::path &p);
const std::string name; ///< Relative path to backup root, as UTF-8 string
const unsigned long long bytes;///< Amount of bytes in the file
const unsigned long long mtime;///< Last modification time as timestamp
const std::string md5; ///< Hash of the file
const Type fileType; ///< File type
/// List of the chunks in file
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
const std::vector<idType> chunks;
};
#endif//SEMBACKUP_FILE_H

View File

@@ -0,0 +1,51 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FileBuffer.h"
#include "../Serialize.h"
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
for (auto const &id: file.chunks) chunksQueue.emplace(id);
};
int FileBuffer::sync() {
return 0;
}
std::streamsize FileBuffer::xsgetn(char *s, std::streamsize countr) {
if (underflow() == std::char_traits<char>::eof()) return 0;
for (int i = 0; i < countr; i++) {
auto c = uflow();
if (c != traits_type::eof()) {
s[i] = traits_type::to_char_type(c);
} else
return i;
}
return countr;
}
int FileBuffer::uflow() {
auto out = underflow();
if (out != traits_type::eof())
curGetBufPos++;
return out;
}
int FileBuffer::underflow() {
if (getBuf.empty() || curGetBufPos == getBuf.size()) {
if (chunksQueue.empty()) return traits_type::eof();
else {
auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
getBuf = chunk.data;
chunksQueue.pop();
curGetBufPos = 0;
}
}
if (!getBuf.empty())
return traits_type::to_int_type(getBuf[curGetBufPos]);
else
return traits_type::eof();
}

View File

@@ -0,0 +1,43 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILEBUFFER_H
#define SEMBACKUP_FILEBUFFER_H
#include <queue>
#include <streambuf>
#include "../Repository.h"
#include "Chunk.h"
#include "File.h"
/// Streambuf implementation to read files from a File in a Repository
class FileBuffer : public std::streambuf {
public:
/// Creates a FileBuffer instance
/// \param repo Constant pointer to the backing Repository, should be available during the entire lifetime
/// \param fileId ID of a file to "open"
FileBuffer(const Repository *repo, Object::idType fileId);
protected:
int underflow() override;
int uflow() override;
std::streamsize xsgetn(char *s, std::streamsize count) override;
int sync() override;
private:
std::vector<char> getBuf;///< Currently loaded chunk
size_t curGetBufPos = 0;///< Currently pointed to byte in the loaded chunk
const Repository *repo; ///< Pointer to the backing repository
File file; ///< Backing file
std::queue<Object::idType> chunksQueue;///< Chunks of file that weren't read yet
};
#endif//SEMBACKUP_FILEBUFFER_H