mirror of
https://github.com/usatiuk/backup.git
synced 2025-10-26 17:37:47 +01:00
init
This commit is contained in:
37
src/BytesFormatter.cpp
Normal file
37
src/BytesFormatter.cpp
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 13.05.2023.
|
||||
//
|
||||
|
||||
#include "BytesFormatter.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
BytesFormatter::BytesFormat BytesFormatter::format(unsigned long long int bytes) {
|
||||
std::stringstream outNum;
|
||||
outNum << std::fixed << std::setprecision(2);
|
||||
|
||||
if (bytes > 1024UL * 1024 * 1024 * 1024) {
|
||||
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0);
|
||||
return {outNum.str(), "TiB"};
|
||||
}
|
||||
if (bytes > 1024UL * 1024 * 1024) {
|
||||
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0);
|
||||
return {outNum.str(), "GiB"};
|
||||
}
|
||||
if (bytes > 1024UL * 1024) {
|
||||
outNum << (double) bytes / (1024.0 * 1024.0);
|
||||
return {outNum.str(), "MiB"};
|
||||
}
|
||||
if (bytes > 1024UL) {
|
||||
outNum << (double) bytes / (1024.0);
|
||||
return {outNum.str(), "KiB"};
|
||||
}
|
||||
outNum << bytes;
|
||||
return {outNum.str(), "Bytes"};
|
||||
}
|
||||
|
||||
std::string BytesFormatter::formatStr(unsigned long long int bytes) {
|
||||
auto fmt = format(bytes);
|
||||
return fmt.number + " " + fmt.prefix;
|
||||
}
|
||||
31
src/BytesFormatter.h
Normal file
31
src/BytesFormatter.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 13.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_BYTESFORMATTER_H
|
||||
#define SEMBACKUP_BYTESFORMATTER_H
|
||||
|
||||
#include <string>
|
||||
|
||||
/// Utility class to format byte values according to their magnitude
|
||||
class BytesFormatter {
|
||||
public:
|
||||
/// Structure for returning the processed byte value
|
||||
struct BytesFormat {
|
||||
std::string number;///< Number part of the value
|
||||
std::string prefix;///< Unit of measure
|
||||
};
|
||||
|
||||
/// Formats the bytes in BytesFormat format
|
||||
/// \param bytes Number of bytes
|
||||
/// \return BytesFormat value
|
||||
static BytesFormat format(unsigned long long bytes);
|
||||
|
||||
/// Formats the bytes into a string
|
||||
/// \param bytes Number of bytes
|
||||
/// \return String, consisting of the scaled number and the unit of measure separated by a space
|
||||
static std::string formatStr(unsigned long long bytes);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_BYTESFORMATTER_H
|
||||
81
src/Config.cpp
Normal file
81
src/Config.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 01.05.2023.
|
||||
//
|
||||
|
||||
#include "Config.h"
|
||||
#include "Exception.h"
|
||||
#include "repo/Serialize.h"
|
||||
#include <sstream>
|
||||
|
||||
Config &Config::add(const std::string &k, const std::string &v) {
|
||||
if (keys.count(k) == 0) throw Exception("Unknown key " + k);
|
||||
if (data.count(k) > 0)
|
||||
if (data.at(k) != v) throw Exception("Trying to rewrite config!");
|
||||
else if (data.at(k) == v)
|
||||
return *this;
|
||||
|
||||
switch (keys.at(k).type) {
|
||||
case KeyType::STRING:
|
||||
break;
|
||||
case KeyType::INT:
|
||||
try {
|
||||
std::stoi(v);
|
||||
} catch (...) {
|
||||
throw Exception("Can't convert " + k + " to integer!");
|
||||
}
|
||||
break;
|
||||
case KeyType::LIST:
|
||||
break;
|
||||
}
|
||||
|
||||
data.emplace(k, v);
|
||||
return *this;
|
||||
}
|
||||
|
||||
int Config::getInt(const std::string &k) const {
|
||||
return std::stoi(getStr(k));
|
||||
}
|
||||
|
||||
std::vector<std::string> Config::getList(const std::string &k) const {
|
||||
std::vector<std::string> out;
|
||||
std::string next;
|
||||
std::stringstream inss(getStr(k));
|
||||
while (std::getline(inss, next, ',')) {
|
||||
if (next != "")
|
||||
out.emplace_back(next);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
std::string Config::getStr(const std::string &k) const {
|
||||
if (data.count(k) > 0) return data.at(k);
|
||||
else if (keys.at(k).defaultval.has_value())
|
||||
return keys.at(k).defaultval.value();
|
||||
throw Exception("Option " + k + " not specified and no default value exists!");
|
||||
}
|
||||
|
||||
bool Config::exists(const std::string &k) const {
|
||||
return (data.count(k) > 0) || (keys.at(k).defaultval.has_value());
|
||||
}
|
||||
|
||||
Config::Config() = default;
|
||||
|
||||
Config::Config(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end) {
|
||||
data = Serialize::deserialize<decltype(data)>(in, end);
|
||||
}
|
||||
|
||||
void Config::serialize(std::vector<char> &out) const {
|
||||
std::vector<decltype(data)::value_type> temp;
|
||||
for (const auto &d: data) {
|
||||
if (keys.at(d.first).remember) {
|
||||
temp.emplace_back(d);
|
||||
}
|
||||
}
|
||||
Serialize::serialize(temp, out);
|
||||
}
|
||||
|
||||
void Config::merge(const Config &config) {
|
||||
for (const auto &d: config.data) {
|
||||
add(d.first, d.second);
|
||||
}
|
||||
}
|
||||
117
src/Config.h
Normal file
117
src/Config.h
Normal file
@@ -0,0 +1,117 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 01.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CONFIG_H
|
||||
#define SEMBACKUP_CONFIG_H
|
||||
|
||||
#include <optional>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
/// Utility class to manage configuration
|
||||
/**
|
||||
* Also provides keys map for information about config keys
|
||||
* Serializable, remembers only the keys with remember option set in keys
|
||||
*/
|
||||
class Config {
|
||||
public:
|
||||
/// Constructs an empty Config instance
|
||||
Config();
|
||||
/// Deserialization constructor
|
||||
Config(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
/// Adds a key \p k with value \p v to the config
|
||||
/// \param k Const reference to the config key
|
||||
/// \param v Config value
|
||||
/// \return Reference to itself
|
||||
/// \throws Exception if key is invalid or is already set with different value
|
||||
Config &add(const std::string &k, const std::string &v);
|
||||
|
||||
/// Merges \p config to itself
|
||||
/// Adds every config pair from \p config to itself, throws on conflict
|
||||
/// \param config Constant reference to the source Config
|
||||
/// \throws Exception on merge conflict
|
||||
void merge(const Config &config);
|
||||
|
||||
/// Returns an int from config key \p k
|
||||
/// \param k Constant reference to the key string
|
||||
/// \return Config int
|
||||
/// \throws Exception if key is invalid or value isn't an int
|
||||
int getInt(const std::string &k) const;
|
||||
|
||||
/// Returns a string from config key \p k
|
||||
/// \param k Constant reference to the key string
|
||||
/// \return Config value for key
|
||||
/// \throws Exception if key is invalid
|
||||
std::string getStr(const std::string &k) const;
|
||||
|
||||
/// Returns a list of strings delimited by commas from config key \p k
|
||||
/// \param k Constant reference to the key string
|
||||
/// \return Vector of strings
|
||||
/// \throws Exception if key is invalid
|
||||
std::vector<std::string> getList(const std::string &k) const;
|
||||
|
||||
/// Checks if key \p k exists in the config
|
||||
/// \param k Constant reference to the key string
|
||||
/// \return True if key exists or its default value exists
|
||||
bool exists(const std::string &k) const;
|
||||
|
||||
/// Serialization function
|
||||
void serialize(std::vector<char> &out) const;
|
||||
|
||||
using serializable = std::true_type;
|
||||
|
||||
enum class KeyType {
|
||||
STRING,
|
||||
INT,
|
||||
LIST
|
||||
};
|
||||
|
||||
/// Struct to record key options
|
||||
struct keyopts {
|
||||
std::optional<std::string> defaultval;///< Key's default value
|
||||
KeyType type; ///< Key's type
|
||||
bool remember; ///< Whether the key should be serialized
|
||||
std::string info; ///< Printed in help
|
||||
};
|
||||
|
||||
/// Used for printing help
|
||||
const static inline std::unordered_map<KeyType, std::string> KeyTypeToStr{{KeyType::STRING, "string"}, {KeyType::INT, "number"}, {KeyType::LIST, "comma-separated list"}};
|
||||
|
||||
/// Default values and their metadata
|
||||
const static inline std::unordered_map<std::string, keyopts> keys{
|
||||
{"compression", {"none", KeyType::STRING, true, "Compression algorighm to use (zlib or none)"}},
|
||||
{"encryption", {"none", KeyType::STRING, true, "Encryption algorighm to use (aes or none)"}},
|
||||
{"compression-level", {"-1", KeyType::INT, true, "Compression level to use (0 to 9)"}},
|
||||
{"repo", {std::nullopt, KeyType::STRING, false, "Repository root"}},
|
||||
{"to", {std::nullopt, KeyType::STRING, false, "Destination of restore"}},
|
||||
{"from", {std::nullopt, KeyType::STRING, true, "Backed up folder"}},
|
||||
{"type", {"normal", KeyType::STRING, false, "Type of archive"}},
|
||||
{"aid", {std::nullopt, KeyType::INT, false, "ID of archive to restore/compare to"}},
|
||||
{"aid2", {std::nullopt, KeyType::INT, false, "ID of archive to compare with"}},
|
||||
{"threads", {std::nullopt, KeyType::INT, false, "Number of threads to use"}},
|
||||
{"prefix", {"", KeyType::STRING, false, "Prefix of files to compare"}},
|
||||
{"password", {std::nullopt, KeyType::STRING, false, "Encryption password"}},
|
||||
{"salt", {std::nullopt, KeyType::STRING, true, "Encryption salt"}},
|
||||
{"chunker", {"buzhash", KeyType::STRING, true, "Chunker to use (const, buzhash)"}},
|
||||
{"chunker-min", {"256", KeyType::INT, true, "Min chunk size in KB"}},
|
||||
{"chunker-max", {"4096", KeyType::INT, true, "Max chunk size in KB"}},
|
||||
{"chunker-mask", {"20", KeyType::INT, true, "Chunker hash bit mask (mask of n bits results in average chunk size of 2^n bytes)"}},
|
||||
{"repo-target", {"128", KeyType::INT, true, "Target size of files for FileRepository"}},
|
||||
{"full-period", {"2", KeyType::INT, true, "Interval between forced full backups"}},
|
||||
{"progress", {"pretty", KeyType::STRING, false, "How to print progress (simple, pretty, none)"}},
|
||||
{"verbose", {"1", KeyType::INT, false, "Message verbosity (0 - error, 1 - info, -1 - quiet)"}},
|
||||
{"dedup", {"on", KeyType::STRING, true, "Turns deduplication on/off"}},
|
||||
{"change-detectors", {"type,size,etime", KeyType::LIST, true, "Change detectors to use (in order)"}},
|
||||
{"diff-mode", {"normal", KeyType::STRING, false, "Diff mode (file or normal)"}},
|
||||
};
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, std::string> data;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CONFIG_H
|
||||
18
src/Context.h
Normal file
18
src/Context.h
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CONTEXT_H
|
||||
#define SEMBACKUP_CONTEXT_H
|
||||
|
||||
#include "Config.h"
|
||||
#include "Logger.h"
|
||||
#include "repo/Repository.h"
|
||||
|
||||
struct Context {
|
||||
Logger *logger;
|
||||
Repository *repo;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CONTEXT_H
|
||||
103
src/Diff.cpp
Normal file
103
src/Diff.cpp
Normal file
@@ -0,0 +1,103 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 06.05.2023.
|
||||
//
|
||||
|
||||
#include "Diff.h"
|
||||
|
||||
#include "BytesFormatter.h"
|
||||
#include "Exception.h"
|
||||
#include "Signals.h"
|
||||
#include "chunkers/BuzhashChunker.h"
|
||||
|
||||
bool Diff::isBinary(const ComparableFile &c) {
|
||||
auto b = c.contents();
|
||||
for (unsigned int i = 0; i < std::min(c.bytes, 2048ULL); i++) {
|
||||
auto e = b->sbumpc();
|
||||
if (std::streambuf::traits_type::to_char_type(e) == '\0') return true;
|
||||
if (e == std::streambuf::traits_type::eof()) return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string Diff::diff(const ComparableFile &c1, const ComparableFile &c2) {
|
||||
if (isBinary(c1) || isBinary(c2)) {
|
||||
if (!(isBinary(c1) && isBinary(c2))) return "One of the files is binary, the other is not";
|
||||
return diffPercent(c1, c2);
|
||||
}
|
||||
|
||||
std::stringstream out;
|
||||
auto b1 = c1.contents();
|
||||
auto b2 = c2.contents();
|
||||
std::multimap<std::string, unsigned long> f1lines;
|
||||
std::multimap<std::string, unsigned long> f2diff;
|
||||
std::string line;
|
||||
std::istream is1(b1.get());
|
||||
std::istream is2(b2.get());
|
||||
|
||||
int i = 0;
|
||||
while (std::getline(is1, line)) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
f1lines.emplace(line, ++i);
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (std::getline(is2, line)) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
if (f1lines.count(line) > 0) f1lines.erase(f1lines.find(line));
|
||||
else
|
||||
f2diff.emplace(line, ++i);
|
||||
}
|
||||
|
||||
out << "\nLines only in first file: " << std::endl;
|
||||
for (const auto &s: f1lines) {
|
||||
out << s.second << "<" << s.first << std::endl;
|
||||
}
|
||||
out << "Lines only in second file: " << std::endl;
|
||||
for (const auto &s: f2diff) {
|
||||
out << s.second << ">" << s.first << std::endl;
|
||||
}
|
||||
out << "^^^\n";
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::string Diff::diffPercent(const ComparableFile &c1, const ComparableFile &c2) {
|
||||
auto b1 = c1.contents();
|
||||
auto b2 = c2.contents();
|
||||
BuzhashChunker ch1(b1.get(), 512 * 1024, 1024 * 1024, 19, 31);
|
||||
BuzhashChunker ch2(b2.get(), 512 * 1024, 1024 * 1024, 19, 31);
|
||||
std::multiset<std::string> ch1hashes;
|
||||
std::multiset<std::string> ch2diff;
|
||||
std::unordered_map<std::string, unsigned long long> hashsize;
|
||||
for (auto chunkp: ch1) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
if (chunkp.second.empty()) continue;
|
||||
std::string md5(chunkp.first.begin(), chunkp.first.end());
|
||||
ch1hashes.emplace(md5);
|
||||
hashsize[md5] = chunkp.second.size();
|
||||
}
|
||||
|
||||
for (auto chunkp: ch2) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
if (chunkp.second.empty()) continue;
|
||||
std::string md5(chunkp.first.begin(), chunkp.first.end());
|
||||
hashsize[md5] = chunkp.second.size();
|
||||
if (ch1hashes.count(md5) > 0) ch1hashes.erase(md5);
|
||||
else if (ch1hashes.count(md5) == 0)
|
||||
ch2diff.emplace(md5);
|
||||
}
|
||||
|
||||
unsigned long long diff = 0;
|
||||
|
||||
for (const auto &c: ch1hashes) {
|
||||
diff += hashsize[c];
|
||||
}
|
||||
for (const auto &c: ch2diff) {
|
||||
diff += hashsize[c];
|
||||
}
|
||||
|
||||
return "at most " + BytesFormatter::formatStr(diff);
|
||||
}
|
||||
38
src/Diff.h
Normal file
38
src/Diff.h
Normal file
@@ -0,0 +1,38 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 06.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_DIFF_H
|
||||
#define SEMBACKUP_DIFF_H
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
#include "change_detectors/ComparableFile.h"
|
||||
|
||||
/// Utility class to compute difference between two ComparableFile%s
|
||||
class Diff {
|
||||
public:
|
||||
/// Compute the difference between two ComparableFile%s
|
||||
/// If the file is binary, calls diffPercent, which outputs the difference between files in bytes
|
||||
/// Otherwise prints linewise difference
|
||||
/// \param c1 Constant reference to the first ComparableFile
|
||||
/// \param c2 Constant reference to the second ComparableFile
|
||||
/// \returns Difference message
|
||||
static std::string diff(const ComparableFile &c1, const ComparableFile &c2);
|
||||
|
||||
/// Calculates the difference between \p c1 amd \p c2 in bytes
|
||||
/// \param c1 Constant reference to the first ComparableFile
|
||||
/// \param c2 Constant reference to the second ComparableFile
|
||||
/// \returns Difference message
|
||||
static std::string diffPercent(const ComparableFile &c1, const ComparableFile &c2);
|
||||
|
||||
/// Checks if a file is binary
|
||||
/// A file is considered binary if its first 2048 bytes contain a null byte
|
||||
/// \param c1 Constant reference to the checked ComparableFile
|
||||
/// \return True if the file is considered binary, false otherwise
|
||||
static bool isBinary(const ComparableFile &c1);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_DIFF_H
|
||||
32
src/Exception.cpp
Normal file
32
src/Exception.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 01.05.2023.
|
||||
//
|
||||
|
||||
#include "Exception.h"
|
||||
#include <execinfo.h>
|
||||
#include <sstream>
|
||||
|
||||
Exception::Exception(const std::string &text) : runtime_error(text + "\n" + getStacktrace()) {}
|
||||
|
||||
Exception::Exception(const char *text) : runtime_error(std::string(text) + "\n" + getStacktrace()) {}
|
||||
|
||||
// Based on: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html
|
||||
std::string Exception::getStacktrace() {
|
||||
std::vector<void *> functions(50);
|
||||
char **strings;
|
||||
int n;
|
||||
|
||||
n = backtrace(functions.data(), 50);
|
||||
strings = backtrace_symbols(functions.data(), n);
|
||||
|
||||
std::stringstream out;
|
||||
|
||||
if (strings != nullptr) {
|
||||
out << "Stacktrace:" << std::endl;
|
||||
for (int i = 0; i < n; i++)
|
||||
out << strings[i] << std::endl;
|
||||
}
|
||||
|
||||
free(strings);
|
||||
return out.str();
|
||||
}
|
||||
24
src/Exception.h
Normal file
24
src/Exception.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 01.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_EXCEPTION_H
|
||||
#define SEMBACKUP_EXCEPTION_H
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/// Custom exception class that uses execinfo to append a stacktrace to the exception message
|
||||
class Exception : public std::runtime_error {
|
||||
public:
|
||||
Exception(const std::string &text);
|
||||
Exception(const char *text);
|
||||
|
||||
private:
|
||||
/// Static function to get the current stacktrace
|
||||
static std::string getStacktrace();
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_EXCEPTION_H
|
||||
19
src/Logger.cpp
Normal file
19
src/Logger.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "Logger.h"
|
||||
|
||||
Logger::Logger(int level, std::ostream &out) : loglevel(level), out(out) {
|
||||
}
|
||||
|
||||
void Logger::write(const std::string &what, int whatlevel) {
|
||||
if (whatlevel <= loglevel) {
|
||||
std::lock_guard outLock(outM);
|
||||
out.get() << what << std::flush;
|
||||
}
|
||||
}
|
||||
|
||||
void Logger::setLevel(int level) {
|
||||
loglevel = level;
|
||||
}
|
||||
25
src/Logger.h
Normal file
25
src/Logger.h
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_LOGGER_H
|
||||
#define SEMBACKUP_LOGGER_H
|
||||
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
|
||||
class Logger {
|
||||
public:
|
||||
Logger(int level = 3, std::ostream &out = {std::cout});
|
||||
void write(const std::string &what, int whatlevel);
|
||||
void setLevel(int level);
|
||||
|
||||
private:
|
||||
int loglevel;
|
||||
std::mutex outM;
|
||||
std::reference_wrapper<std::ostream> out;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_LOGGER_H
|
||||
57
src/Progress.cpp
Normal file
57
src/Progress.cpp
Normal file
@@ -0,0 +1,57 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#include "Progress.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
Progress::Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level) : format(std::move(format)), out(std::move(out)), type(conf.getStr("progress")), progresslevel(level) {
|
||||
if (type != "none") {
|
||||
this->out("\n\n", level);
|
||||
thread = std::thread(&Progress::showProgress, this);
|
||||
}
|
||||
}
|
||||
|
||||
Progress::~Progress() {
|
||||
stop = true;
|
||||
if (thread.joinable())
|
||||
thread.join();
|
||||
}
|
||||
|
||||
void Progress::showProgress() {
|
||||
while (!stop) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
{
|
||||
update(std::unique_lock(refreshM));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Progress::print(const std::string &s, int level) {
|
||||
std::unique_lock refreshL(refreshM);
|
||||
out((type == "pretty" ? "\r\33[2K " : "") + s + "\n", level);
|
||||
update(std::move(refreshL));
|
||||
}
|
||||
|
||||
void Progress::update(std::unique_lock<std::mutex> &&lock) {
|
||||
std::stringstream outs;
|
||||
|
||||
if (type == "pretty")
|
||||
outs << "\r\33[2K ";
|
||||
|
||||
for (auto const &l: format) {
|
||||
if (std::holds_alternative<std::string>(l)) outs << std::get<std::string>(l);
|
||||
else
|
||||
outs << std::get<std::function<std::string()>>(l)();
|
||||
}
|
||||
|
||||
if (type == "pretty")
|
||||
outs << "\r";
|
||||
else
|
||||
outs << "\n";
|
||||
|
||||
out(outs.str(), progresslevel);
|
||||
lock.unlock();
|
||||
}
|
||||
55
src/Progress.h
Normal file
55
src/Progress.h
Normal file
@@ -0,0 +1,55 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_PROGRESS_H
|
||||
#define SEMBACKUP_PROGRESS_H
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <variant>
|
||||
|
||||
#include "Config.h"
|
||||
|
||||
/// Class to handle writing progress to the screen
|
||||
class Progress {
|
||||
public:
|
||||
/// Constructs the Progress instance
|
||||
/// \param out Function to call for output
|
||||
/// \param format Format of the progress string, vector of strings or functions that return strings
|
||||
/// \param conf Config, used to specify format (`pretty` for line rewriting, `simple` for normal line printing, or `none`)
|
||||
Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level = 1);
|
||||
|
||||
Progress &operator=(Progress rhs) = delete;
|
||||
Progress(const Progress &orig) = delete;
|
||||
|
||||
/// Write a string to the terminal without disturbing the progress bar
|
||||
void print(const std::string &s, int level);
|
||||
|
||||
/// Destructor, instructs the worker thread to stop
|
||||
~Progress();
|
||||
|
||||
private:
|
||||
int progresslevel;
|
||||
std::vector<std::variant<std::function<std::string()>, std::string>> format;///< Format of the progressbar
|
||||
std::function<void(std::string, int)> out; ///< Output function
|
||||
|
||||
/// Thread loop function
|
||||
void showProgress();
|
||||
std::atomic<bool> stop = false;///< Stop flag
|
||||
|
||||
std::mutex refreshM;///< Used to prevent mangling the output between print and progressbar update
|
||||
|
||||
/// Prints the progressbar on screen, then unlocks the mutex
|
||||
void update(std::unique_lock<std::mutex> &&lock);
|
||||
const std::string type;///< Progressbar type (Taken from Config)
|
||||
|
||||
std::thread thread;///< Worker thread
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_PROGRESS_H
|
||||
31
src/RunningAverage.cpp
Normal file
31
src/RunningAverage.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#include "RunningAverage.h"
|
||||
|
||||
RunningAverage::RunningAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
|
||||
: getFunc(std::move(getFunc)), max(max), ms(ms), thread(&RunningAverage::loop, this) {
|
||||
}
|
||||
|
||||
void RunningAverage::loop() {
|
||||
while (!stop) {
|
||||
{
|
||||
std::lock_guard lock(dataLock);
|
||||
data.emplace_front(getFunc());
|
||||
if (data.size() > max) data.pop_back();
|
||||
}
|
||||
std::this_thread::sleep_for(std::chrono::duration(std::chrono::milliseconds(ms)));
|
||||
}
|
||||
}
|
||||
|
||||
RunningAverage::~RunningAverage() {
|
||||
stop = true;
|
||||
thread.join();
|
||||
}
|
||||
|
||||
unsigned long long RunningAverage::get() {
|
||||
std::lock_guard lock(dataLock);
|
||||
if (data.empty()) return 0;
|
||||
return std::accumulate(data.begin(), data.end(), 0UL) / data.size();
|
||||
}
|
||||
44
src/RunningAverage.h
Normal file
44
src/RunningAverage.h
Normal file
@@ -0,0 +1,44 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_RUNNINGAVERAGE_H
|
||||
#define SEMBACKUP_RUNNINGAVERAGE_H
|
||||
|
||||
#include <atomic>
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <thread>
|
||||
|
||||
/// Class to compute running average of some value
|
||||
class RunningAverage {
|
||||
public:
|
||||
///
|
||||
/// \param getFunc Function that samples the value
|
||||
/// \param max Max number of samples to average
|
||||
/// \param ms Sampling period
|
||||
RunningAverage(std::function<unsigned long long()> getFunc, int max, int ms);
|
||||
|
||||
/// Destructor, instructs the thread to exit
|
||||
~RunningAverage();
|
||||
|
||||
/// Returns the average
|
||||
unsigned long long get();
|
||||
|
||||
private:
|
||||
std::atomic<bool> stop = false; ///< Stop signal
|
||||
std::function<unsigned long long()> getFunc;///< Sampling function
|
||||
std::deque<unsigned long long> data; ///< Data collected
|
||||
int max; ///< Max number of samples
|
||||
int ms; ///< Sampling period
|
||||
std::mutex dataLock; ///< Deque lock
|
||||
std::thread thread; ///< Worker thread
|
||||
|
||||
/// Worker thread loop
|
||||
void loop();
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_RUNNINGAVERAGE_H
|
||||
20
src/RunningDiffAverage.cpp
Normal file
20
src/RunningDiffAverage.cpp
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#include "RunningDiffAverage.h"
|
||||
|
||||
RunningDiffAverage::RunningDiffAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
|
||||
: runningAverage(
|
||||
[this, get = std::move(getFunc)] {
|
||||
auto cur = get();
|
||||
auto calc = cur - prev;
|
||||
prev = cur;
|
||||
return calc;
|
||||
},
|
||||
max, ms) {
|
||||
}
|
||||
|
||||
unsigned long long RunningDiffAverage::get() {
|
||||
return runningAverage.get();
|
||||
}
|
||||
30
src/RunningDiffAverage.h
Normal file
30
src/RunningDiffAverage.h
Normal file
@@ -0,0 +1,30 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_RUNNINGDIFFAVERAGE_H
|
||||
#define SEMBACKUP_RUNNINGDIFFAVERAGE_H
|
||||
|
||||
#include <functional>
|
||||
|
||||
#include "RunningAverage.h"
|
||||
|
||||
/// Computes the rolling average of differences between last sampled and currently sampled numbers
|
||||
class RunningDiffAverage {
|
||||
public:
|
||||
///
|
||||
/// \param getFunc Function that samples the value
|
||||
/// \param max Max number of samples to average
|
||||
/// \param ms Sampling period
|
||||
RunningDiffAverage(std::function<unsigned long long()> getFunc, int max, int ms);
|
||||
|
||||
/// Returns the average
|
||||
unsigned long long get();
|
||||
|
||||
private:
|
||||
unsigned long long prev = 0; ///< Previously sampled value
|
||||
RunningAverage runningAverage;///< Backing RunningAverage
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_RUNNINGDIFFAVERAGE_H
|
||||
12
src/Signals.cpp
Normal file
12
src/Signals.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
#include "Signals.h"
|
||||
|
||||
void Signals::setup() {
|
||||
signal(SIGINT, handle);
|
||||
}
|
||||
|
||||
void Signals::handle(int signum) {
|
||||
shouldQuit = true;
|
||||
}
|
||||
24
src/Signals.h
Normal file
24
src/Signals.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_SIGNALS_H
|
||||
#define SEMBACKUP_SIGNALS_H
|
||||
|
||||
#include <csignal>
|
||||
|
||||
/// Class to handle signals sent to the process
|
||||
class Signals {
|
||||
public:
|
||||
/// Setup the signal handlers
|
||||
static void setup();
|
||||
|
||||
volatile static inline std::sig_atomic_t shouldQuit = false;///< Indicates whether the program was requested to exit
|
||||
|
||||
private:
|
||||
/// Handle the signals
|
||||
static void handle(int signum);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_SIGNALS_H
|
||||
67
src/ThreadPool.cpp
Normal file
67
src/ThreadPool.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
#include <string>
|
||||
|
||||
#include "Signals.h"
|
||||
#include "ThreadPool.h"
|
||||
|
||||
ThreadPool::ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum) : onError(std::move(onError)) {
|
||||
for (int i = 0; i < workersNum; i++) threads.emplace_back(&ThreadPool::loop, this);
|
||||
}
|
||||
|
||||
ThreadPool::~ThreadPool() {
|
||||
stop = true;
|
||||
somethingNew.notify_all();
|
||||
for (auto &t: threads) {
|
||||
t.join();
|
||||
}
|
||||
}
|
||||
|
||||
void ThreadPool::push(std::function<void()> &&func) {
|
||||
{
|
||||
std::lock_guard lock(queueLock);
|
||||
queue.push(std::move(func));
|
||||
}
|
||||
somethingNew.notify_one();
|
||||
}
|
||||
|
||||
void ThreadPool::loop() {
|
||||
while (true) {
|
||||
std::unique_lock qLock(queueLock);
|
||||
|
||||
while (queue.empty() && !stop && !Signals::shouldQuit) {
|
||||
// Check for any of the stop signals every second
|
||||
somethingNew.wait_for(qLock, std::chrono::seconds(1));
|
||||
}
|
||||
|
||||
if (stop || Signals::shouldQuit) {
|
||||
// Drop all tasks if requested to exit
|
||||
queue = {};
|
||||
if (queue.empty() && running == 0) { finished.notify_all(); }
|
||||
return;
|
||||
}
|
||||
|
||||
auto task = std::move(queue.front());
|
||||
|
||||
running++;
|
||||
queue.pop();
|
||||
|
||||
qLock.unlock();
|
||||
|
||||
try {
|
||||
task();
|
||||
} catch (std::exception &e) {
|
||||
onError(std::string(e.what()));
|
||||
}
|
||||
|
||||
{
|
||||
std::lock_guard qLock(queueLock);
|
||||
running--;
|
||||
if (queue.empty() && running == 0) { finished.notify_all(); }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool ThreadPool::empty() {
|
||||
std::lock_guard qLock(queueLock);
|
||||
if (queue.empty() && running == 0) return true;
|
||||
return false;
|
||||
}
|
||||
54
src/ThreadPool.h
Normal file
54
src/ThreadPool.h
Normal file
@@ -0,0 +1,54 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 17.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_THREADPOOL_H
|
||||
#define SEMBACKUP_THREADPOOL_H
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <mutex>
|
||||
#include <queue>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
/// Thread pool
|
||||
/**
|
||||
* Handles ctrl-c via Signals, but it is expected of tasks to also do so
|
||||
* Forwards exception messages to the provided handler
|
||||
*/
|
||||
class ThreadPool {
|
||||
public:
|
||||
/// Constructs a thread pool
|
||||
/// \param onError Callback function that is called when an exception happens when executing a task
|
||||
/// \param workersNum Amount of worker threads (default = number of cpu threads)
|
||||
ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum = std::thread::hardware_concurrency());
|
||||
|
||||
/// Destructor, instructs the threads to stop and joins them
|
||||
~ThreadPool();
|
||||
|
||||
/// Pushes a new task to the queue
|
||||
/// \param func Rvalue to the task functon
|
||||
void push(std::function<void()> &&func);
|
||||
|
||||
/// Returns True if the queue is empty and there are no tasks running
|
||||
bool empty();
|
||||
|
||||
std::mutex finishedLock; ///< Lock to use when waiting on the finished variable
|
||||
std::condition_variable finished;///< Condition variable to wait for all tasks to finish
|
||||
|
||||
private:
|
||||
/// Thread loop
|
||||
void loop();
|
||||
|
||||
std::queue<std::function<void()>> queue; ///< Task queue
|
||||
std::mutex queueLock; ///< Task queue lock
|
||||
std::condition_variable somethingNew; ///< Condition variable to wait for new tasks
|
||||
std::vector<std::thread> threads; ///< Vector of worker threads
|
||||
std::atomic<bool> stop = false; ///< Stop signal for threads
|
||||
std::atomic<int> running = 0; ///< Number of currently running tasks
|
||||
std::function<void(std::string)> onError;///< Function to call on exception in task
|
||||
};
|
||||
|
||||
#endif//SEMBACKUP_THREADPOOL_H
|
||||
7
src/change_detectors/ChangeDetector.cpp
Normal file
7
src/change_detectors/ChangeDetector.cpp
Normal file
@@ -0,0 +1,7 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
|
||||
ChangeDetector::~ChangeDetector() = default;
|
||||
24
src/change_detectors/ChangeDetector.h
Normal file
24
src/change_detectors/ChangeDetector.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHANGEDETECTOR_H
|
||||
#define SEMBACKUP_CHANGEDETECTOR_H
|
||||
|
||||
#include "ComparableFile.h"
|
||||
|
||||
/// An interface for a class comparing any two given ComparableFile%s
|
||||
class ChangeDetector {
|
||||
public:
|
||||
/// Abstract method for comparing two ComparableFile%s
|
||||
/// \param f1 Constant reference to the first ComparableFile
|
||||
/// \param f2 Constant reference to the second ComparableFile
|
||||
/// \return True if these objects are considered *different*, False otherwise
|
||||
virtual bool check(const ComparableFile &f1, const ComparableFile &f2) const = 0;
|
||||
|
||||
/// Default virtual destructor
|
||||
virtual ~ChangeDetector();
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHANGEDETECTOR_H
|
||||
16
src/change_detectors/ChangeDetectorContainer.cpp
Normal file
16
src/change_detectors/ChangeDetectorContainer.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 04.05.2023.
|
||||
//
|
||||
|
||||
#include "ChangeDetectorContainer.h"
|
||||
|
||||
#include <functional>
|
||||
|
||||
bool ChangeDetectorContainer::check(const ComparableFile &f1, const ComparableFile &f2) const {
|
||||
return std::any_of(changeDetectors.begin(), changeDetectors.end(),
|
||||
[&](const auto &changeDetector) {
|
||||
return changeDetector->check(f1, f2);
|
||||
});
|
||||
}
|
||||
|
||||
ChangeDetectorContainer::ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors) : changeDetectors(std::move(changeDetectors)) {}
|
||||
33
src/change_detectors/ChangeDetectorContainer.h
Normal file
33
src/change_detectors/ChangeDetectorContainer.h
Normal file
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 04.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHANGEDETECTORCONTAINER_H
|
||||
#define SEMBACKUP_CHANGEDETECTORCONTAINER_H
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
#include "ComparableFile.h"
|
||||
|
||||
/// Wrapper for multiple ChangeDetector%s
|
||||
/** A ChangeDetector implementation that serves as a convenience wrapper for
|
||||
* multiple ChangeDetector%s, its check returns true if any of the wrapped ChangeDetector%s return true
|
||||
*/
|
||||
class ChangeDetectorContainer : public ChangeDetector {
|
||||
public:
|
||||
/// Constructs a ChangeDetectorContainer using a vector of existing ChangeDetector%s
|
||||
/// \param changeDetectors An rvalue reference to a vector of unique pointers of ChangeDetector
|
||||
ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors);
|
||||
|
||||
/// \copydoc ChangeDetector::check
|
||||
/// \return ComparableFile%s are considered different if any of the wrapped ChangeDetector%s return true
|
||||
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHANGEDETECTORCONTAINER_H
|
||||
35
src/change_detectors/ChangeDetectorFactory.cpp
Normal file
35
src/change_detectors/ChangeDetectorFactory.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#include "ChangeDetectorFactory.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "ContentsChangeDetector.h"
|
||||
#include "EditTimeChangeDetector.h"
|
||||
#include "SizeChangeDetector.h"
|
||||
#include "TypeChangeDetector.h"
|
||||
|
||||
std::unique_ptr<ChangeDetector> ChangeDetectorFactory::getChangeDetector(const std::string &type) {
|
||||
if (type == "etime") {
|
||||
return std::make_unique<EditTimeChangeDetector>();
|
||||
} else if (type == "size") {
|
||||
return std::make_unique<SizeChangeDetector>();
|
||||
} else if (type == "type") {
|
||||
return std::make_unique<TypeChangeDetector>();
|
||||
} else if (type == "contents") {
|
||||
return std::make_unique<ContentsChangeDetector>();
|
||||
} else
|
||||
throw Exception("Unknown ChangeDetector type " + type);
|
||||
}
|
||||
|
||||
ChangeDetectorContainer ChangeDetectorFactory::getChangeDetectors(const Config &config) {
|
||||
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
|
||||
for (auto const &i: config.getList("change-detectors")) {
|
||||
changeDetectors.emplace_back(ChangeDetectorFactory::getChangeDetector(i));
|
||||
}
|
||||
|
||||
return ChangeDetectorContainer(std::move(changeDetectors));
|
||||
}
|
||||
33
src/change_detectors/ChangeDetectorFactory.h
Normal file
33
src/change_detectors/ChangeDetectorFactory.h
Normal file
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHANGEDETECTORFACTORY_H
|
||||
#define SEMBACKUP_CHANGEDETECTORFACTORY_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "../Config.h"
|
||||
#include "ChangeDetector.h"
|
||||
#include "ChangeDetectorContainer.h"
|
||||
|
||||
/// Factory class for ChangeDetector
|
||||
/** Can create either a vector of ChangeDetector%s according to Config,
|
||||
* or an individual ChangeDetector from a type string
|
||||
*/
|
||||
class ChangeDetectorFactory {
|
||||
public:
|
||||
/// Creates a ChangeDetector of given type and returns an unique pointer to it
|
||||
/// \param type Constant reference to a string containing type of the ChangeDetector to create
|
||||
/// \return Unique pointer to constructed ChangeDetector
|
||||
static std::unique_ptr<ChangeDetector> getChangeDetector(const std::string &type);
|
||||
|
||||
/// Constructs a vector of unique pointers to ChangeDetector%s according to the given \p config
|
||||
/// \param config Config with comma-separated "change-detectors" option set, for each entry a ChangeDetector will be created
|
||||
/// \return A vector of unique pointers to ChangeDetector%s constructed according to \p config
|
||||
static ChangeDetectorContainer getChangeDetectors(const Config &config);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHANGEDETECTORFACTORY_H
|
||||
42
src/change_detectors/ComparableFile.cpp
Normal file
42
src/change_detectors/ComparableFile.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
|
||||
#include "ComparableFile.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "../repo/objects/FileBuffer.h"
|
||||
|
||||
ComparableFile::ComparableFile(const File &file, const Repository *repo)
|
||||
: path(file.name),
|
||||
type(file.fileType),
|
||||
bytes(file.bytes),
|
||||
mtime(file.mtime),
|
||||
contents(
|
||||
[file, repo]() {
|
||||
return std::make_unique<FileBuffer>(repo, file.id);
|
||||
}) {}
|
||||
|
||||
ComparableFile::ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base)
|
||||
: path(p.lexically_relative(base).u8string()),
|
||||
type(File::getFileType(p)),
|
||||
bytes(File::getFileSize(p)),
|
||||
mtime(File::getFileMtime(p)),
|
||||
contents(
|
||||
[p, path = this->path, type = this->type]() -> std::unique_ptr<std::streambuf> {
|
||||
if (type == File::Type::Normal) {
|
||||
auto fb = std::make_unique<std::filebuf>();
|
||||
fb->open(p, std::ios::in | std::ios::binary);
|
||||
if (!fb->is_open()) throw Exception("Can't open " + p.u8string() + " for reading!");
|
||||
return fb;
|
||||
}
|
||||
|
||||
auto contentsVector = File::getFileContents(p);
|
||||
std::string contents = {contentsVector.begin(), contentsVector.end()};
|
||||
|
||||
return std::make_unique<std::stringbuf>(contents, std::ios::in | std::ios::binary);
|
||||
}) {}
|
||||
43
src/change_detectors/ComparableFile.h
Normal file
43
src/change_detectors/ComparableFile.h
Normal file
@@ -0,0 +1,43 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMPARABLEFILE_H
|
||||
#define SEMBACKUP_COMPARABLEFILE_H
|
||||
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
#include <streambuf>
|
||||
|
||||
#include "../repo/Repository.h"
|
||||
#include "../repo/objects/File.h"
|
||||
|
||||
/// Helper class to allow comparing files from different sources
|
||||
/**
|
||||
* As we are required to allow comparisons between a File in a repository and a file in filesystem,
|
||||
* comparisons between two files that are already in a Repository,
|
||||
* and between File%s that are in a repository cache and between files in the filesystem (when making backups),
|
||||
* this helper class exists to provide a uniform interface to be used when calling ChangeDetector%s.
|
||||
*/
|
||||
struct ComparableFile {
|
||||
/// Constructs a ComparableFile based on a File in a Repository
|
||||
/// The resulting ComparableFile will have a #contents function that returns an instance of FileBuffer for given \p file
|
||||
/// \param file Constant reference to a File object
|
||||
/// \param repo Constant pointer to Repository from which the File object was taken, must be valid during the lifetime of created ComparableFile
|
||||
ComparableFile(const File &file, const Repository *repo);
|
||||
|
||||
/// Constructs a ComparableFile based on a file in the filesystem
|
||||
/// The resulting ComparableFile will have a #contents function that returns an instance of std::filebuf for file at given path
|
||||
/// \param p Constant reference to an absolute path to the file
|
||||
/// \param base Constant reference to a base path against which #path will be set
|
||||
ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base);
|
||||
|
||||
const std::string path; ///< Relative path to the file
|
||||
const File::Type type; ///< File type
|
||||
const unsigned long long bytes; ///< Number of bytes in the file
|
||||
const unsigned long long mtime; ///< Timestamp of last file modification
|
||||
const std::function<std::unique_ptr<std::streambuf>()> contents;///< Function that returns a unique pointer to a std::streambuf instance linked to the contents of the file
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMPARABLEFILE_H
|
||||
19
src/change_detectors/ContentsChangeDetector.cpp
Normal file
19
src/change_detectors/ContentsChangeDetector.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#include "ContentsChangeDetector.h"
|
||||
|
||||
#include <iterator>
|
||||
|
||||
bool ContentsChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
|
||||
if (f1.type != f2.type) return true;
|
||||
|
||||
auto b1 = f1.contents();
|
||||
auto b2 = f2.contents();
|
||||
|
||||
return !std::equal(std::istreambuf_iterator<char>(b1.get()),
|
||||
std::istreambuf_iterator<char>(),
|
||||
std::istreambuf_iterator<char>(b2.get()),
|
||||
std::istreambuf_iterator<char>());
|
||||
}
|
||||
19
src/change_detectors/ContentsChangeDetector.h
Normal file
19
src/change_detectors/ContentsChangeDetector.h
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 05.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CONTENTSCHANGEDETECTOR_H
|
||||
#define SEMBACKUP_CONTENTSCHANGEDETECTOR_H
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
|
||||
/// A ChangeDetector implementation that compares two files by their contents
|
||||
class ContentsChangeDetector : public ChangeDetector {
|
||||
public:
|
||||
/// \copydoc ChangeDetector::check
|
||||
/// \return ComparableFile%s are considered different if their contents are different
|
||||
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CONTENTSCHANGEDETECTOR_H
|
||||
9
src/change_detectors/EditTimeChangeDetector.cpp
Normal file
9
src/change_detectors/EditTimeChangeDetector.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#include "EditTimeChangeDetector.h"
|
||||
|
||||
bool EditTimeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
|
||||
return f1.mtime != f2.mtime;
|
||||
}
|
||||
20
src/change_detectors/EditTimeChangeDetector.h
Normal file
20
src/change_detectors/EditTimeChangeDetector.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_EDITTIMECHANGEDETECTOR_H
|
||||
#define SEMBACKUP_EDITTIMECHANGEDETECTOR_H
|
||||
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
|
||||
/// A ChangeDetector implementation that compares two files by their modification time
|
||||
class EditTimeChangeDetector : public ChangeDetector {
|
||||
public:
|
||||
/// \copydoc ChangeDetector::check
|
||||
/// \return ComparableFile%s are considered different if their modification times are different
|
||||
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_EDITTIMECHANGEDETECTOR_H
|
||||
9
src/change_detectors/SizeChangeDetector.cpp
Normal file
9
src/change_detectors/SizeChangeDetector.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#include "SizeChangeDetector.h"
|
||||
|
||||
bool SizeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
|
||||
return f1.bytes != f2.bytes;
|
||||
}
|
||||
19
src/change_detectors/SizeChangeDetector.h
Normal file
19
src/change_detectors/SizeChangeDetector.h
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 16.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_SIZECHANGEDETECTOR_H
|
||||
#define SEMBACKUP_SIZECHANGEDETECTOR_H
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
|
||||
/// A ChangeDetector implementation that compares two files by their size
|
||||
class SizeChangeDetector : public ChangeDetector {
|
||||
public:
|
||||
/// \copydoc ChangeDetector::check
|
||||
/// \return ComparableFile%s are considered different if their sizes are different
|
||||
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_SIZECHANGEDETECTOR_H
|
||||
9
src/change_detectors/TypeChangeDetector.cpp
Normal file
9
src/change_detectors/TypeChangeDetector.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#include "TypeChangeDetector.h"
|
||||
|
||||
bool TypeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
|
||||
return f1.type != f2.type;
|
||||
}
|
||||
19
src/change_detectors/TypeChangeDetector.h
Normal file
19
src/change_detectors/TypeChangeDetector.h
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_TYPECHANGEDETECTOR_H
|
||||
#define SEMBACKUP_TYPECHANGEDETECTOR_H
|
||||
|
||||
#include "ChangeDetector.h"
|
||||
|
||||
/// A ChangeDetector implementation that compares two files by their type
|
||||
class TypeChangeDetector : public ChangeDetector {
|
||||
public:
|
||||
/// \copydoc ChangeDetector::check
|
||||
/// \return ComparableFile%s are considered different if their types are different
|
||||
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_TYPECHANGEDETECTOR_H
|
||||
34
src/chunkers/Buzhash.cpp
Normal file
34
src/chunkers/Buzhash.cpp
Normal file
@@ -0,0 +1,34 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 26.04.2023.
|
||||
//
|
||||
|
||||
#include "Buzhash.h"
|
||||
|
||||
Buzhash::Buzhash(uint32_t blockSize) : blockSize(blockSize), history() {}
|
||||
|
||||
uint32_t Buzhash::get() const {
|
||||
return cur;
|
||||
}
|
||||
|
||||
uint32_t Buzhash::feed(uint8_t in) {
|
||||
cur = rotr32(cur, 1);
|
||||
|
||||
if (history.size() >= blockSize) {
|
||||
auto oldest = history.back();
|
||||
history.pop_back();
|
||||
cur ^= rotr32(randomNumbers[oldest], blockSize);
|
||||
}
|
||||
|
||||
history.emplace_front(in);
|
||||
|
||||
cur ^= randomNumbers[in];
|
||||
|
||||
return cur;
|
||||
}
|
||||
|
||||
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
|
||||
uint32_t Buzhash::rotr32(uint32_t value, unsigned int count) {
|
||||
const unsigned int mask = CHAR_BIT * sizeof(value) - 1;
|
||||
count &= mask;
|
||||
return (value >> count) | (value << (-count & mask));
|
||||
}
|
||||
85
src/chunkers/Buzhash.h
Normal file
85
src/chunkers/Buzhash.h
Normal file
@@ -0,0 +1,85 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 26.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_BUZHASH_H
|
||||
#define SEMBACKUP_BUZHASH_H
|
||||
|
||||
#include <array>
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
|
||||
/// Cyclic polynomial rolling hash
|
||||
/** Based on: http://www.serve.net/buz/hash.adt/java.002.html
|
||||
* https://github.com/silvasur/buzhash/blob/master/hash.go
|
||||
* https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
|
||||
*/
|
||||
class Buzhash {
|
||||
public:
|
||||
/// Constructs a new Buzhash instance
|
||||
/// \param blockSize Rolling hash window
|
||||
Buzhash(uint32_t blockSize);
|
||||
|
||||
/// Returns current hash value
|
||||
uint32_t get() const;
|
||||
|
||||
/// Adds \p in to the hash
|
||||
/// \param in Byte to add
|
||||
/// \return New hash value
|
||||
uint32_t feed(uint8_t in);
|
||||
|
||||
private:
|
||||
uint32_t cur = 0; ///< Current hash value
|
||||
const uint32_t blockSize; ///< Hashing window size
|
||||
std::deque<uint32_t> history;///< Bytes used to calculate current hash, used to compute the hash in a rolling fashion (to remove the oldest byte from the hash when blockSize is reached)
|
||||
|
||||
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
|
||||
/// Shift \p value \p count bits to the right circularly
|
||||
/// \param value Value to shift
|
||||
/// \param count By how many bytes
|
||||
/// \return Shifted value
|
||||
static uint32_t rotr32(uint32_t value, unsigned int count);
|
||||
|
||||
/// 256 32-bit random numbers used for hashing
|
||||
/// Ideally, should have an equal distribution of 0s and 1s, but I didn't bother checking it
|
||||
// clang-format off
|
||||
static constexpr std::array<uint32_t, 256> randomNumbers{
|
||||
0x827f934c, 0xebcd9924, 0x667fdea2, 0x8a8b0997, 0x42af49e8, 0x556cb313, 0x505da41b, 0xb23be60f,
|
||||
0xc3901be4, 0xee1d8d4d, 0x4d59795c, 0x8d542ba4, 0x043f073c, 0x2af19a39, 0xb2c4aa36, 0x6e30ff43,
|
||||
0x77ad3ef7, 0xd4c077e5, 0x3a1155aa, 0x866b07d3, 0xc16022b2, 0x6d4dad6e, 0x7a69c6dd, 0xd436dc23,
|
||||
0x32b64948, 0x1f72475f, 0x129be871, 0x05d46f6e, 0x7e405cd5, 0x31fdd272, 0x84a56b1a, 0xeaf43633,
|
||||
0x5f8148d4, 0x6d4bf6d9, 0xc2b4dbd7, 0xaa804cc7, 0xcb3de5ca, 0x6503cdb3, 0xa3c6d727, 0x20e2f098,
|
||||
0xd525bb67, 0x37b1b81e, 0xc1f1fd79, 0x4fe91240, 0x6a4ea716, 0x71245e33, 0xdbaab854, 0xfc24600e,
|
||||
0xd72dc72f, 0x2d7139ae, 0x075fb38d, 0xb18028a5, 0x9970d103, 0x235ec64b, 0x68645255, 0x352945f0,
|
||||
0x7a4b19a1, 0xe17df5f5, 0x676a6644, 0x75aad7aa, 0x63bdfc9a, 0x607586c7, 0x1546400e, 0xfe582141,
|
||||
0xb50a199f, 0xb0769910, 0x5d74ab3b, 0x2404799b, 0xa66a3a78, 0x1b6e24aa, 0x630674cc, 0x3272fea4,
|
||||
0xd4e9e078, 0xe586d12a, 0x579f8b98, 0xfd16bcb5, 0xd1e4faee, 0xe30953c7, 0x3ac73f87, 0xab66983f,
|
||||
0x5fe12f90, 0x10952ef1, 0x5c7ac32a, 0x89ccd941, 0xb82c3fa9, 0xacd374e5, 0x50984746, 0x09f082e8,
|
||||
0x11ee3b91, 0x31764e3a, 0xb59df38a, 0x67e94f2d, 0xcceaca68, 0xc68a89d8, 0x5f2e80ac, 0xd5556741,
|
||||
0x8c815df6, 0xde71c2b5, 0x7b1f5c49, 0xd64682a4, 0x4fb59748, 0x4968707f, 0x909c0c1a, 0x5f1dd608,
|
||||
0x1c601e37, 0x96e01ada, 0xc5582ef8, 0xae6834c1, 0xbe63b0ce, 0xab2aea9f, 0xf13e77c2, 0xe433350b,
|
||||
0x17a24a33, 0xc1f31bb6, 0xa23e9de4, 0x7e28ef69, 0x23e0ef42, 0x0796e53f, 0xf9e3045d, 0x7bbacd31,
|
||||
0xa48bee27, 0x15f3c3b3, 0x4c320cb4, 0x916429d9, 0xa15ccb3c, 0x82a4a23c, 0xb0cc6a4a, 0xcf8d93fa,
|
||||
0x3b18b937, 0xad0488e4, 0xaa568114, 0x80b9b8c7, 0x8f3a9071, 0x818b790d, 0x99c8dbf2, 0x0d23b2a4,
|
||||
0x74c81a28, 0x1aa65d76, 0x7168ee7d, 0xc0d40b6c, 0x77c70a0c, 0xd3752839, 0xc2f7981c, 0x83767124,
|
||||
0xb881618f, 0xb263d8cf, 0xbbb40400, 0xdb9702eb, 0xaccad841, 0x806af5a7, 0x16f096e3, 0x64bf45d9,
|
||||
0x5f7c0a58, 0xdac0c665, 0x1dbebaac, 0xb97027a6, 0xfc934433, 0xfc7b2d06, 0x8871fe4e, 0x0df24135,
|
||||
0x6ddf7cc8, 0x32e0d1cd, 0xe88abedd, 0x214af930, 0x90990f97, 0xc7691171, 0xbf7b6ca3, 0x8af6589c,
|
||||
0x452c8ee0, 0xbc2c5891, 0xcf8d13b4, 0x698d1f1f, 0x802a011a, 0x19820708, 0x25c79d2f, 0xedf91253,
|
||||
0xc93fe5dd, 0xa03a117b, 0x10912ae7, 0xc90d59d0, 0xc3522549, 0x3e4f3e81, 0x494ae40f, 0x2d157b6e,
|
||||
0xd7bf06b2, 0x19c5bb2a, 0xa869261c, 0xa80cfd2c, 0x1ea7c6ec, 0x1b36a51f, 0x8bd227cc, 0xad2d2260,
|
||||
0x181258c3, 0xbd253a58, 0x3273f94b, 0x9c315309, 0xb2d8d3e3, 0x11ec35a8, 0x384e6475, 0x855a9009,
|
||||
0x854cc06a, 0xe7408809, 0xe583ce2a, 0x895fb756, 0x6a8a2072, 0x6598a92b, 0x530f41bb, 0xb1bd57f1,
|
||||
0x62d57fa0, 0xe6505776, 0x42fcfe4d, 0x0fbdf1ee, 0x8e3104c4, 0xf11c8a65, 0x5bc51ad9, 0x5f1f8ce9,
|
||||
0xab179a87, 0xd5448444, 0x7bd4a26b, 0x658f1963, 0x86db95b8, 0xaba6734e, 0x486fddea, 0x859c3e0b,
|
||||
0xebce0106, 0x99c3014e, 0xc151b942, 0x9604aad8, 0xf6ce654b, 0xa1e7982e, 0xf6d8ed14, 0xd4bdf7e2,
|
||||
0x13696254, 0x05ec638c, 0x306dbc29, 0x1676eb60, 0xadbf3ce3, 0x966dde56, 0x6d5bea46, 0x719aa10d,
|
||||
0x0e65093d, 0x0b1a3c43, 0x0321ea8c, 0xe0ef2cbd, 0x43432ee3, 0x3e62046d, 0x425e7b44, 0x892e119c,
|
||||
0xfdec4de5, 0x48c5dd6c, 0x79e6bfcd, 0x8d53372e, 0xe96f6d32, 0x52cddacd, 0x3e99e0eb, 0xa9e5d28f,
|
||||
};
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_BUZHASH_H
|
||||
42
src/chunkers/BuzhashChunker.cpp
Normal file
42
src/chunkers/BuzhashChunker.cpp
Normal file
@@ -0,0 +1,42 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 26.04.2023.
|
||||
//
|
||||
|
||||
#include "BuzhashChunker.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "../crypto/MD5.h"
|
||||
|
||||
BuzhashChunker::BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window) : Chunker(buf, maxBytes), window(window), minBytes(minBytes), mask(mask), buzhash(window) {}
|
||||
|
||||
std::pair<std::string, std::vector<char>> BuzhashChunker::getNext() {
|
||||
if (eof) throw Exception("Trying to read from a file that is finished!");
|
||||
std::vector<char> rbuf(minBytes);
|
||||
|
||||
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) minBytes));
|
||||
|
||||
if (read != minBytes) {
|
||||
eof = true;
|
||||
rbuf.resize(read);
|
||||
return {MD5::calculate(rbuf), rbuf};
|
||||
}
|
||||
|
||||
for (auto c: rbuf) {
|
||||
buzhash.feed(static_cast<uint8_t>(c));
|
||||
}
|
||||
|
||||
// Continue reading the file until either the last mask bits are zero of we exceed the maxSize
|
||||
while (((buzhash.get() & (~0UL >> (sizeof(unsigned long long) * 8 - mask))) != 0) && rbuf.size() < maxBytes) {
|
||||
auto r = buf->sbumpc();
|
||||
if (r == std::streambuf::traits_type::eof()) {
|
||||
eof = true;
|
||||
break;
|
||||
} else {
|
||||
char c = std::streambuf::traits_type::to_char_type(r);
|
||||
rbuf.emplace_back(c);
|
||||
buzhash.feed(static_cast<uint8_t>(c));
|
||||
}
|
||||
}
|
||||
|
||||
return {MD5::calculate(rbuf), rbuf};
|
||||
}
|
||||
34
src/chunkers/BuzhashChunker.h
Normal file
34
src/chunkers/BuzhashChunker.h
Normal file
@@ -0,0 +1,34 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 26.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_BUZHASHCHUNKER_H
|
||||
#define SEMBACKUP_BUZHASHCHUNKER_H
|
||||
|
||||
#include <streambuf>
|
||||
|
||||
#include "Buzhash.h"
|
||||
#include "Chunker.h"
|
||||
|
||||
/// Chunker implementation using rolling hash
|
||||
class BuzhashChunker : public Chunker {
|
||||
public:
|
||||
/// Constructs a BuzhashChunker
|
||||
/// \copydoc Chunker::Chunker
|
||||
/// \param minBytes Minimum amount of bytes in returned chunks
|
||||
/// \param mask Amount of trailing zeroes in the rolling hash at which the file is cut (results in average chunk size of 2^mask bytes)
|
||||
/// \param window Rolling hash window (how many of chunks last bytes are included in the hash, the default is recommended)
|
||||
BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window = 4095);
|
||||
|
||||
/// \copydoc Chunker::getNext
|
||||
std::pair<std::string, std::vector<char>> getNext() override;
|
||||
|
||||
private:
|
||||
const unsigned long long window; ///< Rolling hash window
|
||||
const unsigned long long minBytes;///< Minimum amount of bytes in returned chunks
|
||||
const unsigned long long mask; ///< Amount of trailing zeroes in the rolling hash at which the file is cut
|
||||
Buzhash buzhash; ///< Hasher instance
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_BUZHASHCHUNKER_H
|
||||
51
src/chunkers/Chunker.cpp
Normal file
51
src/chunkers/Chunker.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#include "Chunker.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
|
||||
Chunker::Chunker(std::streambuf *buf, unsigned long long maxBytes) : buf(buf), maxBytes(maxBytes) {}
|
||||
|
||||
bool Chunker::getEof() const {
|
||||
return eof;
|
||||
}
|
||||
|
||||
Chunker::~Chunker() = default;
|
||||
|
||||
Chunker::ChunkerIterator Chunker::begin() {
|
||||
return {this};
|
||||
}
|
||||
|
||||
Chunker::ChunkerIterator Chunker::end() {
|
||||
return {nullptr};
|
||||
}
|
||||
|
||||
Chunker::ChunkerIterator &Chunker::ChunkerIterator::operator++() {
|
||||
if (pastEOF) throw Exception("Trying to increment pastEOF ChunkerIterator!");
|
||||
if (source->getEof())
|
||||
pastEOF = true;
|
||||
else
|
||||
buf = source->getNext();
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool Chunker::ChunkerIterator::operator!=(const Chunker::ChunkerIterator &rhs) const {
|
||||
return pastEOF != rhs.pastEOF;
|
||||
}
|
||||
|
||||
Chunker::ChunkerIterator::value_type Chunker::ChunkerIterator::operator*() const {
|
||||
if (pastEOF) throw Exception("Trying to dereference pastEOF ChunkerIterator!");
|
||||
return buf.value();
|
||||
}
|
||||
|
||||
bool Chunker::ChunkerIterator::operator==(const Chunker::ChunkerIterator &rhs) const {
|
||||
return pastEOF == rhs.pastEOF;
|
||||
}
|
||||
|
||||
Chunker::ChunkerIterator::ChunkerIterator(Chunker *source)
|
||||
: source(source), pastEOF(source == nullptr) {
|
||||
if (source)
|
||||
operator++();
|
||||
}
|
||||
74
src/chunkers/Chunker.h
Normal file
74
src/chunkers/Chunker.h
Normal file
@@ -0,0 +1,74 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHUNKER_H
|
||||
#define SEMBACKUP_CHUNKER_H
|
||||
|
||||
#include <array>
|
||||
#include <optional>
|
||||
#include <streambuf>
|
||||
#include <vector>
|
||||
|
||||
/// Abstract base class for a Chunker that takes a file and splits it into chunks to be backed up
|
||||
class Chunker {
|
||||
private:
|
||||
/// Convenience iterator to allow using Chunker%s in range for loops
|
||||
struct ChunkerIterator {
|
||||
using value_type = std::pair<std::string, std::vector<char>>;
|
||||
|
||||
/// Creates a ChunkerIterator pointing to the first chunk or past-EOF
|
||||
/// \param source Pointer to a Chunker, should be available during the entire iterator lifetime, or nullptr if this is pastEOF iterator
|
||||
ChunkerIterator(Chunker *source);
|
||||
|
||||
/// Increments the iterator to the next chunk, or past-EOF
|
||||
/// \throws Exception if iterator points past-EOF
|
||||
ChunkerIterator &operator++();
|
||||
|
||||
/// Returns the current pointed-to chunk
|
||||
/// \throws Exception if iterator points past-EOF
|
||||
value_type operator*() const;
|
||||
|
||||
/// Returns true if both iterators are past-EOF
|
||||
bool operator==(const ChunkerIterator &rhs) const;
|
||||
|
||||
/// Returns false if both iterators are past-EOF
|
||||
bool operator!=(const ChunkerIterator &rhs) const;
|
||||
|
||||
private:
|
||||
Chunker *const source; ///< Pointer to the underlying Chunker
|
||||
std::optional<value_type> buf;///< Currently pointed to chunk
|
||||
bool pastEOF = false; ///< Whether past EOF has been reached
|
||||
};
|
||||
|
||||
public:
|
||||
/// Returns the next chunk of the file
|
||||
/// Returns a single empty chunk if a file is empty
|
||||
/// \return Pair consisting of chunk's bytes and its MD5 hash
|
||||
/// \throws Exception if EOF was already reached
|
||||
virtual std::pair<std::string, std::vector<char>> getNext() = 0;
|
||||
|
||||
/// Returns True if EOF was reached, False otherwise
|
||||
bool getEof() const;
|
||||
|
||||
/// Default virtual destructor
|
||||
virtual ~Chunker();
|
||||
|
||||
/// Returns a ChunkerIterator pointing to the first chunk in a file
|
||||
ChunkerIterator begin();
|
||||
|
||||
/// Returns a past-EOF ChunkerIterator
|
||||
static ChunkerIterator end();
|
||||
|
||||
protected:
|
||||
/// \param buf Pointer to a std::streambuf, should be available during the entire lifetime of a Chunker
|
||||
/// \param maxBytes Maximal amount of bytes in returned chunks
|
||||
Chunker(std::streambuf *buf, unsigned long long maxBytes);
|
||||
|
||||
std::streambuf *const buf; ///< Constant pointer to the source std::streambuf
|
||||
bool eof = false; ///< Indicates whether EOF has been reached
|
||||
const unsigned long long maxBytes;///< Max number of bytes in returned chunks
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHUNKER_H
|
||||
19
src/chunkers/ChunkerFactory.cpp
Normal file
19
src/chunkers/ChunkerFactory.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 30.04.2023.
|
||||
//
|
||||
|
||||
#include "ChunkerFactory.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "BuzhashChunker.h"
|
||||
#include "ConstChunker.h"
|
||||
|
||||
std::unique_ptr<Chunker> ChunkerFactory::getChunker(const Config &config, std::streambuf *buf) {
|
||||
if (config.getStr("chunker") == "const") {
|
||||
return std::make_unique<ConstChunker>(buf, config.getInt("chunker-max") * 1024);
|
||||
} else if (config.getStr("chunker") == "buzhash") {
|
||||
return std::make_unique<BuzhashChunker>(buf, config.getInt("chunker-min") * 1024, config.getInt("chunker-max") * 1024, config.getInt("chunker-mask"));
|
||||
} else {
|
||||
throw Exception("Unknown chunker type!");
|
||||
}
|
||||
}
|
||||
25
src/chunkers/ChunkerFactory.h
Normal file
25
src/chunkers/ChunkerFactory.h
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 30.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHUNKERFACTORY_H
|
||||
#define SEMBACKUP_CHUNKERFACTORY_H
|
||||
|
||||
#include <memory>
|
||||
#include <streambuf>
|
||||
|
||||
#include "../Config.h"
|
||||
#include "Chunker.h"
|
||||
|
||||
/// Factory for Chunker%s
|
||||
class ChunkerFactory {
|
||||
public:
|
||||
/// Creates a new Chunker based on provided \p config backed with \p buf
|
||||
/// \param config Constant reference to Config
|
||||
/// \param buf Pointer to a std::streambuf instance, should be avaliable during the Chunker lifetime
|
||||
/// \return Unique pointer to the created Chunker
|
||||
static std::unique_ptr<Chunker> getChunker(const Config &config, std::streambuf *buf);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHUNKERFACTORY_H
|
||||
27
src/chunkers/ConstChunker.cpp
Normal file
27
src/chunkers/ConstChunker.cpp
Normal file
@@ -0,0 +1,27 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#include "ConstChunker.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "../crypto/MD5.h"
|
||||
|
||||
ConstChunker::ConstChunker(std::streambuf *buf, unsigned long long maxBytes) : Chunker(buf, maxBytes) {}
|
||||
|
||||
std::pair<std::string, std::vector<char>> ConstChunker::getNext() {
|
||||
if (eof) throw Exception("Trying to read from a file that is finished!");
|
||||
|
||||
std::vector<char> rbuf(maxBytes);
|
||||
|
||||
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) maxBytes));
|
||||
|
||||
if (read != maxBytes) {
|
||||
eof = true;
|
||||
rbuf.resize(read);
|
||||
}
|
||||
|
||||
auto md5 = MD5::calculate(rbuf);
|
||||
|
||||
return {md5, rbuf};
|
||||
}
|
||||
24
src/chunkers/ConstChunker.h
Normal file
24
src/chunkers/ConstChunker.h
Normal file
@@ -0,0 +1,24 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CONSTCHUNKER_H
|
||||
#define SEMBACKUP_CONSTCHUNKER_H
|
||||
|
||||
#include <streambuf>
|
||||
|
||||
#include "Chunker.h"
|
||||
|
||||
/// Chunker implementation that splits the file into equally-sized chunks of maxBytes bytes
|
||||
class ConstChunker : public Chunker {
|
||||
public:
|
||||
/// Constructs a ConstChunker
|
||||
/// \copydoc Chunker::Chunker
|
||||
ConstChunker(std::streambuf *buf, unsigned long long maxBytes);
|
||||
|
||||
/// \copydoc Chunker::getNext
|
||||
std::pair<std::string, std::vector<char>> getNext() override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CONSTCHUNKER_H
|
||||
9
src/commands/Command.cpp
Normal file
9
src/commands/Command.cpp
Normal file
@@ -0,0 +1,9 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
Command::Command(std::string name) : name(std::move(name)) {}
|
||||
|
||||
Command::~Command() = default;
|
||||
28
src/commands/Command.h
Normal file
28
src/commands/Command.h
Normal file
@@ -0,0 +1,28 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMAND_H
|
||||
#define SEMBACKUP_COMMAND_H
|
||||
|
||||
#include "../Context.h"
|
||||
|
||||
/// Abstract base class for some process running with some Context
|
||||
class Command {
|
||||
public:
|
||||
/// Runs the command with Context \p ctx
|
||||
virtual void run(Context ctx) = 0;
|
||||
|
||||
/// Default virtual destructor
|
||||
virtual ~Command() = 0;
|
||||
|
||||
/// The name of the command
|
||||
const std::string name;
|
||||
|
||||
protected:
|
||||
/// Constructs a command with name \p name
|
||||
Command(std::string name);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMAND_H
|
||||
152
src/commands/CommandDiff.cpp
Normal file
152
src/commands/CommandDiff.cpp
Normal file
@@ -0,0 +1,152 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandDiff.h"
|
||||
|
||||
#include "../BytesFormatter.h"
|
||||
#include "../Diff.h"
|
||||
#include "../Exception.h"
|
||||
#include "../Progress.h"
|
||||
#include "../RunningDiffAverage.h"
|
||||
#include "../Signals.h"
|
||||
#include "../ThreadPool.h"
|
||||
#include "../change_detectors/ChangeDetectorFactory.h"
|
||||
#include "../chunkers/ChunkerFactory.h"
|
||||
#include "../repo/Serialize.h"
|
||||
#include "../repo/objects/Archive.h"
|
||||
#include "../repo/objects/Chunk.h"
|
||||
|
||||
using namespace CommandsCommon;
|
||||
|
||||
CommandDiff::CommandDiff() : Command("diff") {}
|
||||
|
||||
void CommandDiff::run(Context ctx) {
|
||||
std::string diffMode = ctx.repo->getConfig().getStr("diff-mode");
|
||||
|
||||
Object::idType archive1;
|
||||
if (!ctx.repo->getConfig().exists("aid")) {
|
||||
auto archives = ctx.repo->getObjects(Object::ObjectType::Archive);
|
||||
archive1 = std::max_element(archives.begin(), archives.end(), [](const auto &a1, const auto &a2) { return a1.second < a2.second; })->second;
|
||||
} else {
|
||||
archive1 = ctx.repo->getConfig().getInt("aid");
|
||||
}
|
||||
|
||||
ThreadPool threadPool([&](const std::string &error) {
|
||||
ctx.logger->write("Error: " + error, 0);
|
||||
},
|
||||
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
|
||||
|
||||
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
|
||||
std::mutex filesLock;
|
||||
std::map<std::filesystem::path, File> files;///< Files in the first archive
|
||||
for (auto id: archiveO1.files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
auto path = std::filesystem::u8path(file.name);
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
|
||||
files.emplace(file.getKey(), std::move(file));
|
||||
}
|
||||
|
||||
/// Container of ChangeDetectors built using the config of the repository
|
||||
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
|
||||
|
||||
/// Task to to compare the given file with the first archive
|
||||
auto processFile = [&, this](ComparableFile p) {
|
||||
auto relPath = p.path;
|
||||
std::unique_lock lock(filesLock);
|
||||
if (files.count(relPath) == 0) {
|
||||
ctx.logger->write(relPath + " is new\n", 0);
|
||||
lock.unlock();
|
||||
} else {
|
||||
File repoFile = files.at(relPath);
|
||||
lock.unlock();
|
||||
if (changeDetector.check({repoFile, ctx.repo}, p)) {
|
||||
ctx.logger->write(relPath + " is different " + Diff::diff({repoFile, ctx.repo}, p) + "\n", 1);
|
||||
} else {
|
||||
if (diffMode == "file")
|
||||
ctx.logger->write(relPath + " are same ", 0);
|
||||
}
|
||||
}
|
||||
|
||||
lock.lock();
|
||||
files.erase(relPath);
|
||||
};
|
||||
|
||||
std::optional<Archive> archiveO2;
|
||||
if (diffMode == "normal") {
|
||||
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
|
||||
if (ctx.repo->getConfig().exists("aid2")) {
|
||||
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
|
||||
|
||||
threadPool.push([&]() {
|
||||
for (auto id: archiveO2.value().files) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) throw Exception("Quitting");
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::u8path(file.name)))
|
||||
threadPool.push([&, file]() {
|
||||
processFile(ComparableFile{file, ctx.repo});
|
||||
});
|
||||
if (Signals::shouldQuit) break;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
} else {
|
||||
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
|
||||
/// Start the diff with the root directory and empty ignore list
|
||||
threadPool.push([&, from]() {
|
||||
processDirWithIgnore(
|
||||
from,
|
||||
{},
|
||||
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
|
||||
[processFile, from, prefix = ctx.repo->getConfig().getStr("prefix")](const std::filesystem::directory_entry &dirEntry) {
|
||||
if (isSubpath(prefix, dirEntry.path().lexically_relative(from)))
|
||||
processFile(ComparableFile{dirEntry, from});
|
||||
});
|
||||
});
|
||||
}
|
||||
} else if (diffMode == "file") {
|
||||
if (files.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
|
||||
ctx.logger->write("Doesn't exist in the first archive", 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx.repo->getConfig().exists("aid2")) {
|
||||
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
|
||||
std::map<std::filesystem::path, File> files2;///< Files in the first archive
|
||||
for (auto id: archiveO2->files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
|
||||
auto path = std::filesystem::u8path(file.name);
|
||||
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
|
||||
files2.emplace(file.getKey(), std::move(file));
|
||||
}
|
||||
|
||||
if (files2.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
|
||||
ctx.logger->write("Doesn't exist in the second archive", 0);
|
||||
return;
|
||||
} else {
|
||||
processFile(ComparableFile{files2.at(ctx.repo->getConfig().getStr("prefix")), ctx.repo});
|
||||
}
|
||||
} else {
|
||||
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
|
||||
if (!std::filesystem::exists(from / ctx.repo->getConfig().getStr("prefix"))) {
|
||||
ctx.logger->write("Doesn't exist in the filesystem archive", 0);
|
||||
return;
|
||||
}
|
||||
/// Start the diff with the root directory and empty ignore list
|
||||
processFile(ComparableFile{from / ctx.repo->getConfig().getStr("prefix"), from});
|
||||
}
|
||||
|
||||
} else {
|
||||
throw Exception("Unknown diff-mode: " + diffMode);
|
||||
}
|
||||
|
||||
/// Wait for diff to end
|
||||
std::unique_lock finishedLock(threadPool.finishedLock);
|
||||
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
|
||||
if (diffMode == "normal")
|
||||
for (auto const &s: files) {
|
||||
ctx.logger->write(s.first.u8string() + " is removed\n", 0);
|
||||
}
|
||||
}
|
||||
23
src/commands/CommandDiff.h
Normal file
23
src/commands/CommandDiff.h
Normal file
@@ -0,0 +1,23 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDDIFF_H
|
||||
#define SEMBACKUP_COMMANDDIFF_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
/// Run the diff between:
|
||||
/// 1. The latest archive and the `from` directory
|
||||
/// 2. if `aid` is set the aid archive and the `from` directory
|
||||
/// 3. if `aid` and `aid2` are set between `aid` and `aid2`
|
||||
class CommandDiff : public Command {
|
||||
public:
|
||||
CommandDiff();
|
||||
void run(Context ctx) override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDDIFF_H
|
||||
16
src/commands/CommandList.cpp
Normal file
16
src/commands/CommandList.cpp
Normal file
@@ -0,0 +1,16 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandList.h"
|
||||
|
||||
CommandList::CommandList() : Command("list") {
|
||||
}
|
||||
|
||||
void CommandList::run(Context ctx) {
|
||||
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
|
||||
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second < r.second; });
|
||||
for (auto const &aid: list) {
|
||||
std::cout << "Name: " << aid.first << " Id: " << aid.second << std::endl;
|
||||
}
|
||||
}
|
||||
20
src/commands/CommandList.h
Normal file
20
src/commands/CommandList.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDLIST_H
|
||||
#define SEMBACKUP_COMMANDLIST_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
/// Lists available archives in a repository
|
||||
class CommandList : public Command {
|
||||
public:
|
||||
CommandList();
|
||||
void run(Context ctx) override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDLIST_H
|
||||
22
src/commands/CommandListFiles.cpp
Normal file
22
src/commands/CommandListFiles.cpp
Normal file
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandListFiles.h"
|
||||
|
||||
#include "../BytesFormatter.h"
|
||||
#include "../repo/Serialize.h"
|
||||
#include "../repo/objects/Archive.h"
|
||||
#include "../repo/objects/Chunk.h"
|
||||
#include "../repo/objects/File.h"
|
||||
|
||||
CommandListFiles::CommandListFiles() : Command("list-files") {
|
||||
}
|
||||
|
||||
void CommandListFiles::run(Context ctx) {
|
||||
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
|
||||
for (auto const &fid: archive.files) {
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
|
||||
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
|
||||
}
|
||||
}
|
||||
20
src/commands/CommandListFiles.h
Normal file
20
src/commands/CommandListFiles.h
Normal file
@@ -0,0 +1,20 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDLISTFILES_H
|
||||
#define SEMBACKUP_COMMANDLISTFILES_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
/// Lists files in the selected Archive
|
||||
class CommandListFiles : public Command {
|
||||
public:
|
||||
CommandListFiles();
|
||||
void run(Context ctx) override;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDLISTFILES_H
|
||||
125
src/commands/CommandRestore.cpp
Normal file
125
src/commands/CommandRestore.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandRestore.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include "../BytesFormatter.h"
|
||||
#include "../Exception.h"
|
||||
#include "../Progress.h"
|
||||
#include "../RunningDiffAverage.h"
|
||||
#include "../Signals.h"
|
||||
#include "../ThreadPool.h"
|
||||
#include "../chunkers/ChunkerFactory.h"
|
||||
#include "../repo/Serialize.h"
|
||||
#include "../repo/objects/Archive.h"
|
||||
#include "../repo/objects/Chunk.h"
|
||||
|
||||
using namespace CommandsCommon;
|
||||
|
||||
CommandRestore::CommandRestore() : Command("restore") {
|
||||
}
|
||||
|
||||
void CommandRestore::run(Context ctx) {
|
||||
Object::idType archive = ctx.repo->getConfig().getInt("aid");
|
||||
std::filesystem::path to = std::filesystem::u8path(ctx.repo->getConfig().getStr("to"));
|
||||
|
||||
std::atomic<unsigned long long> filesToRestoreCount = 0;
|
||||
std::atomic<unsigned long long> bytesToRestore = 0;
|
||||
|
||||
WorkerStats workerStats;///< Backup statistics of the worker threads
|
||||
|
||||
/// Worker callback, bound to the local workerStats variable
|
||||
workerStatsFunction workerCallback = [&workerStats](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
|
||||
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
|
||||
};
|
||||
{
|
||||
/// Calculate the average speed of backup
|
||||
RunningDiffAverage avg(
|
||||
[&]() { return workerStats.bytesWritten.load(); },
|
||||
100, 100);
|
||||
|
||||
/// Show restore progress
|
||||
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
|
||||
{
|
||||
[&workerStats]() { return std::to_string(workerStats.filesWritten.load()); },
|
||||
"/",
|
||||
[&filesToRestoreCount]() { return std::to_string(filesToRestoreCount); },
|
||||
" files saved, ",
|
||||
[&workerStats]() { return BytesFormatter::formatStr(workerStats.bytesWritten.load() + workerStats.bytesSkipped.load()); },
|
||||
" / ",
|
||||
[&bytesToRestore]() { return BytesFormatter::formatStr(bytesToRestore); },
|
||||
" saved @ ",
|
||||
[&avg]() { return BytesFormatter::formatStr(avg.get() * 10); },
|
||||
"/s",
|
||||
},
|
||||
ctx.repo->getConfig());
|
||||
|
||||
/// Thread pool for restore tasks
|
||||
ThreadPool threadPool([&](const std::string &error) {
|
||||
progress.print("Error: " + error, 0);
|
||||
},
|
||||
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
|
||||
|
||||
/// Add the main restore task
|
||||
threadPool.push([&, this]() {
|
||||
/// Get the archive and its file IDs
|
||||
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
|
||||
std::vector<Object::idType> files = archiveO.files;
|
||||
/// For each file...
|
||||
for (const auto fid: files) {
|
||||
/// Stop when asked to
|
||||
if (Signals::shouldQuit) break;
|
||||
|
||||
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
|
||||
filesToRestoreCount++;
|
||||
bytesToRestore += file.bytes;
|
||||
/// Spawn a restore task
|
||||
threadPool.push([&, this, to, file]() {
|
||||
backupRestoreFile(file, to, workerCallback, ctx);
|
||||
progress.print("Restored " + file.name, 1);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
/// Wait for all tasks to finish
|
||||
std::unique_lock finishedLock(threadPool.finishedLock);
|
||||
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
|
||||
}
|
||||
ctx.logger->write("\n", 1);
|
||||
}
|
||||
|
||||
std::string CommandRestore::backupRestoreFile(const File &file, const std::filesystem::path &baseDir, workerStatsFunction &callback, Context ctx) {
|
||||
auto fullpath = baseDir / std::filesystem::u8path(file.name);
|
||||
|
||||
std::filesystem::create_directories(fullpath.parent_path());
|
||||
|
||||
if (file.fileType == File::Type::Directory) {
|
||||
std::filesystem::create_directory(fullpath);
|
||||
callback(0, 0, 1);
|
||||
return fullpath.u8string();
|
||||
}
|
||||
if (file.fileType == File::Type::Symlink) {
|
||||
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks[0]));
|
||||
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath);
|
||||
callback(0, 0, 1);
|
||||
return fullpath.u8string();
|
||||
}
|
||||
|
||||
std::ofstream ostream(fullpath, std::ios::binary | std::ios::out | std::ios::trunc);
|
||||
for (const auto cid: file.chunks) {
|
||||
if (Signals::shouldQuit) throw Exception("Quitting!");
|
||||
|
||||
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid));
|
||||
if (!c.data.empty()) {
|
||||
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
|
||||
callback(c.data.size(), 0, 0);
|
||||
}
|
||||
}
|
||||
callback(0, 0, 1);
|
||||
|
||||
return fullpath.u8string();
|
||||
}
|
||||
30
src/commands/CommandRestore.h
Normal file
30
src/commands/CommandRestore.h
Normal file
@@ -0,0 +1,30 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDRESTORE_H
|
||||
#define SEMBACKUP_COMMANDRESTORE_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
#include "../repo/objects/File.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
/// Restores the archive with id \aid to path \p to (from config)
|
||||
class CommandRestore : public Command {
|
||||
public:
|
||||
CommandRestore();
|
||||
void run(Context ctx) override;
|
||||
|
||||
private:
|
||||
/// Internal function to restore a file
|
||||
/// \param file Constant reference to the File object
|
||||
/// \param base Base directory to restore to
|
||||
/// \param callback Stats callback
|
||||
/// \return Name of the restored file
|
||||
std::string backupRestoreFile(const File &file, const std::filesystem::path &base, CommandsCommon::workerStatsFunction &callback, Context ctx);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDRESTORE_H
|
||||
239
src/commands/CommandRun.cpp
Normal file
239
src/commands/CommandRun.cpp
Normal file
@@ -0,0 +1,239 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandRun.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <sstream>
|
||||
|
||||
#include "../BytesFormatter.h"
|
||||
#include "../Exception.h"
|
||||
#include "../Progress.h"
|
||||
#include "../RunningDiffAverage.h"
|
||||
#include "../Signals.h"
|
||||
#include "../ThreadPool.h"
|
||||
#include "../change_detectors/ChangeDetectorFactory.h"
|
||||
#include "../chunkers/ChunkerFactory.h"
|
||||
#include "../crypto/MD5.h"
|
||||
#include "../repo/Serialize.h"
|
||||
#include "../repo/objects/Archive.h"
|
||||
#include "../repo/objects/Chunk.h"
|
||||
#include "../repo/objects/File.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
using namespace CommandsCommon;
|
||||
|
||||
CommandRun::CommandRun() : Command("run") {
|
||||
}
|
||||
|
||||
void CommandRun::run(Context ctx) {
|
||||
WorkerStats workerStats;///< Backup statistics of the worker threads
|
||||
RunnerStats runnerStats;///< Backup target metrics
|
||||
|
||||
std::filesystem::path from = ctx.repo->getConfig().getStr("from");///< Directory to back up from
|
||||
bool fullBackup = ctx.repo->getConfig().getStr("type") == "full";
|
||||
if (fullBackup) {
|
||||
ctx.logger->write("Backup is full because of the config\n", 1);
|
||||
}
|
||||
/// For progtest task compliance
|
||||
if (!fullBackup) {
|
||||
/// If it's time for full backup as per config, force it
|
||||
auto per = ctx.repo->getConfig().getInt("full-period");
|
||||
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
|
||||
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second > r.second; });
|
||||
int lastInc = 0;
|
||||
for (auto const &a: list) {
|
||||
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(a.second));
|
||||
if (!archiveO.isFull) {
|
||||
lastInc++;
|
||||
continue;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
if (lastInc >= per) {
|
||||
fullBackup = true;
|
||||
ctx.logger->write("Backup is full because of the interval\n", 1);
|
||||
}
|
||||
if (list.size() == 0) {
|
||||
fullBackup = true;
|
||||
ctx.logger->write("Backup is full because there are no backups\n", 1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker callback, bound to the local workerStats variable
|
||||
workerStatsFunction workerCallback = [&](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
|
||||
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
|
||||
};
|
||||
|
||||
std::vector<Object::idType> files;///< File ids so far added to the archive
|
||||
std::mutex filesLock; ///< Files vector lock
|
||||
/// Function to safely add new file ids to `files`
|
||||
std::function addFile = [&](Object::idType id) {std::lock_guard lock(filesLock); files.emplace_back(id); };
|
||||
|
||||
/// Technically the progtest task says that only the files from the last backup should be compared against...
|
||||
std::map<std::string, Object::idType> prevArchiveFiles;
|
||||
{
|
||||
auto prevArchiveFilesList = ctx.repo->getObjects(Object::ObjectType::File);
|
||||
prevArchiveFiles = {prevArchiveFilesList.begin(), prevArchiveFilesList.end()};
|
||||
}
|
||||
ctx.repo->clearCache(Object::ObjectType::File);
|
||||
|
||||
{
|
||||
/// Calculate the average speed of backup
|
||||
RunningDiffAverage avg(
|
||||
[&]() { return workerStats.bytesWritten.load(); },
|
||||
100, 100);
|
||||
|
||||
/// Show the progress of backup
|
||||
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
|
||||
{[&]() { return std::to_string(workerStats.filesWritten.load()); },
|
||||
"/",
|
||||
[&]() { return std::to_string(runnerStats.filesToSaveCount); },
|
||||
" files saved, ",
|
||||
[&]() { return std::to_string(runnerStats.filesSkipped); },
|
||||
" files skipped, ",
|
||||
[&]() { return BytesFormatter::formatStr((workerStats.bytesWritten.load() + workerStats.bytesSkipped.load())); },
|
||||
" / ",
|
||||
[&]() { return BytesFormatter::formatStr(runnerStats.bytesToSave); },
|
||||
" read @ ",
|
||||
[&]() { return BytesFormatter::formatStr(avg.get() * 10); },
|
||||
"/s"},
|
||||
ctx.repo->getConfig());
|
||||
|
||||
/// Thread pool for backup tasks, prints to progress on any errors
|
||||
ThreadPool threadPool([&](const std::string &error) {
|
||||
progress.print("Error: " + error, 0);
|
||||
},
|
||||
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
|
||||
|
||||
/// Container of ChangeDetectors built using the config of the repository
|
||||
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
|
||||
|
||||
/// Function to spawn a rechunking task
|
||||
auto saveFile = [&, this](const std::filesystem::path &absPath, const std::filesystem::path &relPath) {
|
||||
runnerStats.bytesToSave += File::getFileType(absPath) == File::Type::Normal ? std::filesystem::file_size(absPath) : 0;
|
||||
runnerStats.filesToSaveCount++;
|
||||
threadPool.push([&, relPath, absPath]() {
|
||||
addFile(backupChunkFile(absPath, relPath.u8string(), workerCallback, ctx));
|
||||
progress.print("Copied: " + relPath.u8string(), 1);
|
||||
});
|
||||
};
|
||||
|
||||
/// Task to process an individual file in the backup
|
||||
std::function<void(std::filesystem::path)> processFile;
|
||||
/// If it's a full backup, just save the file, otherwise re-chunk it only if it's changed
|
||||
if (fullBackup)
|
||||
processFile =
|
||||
[&, this](const std::filesystem::path &p) {
|
||||
saveFile(p, p.lexically_relative(from).u8string());
|
||||
};
|
||||
else
|
||||
processFile =
|
||||
[&, this](const std::filesystem::path &p) {
|
||||
auto relPath = p.lexically_relative(from).u8string();
|
||||
|
||||
if (prevArchiveFiles.count(relPath) != 0) {
|
||||
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(prevArchiveFiles.at(relPath)));
|
||||
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
|
||||
addFile(repoFile.id);
|
||||
ctx.repo->addToCache(repoFile);
|
||||
progress.print("Skipped: " + relPath, 1);
|
||||
runnerStats.filesSkipped++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
saveFile(p, relPath);
|
||||
return;
|
||||
};
|
||||
|
||||
/// Start the backup with the root directory and empty ignore list
|
||||
threadPool.push([&]() {
|
||||
processDirWithIgnore(
|
||||
from,
|
||||
{},
|
||||
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
|
||||
processFile);
|
||||
});
|
||||
|
||||
/// Wait for all the tasks to finish
|
||||
std::unique_lock finishedLock(threadPool.finishedLock);
|
||||
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
|
||||
}
|
||||
|
||||
ctx.logger->write("\n", 1);
|
||||
|
||||
auto written = BytesFormatter::format(workerStats.bytesWritten);
|
||||
auto skipped = BytesFormatter::format(workerStats.bytesSkipped);
|
||||
|
||||
ctx.logger->write(written.prefix + " written: " + written.number + '\n', 1);
|
||||
ctx.logger->write(skipped.prefix + " skipped: " + skipped.number + '\n', 1);
|
||||
|
||||
auto time = std::time(0);
|
||||
auto ltime = std::localtime(&time);
|
||||
std::stringstream s;
|
||||
s << std::put_time(ltime, "%d-%m-%Y %H-%M-%S");
|
||||
/// Avoid archive name collisions
|
||||
while (ctx.repo->exists(Object::ObjectType::Archive, s.str())) s << "N";
|
||||
Archive a(ctx.repo->getId(), s.str(), time, files, fullBackup);
|
||||
ctx.repo->putObject(a);
|
||||
}
|
||||
|
||||
Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, workerStatsFunction &callback, Context ctx) {
|
||||
/// If it's a symlink or directory, treat it specially
|
||||
/// The order of checks is important, because is_directory follows the symlink
|
||||
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
|
||||
auto contents = File::getFileContents(orig);
|
||||
Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents);
|
||||
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig));
|
||||
ctx.repo->putObject(c);
|
||||
ctx.repo->putObject(f);
|
||||
return f.id;
|
||||
}
|
||||
if (!std::filesystem::is_regular_file(orig))
|
||||
throw Exception(orig.u8string() + "is a special file, not saving");
|
||||
|
||||
std::ifstream ifstream(orig, std::ios::in | std::ios::binary);
|
||||
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
|
||||
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
|
||||
|
||||
MD5 fileHash;
|
||||
|
||||
std::vector<Object::idType> fileChunks;
|
||||
unsigned long long size = 0;
|
||||
|
||||
for (auto chunkp: *chunker) {
|
||||
/// Exit when asked to
|
||||
if (Signals::shouldQuit) break;
|
||||
|
||||
Object::idType chunkId;
|
||||
size += chunkp.second.size();
|
||||
if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) {
|
||||
/// If the chunk already exists, reuse it
|
||||
chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first);
|
||||
callback(0, chunkp.second.size(), 0);
|
||||
} else {
|
||||
/// Otherwise, write it
|
||||
Chunk c(ctx.repo->getId(), chunkp.first, chunkp.second);
|
||||
chunkId = c.id;
|
||||
callback(c.data.size(), 0, 0);
|
||||
ctx.repo->putObject(c);
|
||||
}
|
||||
fileHash.feedData(chunkp.second);
|
||||
fileChunks.emplace_back(chunkId);
|
||||
}
|
||||
|
||||
/// We might have exited in the loop before, so we don't save an incomplete file
|
||||
if (Signals::shouldQuit) throw Exception("Quitting!");
|
||||
if (size != File::getFileSize(orig)) {
|
||||
throw Exception("Something really bad happened or file " + orig.u8string() + " changed during backup");
|
||||
}
|
||||
File f(ctx.repo->getId(), saveAs, size, File::getFileMtime(orig), fileHash.getHash(), fileChunks, File::getFileType(orig));
|
||||
ctx.repo->putObject(f);
|
||||
callback(0, 0, 1);
|
||||
|
||||
return f.id;
|
||||
}
|
||||
28
src/commands/CommandRun.h
Normal file
28
src/commands/CommandRun.h
Normal file
@@ -0,0 +1,28 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDRUN_H
|
||||
#define SEMBACKUP_COMMANDRUN_H
|
||||
|
||||
#include "Command.h"
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
/// Runs the backup according to the config in the Repository
|
||||
class CommandRun : public Command {
|
||||
public:
|
||||
CommandRun();
|
||||
void run(Context ctx) override;
|
||||
|
||||
private:
|
||||
/// Internal function to chunk the file and save it
|
||||
/// \param orig Absolute path to the file
|
||||
/// \param saveAs UTF-8 encoded file name to save as
|
||||
/// \param callback Stats callback
|
||||
/// \return ID of the saved file
|
||||
Object::idType backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, CommandsCommon::workerStatsFunction &callback, Context ctx);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDRUN_H
|
||||
67
src/commands/CommandsCommon.cpp
Normal file
67
src/commands/CommandsCommon.cpp
Normal file
@@ -0,0 +1,67 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#include "CommandsCommon.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "../Signals.h"
|
||||
|
||||
void CommandsCommon::workerCallback(unsigned long long int bytesWritten, unsigned long long int bytesSkipped, unsigned long long int filesWritten, WorkerStats &to) {
|
||||
to.bytesWritten += bytesWritten;
|
||||
to.bytesSkipped += bytesSkipped;
|
||||
to.filesWritten += filesWritten;
|
||||
}
|
||||
|
||||
bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p) {
|
||||
if (prefix.u8string().size() > p.u8string().size()) return false;
|
||||
for (int i = 0; i < prefix.u8string().size(); i++)
|
||||
if (p.u8string()[i] != prefix.u8string()[i]) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
|
||||
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
|
||||
|
||||
/// Don't process the directory if it has a ".nobackup" file
|
||||
if (std::filesystem::exists(dir / ".nobackup")) return;
|
||||
|
||||
/// If it has an .ignore file, add every line of it into our ignore vector
|
||||
if (std::filesystem::exists(dir / ".ignore")) {
|
||||
std::ifstream ignorefile(dir / ".ignore", std::ios::in);
|
||||
std::string line;
|
||||
while (std::getline(ignorefile, line)) {
|
||||
ignore.emplace_back(line);
|
||||
}
|
||||
}
|
||||
|
||||
/// For each directory entry...
|
||||
for (const auto &dirEntry: std::filesystem::directory_iterator(dir)) {
|
||||
/// Break in case exit was requested by the user
|
||||
if (Signals::shouldQuit) break;
|
||||
|
||||
/// Don't process the entry if it matches any of the ignore rules
|
||||
if (std::any_of(ignore.begin(), ignore.end(), [dirEntry](auto pred) {
|
||||
std::smatch m;
|
||||
auto s = dirEntry.path().filename().u8string();
|
||||
return std::regex_match(s, m, std::regex(pred));
|
||||
})) continue;
|
||||
|
||||
/// If it's a directory, spawn a task to process the entries in it
|
||||
if (!dirEntry.is_symlink() && dirEntry.is_directory()) {
|
||||
spawner([dirEntry, ignore, spawner, processFile]() {
|
||||
processDirWithIgnore(dirEntry.path(), ignore, spawner, processFile);
|
||||
});
|
||||
/// Don't save the dir if it has a .nobackup file
|
||||
if (std::filesystem::exists(dirEntry.path() / ".nobackup")) continue;
|
||||
}
|
||||
|
||||
/// Spawn a task to process each individual file
|
||||
spawner([processFile, dirEntry]() {
|
||||
processFile(dirEntry);
|
||||
});
|
||||
}
|
||||
}
|
||||
48
src/commands/CommandsCommon.h
Normal file
48
src/commands/CommandsCommon.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_COMMANDSCOMMON_H
|
||||
#define SEMBACKUP_COMMANDSCOMMON_H
|
||||
|
||||
#include <atomic>
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
|
||||
namespace CommandsCommon {
|
||||
// Bytes written, bytes skipped, files written
|
||||
using workerStatsFunction = std::function<void(unsigned long long, unsigned long long, unsigned long long)>;
|
||||
|
||||
/// Internat function for recursive directory processing, taking into account ".ignore" and ".nobackup" files
|
||||
/// \param dir Const reference to the path of directory to iterate through
|
||||
/// \param ignore List of files to ignore
|
||||
/// \param spawner Function to spawn other tasks
|
||||
/// \param processFile Task to spawn on found files
|
||||
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile);
|
||||
|
||||
struct WorkerStats {
|
||||
public:
|
||||
std::atomic<unsigned long long> bytesWritten = 0;
|
||||
std::atomic<unsigned long long> bytesSkipped = 0;
|
||||
std::atomic<unsigned long long> filesWritten = 0;
|
||||
};
|
||||
|
||||
struct RunnerStats {
|
||||
public:
|
||||
std::atomic<unsigned long long> bytesToSave = 0;
|
||||
std::atomic<unsigned long long> filesToSaveCount = 0;
|
||||
std::atomic<unsigned long long> filesSkipped = 0;
|
||||
};
|
||||
|
||||
/// Checks if \p p has \p prefix as prefix
|
||||
/// \param prefix Constant reference to the prefix path
|
||||
/// \param p Constant reference to the checked path
|
||||
/// \return True if \p p contains \p prefix at its prefix, False otherwise
|
||||
bool isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p);
|
||||
|
||||
void workerCallback(unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten, WorkerStats &to);
|
||||
|
||||
};// namespace CommandsCommon
|
||||
|
||||
|
||||
#endif//SEMBACKUP_COMMANDSCOMMON_H
|
||||
82
src/crypto/AES.cpp
Normal file
82
src/crypto/AES.cpp
Normal file
@@ -0,0 +1,82 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 30.04.2023.
|
||||
//
|
||||
|
||||
#include "AES.h"
|
||||
|
||||
#include <openssl/aes.h>
|
||||
#include <openssl/evp.h>
|
||||
#include <openssl/rand.h>
|
||||
|
||||
#include "../Exception.h"
|
||||
|
||||
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
|
||||
return AES::encrypt(in, AES::deriveKey(password, salt));
|
||||
}
|
||||
|
||||
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
|
||||
return AES::decrypt(in, AES::deriveKey(password, salt));
|
||||
}
|
||||
|
||||
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
|
||||
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
|
||||
if (!ctx) throw Exception("Error initializing encryption context!");
|
||||
|
||||
std::vector<char> out(in.size() + AES_BLOCK_SIZE + 32);
|
||||
if (!RAND_bytes(reinterpret_cast<unsigned char *>(out.data()), 32))
|
||||
throw Exception("Error generating IV!");
|
||||
|
||||
if (!EVP_EncryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(out.data())))
|
||||
throw Exception("Error encrypting!");
|
||||
|
||||
int outlen = static_cast<int>(out.size()) - 32;
|
||||
|
||||
if (!EVP_EncryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + 32), &outlen, reinterpret_cast<const unsigned char *>(in.data()), static_cast<int>(in.size())))
|
||||
throw Exception("Error encrypting!");
|
||||
|
||||
int finlen = 0;
|
||||
if (!EVP_EncryptFinal_ex(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + outlen + 32), &finlen))
|
||||
throw Exception("Error encrypting!");
|
||||
|
||||
out.resize(outlen + finlen + 32);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
|
||||
if (in.size() < 32) throw Exception("Array to decrypt is too small!");
|
||||
|
||||
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
|
||||
if (!ctx) throw Exception("Error initializing encryption context!");
|
||||
|
||||
std::vector<char> out(in.size() - 32);
|
||||
int outlen = static_cast<int>(out.size());
|
||||
|
||||
if (!EVP_DecryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(in.data())))
|
||||
throw Exception("Error decrypting!");
|
||||
|
||||
|
||||
if (!EVP_DecryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data()), &outlen, reinterpret_cast<const unsigned char *>(in.data() + 32), static_cast<int>(in.size() - 32)))
|
||||
throw Exception("Error decrypting!");
|
||||
|
||||
int finlen = 0;
|
||||
if (!EVP_DecryptFinal_ex(ctx.get(), (unsigned char *) (out.data() + outlen), &finlen))
|
||||
throw Exception("Error decrypting!");
|
||||
|
||||
out.resize(outlen + finlen);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::array<uint8_t, 32> AES::deriveKey(const std::string &password, const std::string &salt) {
|
||||
std::array<uint8_t, 32> key;//NOLINT
|
||||
if (!PKCS5_PBKDF2_HMAC_SHA1(password.data(),
|
||||
static_cast<int>(password.length()),
|
||||
reinterpret_cast<const unsigned char *>(salt.data()),
|
||||
static_cast<int>(salt.length()),
|
||||
10000,
|
||||
32,
|
||||
key.data()))
|
||||
throw Exception("Error deriving key!");
|
||||
return key;
|
||||
}
|
||||
59
src/crypto/AES.h
Normal file
59
src/crypto/AES.h
Normal file
@@ -0,0 +1,59 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 30.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_AES_H
|
||||
#define SEMBACKUP_AES_H
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/// Utility class to handle encryption/decryption of byte vectors
|
||||
/**
|
||||
* Based on: https://wiki.openssl.org/index.php/EVP_Symmetric_Encryption_and_Decryption
|
||||
*/
|
||||
class AES {
|
||||
public:
|
||||
/// Encrypts the provided \p in vector using \p password and \p salt
|
||||
/// \param in Constant reference to to-be-encrypted vector
|
||||
/// \param password Constant reference to the password
|
||||
/// \param salt Constant reference to the salt
|
||||
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
|
||||
/// \throws Exception on any error
|
||||
static std::vector<char> encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
|
||||
|
||||
/// Decrypts the provided \p in vector using \p password and \p salt
|
||||
/// \param in Constant reference to to-be-decrypted vector
|
||||
/// \param password Constant reference to the password
|
||||
/// \param salt Constant reference to the salt
|
||||
/// \return Decrypted vector
|
||||
/// \throws Exception on any error
|
||||
static std::vector<char> decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
|
||||
|
||||
/// Encrypts the provided \p in vector using \p key
|
||||
/// \param in Constant reference to to-be-encrypted vector
|
||||
/// \param key Constant reference to the key
|
||||
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
|
||||
/// \throws Exception on any error
|
||||
static std::vector<char> encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
|
||||
|
||||
/// Decrypts the provided \p in vector using \p key
|
||||
/// \param in Constant reference to to-be-decrypted vector
|
||||
/// \param key Constant reference to the key
|
||||
/// \return Decrypted vector
|
||||
/// \throws Exception on any error
|
||||
static std::vector<char> decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
|
||||
|
||||
/// Generates a key for the encryption using \p password and \p salt using PKCS5_PBKDF2_HMAC_SHA1
|
||||
/// \param password Constant reference to the password
|
||||
/// \param salt Constant reference to the salt
|
||||
/// \return Derived key
|
||||
/// \throws Exception on any error
|
||||
static std::array<uint8_t, 32> deriveKey(const std::string &password, const std::string &salt);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_AES_H
|
||||
11
src/crypto/CRC32.cpp
Normal file
11
src/crypto/CRC32.cpp
Normal file
@@ -0,0 +1,11 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#include "CRC32.h"
|
||||
|
||||
CRC32::crcType CRC32::calculate(const std::vector<char> &in) {
|
||||
crcType res = crc32(0L, nullptr, 0);
|
||||
res = crc32(res, reinterpret_cast<const Bytef *>(in.data()), in.size());
|
||||
return res;
|
||||
}
|
||||
25
src/crypto/CRC32.h
Normal file
25
src/crypto/CRC32.h
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CRC32_H
|
||||
#define SEMBACKUP_CRC32_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
/// Utility class to compute CRC32 values of vectors of chars
|
||||
class CRC32 {
|
||||
public:
|
||||
using crcType = uLong;
|
||||
|
||||
/// Calculates the CRC32 of given vector
|
||||
/// \param in Constant reference to a vector of chars
|
||||
/// \return CRC32 result
|
||||
static crcType calculate(const std::vector<char> &in);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CRC32_H
|
||||
48
src/crypto/MD5.cpp
Normal file
48
src/crypto/MD5.cpp
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#include "MD5.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
|
||||
std::string MD5::calculate(const std::vector<char> &in) {
|
||||
MD5 hasher;
|
||||
hasher.feedData(in);
|
||||
return hasher.getHash();
|
||||
}
|
||||
|
||||
MD5::MD5() {
|
||||
if (!mdctx)
|
||||
throw Exception("Can't create hashing context!");
|
||||
|
||||
if (!EVP_DigestInit_ex(mdctx.get(), EVP_md5(), nullptr))
|
||||
throw Exception("Can't create hashing context!");
|
||||
}
|
||||
|
||||
void MD5::feedData(const std::vector<char> &in) {
|
||||
if (in.empty()) return;
|
||||
if (!EVP_DigestUpdate(mdctx.get(), in.data(), in.size()))
|
||||
throw Exception("Error hashing!");
|
||||
}
|
||||
|
||||
std::string MD5::getHash() {
|
||||
std::array<char, 16> out;
|
||||
unsigned int s = 0;
|
||||
|
||||
if (!EVP_DigestFinal_ex(mdctx.get(), reinterpret_cast<unsigned char *>(out.data()), &s))
|
||||
throw Exception("Error hashing!");
|
||||
|
||||
if (s != out.size())
|
||||
throw Exception("Error hashing!");
|
||||
|
||||
if (!EVP_MD_CTX_reset(mdctx.get()))
|
||||
throw Exception("Error hashing!");
|
||||
|
||||
return {out.begin(), out.end()};
|
||||
}
|
||||
|
||||
std::string MD5::calculate(const std::string &in) {
|
||||
std::vector<char> tmp(in.begin(), in.end());
|
||||
return MD5::calculate(tmp);
|
||||
}
|
||||
48
src/crypto/MD5.h
Normal file
48
src/crypto/MD5.h
Normal file
@@ -0,0 +1,48 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_MD5_H
|
||||
#define SEMBACKUP_MD5_H
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <openssl/evp.h>
|
||||
|
||||
/// Class to handle MD5 hashing
|
||||
/**
|
||||
* Based on: https://wiki.openssl.org/index.php/EVP_Message_Digests
|
||||
*/
|
||||
class MD5 {
|
||||
public:
|
||||
/// Constructs an empty MD5 hasher instance
|
||||
/// \throws Exception on initialization error
|
||||
MD5();
|
||||
|
||||
/// Calculates the hash for a given \p in char vector
|
||||
/// \param in Constant reference to an input vector
|
||||
/// \return MD5 hash of \p in
|
||||
static std::string calculate(const std::vector<char> &in);
|
||||
|
||||
/// Calculates the hash for a given \p in string
|
||||
/// \param in Constant reference to an input string
|
||||
/// \return MD5 hash of \p in
|
||||
static std::string calculate(const std::string &in);
|
||||
|
||||
/// Append a vector of chars to the current hash
|
||||
/// \param in Constant reference to an input vector
|
||||
/// \throws Exception on any error
|
||||
void feedData(const std::vector<char> &in);
|
||||
|
||||
/// Returns the hash, resets the hashing context
|
||||
/// \throws Exception on any error
|
||||
std::string getHash();
|
||||
|
||||
private:
|
||||
const std::unique_ptr<EVP_MD_CTX, decltype(&EVP_MD_CTX_free)> mdctx{EVP_MD_CTX_new(), &EVP_MD_CTX_free};///< Current hashing context
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_MD5_H
|
||||
47
src/filters/CheckFilter.cpp
Normal file
47
src/filters/CheckFilter.cpp
Normal file
@@ -0,0 +1,47 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#include "CheckFilter.h"
|
||||
#include "../crypto/CRC32.h"
|
||||
#include "../repo/Serialize.h"
|
||||
|
||||
std::vector<char> CheckFilter::filterWrite(std::vector<char> from) const {
|
||||
return filterWriteStatic(std::move(from));
|
||||
}
|
||||
|
||||
std::vector<char> CheckFilter::filterRead(std::vector<char> from) const {
|
||||
return filterReadStatic(std::move(from));
|
||||
}
|
||||
|
||||
std::vector<char> CheckFilter::filterWriteStatic(std::vector<char> from) {
|
||||
auto out = magic;
|
||||
|
||||
Serialize::serialize(from, out);
|
||||
|
||||
auto crc = CRC32::calculate(from);
|
||||
|
||||
Serialize::serialize(crc, out);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<char> CheckFilter::filterReadStatic(std::vector<char> from) {
|
||||
if (from.size() < magic.size()) throw Exception("Input is corrupted (too small)!");
|
||||
|
||||
for (size_t i = 0; i < magic.size(); i++) {
|
||||
if (from[i] != magic[i]) throw Exception("Magic prefix is wrong!");
|
||||
}
|
||||
|
||||
auto fromIt = from.cbegin() + magic.size();
|
||||
|
||||
auto out = Serialize::deserialize<std::vector<char>>(fromIt, from.cend());
|
||||
|
||||
auto crc = CRC32::calculate(out);
|
||||
|
||||
auto crcRecorded = Serialize::deserialize<CRC32::crcType>(fromIt, from.cend());
|
||||
|
||||
if (crc != crcRecorded) throw Exception("CRC mismatch!");
|
||||
|
||||
return out;
|
||||
}
|
||||
36
src/filters/CheckFilter.h
Normal file
36
src/filters/CheckFilter.h
Normal file
@@ -0,0 +1,36 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 12.05.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHECKFILTER_H
|
||||
#define SEMBACKUP_CHECKFILTER_H
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Filter implementation that checks the input for corruption using CRC
|
||||
/**
|
||||
* Additionally, it has static methods for work outside FilterContainer%s
|
||||
*/
|
||||
class CheckFilter : public Filter {
|
||||
public:
|
||||
/// \copydoc Filter::filterWrite
|
||||
/// \copydoc CheckFilter::filterWriteS
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// \copydoc Filter::filterRead
|
||||
/// \copydoc CheckFilter::filterReadS
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
/// Adds CRC hash and magic string to the the \p from vector
|
||||
static std::vector<char> filterWriteStatic(std::vector<char> from);
|
||||
|
||||
/// Checks the \p from vector and removes the metadata
|
||||
/// \throws Exception on any error
|
||||
static std::vector<char> filterReadStatic(std::vector<char> from);
|
||||
|
||||
private:
|
||||
static const inline std::vector<char> magic{'s', 'e', 'm', 'b', 'a'};
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHECKFILTER_H
|
||||
6
src/filters/Filter.cpp
Normal file
6
src/filters/Filter.cpp
Normal file
@@ -0,0 +1,6 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#include "Filter.h"
|
||||
Filter::~Filter() = default;
|
||||
30
src/filters/Filter.h
Normal file
30
src/filters/Filter.h
Normal file
@@ -0,0 +1,30 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILTER_H
|
||||
#define SEMBACKUP_FILTER_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
/// Interface class for I/O filters
|
||||
class Filter {
|
||||
public:
|
||||
/// Applies the filter to \p from vector and returns the result
|
||||
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
|
||||
/// \param from Source vector of chars
|
||||
/// \return Filtered vector of chars
|
||||
virtual std::vector<char> filterWrite(std::vector<char> from) const = 0;
|
||||
|
||||
/// Reverses the applied filter from \p from vector and returns the result
|
||||
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
|
||||
/// \param from Source vector of chars
|
||||
/// \return Filtered vector of chars
|
||||
virtual std::vector<char> filterRead(std::vector<char> from) const = 0;
|
||||
|
||||
/// Default virtual destructor
|
||||
virtual ~Filter();
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTER_H
|
||||
17
src/filters/FilterAES.cpp
Normal file
17
src/filters/FilterAES.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#include "FilterAES.h"
|
||||
|
||||
#include "../crypto/AES.h"
|
||||
|
||||
std::vector<char> FilterAES::filterWrite(std::vector<char> from) const {
|
||||
return AES::encrypt(from, key);
|
||||
}
|
||||
|
||||
std::vector<char> FilterAES::filterRead(std::vector<char> from) const {
|
||||
return AES::decrypt(from, key);
|
||||
}
|
||||
|
||||
FilterAES::FilterAES(const std::string &password, const std::string &salt) : key(AES::deriveKey(password, salt)) {}
|
||||
37
src/filters/FilterAES.h
Normal file
37
src/filters/FilterAES.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILTERAES_H
|
||||
#define SEMBACKUP_FILTERAES_H
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Filter implementation that encrypts/decrypts data using provided password and salt
|
||||
class FilterAES : public Filter {
|
||||
public:
|
||||
/// Constructs the filter, using \p password and \p salt to generate the encryption key
|
||||
/// \param password Constant reference to password string
|
||||
/// \param salt Constant reference to salt string
|
||||
FilterAES(const std::string &password, const std::string &salt);
|
||||
|
||||
/// Encrypts the \p from vector
|
||||
/// \copydoc Filter::filterWrite
|
||||
/// \throws Exception on any error
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// Decrypts the \p from vector
|
||||
/// \copydoc Filter::filterRead
|
||||
/// \throws Exception on any error
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
private:
|
||||
const std::array<uint8_t, 32> key;///< Key used for encryption, derived from \p password and \p salt
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTERAES_H
|
||||
23
src/filters/FilterContainer.cpp
Normal file
23
src/filters/FilterContainer.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#include "FilterContainer.h"
|
||||
|
||||
FilterContainer::FilterContainer() = default;
|
||||
|
||||
FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) {
|
||||
filters.emplace_back(std::move(f));
|
||||
return *this;
|
||||
}
|
||||
|
||||
std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const {
|
||||
for (auto const &f: filters) from = f->filterWrite(std::move(from));
|
||||
return from;
|
||||
}
|
||||
|
||||
std::vector<char> FilterContainer::filterRead(std::vector<char> from) const {
|
||||
for (auto f = filters.crbegin(); f != filters.crend(); f++)
|
||||
from = (*f)->filterRead(std::move(from));
|
||||
return from;
|
||||
}
|
||||
37
src/filters/FilterContainer.h
Normal file
37
src/filters/FilterContainer.h
Normal file
@@ -0,0 +1,37 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILTERCONTAINER_H
|
||||
#define SEMBACKUP_FILTERCONTAINER_H
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Convenience Filter implementation, that applies multiple Filter%s in succession
|
||||
class FilterContainer : public Filter {
|
||||
public:
|
||||
/// Constructs an empty FilterContainer
|
||||
FilterContainer();
|
||||
|
||||
/// Adds a Filter into itself
|
||||
/// \param f Rvalue reference to a unique pointer to Filter
|
||||
/// \return Reference to itself
|
||||
FilterContainer &addFilter(std::unique_ptr<Filter> &&f);
|
||||
|
||||
/// Applies the filters in order of insertion
|
||||
/// \copydoc Filter::filterWrite
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// Applies the filters in reverse order of insertion
|
||||
/// \copydoc Filter::filterRead
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<Filter>> filters;///< Vector of unique pointers to Filter%s
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTERCONTAINER_H
|
||||
32
src/filters/FilterFactory.cpp
Normal file
32
src/filters/FilterFactory.cpp
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#include "FilterFactory.h"
|
||||
|
||||
#include "../Exception.h"
|
||||
#include "CheckFilter.h"
|
||||
#include "FilterAES.h"
|
||||
#include "FilterShift.h"
|
||||
#include "FilterShiftSecret.h"
|
||||
#include "FilterZlib.h"
|
||||
|
||||
std::unique_ptr<Filter> FilterFactory::makeFilter(const std::string &type, const Config &config) {
|
||||
if (type == "none") throw Exception("Trying to make a \"none\" filter!");
|
||||
|
||||
if (type == "aes") {
|
||||
return std::make_unique<FilterAES>(config.getStr("password"), config.getStr("salt"));
|
||||
} else if (type == "zlib") {
|
||||
return std::make_unique<FilterZlib>(config.getInt("compression-level"));
|
||||
} else if (type == "crc") {
|
||||
return std::make_unique<CheckFilter>();
|
||||
}
|
||||
#ifdef TEST
|
||||
else if (type == "shiftC") {
|
||||
return std::make_unique<FilterShift>(config.getInt("compression-level"));
|
||||
} else if (type == "shiftE")
|
||||
return std::make_unique<FilterShiftSecret>(config.getStr("password"), config.getStr("salt"));
|
||||
#endif
|
||||
|
||||
throw Exception("Unknown filter value");
|
||||
}
|
||||
25
src/filters/FilterFactory.h
Normal file
25
src/filters/FilterFactory.h
Normal file
@@ -0,0 +1,25 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILTERFACTORY_H
|
||||
#define SEMBACKUP_FILTERFACTORY_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "../Config.h"
|
||||
#include "Filter.h"
|
||||
|
||||
/// Utility factory class for Filter%s
|
||||
class FilterFactory {
|
||||
public:
|
||||
/// Constructs a Filter of type \p type according to \p config
|
||||
/// \param type Constant reference to a string containing the type of filter to construct
|
||||
/// \param config Constant reference to Config which will be used to determine constructed Filter%'s parameters
|
||||
/// \return Unique pointer to the constructed Filter
|
||||
static std::unique_ptr<Filter> makeFilter(const std::string &type, const Config &config);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTERFACTORY_H
|
||||
18
src/filters/FilterShift.cpp
Normal file
18
src/filters/FilterShift.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
#ifdef TEST
|
||||
#include "FilterShift.h"
|
||||
|
||||
std::vector<char> FilterShift::filterWrite(std::vector<char> from) const {
|
||||
for (auto &c: from) c += shiftVal;
|
||||
return from;
|
||||
}
|
||||
|
||||
std::vector<char> FilterShift::filterRead(std::vector<char> from) const {
|
||||
for (auto &c: from) c -= shiftVal;
|
||||
return from;
|
||||
}
|
||||
|
||||
FilterShift::FilterShift(int level) : shiftVal(level) {}
|
||||
#endif
|
||||
30
src/filters/FilterShift.h
Normal file
30
src/filters/FilterShift.h
Normal file
@@ -0,0 +1,30 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 22.04.2023.
|
||||
//
|
||||
#ifdef TEST
|
||||
#ifndef SEMBACKUP_FILTERSHIFT_H
|
||||
#define SEMBACKUP_FILTERSHIFT_H
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Filter implementation that shifts every byte in input vector using provided value
|
||||
/// \warning For testing purposes only!
|
||||
class FilterShift : public Filter {
|
||||
public:
|
||||
/// Constructs the filter using \p level as shift value
|
||||
/// \param level Number that will be added to each input byte
|
||||
FilterShift(int level);
|
||||
|
||||
/// \copydoc Filter::filterWrite
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// \copydoc Filter::filterRead
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
private:
|
||||
int shiftVal;///< Value to add to input bytes
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTERSHIFT_H
|
||||
#endif//TEST
|
||||
23
src/filters/FilterShiftSecret.cpp
Normal file
23
src/filters/FilterShiftSecret.cpp
Normal file
@@ -0,0 +1,23 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
#ifdef TEST
|
||||
|
||||
#include "FilterShiftSecret.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
std::vector<char> FilterShiftSecret::filterWrite(std::vector<char> from) const {
|
||||
for (auto &c: from) c += shiftVal;
|
||||
return from;
|
||||
}
|
||||
|
||||
std::vector<char> FilterShiftSecret::filterRead(std::vector<char> from) const {
|
||||
for (auto &c: from) c -= shiftVal;
|
||||
return from;
|
||||
}
|
||||
|
||||
FilterShiftSecret::FilterShiftSecret(const std::string &password, const std::string &salt) {
|
||||
shiftVal = password[0] + salt[0];
|
||||
}
|
||||
#endif
|
||||
33
src/filters/FilterShiftSecret.h
Normal file
33
src/filters/FilterShiftSecret.h
Normal file
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
#ifdef TEST
|
||||
#ifndef SEMBACKUP_FILTERSHIFTSECRET_H
|
||||
#define SEMBACKUP_FILTERSHIFTSECRET_H
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Filter implementation that shifts every byte in input vector using two provided value
|
||||
/// \warning For testing purposes only!
|
||||
class FilterShiftSecret : public Filter {
|
||||
public:
|
||||
/// Constructs the filter using the sum of first bytes of \p password and \p salt to initialize shiftVal
|
||||
/// \param password Constant reference to "password" string
|
||||
/// \param salt Constant reference to "salt" string
|
||||
FilterShiftSecret(const std::string &password, const std::string &salt);
|
||||
|
||||
/// \copydoc Filter::filterWrite
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// \copydoc Filter::filterRead
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
private:
|
||||
int shiftVal = 0;///< Value to add to input bytes
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILTERSHIFTSECRET_H
|
||||
#endif//TEST
|
||||
50
src/filters/FilterZlib.cpp
Normal file
50
src/filters/FilterZlib.cpp
Normal file
@@ -0,0 +1,50 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#include "FilterZlib.h"
|
||||
|
||||
#include <zlib.h>
|
||||
|
||||
#include "../repo/Serialize.h"
|
||||
|
||||
std::vector<char> FilterZlib::filterWrite(std::vector<char> from) const {
|
||||
uLongf outSize = compressBound(from.size());
|
||||
|
||||
std::vector<char> out;
|
||||
Serialize::serialize('C', out);
|
||||
Serialize::serialize(static_cast<unsigned long long>(from.size()), out);
|
||||
|
||||
uLongf sizeSize = out.size();
|
||||
|
||||
out.resize(sizeSize + outSize);
|
||||
|
||||
if (compress2(reinterpret_cast<Bytef *>(out.data() + sizeSize), &outSize, reinterpret_cast<const Bytef *>(from.data()), from.size(), level) !=
|
||||
Z_OK)
|
||||
throw Exception("Error compressing!");
|
||||
|
||||
out.resize(outSize + sizeSize);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
std::vector<char> FilterZlib::filterRead(std::vector<char> from) const {
|
||||
auto desI = from.cbegin();
|
||||
|
||||
char C = Serialize::deserialize<char>(desI, from.cend());
|
||||
if (C != 'C') throw Exception("Bad compression prefix!");
|
||||
|
||||
uLongf size = Serialize::deserialize<unsigned long long>(desI, from.cend());
|
||||
|
||||
std::vector<char> out(size);
|
||||
|
||||
if (desI >= from.cend()) throw Exception("Unexpected end of archive!");
|
||||
|
||||
if (uncompress(reinterpret_cast<Bytef *>(out.data()), &size, reinterpret_cast<const Bytef *>(&(*desI)), std::distance(desI, from.cend())) !=
|
||||
Z_OK)
|
||||
throw Exception("Error decompressing!");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
FilterZlib::FilterZlib(int level) : level(level) {}
|
||||
31
src/filters/FilterZlib.h
Normal file
31
src/filters/FilterZlib.h
Normal file
@@ -0,0 +1,31 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILTERZLIB_H
|
||||
#define SEMBACKUP_FILTERZLIB_H
|
||||
|
||||
#include "Filter.h"
|
||||
|
||||
/// Filter implementation that uses Zlib to compress data
|
||||
class FilterZlib : public Filter {
|
||||
public:
|
||||
/// Creates the filter using \p level as compression level
|
||||
/// \param level
|
||||
FilterZlib(int level);
|
||||
|
||||
/// Compresses the \p from vector
|
||||
/// \copydoc Filter::filterWrite
|
||||
/// \throws Exception on any error
|
||||
std::vector<char> filterWrite(std::vector<char> from) const override;
|
||||
|
||||
/// Decompresses the \p from vector
|
||||
/// \copydoc Filter::filterRead
|
||||
/// \throws Exception on any error
|
||||
std::vector<char> filterRead(std::vector<char> from) const override;
|
||||
|
||||
private:
|
||||
int level = -1;///< Compression level to use, -1 is the Zlib default
|
||||
};
|
||||
|
||||
#endif//SEMBACKUP_FILTERZLIB_H
|
||||
129
src/main.cpp
Normal file
129
src/main.cpp
Normal file
@@ -0,0 +1,129 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "BytesFormatter.h"
|
||||
#include "Config.h"
|
||||
#include "Context.h"
|
||||
#include "Exception.h"
|
||||
#include "Logger.h"
|
||||
#include "Signals.h"
|
||||
#include "commands/Command.h"
|
||||
#include "commands/CommandDiff.h"
|
||||
#include "commands/CommandList.h"
|
||||
#include "commands/CommandListFiles.h"
|
||||
#include "commands/CommandRestore.h"
|
||||
#include "commands/CommandRun.h"
|
||||
#include "repo/FileRepository.h"
|
||||
#include "repo/Repository.h"
|
||||
#include "repo/Serialize.h"
|
||||
#include "repo/objects/Archive.h"
|
||||
#include "repo/objects/File.h"
|
||||
|
||||
Config getConf(int argc, char *argv[]) {
|
||||
Config out;
|
||||
for (int i = 0; i < argc; i++) {
|
||||
std::string key = argv[i];
|
||||
if (key.substr(0, 2) != "--") throw Exception("Options should start with --");
|
||||
key = key.substr(2);
|
||||
if (++i == argc) throw Exception("Option not specified for " + key);
|
||||
std::string val = argv[i];
|
||||
out.add(key, val);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
int help() {
|
||||
for (auto const &o: Config::keys) {
|
||||
std::cout << "--" << o.first << " <" << Config::KeyTypeToStr.at(o.second.type) << ">" << std::endl;
|
||||
if (o.second.defaultval.has_value())
|
||||
std::cout << " Default: " << o.second.defaultval.value() << std::endl;
|
||||
std::cout << " Is saved in repository: " << (o.second.remember ? "yes" : "no") << std::endl;
|
||||
std::cout << " Info: " << o.second.info << std::endl;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::unique_ptr<Repository> openRepo(Config &conf) {
|
||||
try {
|
||||
auto repo = std::make_unique<FileRepository>(conf);
|
||||
repo->open();
|
||||
return repo;
|
||||
} catch (std::exception &e) {
|
||||
std::cout << "Error opening repo: " << e.what() << std::endl;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int init(Config &conf) {
|
||||
auto repo = std::make_unique<FileRepository>(conf);
|
||||
if (repo == nullptr) return -1;
|
||||
|
||||
try {
|
||||
repo->init();
|
||||
} catch (std::exception &e) {
|
||||
std::cout << "Error initializing repo: " << e.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
try {
|
||||
Signals::setup();
|
||||
|
||||
if (argc < 2) {
|
||||
std::cerr << "No argument specified" << std::endl;
|
||||
help();
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string opt = argv[1];
|
||||
if (opt == "help") {
|
||||
return help();
|
||||
}
|
||||
|
||||
Config conf;
|
||||
|
||||
try {
|
||||
conf = getConf(argc - 2, argv + 2);
|
||||
} catch (std::exception &e) {
|
||||
std::cerr << "Error reading config!" << std::endl
|
||||
<< e.what() << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (opt == "init") {
|
||||
return init(conf);
|
||||
}
|
||||
|
||||
auto repo = openRepo(conf);
|
||||
|
||||
if (repo == nullptr) {
|
||||
std::cerr << "Can't open repo!" << std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
Logger logger(conf.getInt("verbose"));
|
||||
Context ctx{&logger, repo.get()};
|
||||
|
||||
std::unordered_map<std::string, std::unique_ptr<Command>> commands;
|
||||
commands.emplace(CommandDiff().name, std::make_unique<CommandDiff>());
|
||||
commands.emplace(CommandRestore().name, std::make_unique<CommandRestore>());
|
||||
commands.emplace(CommandRun().name, std::make_unique<CommandRun>());
|
||||
commands.emplace(CommandListFiles().name, std::make_unique<CommandListFiles>());
|
||||
commands.emplace(CommandList().name, std::make_unique<CommandList>());
|
||||
|
||||
if (commands.count(opt) == 0) {
|
||||
std::cerr << "Unknown argument" << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
commands.at(opt)->run(ctx);
|
||||
}
|
||||
} catch (std::exception &e) {
|
||||
std::cerr << "Error!" << std::endl
|
||||
<< e.what() << std::endl;
|
||||
} catch (...) {
|
||||
std::cerr << "Something very bad happened!" << std::endl;
|
||||
}
|
||||
}
|
||||
246
src/repo/FileRepository.cpp
Normal file
246
src/repo/FileRepository.cpp
Normal file
@@ -0,0 +1,246 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "FileRepository.h"
|
||||
|
||||
#include <exception>
|
||||
#include <iterator>
|
||||
#include <mutex>
|
||||
|
||||
#include "../filters/CheckFilter.h"
|
||||
#include "../filters/FilterFactory.h"
|
||||
#include "Object.h"
|
||||
#include "Serialize.h"
|
||||
|
||||
FileRepository::FileRepository(Config config) : Repository(std::move(config)), root(std::filesystem::path(this->config.getStr("repo"))), writeCacheMax(config.getInt("repo-target") * 1024 * 1024) {}
|
||||
|
||||
bool FileRepository::exists() {
|
||||
return std::filesystem::is_directory(root) && std::filesystem::exists(root / "info");
|
||||
}
|
||||
|
||||
bool FileRepository::flush() {
|
||||
flushWriteCache(std::unique_lock(writeCacheLock));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileRepository::open() {
|
||||
if (!exists()) throw Exception("Repository doesn't exist!");
|
||||
|
||||
auto readConf = Serialize::deserialize<Config>(CheckFilter::filterReadStatic(readFile(root / "info")));
|
||||
std::swap(config, readConf);
|
||||
config.merge(readConf);
|
||||
|
||||
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
|
||||
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
|
||||
filters.addFilter(FilterFactory::makeFilter("crc", config));
|
||||
|
||||
ready = true;
|
||||
try {
|
||||
std::tie(maxFileId, offsetIndex) = Serialize::deserialize<std::pair<decltype(maxFileId), decltype(offsetIndex)>>(filters.filterRead(readFile(root / "offsets")));
|
||||
std::tie(keyIndex, largestUnusedId) = Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(filters.filterRead(readFile(root / "index")));
|
||||
} catch (const std::exception &e) {
|
||||
ready = false;
|
||||
throw;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileRepository::init() {
|
||||
if (ready) throw Exception("Trying to initialize already initialized repository!");
|
||||
if (exists()) throw Exception("Trying to initialize already existing repository!");
|
||||
|
||||
if (!std::filesystem::is_directory(root) && !std::filesystem::create_directories(root))
|
||||
throw Exception("Can't create directory " + root.u8string());
|
||||
|
||||
writeFile(root / "info", CheckFilter::filterWriteStatic(Serialize::serialize(config)));
|
||||
|
||||
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
|
||||
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
|
||||
filters.addFilter(FilterFactory::makeFilter("crc", config));
|
||||
|
||||
ready = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
FileRepository::~FileRepository() {
|
||||
if (ready) {
|
||||
ready = false;
|
||||
flushWriteCache(std::unique_lock(writeCacheLock));
|
||||
|
||||
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
|
||||
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::getObject(Object::idType id) const {
|
||||
if (!ready) throw Exception("Tried working with uninitialized repo!");
|
||||
|
||||
std::unique_lock lock(repoLock);
|
||||
if (offsetIndex.count(id) == 0)
|
||||
throw Exception("Object with id " + std::to_string(id) + " doesn't exist!");
|
||||
auto entry = offsetIndex.at(id);
|
||||
lock.unlock();
|
||||
|
||||
return filters.filterRead(readFile(root / std::to_string(entry.fileId), entry.offset, entry.length));
|
||||
}
|
||||
|
||||
bool FileRepository::writeObject(const Object &obj) {
|
||||
if (!ready) throw Exception("Tried working with uninitialized repo!");
|
||||
auto tmp = filters.filterWrite(Serialize::serialize(obj));
|
||||
{
|
||||
std::unique_lock lockW(writeCacheLock);
|
||||
writeCacheSize += tmp.size();
|
||||
writeCache[obj.id] = std::move(tmp);
|
||||
|
||||
// If we have reached the target file size, flush the cache
|
||||
if (writeCacheSize >= writeCacheMax) {
|
||||
flushWriteCache(std::move(lockW));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
|
||||
if (writeCache.empty()) {
|
||||
lockW.unlock();
|
||||
return;
|
||||
}
|
||||
|
||||
// Swap the cache for a new one and unlock the mutex so other threads can continue working
|
||||
decltype(writeCache) objs;
|
||||
std::swap(writeCache, objs);
|
||||
writeCacheSize = 0;
|
||||
|
||||
decltype(maxFileId) currentFileId;
|
||||
{
|
||||
std::lock_guard lockI(repoLock);
|
||||
currentFileId = maxFileId;
|
||||
maxFileId++;
|
||||
}
|
||||
lockW.unlock();
|
||||
|
||||
unsigned long long offset = 0;
|
||||
std::ofstream ofstream(root / std::to_string(currentFileId), std::ios::binary | std::ios::trunc | std::ios::out);
|
||||
|
||||
for (auto &i: objs) {
|
||||
{
|
||||
std::lock_guard lockI(repoLock);
|
||||
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
|
||||
}
|
||||
offset += i.second.size();
|
||||
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
|
||||
}
|
||||
}
|
||||
|
||||
bool FileRepository::putObject(const Object &obj) {
|
||||
// Put the object into index, and then write it to the storage
|
||||
{
|
||||
std::lock_guard lock(repoLock);
|
||||
keyIndex[obj.type][obj.getKey()] = obj.id;
|
||||
}
|
||||
writeObject(obj);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileRepository::deleteObject(const Object &obj) {
|
||||
if (!ready) throw Exception("Tried working with uninitialized repo!");
|
||||
throw Exception("Deletion not implemented!");
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const {
|
||||
if (size > absoluteMaxFileLimit) throw Exception("Tried to read " + std::to_string(size) +
|
||||
" bytes from " + file.u8string() +
|
||||
" which is more than absoluteMaxFileLimit");
|
||||
|
||||
std::ifstream ifstream(file, std::ios::binary | std::ios::in);
|
||||
if (!ifstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for reading!");
|
||||
|
||||
std::vector<char> buf(size);
|
||||
|
||||
if (ifstream.rdbuf()->pubseekpos(offset) == std::streampos(std::streamoff(-1))) throw Exception("Unexpected end of file " + file.u8string());
|
||||
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.u8string());
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::readFile(const std::filesystem::path &file) const {
|
||||
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.u8string() + " is not a regular file!");
|
||||
auto fileSize = std::filesystem::file_size(file);
|
||||
if (fileSize == 0) return {};
|
||||
return readFile(file, 0, fileSize);
|
||||
}
|
||||
|
||||
bool FileRepository::writeFile(const std::filesystem::path &file, const std::vector<char> &data) {
|
||||
std::ofstream ofstream(file, std::ios::binary | std::ios::trunc | std::ios::out);
|
||||
if (!ofstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for writing!");
|
||||
|
||||
if (ofstream.rdbuf()->sputn(data.data(), data.size()) != data.size())
|
||||
throw Exception("Couldn't write all the data for " + file.u8string());
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
|
||||
return getObject(getObjectId(type, key));
|
||||
}
|
||||
|
||||
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
|
||||
std::lock_guard lock(repoLock);
|
||||
if (keyIndex.count(type) == 0) throw Exception("No objects of requested type!");
|
||||
return keyIndex.at(type).at(key);
|
||||
}
|
||||
|
||||
std::vector<std::pair<std::string, Object::idType>> FileRepository::getObjects(Object::ObjectType type) const {
|
||||
std::lock_guard lock(repoLock);
|
||||
std::vector<std::pair<std::string, Object::idType>> out;
|
||||
if (keyIndex.count(type) == 0) return {};
|
||||
for (auto const &i: keyIndex.at(type))
|
||||
out.emplace_back(i);
|
||||
return out;
|
||||
}
|
||||
|
||||
bool FileRepository::exists(Object::ObjectType type, const std::string &key) const {
|
||||
std::lock_guard lock(repoLock);
|
||||
if (keyIndex.count(type) == 0) return false;
|
||||
return keyIndex.at(type).count(key) > 0;
|
||||
}
|
||||
|
||||
Object::idType FileRepository::getId() {
|
||||
std::lock_guard lock(repoLock);
|
||||
return largestUnusedId++;
|
||||
}
|
||||
|
||||
FileRepository::OffsetEntry::OffsetEntry(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end)
|
||||
: fileId(Serialize::deserialize<decltype(fileId)>(in, end)),
|
||||
offset(Serialize::deserialize<decltype(offset)>(in, end)),
|
||||
length(Serialize::deserialize<decltype(length)>(in, end)) {
|
||||
}
|
||||
|
||||
void FileRepository::OffsetEntry::serialize(std::vector<char> &out) const {
|
||||
Serialize::serialize(fileId, out);
|
||||
Serialize::serialize(offset, out);
|
||||
Serialize::serialize(length, out);
|
||||
}
|
||||
|
||||
FileRepository::OffsetEntry::OffsetEntry(unsigned long long int fileId, unsigned long long int offset, unsigned long long int length)
|
||||
: fileId(fileId), offset(offset), length(length) {}
|
||||
|
||||
bool FileRepository::clearCache(Object::ObjectType type) {
|
||||
keyIndex[type] = {};
|
||||
return true;
|
||||
}
|
||||
|
||||
bool FileRepository::addToCache(const Object &obj) {
|
||||
{
|
||||
std::unique_lock lock(repoLock);
|
||||
if (offsetIndex.count(obj.id) == 0)
|
||||
throw Exception("Object with id " + std::to_string(obj.id) + " doesn't exist!");
|
||||
}
|
||||
{
|
||||
std::lock_guard lock(repoLock);
|
||||
keyIndex[obj.type][obj.getKey()] = obj.id;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
125
src/repo/FileRepository.h
Normal file
125
src/repo/FileRepository.h
Normal file
@@ -0,0 +1,125 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILEREPOSITORY_H
|
||||
#define SEMBACKUP_FILEREPOSITORY_H
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
|
||||
#include "Object.h"
|
||||
#include "Repository.h"
|
||||
|
||||
/// Repository implementation in the local filesystem
|
||||
/**
|
||||
* `root` Config value is used as a root
|
||||
* Objects are stored concatenated in files with approximate size of `repo-target` MB (from Config)
|
||||
* The object key/object id index is stored as a hash map, as a `index` file out of the object storage structure
|
||||
* Hints for the location of objects inside of files are also stored as a hash map in the `offsets` file
|
||||
* Config is stored in the `info` file, merged with the supplied Config on open()
|
||||
*
|
||||
* Thread safe, approx. max memory usage is `number of threads` * `repo-target`,
|
||||
* as every thread can be flushing its write cache at the same time
|
||||
*/
|
||||
class FileRepository final : public Repository {
|
||||
public:
|
||||
/// Constructs a new FileRepository
|
||||
/// \param config Config to use
|
||||
FileRepository(Config config);
|
||||
|
||||
bool exists() override;
|
||||
bool open() override;
|
||||
bool init() override;
|
||||
bool flush() override;
|
||||
|
||||
std::vector<char> getObject(Object::idType id) const override;
|
||||
bool putObject(const Object &obj) override;
|
||||
bool deleteObject(const Object &obj) override;
|
||||
|
||||
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
|
||||
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
|
||||
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
|
||||
|
||||
bool clearCache(Object::ObjectType type) override;
|
||||
bool addToCache(const Object &obj) override;
|
||||
|
||||
bool exists(Object::ObjectType type, const std::string &key) const override;
|
||||
Object::idType getId() override;
|
||||
|
||||
/// FileRepository destructor
|
||||
/// Flushes write cache, and writes the metadata
|
||||
~FileRepository() override;
|
||||
FileRepository(const FileRepository &r) = delete;
|
||||
FileRepository &operator=(const FileRepository &r) = delete;
|
||||
|
||||
private:
|
||||
const std::filesystem::path root;///< Root of the repository in the filesystem
|
||||
|
||||
/// Puts the Object raw data into write cache
|
||||
bool writeObject(const Object &obj);
|
||||
|
||||
bool ready = false;/// < Indicates whether the FileRepository was open or initialized
|
||||
|
||||
/// Reads the file and returns its raw data
|
||||
/// \param file Constant reference to the absolute path of the file
|
||||
/// \return Vector of bytes of the file
|
||||
std::vector<char> readFile(const std::filesystem::path &file) const;
|
||||
|
||||
|
||||
/// Reads the \psize bytes of the file from \p offset and returns its raw data
|
||||
/// \param file Constant reference to the absolute path of the file
|
||||
/// \param offset First byte of the file to read
|
||||
/// \param size Amount of bytes to read (no more than absoluteMaxFileLimit)
|
||||
/// \return Vector of bytes of the file
|
||||
/// \throws Exception on any error, or when absoluteMaxFileLimit is reached
|
||||
std::vector<char> readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const;
|
||||
static constexpr unsigned long long absoluteMaxFileLimit{4ULL * 1024 * 1024 * 1024};///<Max file read size (4GB)
|
||||
|
||||
/// Writes \p data to \p file
|
||||
/// \param file Constant reference to the absolute path of the file
|
||||
/// \param data Constant reference to the vector of bytes to write
|
||||
/// \return True
|
||||
/// \throws Exception on any error
|
||||
bool writeFile(const std::filesystem::path &file, const std::vector<char> &data);
|
||||
|
||||
mutable std::mutex repoLock;///< Lock for any operations on the Repository
|
||||
|
||||
/// Helper struct to store the location of objects in the filesystem
|
||||
struct OffsetEntry {
|
||||
unsigned long long fileId;///< ID of file where the object is located
|
||||
unsigned long long offset;///< Offset in the file where the object starts
|
||||
unsigned long long length;///< Length of the object
|
||||
using serializable = std::true_type;
|
||||
|
||||
/// Default constructor
|
||||
OffsetEntry(unsigned long long fileId, unsigned long long offset, unsigned long long length);
|
||||
|
||||
/// Deserialization constrictor
|
||||
OffsetEntry(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
/// Serializes the entry to \p out
|
||||
void serialize(std::vector<char> &out) const;
|
||||
};
|
||||
|
||||
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
|
||||
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
|
||||
|
||||
std::mutex writeCacheLock; ///< Write cache lock
|
||||
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
|
||||
unsigned long long writeCacheSize = 0; ///< Current byte size of the write cache
|
||||
const unsigned long long writeCacheMax; ///< Target size of the write cache, it is automatically flushed after this is reached
|
||||
|
||||
/// Flushes the write cache
|
||||
/// Takes the cache lock, swaps the cache with an empty one and unlocks it
|
||||
/// \param lockW Write cache lock
|
||||
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
|
||||
|
||||
Object::idType largestUnusedId = 1; ///< Largest available objectID
|
||||
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> keyIndex;///< Maps Object%'s keys to their ID's
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILEREPOSITORY_H
|
||||
21
src/repo/Object.cpp
Normal file
21
src/repo/Object.cpp
Normal file
@@ -0,0 +1,21 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "Object.h"
|
||||
|
||||
#include "Serialize.h"
|
||||
|
||||
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
|
||||
|
||||
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
|
||||
: id(Serialize::deserialize<idType>(in, end)),
|
||||
type(Serialize::deserialize<ObjectType>(in, end)) {
|
||||
}
|
||||
|
||||
void Object::serialize(std::vector<char> &out) const {
|
||||
Serialize::serialize(id, out);
|
||||
Serialize::serialize(type, out);
|
||||
}
|
||||
|
||||
Object::~Object() = default;
|
||||
53
src/repo/Object.h
Normal file
53
src/repo/Object.h
Normal file
@@ -0,0 +1,53 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_OBJECT_H
|
||||
#define SEMBACKUP_OBJECT_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
/// Base class for objects in the Repository
|
||||
/**
|
||||
* Every object has a unique id, and is also indexed by a type-key pair in the Repository cache
|
||||
*/
|
||||
class Object {
|
||||
public:
|
||||
using idType = uint64_t;///< Type alias for Object%'s ID
|
||||
|
||||
enum class ObjectType {
|
||||
Archive,
|
||||
File,
|
||||
Chunk,
|
||||
END
|
||||
};
|
||||
|
||||
/// Serializes the object to \p out
|
||||
virtual void serialize(std::vector<char> &out) const;
|
||||
/// Signals the Serialization template to use Object's serialization/deserialization facilities
|
||||
using serializable = std::true_type;
|
||||
|
||||
/// Default virtual destructor, don't allow to create an instance of Object
|
||||
virtual ~Object() = 0;
|
||||
|
||||
/// Pure virtual function that returns the key by which will be the object indexed in the Repository cache
|
||||
/// All derived objects should implement this method
|
||||
virtual std::string getKey() const = 0;
|
||||
|
||||
const idType id; ///< Unique numerical of the object
|
||||
const ObjectType type;///< Type of the object
|
||||
|
||||
protected:
|
||||
/// Default constructor
|
||||
/// \param id Object ID
|
||||
/// \param type Object type
|
||||
Object(idType id, ObjectType type);
|
||||
|
||||
/// Deserialization constructor
|
||||
Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_OBJECT_H
|
||||
12
src/repo/Repository.cpp
Normal file
12
src/repo/Repository.cpp
Normal file
@@ -0,0 +1,12 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "Repository.h"
|
||||
Repository::~Repository() = default;
|
||||
|
||||
Repository::Repository(Config config) : config(std::move(config)) {}
|
||||
|
||||
const Config &Repository::getConfig() const {
|
||||
return config;
|
||||
}
|
||||
122
src/repo/Repository.h
Normal file
122
src/repo/Repository.h
Normal file
@@ -0,0 +1,122 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_REPOSITORY_H
|
||||
#define SEMBACKUP_REPOSITORY_H
|
||||
|
||||
#include <filesystem>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
|
||||
#include "../Config.h"
|
||||
#include "../filters/FilterContainer.h"
|
||||
#include "Object.h"
|
||||
|
||||
/// Abstract base class for an Object Repository
|
||||
/**
|
||||
* So far only FileRepository exists, and probably this interface is too inflexible
|
||||
* to be easily used for creating other (database, object storage...) repositories,
|
||||
* but it should be possible with some refactoring
|
||||
*/
|
||||
|
||||
class Repository {
|
||||
public:
|
||||
/// Checks if a repository already exists
|
||||
/// \return True if exists, False otherwise
|
||||
virtual bool exists() = 0;
|
||||
|
||||
/// Tries to open the Repository
|
||||
/// \return True
|
||||
/// \throws Exception on any error
|
||||
virtual bool open() = 0;
|
||||
|
||||
/// Tries to initialize the Repository
|
||||
/// \return True
|
||||
/// \throws Exception on any error, including if the Repository is already initialized
|
||||
virtual bool init() = 0;
|
||||
|
||||
/// Tries to flush the Repository write cache
|
||||
/// \returns True
|
||||
/// \throws Exception on any error
|
||||
virtual bool flush() = 0;
|
||||
|
||||
/// Returns the serialized Object with id \p id
|
||||
/// \param id ID of object to return
|
||||
/// \return Serialized object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
virtual std::vector<char> getObject(Object::idType id) const = 0;
|
||||
|
||||
/// Adds the Object \p obj to the Repository
|
||||
/// \param obj Constant reference to the object
|
||||
/// \return True
|
||||
/// \throws Exception on any error
|
||||
virtual bool putObject(const Object &obj) = 0;
|
||||
|
||||
/// Deletes Object \p obj from the Repository
|
||||
/// \param obj Constant reference to the object
|
||||
/// \return True if successful, False if it didn't exist
|
||||
/// \throws Exception on any error
|
||||
virtual bool deleteObject(const Object &obj) = 0;
|
||||
|
||||
/// Returns the Object of type \p type and with key \p key
|
||||
/// \param type Type of the object
|
||||
/// \param key Constant reference to the key of the object
|
||||
/// \return Serialized object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
|
||||
|
||||
/// Returns the id of an Object of type \p type and with key \p key
|
||||
/// \param type Type of the object
|
||||
/// \param key Constant reference to the key of the object
|
||||
/// \return ID of the object
|
||||
/// \throws Exception on any error or if object doesn't exist
|
||||
virtual Object::idType getObjectId(Object::ObjectType type, const std::string &key) const = 0;
|
||||
|
||||
/// Returns the list of Objects of type \p type
|
||||
/// \param type Type of the object
|
||||
/// \return Vector of pairs <key of object, id of object>
|
||||
/// \throws Exception on any error
|
||||
virtual std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const = 0;
|
||||
|
||||
/// Returns whether Object of type \p type and with key \p key exists
|
||||
/// \param type Type of the object
|
||||
/// \param key Constant reference to the key of the object
|
||||
/// \return True if exists, False otherwise
|
||||
/// \throws Exception on any error
|
||||
virtual bool exists(Object::ObjectType type, const std::string &key) const = 0;
|
||||
|
||||
/// Erases all the cache entries of object type \p type
|
||||
/// \param type Type of the objects
|
||||
/// \return True
|
||||
virtual bool clearCache(Object::ObjectType type) = 0;
|
||||
|
||||
/// Adds the object to the cache, but doesn't change it on disk otherwise
|
||||
/// \param obj Constant reference to the object
|
||||
/// \return True
|
||||
/// \throws Exception on any error, or if the object doesn't exist
|
||||
virtual bool addToCache(const Object &obj) = 0;
|
||||
|
||||
/// Returns the next available object id
|
||||
virtual Object::idType getId() = 0;
|
||||
|
||||
/// Returns the const reference to Config object used for this Repository
|
||||
const Config &getConfig() const;
|
||||
|
||||
/// Default virtual destructor
|
||||
virtual ~Repository();
|
||||
|
||||
Repository(const Repository &r) = delete;
|
||||
Repository &operator=(const Repository &r) = delete;
|
||||
|
||||
protected:
|
||||
/// Base Repository class constructor
|
||||
/// \param config Config to use
|
||||
Repository(Config config);
|
||||
|
||||
Config config; ///< Config of this Repository
|
||||
FilterContainer filters;///< Container of IO filters used to transform Objects when writing/reading to/from storage
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_REPOSITORY_H
|
||||
208
src/repo/Serialize.h
Normal file
208
src/repo/Serialize.h
Normal file
@@ -0,0 +1,208 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 15.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_SERIALIZE_H
|
||||
#define SEMBACKUP_SERIALIZE_H
|
||||
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <stdexcept>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#ifdef __APPLE__
|
||||
#include <machine/endian.h>
|
||||
#define htobe64(x) htonll(x)
|
||||
#define be64toh(x) ntohll(x)
|
||||
#else
|
||||
#include <endian.h>
|
||||
#endif
|
||||
|
||||
#include "../Exception.h"
|
||||
|
||||
/// Serialization library
|
||||
/**
|
||||
* To serialize the objects in Repository, we have to handle a couple of cases:
|
||||
* 1. Serializing integers (object ids, etc...)
|
||||
* 2. Serializing enums (object types)
|
||||
* 3. Serializing char vectors and strings
|
||||
* 4. Serializing other STL containers (which also requires serializing pairs)
|
||||
* 5. Serializing custom structs (including the objects themselves)
|
||||
*
|
||||
* With this library it is possible to do all of that.
|
||||
* One problem is that it isn't really portable, but it can be fixed by changing the std::is_integral<T>::value case to use something like be64toh/htobe64
|
||||
*
|
||||
*/
|
||||
namespace Serialize {
|
||||
template<typename, typename = void, typename = void>
|
||||
struct is_pair : std::false_type {};
|
||||
|
||||
template<typename P>
|
||||
struct is_pair<P, std::void_t<decltype(std::declval<P>().first)>, std::void_t<decltype(std::declval<P>().second)>> : std::true_type {};
|
||||
|
||||
template<typename, typename, typename = void>
|
||||
struct has_emplace_back : std::false_type {};
|
||||
|
||||
template<typename T, typename V>
|
||||
struct has_emplace_back<T, V, std::void_t<decltype(T().emplace_back(std::declval<V>()))>> : std::true_type {};
|
||||
|
||||
template<typename, typename = void, typename = void>
|
||||
struct serializable : std::false_type {};
|
||||
|
||||
/// Checks if the object has the `serializable` type
|
||||
/// In that case, its serialization will be delegated to its .serialize() parameter,
|
||||
/// and deserialization to its T(char vector iterator in, const char vector iterator end) constructor,
|
||||
/// similar to Serialize::deserialize
|
||||
template<typename T>
|
||||
struct serializable<T, std::void_t<decltype(T::serializable::value)>> : std::true_type {};
|
||||
|
||||
/// Deserializes object of type \p T starting from fist byte \p in, advances the iterator past the end of object
|
||||
/// \tparam T Type to deserialize
|
||||
/// \param in Iterator to the first byte of the object
|
||||
/// \param end End iterator of source container
|
||||
/// \return Deserialized value
|
||||
template<typename T>
|
||||
static T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
/// Serializes object of type \p T into vector \p out
|
||||
/// \tparam T Type to serialize
|
||||
/// \param what Constant reference to the serialized object
|
||||
/// \param out Reference to output vector
|
||||
template<typename T>
|
||||
static void serialize(const T &what, std::vector<char> &out);
|
||||
|
||||
/// Serializes the object of type \p T and returns the resulting vector
|
||||
/// \tparam T Type to serialize
|
||||
/// \param o Constant reference to the serialized object
|
||||
/// \return Serialized data
|
||||
template<typename T>
|
||||
static std::vector<char> serialize(const T &o);
|
||||
|
||||
/// Deserializes object of type \p T from input vector \p from
|
||||
/// \tparam T Type to deserialize
|
||||
/// \param from Constant reference to the serialized object
|
||||
/// \return Deserialized value
|
||||
template<typename T>
|
||||
static T deserialize(const std::vector<char> &from);
|
||||
|
||||
template<typename T>
|
||||
T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end) {
|
||||
if (in >= end) throw Exception("Unexpected end of object!");
|
||||
|
||||
if constexpr (serializable<T>::value) {
|
||||
// If the object declares itself as serializable, call its constructor with in and end
|
||||
return T(in, end);
|
||||
} else if constexpr (is_pair<T>::value) {
|
||||
// If the object is pair, deserialize the first and second element and return the pair
|
||||
using KT = typename std::remove_const<decltype(T::first)>::type;
|
||||
using VT = typename std::remove_const<decltype(T::second)>::type;
|
||||
auto K = deserialize<KT>(in, end);
|
||||
auto V = deserialize<VT>(in, end);
|
||||
return T(std::move(K), std::move(V));
|
||||
} else if constexpr (std::is_enum<T>::value) {
|
||||
// If the object is an enum, deserialize an int and cast it to the enum
|
||||
auto tmp = deserialize<uint32_t>(in, end);
|
||||
if (tmp >= 0 && tmp < static_cast<uint32_t>(T::END))
|
||||
return static_cast<T>(tmp);
|
||||
else
|
||||
throw Exception("Enum out of range!");
|
||||
} else if constexpr (sizeof(T) == 1) {
|
||||
// If it's a single byte, just copy it
|
||||
if (std::distance(in, end) < sizeof(T))
|
||||
throw Exception("Unexpected end of object!");
|
||||
return *(in++);
|
||||
} else if constexpr (std::is_integral<T>::value) {
|
||||
uint64_t tmp;
|
||||
static_assert(sizeof(tmp) == 8);
|
||||
|
||||
// If the object is a number, copy it byte-by-byte
|
||||
if (std::distance(in, end) < sizeof(tmp))
|
||||
throw Exception("Unexpected end of object!");
|
||||
|
||||
std::copy(in, in + sizeof(tmp), reinterpret_cast<char *>(&tmp));
|
||||
in += sizeof(tmp);
|
||||
return static_cast<T>(be64toh(tmp));
|
||||
} else {
|
||||
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
|
||||
size_t size = deserialize<size_t>(in, end);
|
||||
|
||||
char b = deserialize<char>(in, end);
|
||||
if (b != 'b') throw Exception("Error deserializing!");
|
||||
|
||||
T out;
|
||||
if constexpr (sizeof(typename T::value_type) == 1) {
|
||||
// Optimization for char vectors
|
||||
if (std::distance(in, end) < size)
|
||||
throw Exception("Unexpected end of object!");
|
||||
out.insert(out.end(), in, in + size);
|
||||
in += size;
|
||||
} else
|
||||
for (size_t i = 0; i < size; i++) {
|
||||
using V = typename T::value_type;
|
||||
V v = deserialize<V>(in, end);
|
||||
// Try either emplace_back or emplace if it doesn't exist
|
||||
if constexpr (has_emplace_back<T, V>::value)
|
||||
out.emplace_back(std::move(v));
|
||||
else
|
||||
out.emplace(std::move(v));
|
||||
}
|
||||
|
||||
b = deserialize<char>(in, end);
|
||||
if (b != 'e') throw Exception("Error deserializing!");
|
||||
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void serialize(const T &what, std::vector<char> &out) {
|
||||
if constexpr (serializable<T>::value) {
|
||||
// If the object declares itself as serializable, call its serialize method
|
||||
what.serialize(out);
|
||||
} else if constexpr (is_pair<T>::value) {
|
||||
// If the object is pair, serialize the first and second element
|
||||
serialize(what.first, out);
|
||||
serialize(what.second, out);
|
||||
} else if constexpr (std::is_enum<T>::value) {
|
||||
// If the object is an enum, cast it to an int and serialize that
|
||||
serialize(static_cast<uint32_t>(what), out);
|
||||
} else if constexpr (sizeof(T) == 1) {
|
||||
// If it's a single byte, just copy it
|
||||
out.emplace_back(what);
|
||||
} else if constexpr (std::is_integral<T>::value) {
|
||||
// If the object is a number, copy it byte-by-byte
|
||||
uint64_t tmp = htobe64(static_cast<uint64_t>(what));
|
||||
static_assert(sizeof(tmp) == 8);
|
||||
out.insert(out.end(), (reinterpret_cast<const char *>(&tmp)), (reinterpret_cast<const char *>(&tmp) + sizeof(tmp)));
|
||||
} else {
|
||||
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
|
||||
serialize(what.size(), out);
|
||||
serialize('b', out);
|
||||
if constexpr (sizeof(typename T::value_type) == 1) {
|
||||
// Optimization for char vectors
|
||||
out.insert(out.end(), what.begin(), what.end());
|
||||
} else
|
||||
for (auto const &i: what) {
|
||||
serialize(i, out);
|
||||
}
|
||||
serialize('e', out);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
std::vector<char> serialize(const T &o) {
|
||||
std::vector<char> out;
|
||||
serialize(o, out);
|
||||
return out;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T deserialize(const std::vector<char> &from) {
|
||||
auto bgwr = from.cbegin();
|
||||
return deserialize<T>(bgwr, from.cend());
|
||||
}
|
||||
}// namespace Serialize
|
||||
|
||||
#endif//SEMBACKUP_SERIALIZE_H
|
||||
35
src/repo/objects/Archive.cpp
Normal file
35
src/repo/objects/Archive.cpp
Normal file
@@ -0,0 +1,35 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "Archive.h"
|
||||
|
||||
#include "../../Exception.h"
|
||||
#include "../Serialize.h"
|
||||
|
||||
Archive::Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full)
|
||||
: Object(id, ObjectType::Archive), name(name), mtime(mtime), files(files), isFull(full) {}
|
||||
|
||||
Archive::Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
|
||||
: Object(in, end),
|
||||
name(Serialize::deserialize<std::string>(in, end)),
|
||||
mtime(Serialize::deserialize<unsigned long long>(in, end)),
|
||||
files(Serialize::deserialize<std::remove_const<decltype(files)>::type>(in, end)),
|
||||
isFull(Serialize::deserialize<bool>(in, end)) {
|
||||
if (type != ObjectType::Archive) throw Exception("Type mismatch for Archive!");
|
||||
auto filesN = Serialize::deserialize<decltype(files.size())>(in, end);
|
||||
if (files.size() != filesN) throw Exception("Number of files recorded doesn't match the number of files read!");
|
||||
}
|
||||
|
||||
void Archive::serialize(std::vector<char> &out) const {
|
||||
Object::serialize(out);
|
||||
Serialize::serialize(name, out);
|
||||
Serialize::serialize(mtime, out);
|
||||
Serialize::serialize(files, out);
|
||||
Serialize::serialize(isFull, out);
|
||||
Serialize::serialize(files.size(), out);
|
||||
}
|
||||
|
||||
std::string Archive::getKey() const {
|
||||
return name;
|
||||
}
|
||||
32
src/repo/objects/Archive.h
Normal file
32
src/repo/objects/Archive.h
Normal file
@@ -0,0 +1,32 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_ARCHIVE_H
|
||||
#define SEMBACKUP_ARCHIVE_H
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a backup
|
||||
class Archive : public Object {
|
||||
public:
|
||||
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full = false);
|
||||
|
||||
/// \copydoc Object::serialize
|
||||
Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
void serialize(std::vector<char> &out) const override;
|
||||
|
||||
/// Returns the name of the archive
|
||||
std::string getKey() const override;
|
||||
|
||||
const std::string name; ///< Archive name
|
||||
const unsigned long long mtime; ///< Time of creation
|
||||
const std::vector<idType> files;///< List of ids of File objects in the Archive
|
||||
const bool isFull = false; ///< Whether this was a full archive
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_ARCHIVE_H
|
||||
30
src/repo/objects/Chunk.cpp
Normal file
30
src/repo/objects/Chunk.cpp
Normal file
@@ -0,0 +1,30 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "Chunk.h"
|
||||
|
||||
#include "../../Exception.h"
|
||||
#include "../Serialize.h"
|
||||
|
||||
Chunk::Chunk(idType id, std::string md5, std::vector<char> data) : Object(id, ObjectType::Chunk), data(std::move(data)), md5(std::move(md5)), length(this->data.size()) {}
|
||||
|
||||
Chunk::Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
|
||||
: Object(in, end),
|
||||
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
|
||||
data(Serialize::deserialize<std::remove_const<decltype(data)>::type>(in, end)),
|
||||
length(Serialize::deserialize<std::remove_const<decltype(length)>::type>(in, end)) {
|
||||
if (type != ObjectType::Chunk) throw Exception("Type mismatch for Chunk!");
|
||||
if (length != data.size()) throw Exception("Recorded length and actual length don't match for Chunk!");
|
||||
}
|
||||
|
||||
void Chunk::serialize(std::vector<char> &out) const {
|
||||
Object::serialize(out);
|
||||
Serialize::serialize(md5, out);
|
||||
Serialize::serialize(data, out);
|
||||
Serialize::serialize(length, out);
|
||||
}
|
||||
|
||||
std::string Chunk::getKey() const {
|
||||
return md5;
|
||||
}
|
||||
33
src/repo/objects/Chunk.h
Normal file
33
src/repo/objects/Chunk.h
Normal file
@@ -0,0 +1,33 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_CHUNK_H
|
||||
#define SEMBACKUP_CHUNK_H
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a part of a File
|
||||
class Chunk : public Object {
|
||||
public:
|
||||
Chunk(idType id, std::string, std::vector<char> data);
|
||||
|
||||
/// Deserialization constructor
|
||||
Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
/// \copydoc Object::serialize
|
||||
void serialize(std::vector<char> &out) const override;
|
||||
|
||||
/// Returns the MD5 of the chunk
|
||||
std::string getKey() const override;
|
||||
|
||||
const std::string md5; ///< MD5 hash of the chunk
|
||||
const std::vector<char> data; ///< Raw chunk data
|
||||
const unsigned long long length;///< Size of chunk in bytes
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_CHUNK_H
|
||||
84
src/repo/objects/File.cpp
Normal file
84
src/repo/objects/File.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#include "File.h"
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../../Exception.h"
|
||||
#include "../Serialize.h"
|
||||
|
||||
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType)
|
||||
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), md5(md5), fileType(fileType), chunks(chunks) {}
|
||||
|
||||
File::File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
|
||||
: Object(in, end),
|
||||
name(Serialize::deserialize<std::string>(in, end)),
|
||||
bytes(Serialize::deserialize<unsigned long long>(in, end)),
|
||||
mtime(Serialize::deserialize<unsigned long long>(in, end)),
|
||||
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
|
||||
fileType(Serialize::deserialize<std::remove_const<decltype(fileType)>::type>(in, end)),
|
||||
chunks(Serialize::deserialize<std::remove_const<decltype(chunks)>::type>(in, end)) {
|
||||
if (type != ObjectType::File) throw Exception("Type mismatch for File!");
|
||||
}
|
||||
|
||||
void File::serialize(std::vector<char> &out) const {
|
||||
Object::serialize(out);
|
||||
Serialize::serialize(name, out);
|
||||
Serialize::serialize(bytes, out);
|
||||
Serialize::serialize(mtime, out);
|
||||
Serialize::serialize(md5, out);
|
||||
Serialize::serialize(fileType, out);
|
||||
Serialize::serialize(chunks, out);
|
||||
}
|
||||
|
||||
std::string File::getKey() const {
|
||||
return name;
|
||||
}
|
||||
|
||||
File::Type File::getFileType(const std::filesystem::path &p) {
|
||||
if (std::filesystem::is_symlink(p)) return Type::Symlink;
|
||||
if (std::filesystem::is_directory(p)) return Type::Directory;
|
||||
if (std::filesystem::is_regular_file(p)) return Type::Normal;
|
||||
throw Exception("Unsupported file type! " + p.u8string());
|
||||
}
|
||||
|
||||
std::vector<char> File::getFileContents(const std::filesystem::path &p) {
|
||||
auto type = getFileType(p);
|
||||
if (type == Type::Normal) throw Exception(p.u8string() + " is a normal file!");
|
||||
if (type == Type::Directory) {
|
||||
return {};
|
||||
}
|
||||
if (type == Type::Symlink) {
|
||||
auto target = std::filesystem::read_symlink(p).u8string();
|
||||
return {target.begin(), target.end()};
|
||||
}
|
||||
throw Exception("Error with file " + p.u8string());
|
||||
}
|
||||
|
||||
unsigned long long File::getFileMtime(const std::filesystem::path &p) {
|
||||
auto type = getFileType(p);
|
||||
if (type == Type::Normal || type == Type::Directory)
|
||||
return static_cast<const unsigned long long int>(std::chrono::duration_cast<std::chrono::seconds>(std::filesystem::last_write_time(p).time_since_epoch()).count());
|
||||
else if (type == Type::Symlink) {
|
||||
auto path = p.u8string();
|
||||
struct stat sb;
|
||||
if (lstat(path.c_str(), &sb) != 0) throw Exception("Error reading mtime for " + p.u8string());
|
||||
#ifdef __APPLE__
|
||||
return sb.st_mtimespec.tv_sec;
|
||||
#else
|
||||
return sb.st_mtime;
|
||||
#endif
|
||||
}
|
||||
throw Exception("Error with file " + p.u8string());
|
||||
}
|
||||
|
||||
unsigned long long File::getFileSize(const std::filesystem::path &p) {
|
||||
auto type = getFileType(p);
|
||||
if (type == Type::Normal) return std::filesystem::file_size(p);
|
||||
else
|
||||
return getFileContents(p).size();
|
||||
}
|
||||
76
src/repo/objects/File.h
Normal file
76
src/repo/objects/File.h
Normal file
@@ -0,0 +1,76 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 14.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILE_H
|
||||
#define SEMBACKUP_FILE_H
|
||||
|
||||
#include <array>
|
||||
#include <filesystem>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "../Object.h"
|
||||
|
||||
/// Object representing a saved file
|
||||
class File : public Object {
|
||||
public:
|
||||
enum class Type {
|
||||
Normal,
|
||||
Symlink,
|
||||
Directory,
|
||||
END
|
||||
};
|
||||
|
||||
static inline const std::unordered_map<Type, std::string> TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}};
|
||||
|
||||
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType);
|
||||
|
||||
/// Deserialization constructor
|
||||
File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
|
||||
|
||||
/// \copydoc Object::serialize
|
||||
void serialize(std::vector<char> &out) const override;
|
||||
|
||||
/// Returns the file relative path as key
|
||||
std::string getKey() const override;
|
||||
|
||||
/// Helper static function to return Type for any file in the filesystem
|
||||
/// \param p Constant reference to the absolute path of the file
|
||||
/// \return Type of the file
|
||||
static Type getFileType(const std::filesystem::path &p);
|
||||
|
||||
/// Helper static function to return "contents" for non-regular files in the filesystem
|
||||
/// \param p Constant reference to the absolute path of the file
|
||||
/// \return File contents, (for symlinks - its destination, for directory - empty)
|
||||
/// \throws Exception on any error, or if file is regular
|
||||
static std::vector<char> getFileContents(const std::filesystem::path &p);
|
||||
|
||||
/// Helper static function to return modification time for files in the filesystem
|
||||
/// \param p Constant reference to the absolute path of the file
|
||||
/// \return File last modification time
|
||||
/// \throws Exception on any error
|
||||
static unsigned long long getFileMtime(const std::filesystem::path &p);
|
||||
|
||||
/// Helper static function to return file size for files in the filesystem
|
||||
/// \param p Constant reference to the absolute path of the file
|
||||
/// \return File size
|
||||
/// \throws Exception on any error
|
||||
static unsigned long long getFileSize(const std::filesystem::path &p);
|
||||
|
||||
|
||||
const std::string name; ///< Relative path to backup root, as UTF-8 string
|
||||
const unsigned long long bytes;///< Amount of bytes in the file
|
||||
const unsigned long long mtime;///< Last modification time as timestamp
|
||||
const std::string md5; ///< Hash of the file
|
||||
const Type fileType; ///< File type
|
||||
|
||||
/// List of the chunks in file
|
||||
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
|
||||
const std::vector<idType> chunks;
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILE_H
|
||||
51
src/repo/objects/FileBuffer.cpp
Normal file
51
src/repo/objects/FileBuffer.cpp
Normal file
@@ -0,0 +1,51 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#include "FileBuffer.h"
|
||||
|
||||
#include "../Serialize.h"
|
||||
|
||||
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
|
||||
for (auto const &id: file.chunks) chunksQueue.emplace(id);
|
||||
};
|
||||
|
||||
int FileBuffer::sync() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::streamsize FileBuffer::xsgetn(char *s, std::streamsize countr) {
|
||||
if (underflow() == std::char_traits<char>::eof()) return 0;
|
||||
for (int i = 0; i < countr; i++) {
|
||||
auto c = uflow();
|
||||
if (c != traits_type::eof()) {
|
||||
s[i] = traits_type::to_char_type(c);
|
||||
} else
|
||||
return i;
|
||||
}
|
||||
return countr;
|
||||
}
|
||||
|
||||
int FileBuffer::uflow() {
|
||||
auto out = underflow();
|
||||
if (out != traits_type::eof())
|
||||
curGetBufPos++;
|
||||
return out;
|
||||
}
|
||||
|
||||
int FileBuffer::underflow() {
|
||||
if (getBuf.empty() || curGetBufPos == getBuf.size()) {
|
||||
if (chunksQueue.empty()) return traits_type::eof();
|
||||
else {
|
||||
auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
|
||||
getBuf = chunk.data;
|
||||
chunksQueue.pop();
|
||||
curGetBufPos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (!getBuf.empty())
|
||||
return traits_type::to_int_type(getBuf[curGetBufPos]);
|
||||
else
|
||||
return traits_type::eof();
|
||||
}
|
||||
43
src/repo/objects/FileBuffer.h
Normal file
43
src/repo/objects/FileBuffer.h
Normal file
@@ -0,0 +1,43 @@
|
||||
//
|
||||
// Created by Stepan Usatiuk on 23.04.2023.
|
||||
//
|
||||
|
||||
#ifndef SEMBACKUP_FILEBUFFER_H
|
||||
#define SEMBACKUP_FILEBUFFER_H
|
||||
|
||||
#include <queue>
|
||||
#include <streambuf>
|
||||
|
||||
#include "../Repository.h"
|
||||
#include "Chunk.h"
|
||||
#include "File.h"
|
||||
|
||||
/// Streambuf implementation to read files from a File in a Repository
|
||||
class FileBuffer : public std::streambuf {
|
||||
public:
|
||||
/// Creates a FileBuffer instance
|
||||
/// \param repo Constant pointer to the backing Repository, should be available during the entire lifetime
|
||||
/// \param fileId ID of a file to "open"
|
||||
FileBuffer(const Repository *repo, Object::idType fileId);
|
||||
|
||||
protected:
|
||||
int underflow() override;
|
||||
|
||||
int uflow() override;
|
||||
|
||||
std::streamsize xsgetn(char *s, std::streamsize count) override;
|
||||
|
||||
int sync() override;
|
||||
|
||||
private:
|
||||
std::vector<char> getBuf;///< Currently loaded chunk
|
||||
|
||||
size_t curGetBufPos = 0;///< Currently pointed to byte in the loaded chunk
|
||||
|
||||
const Repository *repo; ///< Pointer to the backing repository
|
||||
File file; ///< Backing file
|
||||
std::queue<Object::idType> chunksQueue;///< Chunks of file that weren't read yet
|
||||
};
|
||||
|
||||
|
||||
#endif//SEMBACKUP_FILEBUFFER_H
|
||||
Reference in New Issue
Block a user