This commit is contained in:
2023-06-02 12:51:08 +02:00
commit 0e355fbe42
142 changed files with 10281 additions and 0 deletions

37
src/BytesFormatter.cpp Normal file
View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 13.05.2023.
//
#include "BytesFormatter.h"
#include <iomanip>
#include <sstream>
BytesFormatter::BytesFormat BytesFormatter::format(unsigned long long int bytes) {
std::stringstream outNum;
outNum << std::fixed << std::setprecision(2);
if (bytes > 1024UL * 1024 * 1024 * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0);
return {outNum.str(), "TiB"};
}
if (bytes > 1024UL * 1024 * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0);
return {outNum.str(), "GiB"};
}
if (bytes > 1024UL * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0);
return {outNum.str(), "MiB"};
}
if (bytes > 1024UL) {
outNum << (double) bytes / (1024.0);
return {outNum.str(), "KiB"};
}
outNum << bytes;
return {outNum.str(), "Bytes"};
}
std::string BytesFormatter::formatStr(unsigned long long int bytes) {
auto fmt = format(bytes);
return fmt.number + " " + fmt.prefix;
}

31
src/BytesFormatter.h Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 13.05.2023.
//
#ifndef SEMBACKUP_BYTESFORMATTER_H
#define SEMBACKUP_BYTESFORMATTER_H
#include <string>
/// Utility class to format byte values according to their magnitude
class BytesFormatter {
public:
/// Structure for returning the processed byte value
struct BytesFormat {
std::string number;///< Number part of the value
std::string prefix;///< Unit of measure
};
/// Formats the bytes in BytesFormat format
/// \param bytes Number of bytes
/// \return BytesFormat value
static BytesFormat format(unsigned long long bytes);
/// Formats the bytes into a string
/// \param bytes Number of bytes
/// \return String, consisting of the scaled number and the unit of measure separated by a space
static std::string formatStr(unsigned long long bytes);
};
#endif//SEMBACKUP_BYTESFORMATTER_H

81
src/Config.cpp Normal file
View File

@@ -0,0 +1,81 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#include "Config.h"
#include "Exception.h"
#include "repo/Serialize.h"
#include <sstream>
Config &Config::add(const std::string &k, const std::string &v) {
if (keys.count(k) == 0) throw Exception("Unknown key " + k);
if (data.count(k) > 0)
if (data.at(k) != v) throw Exception("Trying to rewrite config!");
else if (data.at(k) == v)
return *this;
switch (keys.at(k).type) {
case KeyType::STRING:
break;
case KeyType::INT:
try {
std::stoi(v);
} catch (...) {
throw Exception("Can't convert " + k + " to integer!");
}
break;
case KeyType::LIST:
break;
}
data.emplace(k, v);
return *this;
}
int Config::getInt(const std::string &k) const {
return std::stoi(getStr(k));
}
std::vector<std::string> Config::getList(const std::string &k) const {
std::vector<std::string> out;
std::string next;
std::stringstream inss(getStr(k));
while (std::getline(inss, next, ',')) {
if (next != "")
out.emplace_back(next);
}
return out;
}
std::string Config::getStr(const std::string &k) const {
if (data.count(k) > 0) return data.at(k);
else if (keys.at(k).defaultval.has_value())
return keys.at(k).defaultval.value();
throw Exception("Option " + k + " not specified and no default value exists!");
}
bool Config::exists(const std::string &k) const {
return (data.count(k) > 0) || (keys.at(k).defaultval.has_value());
}
Config::Config() = default;
Config::Config(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end) {
data = Serialize::deserialize<decltype(data)>(in, end);
}
void Config::serialize(std::vector<char> &out) const {
std::vector<decltype(data)::value_type> temp;
for (const auto &d: data) {
if (keys.at(d.first).remember) {
temp.emplace_back(d);
}
}
Serialize::serialize(temp, out);
}
void Config::merge(const Config &config) {
for (const auto &d: config.data) {
add(d.first, d.second);
}
}

117
src/Config.h Normal file
View File

@@ -0,0 +1,117 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#ifndef SEMBACKUP_CONFIG_H
#define SEMBACKUP_CONFIG_H
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
/// Utility class to manage configuration
/**
* Also provides keys map for information about config keys
* Serializable, remembers only the keys with remember option set in keys
*/
class Config {
public:
/// Constructs an empty Config instance
Config();
/// Deserialization constructor
Config(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Adds a key \p k with value \p v to the config
/// \param k Const reference to the config key
/// \param v Config value
/// \return Reference to itself
/// \throws Exception if key is invalid or is already set with different value
Config &add(const std::string &k, const std::string &v);
/// Merges \p config to itself
/// Adds every config pair from \p config to itself, throws on conflict
/// \param config Constant reference to the source Config
/// \throws Exception on merge conflict
void merge(const Config &config);
/// Returns an int from config key \p k
/// \param k Constant reference to the key string
/// \return Config int
/// \throws Exception if key is invalid or value isn't an int
int getInt(const std::string &k) const;
/// Returns a string from config key \p k
/// \param k Constant reference to the key string
/// \return Config value for key
/// \throws Exception if key is invalid
std::string getStr(const std::string &k) const;
/// Returns a list of strings delimited by commas from config key \p k
/// \param k Constant reference to the key string
/// \return Vector of strings
/// \throws Exception if key is invalid
std::vector<std::string> getList(const std::string &k) const;
/// Checks if key \p k exists in the config
/// \param k Constant reference to the key string
/// \return True if key exists or its default value exists
bool exists(const std::string &k) const;
/// Serialization function
void serialize(std::vector<char> &out) const;
using serializable = std::true_type;
enum class KeyType {
STRING,
INT,
LIST
};
/// Struct to record key options
struct keyopts {
std::optional<std::string> defaultval;///< Key's default value
KeyType type; ///< Key's type
bool remember; ///< Whether the key should be serialized
std::string info; ///< Printed in help
};
/// Used for printing help
const static inline std::unordered_map<KeyType, std::string> KeyTypeToStr{{KeyType::STRING, "string"}, {KeyType::INT, "number"}, {KeyType::LIST, "comma-separated list"}};
/// Default values and their metadata
const static inline std::unordered_map<std::string, keyopts> keys{
{"compression", {"none", KeyType::STRING, true, "Compression algorighm to use (zlib or none)"}},
{"encryption", {"none", KeyType::STRING, true, "Encryption algorighm to use (aes or none)"}},
{"compression-level", {"-1", KeyType::INT, true, "Compression level to use (0 to 9)"}},
{"repo", {std::nullopt, KeyType::STRING, false, "Repository root"}},
{"to", {std::nullopt, KeyType::STRING, false, "Destination of restore"}},
{"from", {std::nullopt, KeyType::STRING, true, "Backed up folder"}},
{"type", {"normal", KeyType::STRING, false, "Type of archive"}},
{"aid", {std::nullopt, KeyType::INT, false, "ID of archive to restore/compare to"}},
{"aid2", {std::nullopt, KeyType::INT, false, "ID of archive to compare with"}},
{"threads", {std::nullopt, KeyType::INT, false, "Number of threads to use"}},
{"prefix", {"", KeyType::STRING, false, "Prefix of files to compare"}},
{"password", {std::nullopt, KeyType::STRING, false, "Encryption password"}},
{"salt", {std::nullopt, KeyType::STRING, true, "Encryption salt"}},
{"chunker", {"buzhash", KeyType::STRING, true, "Chunker to use (const, buzhash)"}},
{"chunker-min", {"256", KeyType::INT, true, "Min chunk size in KB"}},
{"chunker-max", {"4096", KeyType::INT, true, "Max chunk size in KB"}},
{"chunker-mask", {"20", KeyType::INT, true, "Chunker hash bit mask (mask of n bits results in average chunk size of 2^n bytes)"}},
{"repo-target", {"128", KeyType::INT, true, "Target size of files for FileRepository"}},
{"full-period", {"2", KeyType::INT, true, "Interval between forced full backups"}},
{"progress", {"pretty", KeyType::STRING, false, "How to print progress (simple, pretty, none)"}},
{"verbose", {"1", KeyType::INT, false, "Message verbosity (0 - error, 1 - info, -1 - quiet)"}},
{"dedup", {"on", KeyType::STRING, true, "Turns deduplication on/off"}},
{"change-detectors", {"type,size,etime", KeyType::LIST, true, "Change detectors to use (in order)"}},
{"diff-mode", {"normal", KeyType::STRING, false, "Diff mode (file or normal)"}},
};
private:
std::unordered_map<std::string, std::string> data;
};
#endif//SEMBACKUP_CONFIG_H

18
src/Context.h Normal file
View File

@@ -0,0 +1,18 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_CONTEXT_H
#define SEMBACKUP_CONTEXT_H
#include "Config.h"
#include "Logger.h"
#include "repo/Repository.h"
struct Context {
Logger *logger;
Repository *repo;
};
#endif//SEMBACKUP_CONTEXT_H

103
src/Diff.cpp Normal file
View File

@@ -0,0 +1,103 @@
//
// Created by Stepan Usatiuk on 06.05.2023.
//
#include "Diff.h"
#include "BytesFormatter.h"
#include "Exception.h"
#include "Signals.h"
#include "chunkers/BuzhashChunker.h"
bool Diff::isBinary(const ComparableFile &c) {
auto b = c.contents();
for (unsigned int i = 0; i < std::min(c.bytes, 2048ULL); i++) {
auto e = b->sbumpc();
if (std::streambuf::traits_type::to_char_type(e) == '\0') return true;
if (e == std::streambuf::traits_type::eof()) return false;
}
return false;
}
std::string Diff::diff(const ComparableFile &c1, const ComparableFile &c2) {
if (isBinary(c1) || isBinary(c2)) {
if (!(isBinary(c1) && isBinary(c2))) return "One of the files is binary, the other is not";
return diffPercent(c1, c2);
}
std::stringstream out;
auto b1 = c1.contents();
auto b2 = c2.contents();
std::multimap<std::string, unsigned long> f1lines;
std::multimap<std::string, unsigned long> f2diff;
std::string line;
std::istream is1(b1.get());
std::istream is2(b2.get());
int i = 0;
while (std::getline(is1, line)) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
f1lines.emplace(line, ++i);
}
i = 0;
while (std::getline(is2, line)) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (f1lines.count(line) > 0) f1lines.erase(f1lines.find(line));
else
f2diff.emplace(line, ++i);
}
out << "\nLines only in first file: " << std::endl;
for (const auto &s: f1lines) {
out << s.second << "<" << s.first << std::endl;
}
out << "Lines only in second file: " << std::endl;
for (const auto &s: f2diff) {
out << s.second << ">" << s.first << std::endl;
}
out << "^^^\n";
return out.str();
}
std::string Diff::diffPercent(const ComparableFile &c1, const ComparableFile &c2) {
auto b1 = c1.contents();
auto b2 = c2.contents();
BuzhashChunker ch1(b1.get(), 512 * 1024, 1024 * 1024, 19, 31);
BuzhashChunker ch2(b2.get(), 512 * 1024, 1024 * 1024, 19, 31);
std::multiset<std::string> ch1hashes;
std::multiset<std::string> ch2diff;
std::unordered_map<std::string, unsigned long long> hashsize;
for (auto chunkp: ch1) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
ch1hashes.emplace(md5);
hashsize[md5] = chunkp.second.size();
}
for (auto chunkp: ch2) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
hashsize[md5] = chunkp.second.size();
if (ch1hashes.count(md5) > 0) ch1hashes.erase(md5);
else if (ch1hashes.count(md5) == 0)
ch2diff.emplace(md5);
}
unsigned long long diff = 0;
for (const auto &c: ch1hashes) {
diff += hashsize[c];
}
for (const auto &c: ch2diff) {
diff += hashsize[c];
}
return "at most " + BytesFormatter::formatStr(diff);
}

38
src/Diff.h Normal file
View File

@@ -0,0 +1,38 @@
//
// Created by Stepan Usatiuk on 06.05.2023.
//
#ifndef SEMBACKUP_DIFF_H
#define SEMBACKUP_DIFF_H
#include <sstream>
#include <string>
#include "change_detectors/ComparableFile.h"
/// Utility class to compute difference between two ComparableFile%s
class Diff {
public:
/// Compute the difference between two ComparableFile%s
/// If the file is binary, calls diffPercent, which outputs the difference between files in bytes
/// Otherwise prints linewise difference
/// \param c1 Constant reference to the first ComparableFile
/// \param c2 Constant reference to the second ComparableFile
/// \returns Difference message
static std::string diff(const ComparableFile &c1, const ComparableFile &c2);
/// Calculates the difference between \p c1 amd \p c2 in bytes
/// \param c1 Constant reference to the first ComparableFile
/// \param c2 Constant reference to the second ComparableFile
/// \returns Difference message
static std::string diffPercent(const ComparableFile &c1, const ComparableFile &c2);
/// Checks if a file is binary
/// A file is considered binary if its first 2048 bytes contain a null byte
/// \param c1 Constant reference to the checked ComparableFile
/// \return True if the file is considered binary, false otherwise
static bool isBinary(const ComparableFile &c1);
};
#endif//SEMBACKUP_DIFF_H

32
src/Exception.cpp Normal file
View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#include "Exception.h"
#include <execinfo.h>
#include <sstream>
Exception::Exception(const std::string &text) : runtime_error(text + "\n" + getStacktrace()) {}
Exception::Exception(const char *text) : runtime_error(std::string(text) + "\n" + getStacktrace()) {}
// Based on: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html
std::string Exception::getStacktrace() {
std::vector<void *> functions(50);
char **strings;
int n;
n = backtrace(functions.data(), 50);
strings = backtrace_symbols(functions.data(), n);
std::stringstream out;
if (strings != nullptr) {
out << "Stacktrace:" << std::endl;
for (int i = 0; i < n; i++)
out << strings[i] << std::endl;
}
free(strings);
return out.str();
}

24
src/Exception.h Normal file
View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#ifndef SEMBACKUP_EXCEPTION_H
#define SEMBACKUP_EXCEPTION_H
#include <stdexcept>
#include <string>
#include <vector>
/// Custom exception class that uses execinfo to append a stacktrace to the exception message
class Exception : public std::runtime_error {
public:
Exception(const std::string &text);
Exception(const char *text);
private:
/// Static function to get the current stacktrace
static std::string getStacktrace();
};
#endif//SEMBACKUP_EXCEPTION_H

19
src/Logger.cpp Normal file
View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "Logger.h"
Logger::Logger(int level, std::ostream &out) : loglevel(level), out(out) {
}
void Logger::write(const std::string &what, int whatlevel) {
if (whatlevel <= loglevel) {
std::lock_guard outLock(outM);
out.get() << what << std::flush;
}
}
void Logger::setLevel(int level) {
loglevel = level;
}

25
src/Logger.h Normal file
View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_LOGGER_H
#define SEMBACKUP_LOGGER_H
#include <functional>
#include <iostream>
#include <mutex>
class Logger {
public:
Logger(int level = 3, std::ostream &out = {std::cout});
void write(const std::string &what, int whatlevel);
void setLevel(int level);
private:
int loglevel;
std::mutex outM;
std::reference_wrapper<std::ostream> out;
};
#endif//SEMBACKUP_LOGGER_H

57
src/Progress.cpp Normal file
View File

@@ -0,0 +1,57 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "Progress.h"
#include <sstream>
#include <utility>
Progress::Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level) : format(std::move(format)), out(std::move(out)), type(conf.getStr("progress")), progresslevel(level) {
if (type != "none") {
this->out("\n\n", level);
thread = std::thread(&Progress::showProgress, this);
}
}
Progress::~Progress() {
stop = true;
if (thread.joinable())
thread.join();
}
void Progress::showProgress() {
while (!stop) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
{
update(std::unique_lock(refreshM));
}
}
}
void Progress::print(const std::string &s, int level) {
std::unique_lock refreshL(refreshM);
out((type == "pretty" ? "\r\33[2K " : "") + s + "\n", level);
update(std::move(refreshL));
}
void Progress::update(std::unique_lock<std::mutex> &&lock) {
std::stringstream outs;
if (type == "pretty")
outs << "\r\33[2K ";
for (auto const &l: format) {
if (std::holds_alternative<std::string>(l)) outs << std::get<std::string>(l);
else
outs << std::get<std::function<std::string()>>(l)();
}
if (type == "pretty")
outs << "\r";
else
outs << "\n";
out(outs.str(), progresslevel);
lock.unlock();
}

55
src/Progress.h Normal file
View File

@@ -0,0 +1,55 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_PROGRESS_H
#define SEMBACKUP_PROGRESS_H
#include <atomic>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <string>
#include <thread>
#include <variant>
#include "Config.h"
/// Class to handle writing progress to the screen
class Progress {
public:
/// Constructs the Progress instance
/// \param out Function to call for output
/// \param format Format of the progress string, vector of strings or functions that return strings
/// \param conf Config, used to specify format (`pretty` for line rewriting, `simple` for normal line printing, or `none`)
Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level = 1);
Progress &operator=(Progress rhs) = delete;
Progress(const Progress &orig) = delete;
/// Write a string to the terminal without disturbing the progress bar
void print(const std::string &s, int level);
/// Destructor, instructs the worker thread to stop
~Progress();
private:
int progresslevel;
std::vector<std::variant<std::function<std::string()>, std::string>> format;///< Format of the progressbar
std::function<void(std::string, int)> out; ///< Output function
/// Thread loop function
void showProgress();
std::atomic<bool> stop = false;///< Stop flag
std::mutex refreshM;///< Used to prevent mangling the output between print and progressbar update
/// Prints the progressbar on screen, then unlocks the mutex
void update(std::unique_lock<std::mutex> &&lock);
const std::string type;///< Progressbar type (Taken from Config)
std::thread thread;///< Worker thread
};
#endif//SEMBACKUP_PROGRESS_H

31
src/RunningAverage.cpp Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "RunningAverage.h"
RunningAverage::RunningAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
: getFunc(std::move(getFunc)), max(max), ms(ms), thread(&RunningAverage::loop, this) {
}
void RunningAverage::loop() {
while (!stop) {
{
std::lock_guard lock(dataLock);
data.emplace_front(getFunc());
if (data.size() > max) data.pop_back();
}
std::this_thread::sleep_for(std::chrono::duration(std::chrono::milliseconds(ms)));
}
}
RunningAverage::~RunningAverage() {
stop = true;
thread.join();
}
unsigned long long RunningAverage::get() {
std::lock_guard lock(dataLock);
if (data.empty()) return 0;
return std::accumulate(data.begin(), data.end(), 0UL) / data.size();
}

44
src/RunningAverage.h Normal file
View File

@@ -0,0 +1,44 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_RUNNINGAVERAGE_H
#define SEMBACKUP_RUNNINGAVERAGE_H
#include <atomic>
#include <deque>
#include <functional>
#include <mutex>
#include <numeric>
#include <thread>
/// Class to compute running average of some value
class RunningAverage {
public:
///
/// \param getFunc Function that samples the value
/// \param max Max number of samples to average
/// \param ms Sampling period
RunningAverage(std::function<unsigned long long()> getFunc, int max, int ms);
/// Destructor, instructs the thread to exit
~RunningAverage();
/// Returns the average
unsigned long long get();
private:
std::atomic<bool> stop = false; ///< Stop signal
std::function<unsigned long long()> getFunc;///< Sampling function
std::deque<unsigned long long> data; ///< Data collected
int max; ///< Max number of samples
int ms; ///< Sampling period
std::mutex dataLock; ///< Deque lock
std::thread thread; ///< Worker thread
/// Worker thread loop
void loop();
};
#endif//SEMBACKUP_RUNNINGAVERAGE_H

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "RunningDiffAverage.h"
RunningDiffAverage::RunningDiffAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
: runningAverage(
[this, get = std::move(getFunc)] {
auto cur = get();
auto calc = cur - prev;
prev = cur;
return calc;
},
max, ms) {
}
unsigned long long RunningDiffAverage::get() {
return runningAverage.get();
}

30
src/RunningDiffAverage.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_RUNNINGDIFFAVERAGE_H
#define SEMBACKUP_RUNNINGDIFFAVERAGE_H
#include <functional>
#include "RunningAverage.h"
/// Computes the rolling average of differences between last sampled and currently sampled numbers
class RunningDiffAverage {
public:
///
/// \param getFunc Function that samples the value
/// \param max Max number of samples to average
/// \param ms Sampling period
RunningDiffAverage(std::function<unsigned long long()> getFunc, int max, int ms);
/// Returns the average
unsigned long long get();
private:
unsigned long long prev = 0; ///< Previously sampled value
RunningAverage runningAverage;///< Backing RunningAverage
};
#endif//SEMBACKUP_RUNNINGDIFFAVERAGE_H

12
src/Signals.cpp Normal file
View File

@@ -0,0 +1,12 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "Signals.h"
void Signals::setup() {
signal(SIGINT, handle);
}
void Signals::handle(int signum) {
shouldQuit = true;
}

24
src/Signals.h Normal file
View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_SIGNALS_H
#define SEMBACKUP_SIGNALS_H
#include <csignal>
/// Class to handle signals sent to the process
class Signals {
public:
/// Setup the signal handlers
static void setup();
volatile static inline std::sig_atomic_t shouldQuit = false;///< Indicates whether the program was requested to exit
private:
/// Handle the signals
static void handle(int signum);
};
#endif//SEMBACKUP_SIGNALS_H

67
src/ThreadPool.cpp Normal file
View File

@@ -0,0 +1,67 @@
#include <string>
#include "Signals.h"
#include "ThreadPool.h"
ThreadPool::ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum) : onError(std::move(onError)) {
for (int i = 0; i < workersNum; i++) threads.emplace_back(&ThreadPool::loop, this);
}
ThreadPool::~ThreadPool() {
stop = true;
somethingNew.notify_all();
for (auto &t: threads) {
t.join();
}
}
void ThreadPool::push(std::function<void()> &&func) {
{
std::lock_guard lock(queueLock);
queue.push(std::move(func));
}
somethingNew.notify_one();
}
void ThreadPool::loop() {
while (true) {
std::unique_lock qLock(queueLock);
while (queue.empty() && !stop && !Signals::shouldQuit) {
// Check for any of the stop signals every second
somethingNew.wait_for(qLock, std::chrono::seconds(1));
}
if (stop || Signals::shouldQuit) {
// Drop all tasks if requested to exit
queue = {};
if (queue.empty() && running == 0) { finished.notify_all(); }
return;
}
auto task = std::move(queue.front());
running++;
queue.pop();
qLock.unlock();
try {
task();
} catch (std::exception &e) {
onError(std::string(e.what()));
}
{
std::lock_guard qLock(queueLock);
running--;
if (queue.empty() && running == 0) { finished.notify_all(); }
}
}
}
bool ThreadPool::empty() {
std::lock_guard qLock(queueLock);
if (queue.empty() && running == 0) return true;
return false;
}

54
src/ThreadPool.h Normal file
View File

@@ -0,0 +1,54 @@
//
// Created by Stepan Usatiuk on 17.04.2023.
//
#ifndef SEMBACKUP_THREADPOOL_H
#define SEMBACKUP_THREADPOOL_H
#include <atomic>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
/// Thread pool
/**
* Handles ctrl-c via Signals, but it is expected of tasks to also do so
* Forwards exception messages to the provided handler
*/
class ThreadPool {
public:
/// Constructs a thread pool
/// \param onError Callback function that is called when an exception happens when executing a task
/// \param workersNum Amount of worker threads (default = number of cpu threads)
ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum = std::thread::hardware_concurrency());
/// Destructor, instructs the threads to stop and joins them
~ThreadPool();
/// Pushes a new task to the queue
/// \param func Rvalue to the task functon
void push(std::function<void()> &&func);
/// Returns True if the queue is empty and there are no tasks running
bool empty();
std::mutex finishedLock; ///< Lock to use when waiting on the finished variable
std::condition_variable finished;///< Condition variable to wait for all tasks to finish
private:
/// Thread loop
void loop();
std::queue<std::function<void()>> queue; ///< Task queue
std::mutex queueLock; ///< Task queue lock
std::condition_variable somethingNew; ///< Condition variable to wait for new tasks
std::vector<std::thread> threads; ///< Vector of worker threads
std::atomic<bool> stop = false; ///< Stop signal for threads
std::atomic<int> running = 0; ///< Number of currently running tasks
std::function<void(std::string)> onError;///< Function to call on exception in task
};
#endif//SEMBACKUP_THREADPOOL_H

View File

@@ -0,0 +1,7 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "ChangeDetector.h"
ChangeDetector::~ChangeDetector() = default;

View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTOR_H
#define SEMBACKUP_CHANGEDETECTOR_H
#include "ComparableFile.h"
/// An interface for a class comparing any two given ComparableFile%s
class ChangeDetector {
public:
/// Abstract method for comparing two ComparableFile%s
/// \param f1 Constant reference to the first ComparableFile
/// \param f2 Constant reference to the second ComparableFile
/// \return True if these objects are considered *different*, False otherwise
virtual bool check(const ComparableFile &f1, const ComparableFile &f2) const = 0;
/// Default virtual destructor
virtual ~ChangeDetector();
};
#endif//SEMBACKUP_CHANGEDETECTOR_H

View File

@@ -0,0 +1,16 @@
//
// Created by Stepan Usatiuk on 04.05.2023.
//
#include "ChangeDetectorContainer.h"
#include <functional>
bool ChangeDetectorContainer::check(const ComparableFile &f1, const ComparableFile &f2) const {
return std::any_of(changeDetectors.begin(), changeDetectors.end(),
[&](const auto &changeDetector) {
return changeDetector->check(f1, f2);
});
}
ChangeDetectorContainer::ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors) : changeDetectors(std::move(changeDetectors)) {}

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 04.05.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTORCONTAINER_H
#define SEMBACKUP_CHANGEDETECTORCONTAINER_H
#include <memory>
#include <vector>
#include "ChangeDetector.h"
#include "ComparableFile.h"
/// Wrapper for multiple ChangeDetector%s
/** A ChangeDetector implementation that serves as a convenience wrapper for
* multiple ChangeDetector%s, its check returns true if any of the wrapped ChangeDetector%s return true
*/
class ChangeDetectorContainer : public ChangeDetector {
public:
/// Constructs a ChangeDetectorContainer using a vector of existing ChangeDetector%s
/// \param changeDetectors An rvalue reference to a vector of unique pointers of ChangeDetector
ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors);
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if any of the wrapped ChangeDetector%s return true
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
private:
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
};
#endif//SEMBACKUP_CHANGEDETECTORCONTAINER_H

View File

@@ -0,0 +1,35 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "ChangeDetectorFactory.h"
#include <vector>
#include "../Exception.h"
#include "ContentsChangeDetector.h"
#include "EditTimeChangeDetector.h"
#include "SizeChangeDetector.h"
#include "TypeChangeDetector.h"
std::unique_ptr<ChangeDetector> ChangeDetectorFactory::getChangeDetector(const std::string &type) {
if (type == "etime") {
return std::make_unique<EditTimeChangeDetector>();
} else if (type == "size") {
return std::make_unique<SizeChangeDetector>();
} else if (type == "type") {
return std::make_unique<TypeChangeDetector>();
} else if (type == "contents") {
return std::make_unique<ContentsChangeDetector>();
} else
throw Exception("Unknown ChangeDetector type " + type);
}
ChangeDetectorContainer ChangeDetectorFactory::getChangeDetectors(const Config &config) {
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
for (auto const &i: config.getList("change-detectors")) {
changeDetectors.emplace_back(ChangeDetectorFactory::getChangeDetector(i));
}
return ChangeDetectorContainer(std::move(changeDetectors));
}

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTORFACTORY_H
#define SEMBACKUP_CHANGEDETECTORFACTORY_H
#include <memory>
#include <string>
#include "../Config.h"
#include "ChangeDetector.h"
#include "ChangeDetectorContainer.h"
/// Factory class for ChangeDetector
/** Can create either a vector of ChangeDetector%s according to Config,
* or an individual ChangeDetector from a type string
*/
class ChangeDetectorFactory {
public:
/// Creates a ChangeDetector of given type and returns an unique pointer to it
/// \param type Constant reference to a string containing type of the ChangeDetector to create
/// \return Unique pointer to constructed ChangeDetector
static std::unique_ptr<ChangeDetector> getChangeDetector(const std::string &type);
/// Constructs a vector of unique pointers to ChangeDetector%s according to the given \p config
/// \param config Config with comma-separated "change-detectors" option set, for each entry a ChangeDetector will be created
/// \return A vector of unique pointers to ChangeDetector%s constructed according to \p config
static ChangeDetectorContainer getChangeDetectors(const Config &config);
};
#endif//SEMBACKUP_CHANGEDETECTORFACTORY_H

View File

@@ -0,0 +1,42 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "ComparableFile.h"
#include <fstream>
#include <sstream>
#include "../Exception.h"
#include "../repo/objects/FileBuffer.h"
ComparableFile::ComparableFile(const File &file, const Repository *repo)
: path(file.name),
type(file.fileType),
bytes(file.bytes),
mtime(file.mtime),
contents(
[file, repo]() {
return std::make_unique<FileBuffer>(repo, file.id);
}) {}
ComparableFile::ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base)
: path(p.lexically_relative(base).u8string()),
type(File::getFileType(p)),
bytes(File::getFileSize(p)),
mtime(File::getFileMtime(p)),
contents(
[p, path = this->path, type = this->type]() -> std::unique_ptr<std::streambuf> {
if (type == File::Type::Normal) {
auto fb = std::make_unique<std::filebuf>();
fb->open(p, std::ios::in | std::ios::binary);
if (!fb->is_open()) throw Exception("Can't open " + p.u8string() + " for reading!");
return fb;
}
auto contentsVector = File::getFileContents(p);
std::string contents = {contentsVector.begin(), contentsVector.end()};
return std::make_unique<std::stringbuf>(contents, std::ios::in | std::ios::binary);
}) {}

View File

@@ -0,0 +1,43 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_COMPARABLEFILE_H
#define SEMBACKUP_COMPARABLEFILE_H
#include <filesystem>
#include <functional>
#include <streambuf>
#include "../repo/Repository.h"
#include "../repo/objects/File.h"
/// Helper class to allow comparing files from different sources
/**
* As we are required to allow comparisons between a File in a repository and a file in filesystem,
* comparisons between two files that are already in a Repository,
* and between File%s that are in a repository cache and between files in the filesystem (when making backups),
* this helper class exists to provide a uniform interface to be used when calling ChangeDetector%s.
*/
struct ComparableFile {
/// Constructs a ComparableFile based on a File in a Repository
/// The resulting ComparableFile will have a #contents function that returns an instance of FileBuffer for given \p file
/// \param file Constant reference to a File object
/// \param repo Constant pointer to Repository from which the File object was taken, must be valid during the lifetime of created ComparableFile
ComparableFile(const File &file, const Repository *repo);
/// Constructs a ComparableFile based on a file in the filesystem
/// The resulting ComparableFile will have a #contents function that returns an instance of std::filebuf for file at given path
/// \param p Constant reference to an absolute path to the file
/// \param base Constant reference to a base path against which #path will be set
ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base);
const std::string path; ///< Relative path to the file
const File::Type type; ///< File type
const unsigned long long bytes; ///< Number of bytes in the file
const unsigned long long mtime; ///< Timestamp of last file modification
const std::function<std::unique_ptr<std::streambuf>()> contents;///< Function that returns a unique pointer to a std::streambuf instance linked to the contents of the file
};
#endif//SEMBACKUP_COMPARABLEFILE_H

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "ContentsChangeDetector.h"
#include <iterator>
bool ContentsChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
if (f1.type != f2.type) return true;
auto b1 = f1.contents();
auto b2 = f2.contents();
return !std::equal(std::istreambuf_iterator<char>(b1.get()),
std::istreambuf_iterator<char>(),
std::istreambuf_iterator<char>(b2.get()),
std::istreambuf_iterator<char>());
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_CONTENTSCHANGEDETECTOR_H
#define SEMBACKUP_CONTENTSCHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their contents
class ContentsChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their contents are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_CONTENTSCHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "EditTimeChangeDetector.h"
bool EditTimeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.mtime != f2.mtime;
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_EDITTIMECHANGEDETECTOR_H
#define SEMBACKUP_EDITTIMECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their modification time
class EditTimeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their modification times are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_EDITTIMECHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "SizeChangeDetector.h"
bool SizeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.bytes != f2.bytes;
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_SIZECHANGEDETECTOR_H
#define SEMBACKUP_SIZECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their size
class SizeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their sizes are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_SIZECHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "TypeChangeDetector.h"
bool TypeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.type != f2.type;
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_TYPECHANGEDETECTOR_H
#define SEMBACKUP_TYPECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their type
class TypeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their types are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_TYPECHANGEDETECTOR_H

34
src/chunkers/Buzhash.cpp Normal file
View File

@@ -0,0 +1,34 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#include "Buzhash.h"
Buzhash::Buzhash(uint32_t blockSize) : blockSize(blockSize), history() {}
uint32_t Buzhash::get() const {
return cur;
}
uint32_t Buzhash::feed(uint8_t in) {
cur = rotr32(cur, 1);
if (history.size() >= blockSize) {
auto oldest = history.back();
history.pop_back();
cur ^= rotr32(randomNumbers[oldest], blockSize);
}
history.emplace_front(in);
cur ^= randomNumbers[in];
return cur;
}
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
uint32_t Buzhash::rotr32(uint32_t value, unsigned int count) {
const unsigned int mask = CHAR_BIT * sizeof(value) - 1;
count &= mask;
return (value >> count) | (value << (-count & mask));
}

85
src/chunkers/Buzhash.h Normal file
View File

@@ -0,0 +1,85 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#ifndef SEMBACKUP_BUZHASH_H
#define SEMBACKUP_BUZHASH_H
#include <array>
#include <climits>
#include <cstdint>
#include <deque>
/// Cyclic polynomial rolling hash
/** Based on: http://www.serve.net/buz/hash.adt/java.002.html
* https://github.com/silvasur/buzhash/blob/master/hash.go
* https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
*/
class Buzhash {
public:
/// Constructs a new Buzhash instance
/// \param blockSize Rolling hash window
Buzhash(uint32_t blockSize);
/// Returns current hash value
uint32_t get() const;
/// Adds \p in to the hash
/// \param in Byte to add
/// \return New hash value
uint32_t feed(uint8_t in);
private:
uint32_t cur = 0; ///< Current hash value
const uint32_t blockSize; ///< Hashing window size
std::deque<uint32_t> history;///< Bytes used to calculate current hash, used to compute the hash in a rolling fashion (to remove the oldest byte from the hash when blockSize is reached)
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
/// Shift \p value \p count bits to the right circularly
/// \param value Value to shift
/// \param count By how many bytes
/// \return Shifted value
static uint32_t rotr32(uint32_t value, unsigned int count);
/// 256 32-bit random numbers used for hashing
/// Ideally, should have an equal distribution of 0s and 1s, but I didn't bother checking it
// clang-format off
static constexpr std::array<uint32_t, 256> randomNumbers{
0x827f934c, 0xebcd9924, 0x667fdea2, 0x8a8b0997, 0x42af49e8, 0x556cb313, 0x505da41b, 0xb23be60f,
0xc3901be4, 0xee1d8d4d, 0x4d59795c, 0x8d542ba4, 0x043f073c, 0x2af19a39, 0xb2c4aa36, 0x6e30ff43,
0x77ad3ef7, 0xd4c077e5, 0x3a1155aa, 0x866b07d3, 0xc16022b2, 0x6d4dad6e, 0x7a69c6dd, 0xd436dc23,
0x32b64948, 0x1f72475f, 0x129be871, 0x05d46f6e, 0x7e405cd5, 0x31fdd272, 0x84a56b1a, 0xeaf43633,
0x5f8148d4, 0x6d4bf6d9, 0xc2b4dbd7, 0xaa804cc7, 0xcb3de5ca, 0x6503cdb3, 0xa3c6d727, 0x20e2f098,
0xd525bb67, 0x37b1b81e, 0xc1f1fd79, 0x4fe91240, 0x6a4ea716, 0x71245e33, 0xdbaab854, 0xfc24600e,
0xd72dc72f, 0x2d7139ae, 0x075fb38d, 0xb18028a5, 0x9970d103, 0x235ec64b, 0x68645255, 0x352945f0,
0x7a4b19a1, 0xe17df5f5, 0x676a6644, 0x75aad7aa, 0x63bdfc9a, 0x607586c7, 0x1546400e, 0xfe582141,
0xb50a199f, 0xb0769910, 0x5d74ab3b, 0x2404799b, 0xa66a3a78, 0x1b6e24aa, 0x630674cc, 0x3272fea4,
0xd4e9e078, 0xe586d12a, 0x579f8b98, 0xfd16bcb5, 0xd1e4faee, 0xe30953c7, 0x3ac73f87, 0xab66983f,
0x5fe12f90, 0x10952ef1, 0x5c7ac32a, 0x89ccd941, 0xb82c3fa9, 0xacd374e5, 0x50984746, 0x09f082e8,
0x11ee3b91, 0x31764e3a, 0xb59df38a, 0x67e94f2d, 0xcceaca68, 0xc68a89d8, 0x5f2e80ac, 0xd5556741,
0x8c815df6, 0xde71c2b5, 0x7b1f5c49, 0xd64682a4, 0x4fb59748, 0x4968707f, 0x909c0c1a, 0x5f1dd608,
0x1c601e37, 0x96e01ada, 0xc5582ef8, 0xae6834c1, 0xbe63b0ce, 0xab2aea9f, 0xf13e77c2, 0xe433350b,
0x17a24a33, 0xc1f31bb6, 0xa23e9de4, 0x7e28ef69, 0x23e0ef42, 0x0796e53f, 0xf9e3045d, 0x7bbacd31,
0xa48bee27, 0x15f3c3b3, 0x4c320cb4, 0x916429d9, 0xa15ccb3c, 0x82a4a23c, 0xb0cc6a4a, 0xcf8d93fa,
0x3b18b937, 0xad0488e4, 0xaa568114, 0x80b9b8c7, 0x8f3a9071, 0x818b790d, 0x99c8dbf2, 0x0d23b2a4,
0x74c81a28, 0x1aa65d76, 0x7168ee7d, 0xc0d40b6c, 0x77c70a0c, 0xd3752839, 0xc2f7981c, 0x83767124,
0xb881618f, 0xb263d8cf, 0xbbb40400, 0xdb9702eb, 0xaccad841, 0x806af5a7, 0x16f096e3, 0x64bf45d9,
0x5f7c0a58, 0xdac0c665, 0x1dbebaac, 0xb97027a6, 0xfc934433, 0xfc7b2d06, 0x8871fe4e, 0x0df24135,
0x6ddf7cc8, 0x32e0d1cd, 0xe88abedd, 0x214af930, 0x90990f97, 0xc7691171, 0xbf7b6ca3, 0x8af6589c,
0x452c8ee0, 0xbc2c5891, 0xcf8d13b4, 0x698d1f1f, 0x802a011a, 0x19820708, 0x25c79d2f, 0xedf91253,
0xc93fe5dd, 0xa03a117b, 0x10912ae7, 0xc90d59d0, 0xc3522549, 0x3e4f3e81, 0x494ae40f, 0x2d157b6e,
0xd7bf06b2, 0x19c5bb2a, 0xa869261c, 0xa80cfd2c, 0x1ea7c6ec, 0x1b36a51f, 0x8bd227cc, 0xad2d2260,
0x181258c3, 0xbd253a58, 0x3273f94b, 0x9c315309, 0xb2d8d3e3, 0x11ec35a8, 0x384e6475, 0x855a9009,
0x854cc06a, 0xe7408809, 0xe583ce2a, 0x895fb756, 0x6a8a2072, 0x6598a92b, 0x530f41bb, 0xb1bd57f1,
0x62d57fa0, 0xe6505776, 0x42fcfe4d, 0x0fbdf1ee, 0x8e3104c4, 0xf11c8a65, 0x5bc51ad9, 0x5f1f8ce9,
0xab179a87, 0xd5448444, 0x7bd4a26b, 0x658f1963, 0x86db95b8, 0xaba6734e, 0x486fddea, 0x859c3e0b,
0xebce0106, 0x99c3014e, 0xc151b942, 0x9604aad8, 0xf6ce654b, 0xa1e7982e, 0xf6d8ed14, 0xd4bdf7e2,
0x13696254, 0x05ec638c, 0x306dbc29, 0x1676eb60, 0xadbf3ce3, 0x966dde56, 0x6d5bea46, 0x719aa10d,
0x0e65093d, 0x0b1a3c43, 0x0321ea8c, 0xe0ef2cbd, 0x43432ee3, 0x3e62046d, 0x425e7b44, 0x892e119c,
0xfdec4de5, 0x48c5dd6c, 0x79e6bfcd, 0x8d53372e, 0xe96f6d32, 0x52cddacd, 0x3e99e0eb, 0xa9e5d28f,
};
// clang-format on
};
#endif//SEMBACKUP_BUZHASH_H

View File

@@ -0,0 +1,42 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#include "BuzhashChunker.h"
#include "../Exception.h"
#include "../crypto/MD5.h"
BuzhashChunker::BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window) : Chunker(buf, maxBytes), window(window), minBytes(minBytes), mask(mask), buzhash(window) {}
std::pair<std::string, std::vector<char>> BuzhashChunker::getNext() {
if (eof) throw Exception("Trying to read from a file that is finished!");
std::vector<char> rbuf(minBytes);
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) minBytes));
if (read != minBytes) {
eof = true;
rbuf.resize(read);
return {MD5::calculate(rbuf), rbuf};
}
for (auto c: rbuf) {
buzhash.feed(static_cast<uint8_t>(c));
}
// Continue reading the file until either the last mask bits are zero of we exceed the maxSize
while (((buzhash.get() & (~0UL >> (sizeof(unsigned long long) * 8 - mask))) != 0) && rbuf.size() < maxBytes) {
auto r = buf->sbumpc();
if (r == std::streambuf::traits_type::eof()) {
eof = true;
break;
} else {
char c = std::streambuf::traits_type::to_char_type(r);
rbuf.emplace_back(c);
buzhash.feed(static_cast<uint8_t>(c));
}
}
return {MD5::calculate(rbuf), rbuf};
}

View File

@@ -0,0 +1,34 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#ifndef SEMBACKUP_BUZHASHCHUNKER_H
#define SEMBACKUP_BUZHASHCHUNKER_H
#include <streambuf>
#include "Buzhash.h"
#include "Chunker.h"
/// Chunker implementation using rolling hash
class BuzhashChunker : public Chunker {
public:
/// Constructs a BuzhashChunker
/// \copydoc Chunker::Chunker
/// \param minBytes Minimum amount of bytes in returned chunks
/// \param mask Amount of trailing zeroes in the rolling hash at which the file is cut (results in average chunk size of 2^mask bytes)
/// \param window Rolling hash window (how many of chunks last bytes are included in the hash, the default is recommended)
BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window = 4095);
/// \copydoc Chunker::getNext
std::pair<std::string, std::vector<char>> getNext() override;
private:
const unsigned long long window; ///< Rolling hash window
const unsigned long long minBytes;///< Minimum amount of bytes in returned chunks
const unsigned long long mask; ///< Amount of trailing zeroes in the rolling hash at which the file is cut
Buzhash buzhash; ///< Hasher instance
};
#endif//SEMBACKUP_BUZHASHCHUNKER_H

51
src/chunkers/Chunker.cpp Normal file
View File

@@ -0,0 +1,51 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "Chunker.h"
#include "../Exception.h"
Chunker::Chunker(std::streambuf *buf, unsigned long long maxBytes) : buf(buf), maxBytes(maxBytes) {}
bool Chunker::getEof() const {
return eof;
}
Chunker::~Chunker() = default;
Chunker::ChunkerIterator Chunker::begin() {
return {this};
}
Chunker::ChunkerIterator Chunker::end() {
return {nullptr};
}
Chunker::ChunkerIterator &Chunker::ChunkerIterator::operator++() {
if (pastEOF) throw Exception("Trying to increment pastEOF ChunkerIterator!");
if (source->getEof())
pastEOF = true;
else
buf = source->getNext();
return *this;
}
bool Chunker::ChunkerIterator::operator!=(const Chunker::ChunkerIterator &rhs) const {
return pastEOF != rhs.pastEOF;
}
Chunker::ChunkerIterator::value_type Chunker::ChunkerIterator::operator*() const {
if (pastEOF) throw Exception("Trying to dereference pastEOF ChunkerIterator!");
return buf.value();
}
bool Chunker::ChunkerIterator::operator==(const Chunker::ChunkerIterator &rhs) const {
return pastEOF == rhs.pastEOF;
}
Chunker::ChunkerIterator::ChunkerIterator(Chunker *source)
: source(source), pastEOF(source == nullptr) {
if (source)
operator++();
}

74
src/chunkers/Chunker.h Normal file
View File

@@ -0,0 +1,74 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_CHUNKER_H
#define SEMBACKUP_CHUNKER_H
#include <array>
#include <optional>
#include <streambuf>
#include <vector>
/// Abstract base class for a Chunker that takes a file and splits it into chunks to be backed up
class Chunker {
private:
/// Convenience iterator to allow using Chunker%s in range for loops
struct ChunkerIterator {
using value_type = std::pair<std::string, std::vector<char>>;
/// Creates a ChunkerIterator pointing to the first chunk or past-EOF
/// \param source Pointer to a Chunker, should be available during the entire iterator lifetime, or nullptr if this is pastEOF iterator
ChunkerIterator(Chunker *source);
/// Increments the iterator to the next chunk, or past-EOF
/// \throws Exception if iterator points past-EOF
ChunkerIterator &operator++();
/// Returns the current pointed-to chunk
/// \throws Exception if iterator points past-EOF
value_type operator*() const;
/// Returns true if both iterators are past-EOF
bool operator==(const ChunkerIterator &rhs) const;
/// Returns false if both iterators are past-EOF
bool operator!=(const ChunkerIterator &rhs) const;
private:
Chunker *const source; ///< Pointer to the underlying Chunker
std::optional<value_type> buf;///< Currently pointed to chunk
bool pastEOF = false; ///< Whether past EOF has been reached
};
public:
/// Returns the next chunk of the file
/// Returns a single empty chunk if a file is empty
/// \return Pair consisting of chunk's bytes and its MD5 hash
/// \throws Exception if EOF was already reached
virtual std::pair<std::string, std::vector<char>> getNext() = 0;
/// Returns True if EOF was reached, False otherwise
bool getEof() const;
/// Default virtual destructor
virtual ~Chunker();
/// Returns a ChunkerIterator pointing to the first chunk in a file
ChunkerIterator begin();
/// Returns a past-EOF ChunkerIterator
static ChunkerIterator end();
protected:
/// \param buf Pointer to a std::streambuf, should be available during the entire lifetime of a Chunker
/// \param maxBytes Maximal amount of bytes in returned chunks
Chunker(std::streambuf *buf, unsigned long long maxBytes);
std::streambuf *const buf; ///< Constant pointer to the source std::streambuf
bool eof = false; ///< Indicates whether EOF has been reached
const unsigned long long maxBytes;///< Max number of bytes in returned chunks
};
#endif//SEMBACKUP_CHUNKER_H

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#include "ChunkerFactory.h"
#include "../Exception.h"
#include "BuzhashChunker.h"
#include "ConstChunker.h"
std::unique_ptr<Chunker> ChunkerFactory::getChunker(const Config &config, std::streambuf *buf) {
if (config.getStr("chunker") == "const") {
return std::make_unique<ConstChunker>(buf, config.getInt("chunker-max") * 1024);
} else if (config.getStr("chunker") == "buzhash") {
return std::make_unique<BuzhashChunker>(buf, config.getInt("chunker-min") * 1024, config.getInt("chunker-max") * 1024, config.getInt("chunker-mask"));
} else {
throw Exception("Unknown chunker type!");
}
}

View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#ifndef SEMBACKUP_CHUNKERFACTORY_H
#define SEMBACKUP_CHUNKERFACTORY_H
#include <memory>
#include <streambuf>
#include "../Config.h"
#include "Chunker.h"
/// Factory for Chunker%s
class ChunkerFactory {
public:
/// Creates a new Chunker based on provided \p config backed with \p buf
/// \param config Constant reference to Config
/// \param buf Pointer to a std::streambuf instance, should be avaliable during the Chunker lifetime
/// \return Unique pointer to the created Chunker
static std::unique_ptr<Chunker> getChunker(const Config &config, std::streambuf *buf);
};
#endif//SEMBACKUP_CHUNKERFACTORY_H

View File

@@ -0,0 +1,27 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "ConstChunker.h"
#include "../Exception.h"
#include "../crypto/MD5.h"
ConstChunker::ConstChunker(std::streambuf *buf, unsigned long long maxBytes) : Chunker(buf, maxBytes) {}
std::pair<std::string, std::vector<char>> ConstChunker::getNext() {
if (eof) throw Exception("Trying to read from a file that is finished!");
std::vector<char> rbuf(maxBytes);
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) maxBytes));
if (read != maxBytes) {
eof = true;
rbuf.resize(read);
}
auto md5 = MD5::calculate(rbuf);
return {md5, rbuf};
}

View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_CONSTCHUNKER_H
#define SEMBACKUP_CONSTCHUNKER_H
#include <streambuf>
#include "Chunker.h"
/// Chunker implementation that splits the file into equally-sized chunks of maxBytes bytes
class ConstChunker : public Chunker {
public:
/// Constructs a ConstChunker
/// \copydoc Chunker::Chunker
ConstChunker(std::streambuf *buf, unsigned long long maxBytes);
/// \copydoc Chunker::getNext
std::pair<std::string, std::vector<char>> getNext() override;
};
#endif//SEMBACKUP_CONSTCHUNKER_H

9
src/commands/Command.cpp Normal file
View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "Command.h"
Command::Command(std::string name) : name(std::move(name)) {}
Command::~Command() = default;

28
src/commands/Command.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMAND_H
#define SEMBACKUP_COMMAND_H
#include "../Context.h"
/// Abstract base class for some process running with some Context
class Command {
public:
/// Runs the command with Context \p ctx
virtual void run(Context ctx) = 0;
/// Default virtual destructor
virtual ~Command() = 0;
/// The name of the command
const std::string name;
protected:
/// Constructs a command with name \p name
Command(std::string name);
};
#endif//SEMBACKUP_COMMAND_H

View File

@@ -0,0 +1,152 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandDiff.h"
#include "../BytesFormatter.h"
#include "../Diff.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandDiff::CommandDiff() : Command("diff") {}
void CommandDiff::run(Context ctx) {
std::string diffMode = ctx.repo->getConfig().getStr("diff-mode");
Object::idType archive1;
if (!ctx.repo->getConfig().exists("aid")) {
auto archives = ctx.repo->getObjects(Object::ObjectType::Archive);
archive1 = std::max_element(archives.begin(), archives.end(), [](const auto &a1, const auto &a2) { return a1.second < a2.second; })->second;
} else {
archive1 = ctx.repo->getConfig().getInt("aid");
}
ThreadPool threadPool([&](const std::string &error) {
ctx.logger->write("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
std::mutex filesLock;
std::map<std::filesystem::path, File> files;///< Files in the first archive
for (auto id: archiveO1.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files.emplace(file.getKey(), std::move(file));
}
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Task to to compare the given file with the first archive
auto processFile = [&, this](ComparableFile p) {
auto relPath = p.path;
std::unique_lock lock(filesLock);
if (files.count(relPath) == 0) {
ctx.logger->write(relPath + " is new\n", 0);
lock.unlock();
} else {
File repoFile = files.at(relPath);
lock.unlock();
if (changeDetector.check({repoFile, ctx.repo}, p)) {
ctx.logger->write(relPath + " is different " + Diff::diff({repoFile, ctx.repo}, p) + "\n", 1);
} else {
if (diffMode == "file")
ctx.logger->write(relPath + " are same ", 0);
}
}
lock.lock();
files.erase(relPath);
};
std::optional<Archive> archiveO2;
if (diffMode == "normal") {
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
threadPool.push([&]() {
for (auto id: archiveO2.value().files) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::u8path(file.name)))
threadPool.push([&, file]() {
processFile(ComparableFile{file, ctx.repo});
});
if (Signals::shouldQuit) break;
}
return true;
});
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
/// Start the diff with the root directory and empty ignore list
threadPool.push([&, from]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
[processFile, from, prefix = ctx.repo->getConfig().getStr("prefix")](const std::filesystem::directory_entry &dirEntry) {
if (isSubpath(prefix, dirEntry.path().lexically_relative(from)))
processFile(ComparableFile{dirEntry, from});
});
});
}
} else if (diffMode == "file") {
if (files.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the first archive", 0);
return;
}
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
std::map<std::filesystem::path, File> files2;///< Files in the first archive
for (auto id: archiveO2->files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files2.emplace(file.getKey(), std::move(file));
}
if (files2.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the second archive", 0);
return;
} else {
processFile(ComparableFile{files2.at(ctx.repo->getConfig().getStr("prefix")), ctx.repo});
}
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
if (!std::filesystem::exists(from / ctx.repo->getConfig().getStr("prefix"))) {
ctx.logger->write("Doesn't exist in the filesystem archive", 0);
return;
}
/// Start the diff with the root directory and empty ignore list
processFile(ComparableFile{from / ctx.repo->getConfig().getStr("prefix"), from});
}
} else {
throw Exception("Unknown diff-mode: " + diffMode);
}
/// Wait for diff to end
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
if (diffMode == "normal")
for (auto const &s: files) {
ctx.logger->write(s.first.u8string() + " is removed\n", 0);
}
}

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDDIFF_H
#define SEMBACKUP_COMMANDDIFF_H
#include "Command.h"
#include "CommandsCommon.h"
/// Run the diff between:
/// 1. The latest archive and the `from` directory
/// 2. if `aid` is set the aid archive and the `from` directory
/// 3. if `aid` and `aid2` are set between `aid` and `aid2`
class CommandDiff : public Command {
public:
CommandDiff();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDDIFF_H

View File

@@ -0,0 +1,16 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandList.h"
CommandList::CommandList() : Command("list") {
}
void CommandList::run(Context ctx) {
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second < r.second; });
for (auto const &aid: list) {
std::cout << "Name: " << aid.first << " Id: " << aid.second << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLIST_H
#define SEMBACKUP_COMMANDLIST_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists available archives in a repository
class CommandList : public Command {
public:
CommandList();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLIST_H

View File

@@ -0,0 +1,22 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandListFiles.h"
#include "../BytesFormatter.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
CommandListFiles::CommandListFiles() : Command("list-files") {
}
void CommandListFiles::run(Context ctx) {
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
for (auto const &fid: archive.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLISTFILES_H
#define SEMBACKUP_COMMANDLISTFILES_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists files in the selected Archive
class CommandListFiles : public Command {
public:
CommandListFiles();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLISTFILES_H

View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRestore.h"
#include <fstream>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandRestore::CommandRestore() : Command("restore") {
}
void CommandRestore::run(Context ctx) {
Object::idType archive = ctx.repo->getConfig().getInt("aid");
std::filesystem::path to = std::filesystem::u8path(ctx.repo->getConfig().getStr("to"));
std::atomic<unsigned long long> filesToRestoreCount = 0;
std::atomic<unsigned long long> bytesToRestore = 0;
WorkerStats workerStats;///< Backup statistics of the worker threads
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&workerStats](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show restore progress
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{
[&workerStats]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&filesToRestoreCount]() { return std::to_string(filesToRestoreCount); },
" files saved, ",
[&workerStats]() { return BytesFormatter::formatStr(workerStats.bytesWritten.load() + workerStats.bytesSkipped.load()); },
" / ",
[&bytesToRestore]() { return BytesFormatter::formatStr(bytesToRestore); },
" saved @ ",
[&avg]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s",
},
ctx.repo->getConfig());
/// Thread pool for restore tasks
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Add the main restore task
threadPool.push([&, this]() {
/// Get the archive and its file IDs
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
std::vector<Object::idType> files = archiveO.files;
/// For each file...
for (const auto fid: files) {
/// Stop when asked to
if (Signals::shouldQuit) break;
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
filesToRestoreCount++;
bytesToRestore += file.bytes;
/// Spawn a restore task
threadPool.push([&, this, to, file]() {
backupRestoreFile(file, to, workerCallback, ctx);
progress.print("Restored " + file.name, 1);
});
}
});
/// Wait for all tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
}
std::string CommandRestore::backupRestoreFile(const File &file, const std::filesystem::path &baseDir, workerStatsFunction &callback, Context ctx) {
auto fullpath = baseDir / std::filesystem::u8path(file.name);
std::filesystem::create_directories(fullpath.parent_path());
if (file.fileType == File::Type::Directory) {
std::filesystem::create_directory(fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks[0]));
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
std::ofstream ostream(fullpath, std::ios::binary | std::ios::out | std::ios::trunc);
for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid));
if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0);
}
}
callback(0, 0, 1);
return fullpath.u8string();
}

View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRESTORE_H
#define SEMBACKUP_COMMANDRESTORE_H
#include "Command.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
/// Restores the archive with id \aid to path \p to (from config)
class CommandRestore : public Command {
public:
CommandRestore();
void run(Context ctx) override;
private:
/// Internal function to restore a file
/// \param file Constant reference to the File object
/// \param base Base directory to restore to
/// \param callback Stats callback
/// \return Name of the restored file
std::string backupRestoreFile(const File &file, const std::filesystem::path &base, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRESTORE_H

239
src/commands/CommandRun.cpp Normal file
View File

@@ -0,0 +1,239 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRun.h"
#include <fstream>
#include <iomanip>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../crypto/MD5.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
using namespace CommandsCommon;
CommandRun::CommandRun() : Command("run") {
}
void CommandRun::run(Context ctx) {
WorkerStats workerStats;///< Backup statistics of the worker threads
RunnerStats runnerStats;///< Backup target metrics
std::filesystem::path from = ctx.repo->getConfig().getStr("from");///< Directory to back up from
bool fullBackup = ctx.repo->getConfig().getStr("type") == "full";
if (fullBackup) {
ctx.logger->write("Backup is full because of the config\n", 1);
}
/// For progtest task compliance
if (!fullBackup) {
/// If it's time for full backup as per config, force it
auto per = ctx.repo->getConfig().getInt("full-period");
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second > r.second; });
int lastInc = 0;
for (auto const &a: list) {
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(a.second));
if (!archiveO.isFull) {
lastInc++;
continue;
} else
break;
}
if (lastInc >= per) {
fullBackup = true;
ctx.logger->write("Backup is full because of the interval\n", 1);
}
if (list.size() == 0) {
fullBackup = true;
ctx.logger->write("Backup is full because there are no backups\n", 1);
}
}
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
std::vector<Object::idType> files;///< File ids so far added to the archive
std::mutex filesLock; ///< Files vector lock
/// Function to safely add new file ids to `files`
std::function addFile = [&](Object::idType id) {std::lock_guard lock(filesLock); files.emplace_back(id); };
/// Technically the progtest task says that only the files from the last backup should be compared against...
std::map<std::string, Object::idType> prevArchiveFiles;
{
auto prevArchiveFilesList = ctx.repo->getObjects(Object::ObjectType::File);
prevArchiveFiles = {prevArchiveFilesList.begin(), prevArchiveFilesList.end()};
}
ctx.repo->clearCache(Object::ObjectType::File);
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show the progress of backup
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{[&]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&]() { return std::to_string(runnerStats.filesToSaveCount); },
" files saved, ",
[&]() { return std::to_string(runnerStats.filesSkipped); },
" files skipped, ",
[&]() { return BytesFormatter::formatStr((workerStats.bytesWritten.load() + workerStats.bytesSkipped.load())); },
" / ",
[&]() { return BytesFormatter::formatStr(runnerStats.bytesToSave); },
" read @ ",
[&]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s"},
ctx.repo->getConfig());
/// Thread pool for backup tasks, prints to progress on any errors
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Function to spawn a rechunking task
auto saveFile = [&, this](const std::filesystem::path &absPath, const std::filesystem::path &relPath) {
runnerStats.bytesToSave += File::getFileType(absPath) == File::Type::Normal ? std::filesystem::file_size(absPath) : 0;
runnerStats.filesToSaveCount++;
threadPool.push([&, relPath, absPath]() {
addFile(backupChunkFile(absPath, relPath.u8string(), workerCallback, ctx));
progress.print("Copied: " + relPath.u8string(), 1);
});
};
/// Task to process an individual file in the backup
std::function<void(std::filesystem::path)> processFile;
/// If it's a full backup, just save the file, otherwise re-chunk it only if it's changed
if (fullBackup)
processFile =
[&, this](const std::filesystem::path &p) {
saveFile(p, p.lexically_relative(from).u8string());
};
else
processFile =
[&, this](const std::filesystem::path &p) {
auto relPath = p.lexically_relative(from).u8string();
if (prevArchiveFiles.count(relPath) != 0) {
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(prevArchiveFiles.at(relPath)));
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
addFile(repoFile.id);
ctx.repo->addToCache(repoFile);
progress.print("Skipped: " + relPath, 1);
runnerStats.filesSkipped++;
return;
}
}
saveFile(p, relPath);
return;
};
/// Start the backup with the root directory and empty ignore list
threadPool.push([&]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
processFile);
});
/// Wait for all the tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
auto written = BytesFormatter::format(workerStats.bytesWritten);
auto skipped = BytesFormatter::format(workerStats.bytesSkipped);
ctx.logger->write(written.prefix + " written: " + written.number + '\n', 1);
ctx.logger->write(skipped.prefix + " skipped: " + skipped.number + '\n', 1);
auto time = std::time(0);
auto ltime = std::localtime(&time);
std::stringstream s;
s << std::put_time(ltime, "%d-%m-%Y %H-%M-%S");
/// Avoid archive name collisions
while (ctx.repo->exists(Object::ObjectType::Archive, s.str())) s << "N";
Archive a(ctx.repo->getId(), s.str(), time, files, fullBackup);
ctx.repo->putObject(a);
}
Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, workerStatsFunction &callback, Context ctx) {
/// If it's a symlink or directory, treat it specially
/// The order of checks is important, because is_directory follows the symlink
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
auto contents = File::getFileContents(orig);
Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig));
ctx.repo->putObject(c);
ctx.repo->putObject(f);
return f.id;
}
if (!std::filesystem::is_regular_file(orig))
throw Exception(orig.u8string() + "is a special file, not saving");
std::ifstream ifstream(orig, std::ios::in | std::ios::binary);
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
MD5 fileHash;
std::vector<Object::idType> fileChunks;
unsigned long long size = 0;
for (auto chunkp: *chunker) {
/// Exit when asked to
if (Signals::shouldQuit) break;
Object::idType chunkId;
size += chunkp.second.size();
if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) {
/// If the chunk already exists, reuse it
chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first);
callback(0, chunkp.second.size(), 0);
} else {
/// Otherwise, write it
Chunk c(ctx.repo->getId(), chunkp.first, chunkp.second);
chunkId = c.id;
callback(c.data.size(), 0, 0);
ctx.repo->putObject(c);
}
fileHash.feedData(chunkp.second);
fileChunks.emplace_back(chunkId);
}
/// We might have exited in the loop before, so we don't save an incomplete file
if (Signals::shouldQuit) throw Exception("Quitting!");
if (size != File::getFileSize(orig)) {
throw Exception("Something really bad happened or file " + orig.u8string() + " changed during backup");
}
File f(ctx.repo->getId(), saveAs, size, File::getFileMtime(orig), fileHash.getHash(), fileChunks, File::getFileType(orig));
ctx.repo->putObject(f);
callback(0, 0, 1);
return f.id;
}

28
src/commands/CommandRun.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRUN_H
#define SEMBACKUP_COMMANDRUN_H
#include "Command.h"
#include "CommandsCommon.h"
/// Runs the backup according to the config in the Repository
class CommandRun : public Command {
public:
CommandRun();
void run(Context ctx) override;
private:
/// Internal function to chunk the file and save it
/// \param orig Absolute path to the file
/// \param saveAs UTF-8 encoded file name to save as
/// \param callback Stats callback
/// \return ID of the saved file
Object::idType backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRUN_H

View File

@@ -0,0 +1,67 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandsCommon.h"
#include <fstream>
#include <regex>
#include "../Exception.h"
#include "../Signals.h"
void CommandsCommon::workerCallback(unsigned long long int bytesWritten, unsigned long long int bytesSkipped, unsigned long long int filesWritten, WorkerStats &to) {
to.bytesWritten += bytesWritten;
to.bytesSkipped += bytesSkipped;
to.filesWritten += filesWritten;
}
bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p) {
if (prefix.u8string().size() > p.u8string().size()) return false;
for (int i = 0; i < prefix.u8string().size(); i++)
if (p.u8string()[i] != prefix.u8string()[i]) return false;
return true;
}
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
/// Don't process the directory if it has a ".nobackup" file
if (std::filesystem::exists(dir / ".nobackup")) return;
/// If it has an .ignore file, add every line of it into our ignore vector
if (std::filesystem::exists(dir / ".ignore")) {
std::ifstream ignorefile(dir / ".ignore", std::ios::in);
std::string line;
while (std::getline(ignorefile, line)) {
ignore.emplace_back(line);
}
}
/// For each directory entry...
for (const auto &dirEntry: std::filesystem::directory_iterator(dir)) {
/// Break in case exit was requested by the user
if (Signals::shouldQuit) break;
/// Don't process the entry if it matches any of the ignore rules
if (std::any_of(ignore.begin(), ignore.end(), [dirEntry](auto pred) {
std::smatch m;
auto s = dirEntry.path().filename().u8string();
return std::regex_match(s, m, std::regex(pred));
})) continue;
/// If it's a directory, spawn a task to process the entries in it
if (!dirEntry.is_symlink() && dirEntry.is_directory()) {
spawner([dirEntry, ignore, spawner, processFile]() {
processDirWithIgnore(dirEntry.path(), ignore, spawner, processFile);
});
/// Don't save the dir if it has a .nobackup file
if (std::filesystem::exists(dirEntry.path() / ".nobackup")) continue;
}
/// Spawn a task to process each individual file
spawner([processFile, dirEntry]() {
processFile(dirEntry);
});
}
}

View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDSCOMMON_H
#define SEMBACKUP_COMMANDSCOMMON_H
#include <atomic>
#include <filesystem>
#include <functional>
namespace CommandsCommon {
// Bytes written, bytes skipped, files written
using workerStatsFunction = std::function<void(unsigned long long, unsigned long long, unsigned long long)>;
/// Internat function for recursive directory processing, taking into account ".ignore" and ".nobackup" files
/// \param dir Const reference to the path of directory to iterate through
/// \param ignore List of files to ignore
/// \param spawner Function to spawn other tasks
/// \param processFile Task to spawn on found files
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile);
struct WorkerStats {
public:
std::atomic<unsigned long long> bytesWritten = 0;
std::atomic<unsigned long long> bytesSkipped = 0;
std::atomic<unsigned long long> filesWritten = 0;
};
struct RunnerStats {
public:
std::atomic<unsigned long long> bytesToSave = 0;
std::atomic<unsigned long long> filesToSaveCount = 0;
std::atomic<unsigned long long> filesSkipped = 0;
};
/// Checks if \p p has \p prefix as prefix
/// \param prefix Constant reference to the prefix path
/// \param p Constant reference to the checked path
/// \return True if \p p contains \p prefix at its prefix, False otherwise
bool isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p);
void workerCallback(unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten, WorkerStats &to);
};// namespace CommandsCommon
#endif//SEMBACKUP_COMMANDSCOMMON_H

82
src/crypto/AES.cpp Normal file
View File

@@ -0,0 +1,82 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#include "AES.h"
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/rand.h>
#include "../Exception.h"
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
return AES::encrypt(in, AES::deriveKey(password, salt));
}
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
return AES::decrypt(in, AES::deriveKey(password, salt));
}
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
if (!ctx) throw Exception("Error initializing encryption context!");
std::vector<char> out(in.size() + AES_BLOCK_SIZE + 32);
if (!RAND_bytes(reinterpret_cast<unsigned char *>(out.data()), 32))
throw Exception("Error generating IV!");
if (!EVP_EncryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(out.data())))
throw Exception("Error encrypting!");
int outlen = static_cast<int>(out.size()) - 32;
if (!EVP_EncryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + 32), &outlen, reinterpret_cast<const unsigned char *>(in.data()), static_cast<int>(in.size())))
throw Exception("Error encrypting!");
int finlen = 0;
if (!EVP_EncryptFinal_ex(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + outlen + 32), &finlen))
throw Exception("Error encrypting!");
out.resize(outlen + finlen + 32);
return out;
}
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
if (in.size() < 32) throw Exception("Array to decrypt is too small!");
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
if (!ctx) throw Exception("Error initializing encryption context!");
std::vector<char> out(in.size() - 32);
int outlen = static_cast<int>(out.size());
if (!EVP_DecryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(in.data())))
throw Exception("Error decrypting!");
if (!EVP_DecryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data()), &outlen, reinterpret_cast<const unsigned char *>(in.data() + 32), static_cast<int>(in.size() - 32)))
throw Exception("Error decrypting!");
int finlen = 0;
if (!EVP_DecryptFinal_ex(ctx.get(), (unsigned char *) (out.data() + outlen), &finlen))
throw Exception("Error decrypting!");
out.resize(outlen + finlen);
return out;
}
std::array<uint8_t, 32> AES::deriveKey(const std::string &password, const std::string &salt) {
std::array<uint8_t, 32> key;//NOLINT
if (!PKCS5_PBKDF2_HMAC_SHA1(password.data(),
static_cast<int>(password.length()),
reinterpret_cast<const unsigned char *>(salt.data()),
static_cast<int>(salt.length()),
10000,
32,
key.data()))
throw Exception("Error deriving key!");
return key;
}

59
src/crypto/AES.h Normal file
View File

@@ -0,0 +1,59 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#ifndef SEMBACKUP_AES_H
#define SEMBACKUP_AES_H
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
/// Utility class to handle encryption/decryption of byte vectors
/**
* Based on: https://wiki.openssl.org/index.php/EVP_Symmetric_Encryption_and_Decryption
*/
class AES {
public:
/// Encrypts the provided \p in vector using \p password and \p salt
/// \param in Constant reference to to-be-encrypted vector
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
/// \throws Exception on any error
static std::vector<char> encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
/// Decrypts the provided \p in vector using \p password and \p salt
/// \param in Constant reference to to-be-decrypted vector
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Decrypted vector
/// \throws Exception on any error
static std::vector<char> decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
/// Encrypts the provided \p in vector using \p key
/// \param in Constant reference to to-be-encrypted vector
/// \param key Constant reference to the key
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
/// \throws Exception on any error
static std::vector<char> encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
/// Decrypts the provided \p in vector using \p key
/// \param in Constant reference to to-be-decrypted vector
/// \param key Constant reference to the key
/// \return Decrypted vector
/// \throws Exception on any error
static std::vector<char> decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
/// Generates a key for the encryption using \p password and \p salt using PKCS5_PBKDF2_HMAC_SHA1
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Derived key
/// \throws Exception on any error
static std::array<uint8_t, 32> deriveKey(const std::string &password, const std::string &salt);
};
#endif//SEMBACKUP_AES_H

11
src/crypto/CRC32.cpp Normal file
View File

@@ -0,0 +1,11 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "CRC32.h"
CRC32::crcType CRC32::calculate(const std::vector<char> &in) {
crcType res = crc32(0L, nullptr, 0);
res = crc32(res, reinterpret_cast<const Bytef *>(in.data()), in.size());
return res;
}

25
src/crypto/CRC32.h Normal file
View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_CRC32_H
#define SEMBACKUP_CRC32_H
#include <cstdint>
#include <vector>
#include <zlib.h>
/// Utility class to compute CRC32 values of vectors of chars
class CRC32 {
public:
using crcType = uLong;
/// Calculates the CRC32 of given vector
/// \param in Constant reference to a vector of chars
/// \return CRC32 result
static crcType calculate(const std::vector<char> &in);
};
#endif//SEMBACKUP_CRC32_H

48
src/crypto/MD5.cpp Normal file
View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "MD5.h"
#include "../Exception.h"
std::string MD5::calculate(const std::vector<char> &in) {
MD5 hasher;
hasher.feedData(in);
return hasher.getHash();
}
MD5::MD5() {
if (!mdctx)
throw Exception("Can't create hashing context!");
if (!EVP_DigestInit_ex(mdctx.get(), EVP_md5(), nullptr))
throw Exception("Can't create hashing context!");
}
void MD5::feedData(const std::vector<char> &in) {
if (in.empty()) return;
if (!EVP_DigestUpdate(mdctx.get(), in.data(), in.size()))
throw Exception("Error hashing!");
}
std::string MD5::getHash() {
std::array<char, 16> out;
unsigned int s = 0;
if (!EVP_DigestFinal_ex(mdctx.get(), reinterpret_cast<unsigned char *>(out.data()), &s))
throw Exception("Error hashing!");
if (s != out.size())
throw Exception("Error hashing!");
if (!EVP_MD_CTX_reset(mdctx.get()))
throw Exception("Error hashing!");
return {out.begin(), out.end()};
}
std::string MD5::calculate(const std::string &in) {
std::vector<char> tmp(in.begin(), in.end());
return MD5::calculate(tmp);
}

48
src/crypto/MD5.h Normal file
View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_MD5_H
#define SEMBACKUP_MD5_H
#include <array>
#include <memory>
#include <vector>
#include <openssl/evp.h>
/// Class to handle MD5 hashing
/**
* Based on: https://wiki.openssl.org/index.php/EVP_Message_Digests
*/
class MD5 {
public:
/// Constructs an empty MD5 hasher instance
/// \throws Exception on initialization error
MD5();
/// Calculates the hash for a given \p in char vector
/// \param in Constant reference to an input vector
/// \return MD5 hash of \p in
static std::string calculate(const std::vector<char> &in);
/// Calculates the hash for a given \p in string
/// \param in Constant reference to an input string
/// \return MD5 hash of \p in
static std::string calculate(const std::string &in);
/// Append a vector of chars to the current hash
/// \param in Constant reference to an input vector
/// \throws Exception on any error
void feedData(const std::vector<char> &in);
/// Returns the hash, resets the hashing context
/// \throws Exception on any error
std::string getHash();
private:
const std::unique_ptr<EVP_MD_CTX, decltype(&EVP_MD_CTX_free)> mdctx{EVP_MD_CTX_new(), &EVP_MD_CTX_free};///< Current hashing context
};
#endif//SEMBACKUP_MD5_H

View File

@@ -0,0 +1,47 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "CheckFilter.h"
#include "../crypto/CRC32.h"
#include "../repo/Serialize.h"
std::vector<char> CheckFilter::filterWrite(std::vector<char> from) const {
return filterWriteStatic(std::move(from));
}
std::vector<char> CheckFilter::filterRead(std::vector<char> from) const {
return filterReadStatic(std::move(from));
}
std::vector<char> CheckFilter::filterWriteStatic(std::vector<char> from) {
auto out = magic;
Serialize::serialize(from, out);
auto crc = CRC32::calculate(from);
Serialize::serialize(crc, out);
return out;
}
std::vector<char> CheckFilter::filterReadStatic(std::vector<char> from) {
if (from.size() < magic.size()) throw Exception("Input is corrupted (too small)!");
for (size_t i = 0; i < magic.size(); i++) {
if (from[i] != magic[i]) throw Exception("Magic prefix is wrong!");
}
auto fromIt = from.cbegin() + magic.size();
auto out = Serialize::deserialize<std::vector<char>>(fromIt, from.cend());
auto crc = CRC32::calculate(out);
auto crcRecorded = Serialize::deserialize<CRC32::crcType>(fromIt, from.cend());
if (crc != crcRecorded) throw Exception("CRC mismatch!");
return out;
}

36
src/filters/CheckFilter.h Normal file
View File

@@ -0,0 +1,36 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_CHECKFILTER_H
#define SEMBACKUP_CHECKFILTER_H
#include "Filter.h"
/// Filter implementation that checks the input for corruption using CRC
/**
* Additionally, it has static methods for work outside FilterContainer%s
*/
class CheckFilter : public Filter {
public:
/// \copydoc Filter::filterWrite
/// \copydoc CheckFilter::filterWriteS
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
/// \copydoc CheckFilter::filterReadS
std::vector<char> filterRead(std::vector<char> from) const override;
/// Adds CRC hash and magic string to the the \p from vector
static std::vector<char> filterWriteStatic(std::vector<char> from);
/// Checks the \p from vector and removes the metadata
/// \throws Exception on any error
static std::vector<char> filterReadStatic(std::vector<char> from);
private:
static const inline std::vector<char> magic{'s', 'e', 'm', 'b', 'a'};
};
#endif//SEMBACKUP_CHECKFILTER_H

6
src/filters/Filter.cpp Normal file
View File

@@ -0,0 +1,6 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "Filter.h"
Filter::~Filter() = default;

30
src/filters/Filter.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTER_H
#define SEMBACKUP_FILTER_H
#include <vector>
/// Interface class for I/O filters
class Filter {
public:
/// Applies the filter to \p from vector and returns the result
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
/// \param from Source vector of chars
/// \return Filtered vector of chars
virtual std::vector<char> filterWrite(std::vector<char> from) const = 0;
/// Reverses the applied filter from \p from vector and returns the result
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
/// \param from Source vector of chars
/// \return Filtered vector of chars
virtual std::vector<char> filterRead(std::vector<char> from) const = 0;
/// Default virtual destructor
virtual ~Filter();
};
#endif//SEMBACKUP_FILTER_H

17
src/filters/FilterAES.cpp Normal file
View File

@@ -0,0 +1,17 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FilterAES.h"
#include "../crypto/AES.h"
std::vector<char> FilterAES::filterWrite(std::vector<char> from) const {
return AES::encrypt(from, key);
}
std::vector<char> FilterAES::filterRead(std::vector<char> from) const {
return AES::decrypt(from, key);
}
FilterAES::FilterAES(const std::string &password, const std::string &salt) : key(AES::deriveKey(password, salt)) {}

37
src/filters/FilterAES.h Normal file
View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILTERAES_H
#define SEMBACKUP_FILTERAES_H
#include <array>
#include <cstdint>
#include <string>
#include "Filter.h"
/// Filter implementation that encrypts/decrypts data using provided password and salt
class FilterAES : public Filter {
public:
/// Constructs the filter, using \p password and \p salt to generate the encryption key
/// \param password Constant reference to password string
/// \param salt Constant reference to salt string
FilterAES(const std::string &password, const std::string &salt);
/// Encrypts the \p from vector
/// \copydoc Filter::filterWrite
/// \throws Exception on any error
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Decrypts the \p from vector
/// \copydoc Filter::filterRead
/// \throws Exception on any error
std::vector<char> filterRead(std::vector<char> from) const override;
private:
const std::array<uint8_t, 32> key;///< Key used for encryption, derived from \p password and \p salt
};
#endif//SEMBACKUP_FILTERAES_H

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "FilterContainer.h"
FilterContainer::FilterContainer() = default;
FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) {
filters.emplace_back(std::move(f));
return *this;
}
std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const {
for (auto const &f: filters) from = f->filterWrite(std::move(from));
return from;
}
std::vector<char> FilterContainer::filterRead(std::vector<char> from) const {
for (auto f = filters.crbegin(); f != filters.crend(); f++)
from = (*f)->filterRead(std::move(from));
return from;
}

View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTERCONTAINER_H
#define SEMBACKUP_FILTERCONTAINER_H
#include <memory>
#include <vector>
#include "Filter.h"
/// Convenience Filter implementation, that applies multiple Filter%s in succession
class FilterContainer : public Filter {
public:
/// Constructs an empty FilterContainer
FilterContainer();
/// Adds a Filter into itself
/// \param f Rvalue reference to a unique pointer to Filter
/// \return Reference to itself
FilterContainer &addFilter(std::unique_ptr<Filter> &&f);
/// Applies the filters in order of insertion
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Applies the filters in reverse order of insertion
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
std::vector<std::unique_ptr<Filter>> filters;///< Vector of unique pointers to Filter%s
};
#endif//SEMBACKUP_FILTERCONTAINER_H

View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "FilterFactory.h"
#include "../Exception.h"
#include "CheckFilter.h"
#include "FilterAES.h"
#include "FilterShift.h"
#include "FilterShiftSecret.h"
#include "FilterZlib.h"
std::unique_ptr<Filter> FilterFactory::makeFilter(const std::string &type, const Config &config) {
if (type == "none") throw Exception("Trying to make a \"none\" filter!");
if (type == "aes") {
return std::make_unique<FilterAES>(config.getStr("password"), config.getStr("salt"));
} else if (type == "zlib") {
return std::make_unique<FilterZlib>(config.getInt("compression-level"));
} else if (type == "crc") {
return std::make_unique<CheckFilter>();
}
#ifdef TEST
else if (type == "shiftC") {
return std::make_unique<FilterShift>(config.getInt("compression-level"));
} else if (type == "shiftE")
return std::make_unique<FilterShiftSecret>(config.getStr("password"), config.getStr("salt"));
#endif
throw Exception("Unknown filter value");
}

View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTERFACTORY_H
#define SEMBACKUP_FILTERFACTORY_H
#include <memory>
#include <string>
#include "../Config.h"
#include "Filter.h"
/// Utility factory class for Filter%s
class FilterFactory {
public:
/// Constructs a Filter of type \p type according to \p config
/// \param type Constant reference to a string containing the type of filter to construct
/// \param config Constant reference to Config which will be used to determine constructed Filter%'s parameters
/// \return Unique pointer to the constructed Filter
static std::unique_ptr<Filter> makeFilter(const std::string &type, const Config &config);
};
#endif//SEMBACKUP_FILTERFACTORY_H

View File

@@ -0,0 +1,18 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifdef TEST
#include "FilterShift.h"
std::vector<char> FilterShift::filterWrite(std::vector<char> from) const {
for (auto &c: from) c += shiftVal;
return from;
}
std::vector<char> FilterShift::filterRead(std::vector<char> from) const {
for (auto &c: from) c -= shiftVal;
return from;
}
FilterShift::FilterShift(int level) : shiftVal(level) {}
#endif

30
src/filters/FilterShift.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifdef TEST
#ifndef SEMBACKUP_FILTERSHIFT_H
#define SEMBACKUP_FILTERSHIFT_H
#include "Filter.h"
/// Filter implementation that shifts every byte in input vector using provided value
/// \warning For testing purposes only!
class FilterShift : public Filter {
public:
/// Constructs the filter using \p level as shift value
/// \param level Number that will be added to each input byte
FilterShift(int level);
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int shiftVal;///< Value to add to input bytes
};
#endif//SEMBACKUP_FILTERSHIFT_H
#endif//TEST

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifdef TEST
#include "FilterShiftSecret.h"
#include <string>
std::vector<char> FilterShiftSecret::filterWrite(std::vector<char> from) const {
for (auto &c: from) c += shiftVal;
return from;
}
std::vector<char> FilterShiftSecret::filterRead(std::vector<char> from) const {
for (auto &c: from) c -= shiftVal;
return from;
}
FilterShiftSecret::FilterShiftSecret(const std::string &password, const std::string &salt) {
shiftVal = password[0] + salt[0];
}
#endif

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifdef TEST
#ifndef SEMBACKUP_FILTERSHIFTSECRET_H
#define SEMBACKUP_FILTERSHIFTSECRET_H
#include <string>
#include "Filter.h"
/// Filter implementation that shifts every byte in input vector using two provided value
/// \warning For testing purposes only!
class FilterShiftSecret : public Filter {
public:
/// Constructs the filter using the sum of first bytes of \p password and \p salt to initialize shiftVal
/// \param password Constant reference to "password" string
/// \param salt Constant reference to "salt" string
FilterShiftSecret(const std::string &password, const std::string &salt);
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int shiftVal = 0;///< Value to add to input bytes
};
#endif//SEMBACKUP_FILTERSHIFTSECRET_H
#endif//TEST

View File

@@ -0,0 +1,50 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FilterZlib.h"
#include <zlib.h>
#include "../repo/Serialize.h"
std::vector<char> FilterZlib::filterWrite(std::vector<char> from) const {
uLongf outSize = compressBound(from.size());
std::vector<char> out;
Serialize::serialize('C', out);
Serialize::serialize(static_cast<unsigned long long>(from.size()), out);
uLongf sizeSize = out.size();
out.resize(sizeSize + outSize);
if (compress2(reinterpret_cast<Bytef *>(out.data() + sizeSize), &outSize, reinterpret_cast<const Bytef *>(from.data()), from.size(), level) !=
Z_OK)
throw Exception("Error compressing!");
out.resize(outSize + sizeSize);
return out;
}
std::vector<char> FilterZlib::filterRead(std::vector<char> from) const {
auto desI = from.cbegin();
char C = Serialize::deserialize<char>(desI, from.cend());
if (C != 'C') throw Exception("Bad compression prefix!");
uLongf size = Serialize::deserialize<unsigned long long>(desI, from.cend());
std::vector<char> out(size);
if (desI >= from.cend()) throw Exception("Unexpected end of archive!");
if (uncompress(reinterpret_cast<Bytef *>(out.data()), &size, reinterpret_cast<const Bytef *>(&(*desI)), std::distance(desI, from.cend())) !=
Z_OK)
throw Exception("Error decompressing!");
return out;
}
FilterZlib::FilterZlib(int level) : level(level) {}

31
src/filters/FilterZlib.h Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILTERZLIB_H
#define SEMBACKUP_FILTERZLIB_H
#include "Filter.h"
/// Filter implementation that uses Zlib to compress data
class FilterZlib : public Filter {
public:
/// Creates the filter using \p level as compression level
/// \param level
FilterZlib(int level);
/// Compresses the \p from vector
/// \copydoc Filter::filterWrite
/// \throws Exception on any error
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Decompresses the \p from vector
/// \copydoc Filter::filterRead
/// \throws Exception on any error
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int level = -1;///< Compression level to use, -1 is the Zlib default
};
#endif//SEMBACKUP_FILTERZLIB_H

129
src/main.cpp Normal file
View File

@@ -0,0 +1,129 @@
#include <iostream>
#include "BytesFormatter.h"
#include "Config.h"
#include "Context.h"
#include "Exception.h"
#include "Logger.h"
#include "Signals.h"
#include "commands/Command.h"
#include "commands/CommandDiff.h"
#include "commands/CommandList.h"
#include "commands/CommandListFiles.h"
#include "commands/CommandRestore.h"
#include "commands/CommandRun.h"
#include "repo/FileRepository.h"
#include "repo/Repository.h"
#include "repo/Serialize.h"
#include "repo/objects/Archive.h"
#include "repo/objects/File.h"
Config getConf(int argc, char *argv[]) {
Config out;
for (int i = 0; i < argc; i++) {
std::string key = argv[i];
if (key.substr(0, 2) != "--") throw Exception("Options should start with --");
key = key.substr(2);
if (++i == argc) throw Exception("Option not specified for " + key);
std::string val = argv[i];
out.add(key, val);
}
return out;
}
int help() {
for (auto const &o: Config::keys) {
std::cout << "--" << o.first << " <" << Config::KeyTypeToStr.at(o.second.type) << ">" << std::endl;
if (o.second.defaultval.has_value())
std::cout << " Default: " << o.second.defaultval.value() << std::endl;
std::cout << " Is saved in repository: " << (o.second.remember ? "yes" : "no") << std::endl;
std::cout << " Info: " << o.second.info << std::endl;
}
return 0;
}
std::unique_ptr<Repository> openRepo(Config &conf) {
try {
auto repo = std::make_unique<FileRepository>(conf);
repo->open();
return repo;
} catch (std::exception &e) {
std::cout << "Error opening repo: " << e.what() << std::endl;
return nullptr;
}
}
int init(Config &conf) {
auto repo = std::make_unique<FileRepository>(conf);
if (repo == nullptr) return -1;
try {
repo->init();
} catch (std::exception &e) {
std::cout << "Error initializing repo: " << e.what() << std::endl;
return -1;
}
return 0;
}
int main(int argc, char *argv[]) {
try {
Signals::setup();
if (argc < 2) {
std::cerr << "No argument specified" << std::endl;
help();
return -1;
}
std::string opt = argv[1];
if (opt == "help") {
return help();
}
Config conf;
try {
conf = getConf(argc - 2, argv + 2);
} catch (std::exception &e) {
std::cerr << "Error reading config!" << std::endl
<< e.what() << std::endl;
return -1;
}
if (opt == "init") {
return init(conf);
}
auto repo = openRepo(conf);
if (repo == nullptr) {
std::cerr << "Can't open repo!" << std::endl;
return -1;
}
Logger logger(conf.getInt("verbose"));
Context ctx{&logger, repo.get()};
std::unordered_map<std::string, std::unique_ptr<Command>> commands;
commands.emplace(CommandDiff().name, std::make_unique<CommandDiff>());
commands.emplace(CommandRestore().name, std::make_unique<CommandRestore>());
commands.emplace(CommandRun().name, std::make_unique<CommandRun>());
commands.emplace(CommandListFiles().name, std::make_unique<CommandListFiles>());
commands.emplace(CommandList().name, std::make_unique<CommandList>());
if (commands.count(opt) == 0) {
std::cerr << "Unknown argument" << std::endl;
return -1;
} else {
commands.at(opt)->run(ctx);
}
} catch (std::exception &e) {
std::cerr << "Error!" << std::endl
<< e.what() << std::endl;
} catch (...) {
std::cerr << "Something very bad happened!" << std::endl;
}
}

246
src/repo/FileRepository.cpp Normal file
View File

@@ -0,0 +1,246 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "FileRepository.h"
#include <exception>
#include <iterator>
#include <mutex>
#include "../filters/CheckFilter.h"
#include "../filters/FilterFactory.h"
#include "Object.h"
#include "Serialize.h"
FileRepository::FileRepository(Config config) : Repository(std::move(config)), root(std::filesystem::path(this->config.getStr("repo"))), writeCacheMax(config.getInt("repo-target") * 1024 * 1024) {}
bool FileRepository::exists() {
return std::filesystem::is_directory(root) && std::filesystem::exists(root / "info");
}
bool FileRepository::flush() {
flushWriteCache(std::unique_lock(writeCacheLock));
return true;
}
bool FileRepository::open() {
if (!exists()) throw Exception("Repository doesn't exist!");
auto readConf = Serialize::deserialize<Config>(CheckFilter::filterReadStatic(readFile(root / "info")));
std::swap(config, readConf);
config.merge(readConf);
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
try {
std::tie(maxFileId, offsetIndex) = Serialize::deserialize<std::pair<decltype(maxFileId), decltype(offsetIndex)>>(filters.filterRead(readFile(root / "offsets")));
std::tie(keyIndex, largestUnusedId) = Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(filters.filterRead(readFile(root / "index")));
} catch (const std::exception &e) {
ready = false;
throw;
}
return true;
}
bool FileRepository::init() {
if (ready) throw Exception("Trying to initialize already initialized repository!");
if (exists()) throw Exception("Trying to initialize already existing repository!");
if (!std::filesystem::is_directory(root) && !std::filesystem::create_directories(root))
throw Exception("Can't create directory " + root.u8string());
writeFile(root / "info", CheckFilter::filterWriteStatic(Serialize::serialize(config)));
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
return true;
}
FileRepository::~FileRepository() {
if (ready) {
ready = false;
flushWriteCache(std::unique_lock(writeCacheLock));
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
}
}
std::vector<char> FileRepository::getObject(Object::idType id) const {
if (!ready) throw Exception("Tried working with uninitialized repo!");
std::unique_lock lock(repoLock);
if (offsetIndex.count(id) == 0)
throw Exception("Object with id " + std::to_string(id) + " doesn't exist!");
auto entry = offsetIndex.at(id);
lock.unlock();
return filters.filterRead(readFile(root / std::to_string(entry.fileId), entry.offset, entry.length));
}
bool FileRepository::writeObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
auto tmp = filters.filterWrite(Serialize::serialize(obj));
{
std::unique_lock lockW(writeCacheLock);
writeCacheSize += tmp.size();
writeCache[obj.id] = std::move(tmp);
// If we have reached the target file size, flush the cache
if (writeCacheSize >= writeCacheMax) {
flushWriteCache(std::move(lockW));
}
}
return true;
}
void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
if (writeCache.empty()) {
lockW.unlock();
return;
}
// Swap the cache for a new one and unlock the mutex so other threads can continue working
decltype(writeCache) objs;
std::swap(writeCache, objs);
writeCacheSize = 0;
decltype(maxFileId) currentFileId;
{
std::lock_guard lockI(repoLock);
currentFileId = maxFileId;
maxFileId++;
}
lockW.unlock();
unsigned long long offset = 0;
std::ofstream ofstream(root / std::to_string(currentFileId), std::ios::binary | std::ios::trunc | std::ios::out);
for (auto &i: objs) {
{
std::lock_guard lockI(repoLock);
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
}
offset += i.second.size();
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
}
}
bool FileRepository::putObject(const Object &obj) {
// Put the object into index, and then write it to the storage
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
writeObject(obj);
return true;
}
bool FileRepository::deleteObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
throw Exception("Deletion not implemented!");
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const {
if (size > absoluteMaxFileLimit) throw Exception("Tried to read " + std::to_string(size) +
" bytes from " + file.u8string() +
" which is more than absoluteMaxFileLimit");
std::ifstream ifstream(file, std::ios::binary | std::ios::in);
if (!ifstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for reading!");
std::vector<char> buf(size);
if (ifstream.rdbuf()->pubseekpos(offset) == std::streampos(std::streamoff(-1))) throw Exception("Unexpected end of file " + file.u8string());
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.u8string());
return buf;
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file) const {
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.u8string() + " is not a regular file!");
auto fileSize = std::filesystem::file_size(file);
if (fileSize == 0) return {};
return readFile(file, 0, fileSize);
}
bool FileRepository::writeFile(const std::filesystem::path &file, const std::vector<char> &data) {
std::ofstream ofstream(file, std::ios::binary | std::ios::trunc | std::ios::out);
if (!ofstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for writing!");
if (ofstream.rdbuf()->sputn(data.data(), data.size()) != data.size())
throw Exception("Couldn't write all the data for " + file.u8string());
return true;
}
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
return getObject(getObjectId(type, key));
}
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) throw Exception("No objects of requested type!");
return keyIndex.at(type).at(key);
}
std::vector<std::pair<std::string, Object::idType>> FileRepository::getObjects(Object::ObjectType type) const {
std::lock_guard lock(repoLock);
std::vector<std::pair<std::string, Object::idType>> out;
if (keyIndex.count(type) == 0) return {};
for (auto const &i: keyIndex.at(type))
out.emplace_back(i);
return out;
}
bool FileRepository::exists(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) return false;
return keyIndex.at(type).count(key) > 0;
}
Object::idType FileRepository::getId() {
std::lock_guard lock(repoLock);
return largestUnusedId++;
}
FileRepository::OffsetEntry::OffsetEntry(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end)
: fileId(Serialize::deserialize<decltype(fileId)>(in, end)),
offset(Serialize::deserialize<decltype(offset)>(in, end)),
length(Serialize::deserialize<decltype(length)>(in, end)) {
}
void FileRepository::OffsetEntry::serialize(std::vector<char> &out) const {
Serialize::serialize(fileId, out);
Serialize::serialize(offset, out);
Serialize::serialize(length, out);
}
FileRepository::OffsetEntry::OffsetEntry(unsigned long long int fileId, unsigned long long int offset, unsigned long long int length)
: fileId(fileId), offset(offset), length(length) {}
bool FileRepository::clearCache(Object::ObjectType type) {
keyIndex[type] = {};
return true;
}
bool FileRepository::addToCache(const Object &obj) {
{
std::unique_lock lock(repoLock);
if (offsetIndex.count(obj.id) == 0)
throw Exception("Object with id " + std::to_string(obj.id) + " doesn't exist!");
}
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
return true;
}

125
src/repo/FileRepository.h Normal file
View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_FILEREPOSITORY_H
#define SEMBACKUP_FILEREPOSITORY_H
#include <filesystem>
#include <fstream>
#include <map>
#include <mutex>
#include "Object.h"
#include "Repository.h"
/// Repository implementation in the local filesystem
/**
* `root` Config value is used as a root
* Objects are stored concatenated in files with approximate size of `repo-target` MB (from Config)
* The object key/object id index is stored as a hash map, as a `index` file out of the object storage structure
* Hints for the location of objects inside of files are also stored as a hash map in the `offsets` file
* Config is stored in the `info` file, merged with the supplied Config on open()
*
* Thread safe, approx. max memory usage is `number of threads` * `repo-target`,
* as every thread can be flushing its write cache at the same time
*/
class FileRepository final : public Repository {
public:
/// Constructs a new FileRepository
/// \param config Config to use
FileRepository(Config config);
bool exists() override;
bool open() override;
bool init() override;
bool flush() override;
std::vector<char> getObject(Object::idType id) const override;
bool putObject(const Object &obj) override;
bool deleteObject(const Object &obj) override;
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
bool clearCache(Object::ObjectType type) override;
bool addToCache(const Object &obj) override;
bool exists(Object::ObjectType type, const std::string &key) const override;
Object::idType getId() override;
/// FileRepository destructor
/// Flushes write cache, and writes the metadata
~FileRepository() override;
FileRepository(const FileRepository &r) = delete;
FileRepository &operator=(const FileRepository &r) = delete;
private:
const std::filesystem::path root;///< Root of the repository in the filesystem
/// Puts the Object raw data into write cache
bool writeObject(const Object &obj);
bool ready = false;/// < Indicates whether the FileRepository was open or initialized
/// Reads the file and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \return Vector of bytes of the file
std::vector<char> readFile(const std::filesystem::path &file) const;
/// Reads the \psize bytes of the file from \p offset and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \param offset First byte of the file to read
/// \param size Amount of bytes to read (no more than absoluteMaxFileLimit)
/// \return Vector of bytes of the file
/// \throws Exception on any error, or when absoluteMaxFileLimit is reached
std::vector<char> readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const;
static constexpr unsigned long long absoluteMaxFileLimit{4ULL * 1024 * 1024 * 1024};///<Max file read size (4GB)
/// Writes \p data to \p file
/// \param file Constant reference to the absolute path of the file
/// \param data Constant reference to the vector of bytes to write
/// \return True
/// \throws Exception on any error
bool writeFile(const std::filesystem::path &file, const std::vector<char> &data);
mutable std::mutex repoLock;///< Lock for any operations on the Repository
/// Helper struct to store the location of objects in the filesystem
struct OffsetEntry {
unsigned long long fileId;///< ID of file where the object is located
unsigned long long offset;///< Offset in the file where the object starts
unsigned long long length;///< Length of the object
using serializable = std::true_type;
/// Default constructor
OffsetEntry(unsigned long long fileId, unsigned long long offset, unsigned long long length);
/// Deserialization constrictor
OffsetEntry(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Serializes the entry to \p out
void serialize(std::vector<char> &out) const;
};
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
std::mutex writeCacheLock; ///< Write cache lock
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
unsigned long long writeCacheSize = 0; ///< Current byte size of the write cache
const unsigned long long writeCacheMax; ///< Target size of the write cache, it is automatically flushed after this is reached
/// Flushes the write cache
/// Takes the cache lock, swaps the cache with an empty one and unlocks it
/// \param lockW Write cache lock
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
Object::idType largestUnusedId = 1; ///< Largest available objectID
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> keyIndex;///< Maps Object%'s keys to their ID's
};
#endif//SEMBACKUP_FILEREPOSITORY_H

21
src/repo/Object.cpp Normal file
View File

@@ -0,0 +1,21 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Object.h"
#include "Serialize.h"
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: id(Serialize::deserialize<idType>(in, end)),
type(Serialize::deserialize<ObjectType>(in, end)) {
}
void Object::serialize(std::vector<char> &out) const {
Serialize::serialize(id, out);
Serialize::serialize(type, out);
}
Object::~Object() = default;

53
src/repo/Object.h Normal file
View File

@@ -0,0 +1,53 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_OBJECT_H
#define SEMBACKUP_OBJECT_H
#include <cstdint>
#include <string>
#include <vector>
/// Base class for objects in the Repository
/**
* Every object has a unique id, and is also indexed by a type-key pair in the Repository cache
*/
class Object {
public:
using idType = uint64_t;///< Type alias for Object%'s ID
enum class ObjectType {
Archive,
File,
Chunk,
END
};
/// Serializes the object to \p out
virtual void serialize(std::vector<char> &out) const;
/// Signals the Serialization template to use Object's serialization/deserialization facilities
using serializable = std::true_type;
/// Default virtual destructor, don't allow to create an instance of Object
virtual ~Object() = 0;
/// Pure virtual function that returns the key by which will be the object indexed in the Repository cache
/// All derived objects should implement this method
virtual std::string getKey() const = 0;
const idType id; ///< Unique numerical of the object
const ObjectType type;///< Type of the object
protected:
/// Default constructor
/// \param id Object ID
/// \param type Object type
Object(idType id, ObjectType type);
/// Deserialization constructor
Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
};
#endif//SEMBACKUP_OBJECT_H

12
src/repo/Repository.cpp Normal file
View File

@@ -0,0 +1,12 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Repository.h"
Repository::~Repository() = default;
Repository::Repository(Config config) : config(std::move(config)) {}
const Config &Repository::getConfig() const {
return config;
}

122
src/repo/Repository.h Normal file
View File

@@ -0,0 +1,122 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_REPOSITORY_H
#define SEMBACKUP_REPOSITORY_H
#include <filesystem>
#include <mutex>
#include <set>
#include "../Config.h"
#include "../filters/FilterContainer.h"
#include "Object.h"
/// Abstract base class for an Object Repository
/**
* So far only FileRepository exists, and probably this interface is too inflexible
* to be easily used for creating other (database, object storage...) repositories,
* but it should be possible with some refactoring
*/
class Repository {
public:
/// Checks if a repository already exists
/// \return True if exists, False otherwise
virtual bool exists() = 0;
/// Tries to open the Repository
/// \return True
/// \throws Exception on any error
virtual bool open() = 0;
/// Tries to initialize the Repository
/// \return True
/// \throws Exception on any error, including if the Repository is already initialized
virtual bool init() = 0;
/// Tries to flush the Repository write cache
/// \returns True
/// \throws Exception on any error
virtual bool flush() = 0;
/// Returns the serialized Object with id \p id
/// \param id ID of object to return
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::idType id) const = 0;
/// Adds the Object \p obj to the Repository
/// \param obj Constant reference to the object
/// \return True
/// \throws Exception on any error
virtual bool putObject(const Object &obj) = 0;
/// Deletes Object \p obj from the Repository
/// \param obj Constant reference to the object
/// \return True if successful, False if it didn't exist
/// \throws Exception on any error
virtual bool deleteObject(const Object &obj) = 0;
/// Returns the Object of type \p type and with key \p key
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return Serialized object
/// \throws Exception on any error or if object doesn't exist
virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the id of an Object of type \p type and with key \p key
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return ID of the object
/// \throws Exception on any error or if object doesn't exist
virtual Object::idType getObjectId(Object::ObjectType type, const std::string &key) const = 0;
/// Returns the list of Objects of type \p type
/// \param type Type of the object
/// \return Vector of pairs <key of object, id of object>
/// \throws Exception on any error
virtual std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const = 0;
/// Returns whether Object of type \p type and with key \p key exists
/// \param type Type of the object
/// \param key Constant reference to the key of the object
/// \return True if exists, False otherwise
/// \throws Exception on any error
virtual bool exists(Object::ObjectType type, const std::string &key) const = 0;
/// Erases all the cache entries of object type \p type
/// \param type Type of the objects
/// \return True
virtual bool clearCache(Object::ObjectType type) = 0;
/// Adds the object to the cache, but doesn't change it on disk otherwise
/// \param obj Constant reference to the object
/// \return True
/// \throws Exception on any error, or if the object doesn't exist
virtual bool addToCache(const Object &obj) = 0;
/// Returns the next available object id
virtual Object::idType getId() = 0;
/// Returns the const reference to Config object used for this Repository
const Config &getConfig() const;
/// Default virtual destructor
virtual ~Repository();
Repository(const Repository &r) = delete;
Repository &operator=(const Repository &r) = delete;
protected:
/// Base Repository class constructor
/// \param config Config to use
Repository(Config config);
Config config; ///< Config of this Repository
FilterContainer filters;///< Container of IO filters used to transform Objects when writing/reading to/from storage
};
#endif//SEMBACKUP_REPOSITORY_H

208
src/repo/Serialize.h Normal file
View File

@@ -0,0 +1,208 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_SERIALIZE_H
#define SEMBACKUP_SERIALIZE_H
#include <cstddef>
#include <memory>
#include <stdexcept>
#include <type_traits>
#include <utility>
#include <vector>
#ifdef __APPLE__
#include <machine/endian.h>
#define htobe64(x) htonll(x)
#define be64toh(x) ntohll(x)
#else
#include <endian.h>
#endif
#include "../Exception.h"
/// Serialization library
/**
* To serialize the objects in Repository, we have to handle a couple of cases:
* 1. Serializing integers (object ids, etc...)
* 2. Serializing enums (object types)
* 3. Serializing char vectors and strings
* 4. Serializing other STL containers (which also requires serializing pairs)
* 5. Serializing custom structs (including the objects themselves)
*
* With this library it is possible to do all of that.
* One problem is that it isn't really portable, but it can be fixed by changing the std::is_integral<T>::value case to use something like be64toh/htobe64
*
*/
namespace Serialize {
template<typename, typename = void, typename = void>
struct is_pair : std::false_type {};
template<typename P>
struct is_pair<P, std::void_t<decltype(std::declval<P>().first)>, std::void_t<decltype(std::declval<P>().second)>> : std::true_type {};
template<typename, typename, typename = void>
struct has_emplace_back : std::false_type {};
template<typename T, typename V>
struct has_emplace_back<T, V, std::void_t<decltype(T().emplace_back(std::declval<V>()))>> : std::true_type {};
template<typename, typename = void, typename = void>
struct serializable : std::false_type {};
/// Checks if the object has the `serializable` type
/// In that case, its serialization will be delegated to its .serialize() parameter,
/// and deserialization to its T(char vector iterator in, const char vector iterator end) constructor,
/// similar to Serialize::deserialize
template<typename T>
struct serializable<T, std::void_t<decltype(T::serializable::value)>> : std::true_type {};
/// Deserializes object of type \p T starting from fist byte \p in, advances the iterator past the end of object
/// \tparam T Type to deserialize
/// \param in Iterator to the first byte of the object
/// \param end End iterator of source container
/// \return Deserialized value
template<typename T>
static T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Serializes object of type \p T into vector \p out
/// \tparam T Type to serialize
/// \param what Constant reference to the serialized object
/// \param out Reference to output vector
template<typename T>
static void serialize(const T &what, std::vector<char> &out);
/// Serializes the object of type \p T and returns the resulting vector
/// \tparam T Type to serialize
/// \param o Constant reference to the serialized object
/// \return Serialized data
template<typename T>
static std::vector<char> serialize(const T &o);
/// Deserializes object of type \p T from input vector \p from
/// \tparam T Type to deserialize
/// \param from Constant reference to the serialized object
/// \return Deserialized value
template<typename T>
static T deserialize(const std::vector<char> &from);
template<typename T>
T deserialize(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end) {
if (in >= end) throw Exception("Unexpected end of object!");
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its constructor with in and end
return T(in, end);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, deserialize the first and second element and return the pair
using KT = typename std::remove_const<decltype(T::first)>::type;
using VT = typename std::remove_const<decltype(T::second)>::type;
auto K = deserialize<KT>(in, end);
auto V = deserialize<VT>(in, end);
return T(std::move(K), std::move(V));
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, deserialize an int and cast it to the enum
auto tmp = deserialize<uint32_t>(in, end);
if (tmp >= 0 && tmp < static_cast<uint32_t>(T::END))
return static_cast<T>(tmp);
else
throw Exception("Enum out of range!");
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
if (std::distance(in, end) < sizeof(T))
throw Exception("Unexpected end of object!");
return *(in++);
} else if constexpr (std::is_integral<T>::value) {
uint64_t tmp;
static_assert(sizeof(tmp) == 8);
// If the object is a number, copy it byte-by-byte
if (std::distance(in, end) < sizeof(tmp))
throw Exception("Unexpected end of object!");
std::copy(in, in + sizeof(tmp), reinterpret_cast<char *>(&tmp));
in += sizeof(tmp);
return static_cast<T>(be64toh(tmp));
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
size_t size = deserialize<size_t>(in, end);
char b = deserialize<char>(in, end);
if (b != 'b') throw Exception("Error deserializing!");
T out;
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
if (std::distance(in, end) < size)
throw Exception("Unexpected end of object!");
out.insert(out.end(), in, in + size);
in += size;
} else
for (size_t i = 0; i < size; i++) {
using V = typename T::value_type;
V v = deserialize<V>(in, end);
// Try either emplace_back or emplace if it doesn't exist
if constexpr (has_emplace_back<T, V>::value)
out.emplace_back(std::move(v));
else
out.emplace(std::move(v));
}
b = deserialize<char>(in, end);
if (b != 'e') throw Exception("Error deserializing!");
return out;
}
}
template<typename T>
void serialize(const T &what, std::vector<char> &out) {
if constexpr (serializable<T>::value) {
// If the object declares itself as serializable, call its serialize method
what.serialize(out);
} else if constexpr (is_pair<T>::value) {
// If the object is pair, serialize the first and second element
serialize(what.first, out);
serialize(what.second, out);
} else if constexpr (std::is_enum<T>::value) {
// If the object is an enum, cast it to an int and serialize that
serialize(static_cast<uint32_t>(what), out);
} else if constexpr (sizeof(T) == 1) {
// If it's a single byte, just copy it
out.emplace_back(what);
} else if constexpr (std::is_integral<T>::value) {
// If the object is a number, copy it byte-by-byte
uint64_t tmp = htobe64(static_cast<uint64_t>(what));
static_assert(sizeof(tmp) == 8);
out.insert(out.end(), (reinterpret_cast<const char *>(&tmp)), (reinterpret_cast<const char *>(&tmp) + sizeof(tmp)));
} else {
// Otherwise we treat it as a container, in format of <number of elements>b<elements>e
serialize(what.size(), out);
serialize('b', out);
if constexpr (sizeof(typename T::value_type) == 1) {
// Optimization for char vectors
out.insert(out.end(), what.begin(), what.end());
} else
for (auto const &i: what) {
serialize(i, out);
}
serialize('e', out);
}
}
template<typename T>
std::vector<char> serialize(const T &o) {
std::vector<char> out;
serialize(o, out);
return out;
}
template<typename T>
T deserialize(const std::vector<char> &from) {
auto bgwr = from.cbegin();
return deserialize<T>(bgwr, from.cend());
}
}// namespace Serialize
#endif//SEMBACKUP_SERIALIZE_H

View File

@@ -0,0 +1,35 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Archive.h"
#include "../../Exception.h"
#include "../Serialize.h"
Archive::Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full)
: Object(id, ObjectType::Archive), name(name), mtime(mtime), files(files), isFull(full) {}
Archive::Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
name(Serialize::deserialize<std::string>(in, end)),
mtime(Serialize::deserialize<unsigned long long>(in, end)),
files(Serialize::deserialize<std::remove_const<decltype(files)>::type>(in, end)),
isFull(Serialize::deserialize<bool>(in, end)) {
if (type != ObjectType::Archive) throw Exception("Type mismatch for Archive!");
auto filesN = Serialize::deserialize<decltype(files.size())>(in, end);
if (files.size() != filesN) throw Exception("Number of files recorded doesn't match the number of files read!");
}
void Archive::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(name, out);
Serialize::serialize(mtime, out);
Serialize::serialize(files, out);
Serialize::serialize(isFull, out);
Serialize::serialize(files.size(), out);
}
std::string Archive::getKey() const {
return name;
}

View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_ARCHIVE_H
#define SEMBACKUP_ARCHIVE_H
#include <array>
#include "../Object.h"
/// Object representing a backup
class Archive : public Object {
public:
Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files, bool full = false);
/// \copydoc Object::serialize
Archive(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
void serialize(std::vector<char> &out) const override;
/// Returns the name of the archive
std::string getKey() const override;
const std::string name; ///< Archive name
const unsigned long long mtime; ///< Time of creation
const std::vector<idType> files;///< List of ids of File objects in the Archive
const bool isFull = false; ///< Whether this was a full archive
};
#endif//SEMBACKUP_ARCHIVE_H

View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Chunk.h"
#include "../../Exception.h"
#include "../Serialize.h"
Chunk::Chunk(idType id, std::string md5, std::vector<char> data) : Object(id, ObjectType::Chunk), data(std::move(data)), md5(std::move(md5)), length(this->data.size()) {}
Chunk::Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
data(Serialize::deserialize<std::remove_const<decltype(data)>::type>(in, end)),
length(Serialize::deserialize<std::remove_const<decltype(length)>::type>(in, end)) {
if (type != ObjectType::Chunk) throw Exception("Type mismatch for Chunk!");
if (length != data.size()) throw Exception("Recorded length and actual length don't match for Chunk!");
}
void Chunk::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(md5, out);
Serialize::serialize(data, out);
Serialize::serialize(length, out);
}
std::string Chunk::getKey() const {
return md5;
}

33
src/repo/objects/Chunk.h Normal file
View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_CHUNK_H
#define SEMBACKUP_CHUNK_H
#include <array>
#include <vector>
#include "../Object.h"
/// Object representing a part of a File
class Chunk : public Object {
public:
Chunk(idType id, std::string, std::vector<char> data);
/// Deserialization constructor
Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// \copydoc Object::serialize
void serialize(std::vector<char> &out) const override;
/// Returns the MD5 of the chunk
std::string getKey() const override;
const std::string md5; ///< MD5 hash of the chunk
const std::vector<char> data; ///< Raw chunk data
const unsigned long long length;///< Size of chunk in bytes
};
#endif//SEMBACKUP_CHUNK_H

84
src/repo/objects/File.cpp Normal file
View File

@@ -0,0 +1,84 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "File.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "../../Exception.h"
#include "../Serialize.h"
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), md5(md5), fileType(fileType), chunks(chunks) {}
File::File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
name(Serialize::deserialize<std::string>(in, end)),
bytes(Serialize::deserialize<unsigned long long>(in, end)),
mtime(Serialize::deserialize<unsigned long long>(in, end)),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
fileType(Serialize::deserialize<std::remove_const<decltype(fileType)>::type>(in, end)),
chunks(Serialize::deserialize<std::remove_const<decltype(chunks)>::type>(in, end)) {
if (type != ObjectType::File) throw Exception("Type mismatch for File!");
}
void File::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(name, out);
Serialize::serialize(bytes, out);
Serialize::serialize(mtime, out);
Serialize::serialize(md5, out);
Serialize::serialize(fileType, out);
Serialize::serialize(chunks, out);
}
std::string File::getKey() const {
return name;
}
File::Type File::getFileType(const std::filesystem::path &p) {
if (std::filesystem::is_symlink(p)) return Type::Symlink;
if (std::filesystem::is_directory(p)) return Type::Directory;
if (std::filesystem::is_regular_file(p)) return Type::Normal;
throw Exception("Unsupported file type! " + p.u8string());
}
std::vector<char> File::getFileContents(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal) throw Exception(p.u8string() + " is a normal file!");
if (type == Type::Directory) {
return {};
}
if (type == Type::Symlink) {
auto target = std::filesystem::read_symlink(p).u8string();
return {target.begin(), target.end()};
}
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileMtime(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal || type == Type::Directory)
return static_cast<const unsigned long long int>(std::chrono::duration_cast<std::chrono::seconds>(std::filesystem::last_write_time(p).time_since_epoch()).count());
else if (type == Type::Symlink) {
auto path = p.u8string();
struct stat sb;
if (lstat(path.c_str(), &sb) != 0) throw Exception("Error reading mtime for " + p.u8string());
#ifdef __APPLE__
return sb.st_mtimespec.tv_sec;
#else
return sb.st_mtime;
#endif
}
throw Exception("Error with file " + p.u8string());
}
unsigned long long File::getFileSize(const std::filesystem::path &p) {
auto type = getFileType(p);
if (type == Type::Normal) return std::filesystem::file_size(p);
else
return getFileContents(p).size();
}

76
src/repo/objects/File.h Normal file
View File

@@ -0,0 +1,76 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_FILE_H
#define SEMBACKUP_FILE_H
#include <array>
#include <filesystem>
#include <map>
#include <string>
#include <unordered_map>
#include <vector>
#include "../Object.h"
/// Object representing a saved file
class File : public Object {
public:
enum class Type {
Normal,
Symlink,
Directory,
END
};
static inline const std::unordered_map<Type, std::string> TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}};
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType);
/// Deserialization constructor
File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// \copydoc Object::serialize
void serialize(std::vector<char> &out) const override;
/// Returns the file relative path as key
std::string getKey() const override;
/// Helper static function to return Type for any file in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return Type of the file
static Type getFileType(const std::filesystem::path &p);
/// Helper static function to return "contents" for non-regular files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File contents, (for symlinks - its destination, for directory - empty)
/// \throws Exception on any error, or if file is regular
static std::vector<char> getFileContents(const std::filesystem::path &p);
/// Helper static function to return modification time for files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File last modification time
/// \throws Exception on any error
static unsigned long long getFileMtime(const std::filesystem::path &p);
/// Helper static function to return file size for files in the filesystem
/// \param p Constant reference to the absolute path of the file
/// \return File size
/// \throws Exception on any error
static unsigned long long getFileSize(const std::filesystem::path &p);
const std::string name; ///< Relative path to backup root, as UTF-8 string
const unsigned long long bytes;///< Amount of bytes in the file
const unsigned long long mtime;///< Last modification time as timestamp
const std::string md5; ///< Hash of the file
const Type fileType; ///< File type
/// List of the chunks in file
/// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path
const std::vector<idType> chunks;
};
#endif//SEMBACKUP_FILE_H

View File

@@ -0,0 +1,51 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FileBuffer.h"
#include "../Serialize.h"
FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() {
for (auto const &id: file.chunks) chunksQueue.emplace(id);
};
int FileBuffer::sync() {
return 0;
}
std::streamsize FileBuffer::xsgetn(char *s, std::streamsize countr) {
if (underflow() == std::char_traits<char>::eof()) return 0;
for (int i = 0; i < countr; i++) {
auto c = uflow();
if (c != traits_type::eof()) {
s[i] = traits_type::to_char_type(c);
} else
return i;
}
return countr;
}
int FileBuffer::uflow() {
auto out = underflow();
if (out != traits_type::eof())
curGetBufPos++;
return out;
}
int FileBuffer::underflow() {
if (getBuf.empty() || curGetBufPos == getBuf.size()) {
if (chunksQueue.empty()) return traits_type::eof();
else {
auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front()));
getBuf = chunk.data;
chunksQueue.pop();
curGetBufPos = 0;
}
}
if (!getBuf.empty())
return traits_type::to_int_type(getBuf[curGetBufPos]);
else
return traits_type::eof();
}

View File

@@ -0,0 +1,43 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILEBUFFER_H
#define SEMBACKUP_FILEBUFFER_H
#include <queue>
#include <streambuf>
#include "../Repository.h"
#include "Chunk.h"
#include "File.h"
/// Streambuf implementation to read files from a File in a Repository
class FileBuffer : public std::streambuf {
public:
/// Creates a FileBuffer instance
/// \param repo Constant pointer to the backing Repository, should be available during the entire lifetime
/// \param fileId ID of a file to "open"
FileBuffer(const Repository *repo, Object::idType fileId);
protected:
int underflow() override;
int uflow() override;
std::streamsize xsgetn(char *s, std::streamsize count) override;
int sync() override;
private:
std::vector<char> getBuf;///< Currently loaded chunk
size_t curGetBufPos = 0;///< Currently pointed to byte in the loaded chunk
const Repository *repo; ///< Pointer to the backing repository
File file; ///< Backing file
std::queue<Object::idType> chunksQueue;///< Chunks of file that weren't read yet
};
#endif//SEMBACKUP_FILEBUFFER_H