This commit is contained in:
2023-06-02 12:51:08 +02:00
commit 0e355fbe42
142 changed files with 10281 additions and 0 deletions

9
src/commands/Command.cpp Normal file
View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "Command.h"
Command::Command(std::string name) : name(std::move(name)) {}
Command::~Command() = default;

28
src/commands/Command.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMAND_H
#define SEMBACKUP_COMMAND_H
#include "../Context.h"
/// Abstract base class for some process running with some Context
class Command {
public:
/// Runs the command with Context \p ctx
virtual void run(Context ctx) = 0;
/// Default virtual destructor
virtual ~Command() = 0;
/// The name of the command
const std::string name;
protected:
/// Constructs a command with name \p name
Command(std::string name);
};
#endif//SEMBACKUP_COMMAND_H

View File

@@ -0,0 +1,152 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandDiff.h"
#include "../BytesFormatter.h"
#include "../Diff.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandDiff::CommandDiff() : Command("diff") {}
void CommandDiff::run(Context ctx) {
std::string diffMode = ctx.repo->getConfig().getStr("diff-mode");
Object::idType archive1;
if (!ctx.repo->getConfig().exists("aid")) {
auto archives = ctx.repo->getObjects(Object::ObjectType::Archive);
archive1 = std::max_element(archives.begin(), archives.end(), [](const auto &a1, const auto &a2) { return a1.second < a2.second; })->second;
} else {
archive1 = ctx.repo->getConfig().getInt("aid");
}
ThreadPool threadPool([&](const std::string &error) {
ctx.logger->write("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
std::mutex filesLock;
std::map<std::filesystem::path, File> files;///< Files in the first archive
for (auto id: archiveO1.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files.emplace(file.getKey(), std::move(file));
}
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Task to to compare the given file with the first archive
auto processFile = [&, this](ComparableFile p) {
auto relPath = p.path;
std::unique_lock lock(filesLock);
if (files.count(relPath) == 0) {
ctx.logger->write(relPath + " is new\n", 0);
lock.unlock();
} else {
File repoFile = files.at(relPath);
lock.unlock();
if (changeDetector.check({repoFile, ctx.repo}, p)) {
ctx.logger->write(relPath + " is different " + Diff::diff({repoFile, ctx.repo}, p) + "\n", 1);
} else {
if (diffMode == "file")
ctx.logger->write(relPath + " are same ", 0);
}
}
lock.lock();
files.erase(relPath);
};
std::optional<Archive> archiveO2;
if (diffMode == "normal") {
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
threadPool.push([&]() {
for (auto id: archiveO2.value().files) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::u8path(file.name)))
threadPool.push([&, file]() {
processFile(ComparableFile{file, ctx.repo});
});
if (Signals::shouldQuit) break;
}
return true;
});
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
/// Start the diff with the root directory and empty ignore list
threadPool.push([&, from]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
[processFile, from, prefix = ctx.repo->getConfig().getStr("prefix")](const std::filesystem::directory_entry &dirEntry) {
if (isSubpath(prefix, dirEntry.path().lexically_relative(from)))
processFile(ComparableFile{dirEntry, from});
});
});
}
} else if (diffMode == "file") {
if (files.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the first archive", 0);
return;
}
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
std::map<std::filesystem::path, File> files2;///< Files in the first archive
for (auto id: archiveO2->files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files2.emplace(file.getKey(), std::move(file));
}
if (files2.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the second archive", 0);
return;
} else {
processFile(ComparableFile{files2.at(ctx.repo->getConfig().getStr("prefix")), ctx.repo});
}
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
if (!std::filesystem::exists(from / ctx.repo->getConfig().getStr("prefix"))) {
ctx.logger->write("Doesn't exist in the filesystem archive", 0);
return;
}
/// Start the diff with the root directory and empty ignore list
processFile(ComparableFile{from / ctx.repo->getConfig().getStr("prefix"), from});
}
} else {
throw Exception("Unknown diff-mode: " + diffMode);
}
/// Wait for diff to end
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
if (diffMode == "normal")
for (auto const &s: files) {
ctx.logger->write(s.first.u8string() + " is removed\n", 0);
}
}

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDDIFF_H
#define SEMBACKUP_COMMANDDIFF_H
#include "Command.h"
#include "CommandsCommon.h"
/// Run the diff between:
/// 1. The latest archive and the `from` directory
/// 2. if `aid` is set the aid archive and the `from` directory
/// 3. if `aid` and `aid2` are set between `aid` and `aid2`
class CommandDiff : public Command {
public:
CommandDiff();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDDIFF_H

View File

@@ -0,0 +1,16 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandList.h"
CommandList::CommandList() : Command("list") {
}
void CommandList::run(Context ctx) {
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second < r.second; });
for (auto const &aid: list) {
std::cout << "Name: " << aid.first << " Id: " << aid.second << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLIST_H
#define SEMBACKUP_COMMANDLIST_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists available archives in a repository
class CommandList : public Command {
public:
CommandList();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLIST_H

View File

@@ -0,0 +1,22 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandListFiles.h"
#include "../BytesFormatter.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
CommandListFiles::CommandListFiles() : Command("list-files") {
}
void CommandListFiles::run(Context ctx) {
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
for (auto const &fid: archive.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLISTFILES_H
#define SEMBACKUP_COMMANDLISTFILES_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists files in the selected Archive
class CommandListFiles : public Command {
public:
CommandListFiles();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLISTFILES_H

View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRestore.h"
#include <fstream>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandRestore::CommandRestore() : Command("restore") {
}
void CommandRestore::run(Context ctx) {
Object::idType archive = ctx.repo->getConfig().getInt("aid");
std::filesystem::path to = std::filesystem::u8path(ctx.repo->getConfig().getStr("to"));
std::atomic<unsigned long long> filesToRestoreCount = 0;
std::atomic<unsigned long long> bytesToRestore = 0;
WorkerStats workerStats;///< Backup statistics of the worker threads
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&workerStats](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show restore progress
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{
[&workerStats]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&filesToRestoreCount]() { return std::to_string(filesToRestoreCount); },
" files saved, ",
[&workerStats]() { return BytesFormatter::formatStr(workerStats.bytesWritten.load() + workerStats.bytesSkipped.load()); },
" / ",
[&bytesToRestore]() { return BytesFormatter::formatStr(bytesToRestore); },
" saved @ ",
[&avg]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s",
},
ctx.repo->getConfig());
/// Thread pool for restore tasks
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Add the main restore task
threadPool.push([&, this]() {
/// Get the archive and its file IDs
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
std::vector<Object::idType> files = archiveO.files;
/// For each file...
for (const auto fid: files) {
/// Stop when asked to
if (Signals::shouldQuit) break;
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
filesToRestoreCount++;
bytesToRestore += file.bytes;
/// Spawn a restore task
threadPool.push([&, this, to, file]() {
backupRestoreFile(file, to, workerCallback, ctx);
progress.print("Restored " + file.name, 1);
});
}
});
/// Wait for all tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
}
std::string CommandRestore::backupRestoreFile(const File &file, const std::filesystem::path &baseDir, workerStatsFunction &callback, Context ctx) {
auto fullpath = baseDir / std::filesystem::u8path(file.name);
std::filesystem::create_directories(fullpath.parent_path());
if (file.fileType == File::Type::Directory) {
std::filesystem::create_directory(fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks[0]));
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
std::ofstream ostream(fullpath, std::ios::binary | std::ios::out | std::ios::trunc);
for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid));
if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0);
}
}
callback(0, 0, 1);
return fullpath.u8string();
}

View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRESTORE_H
#define SEMBACKUP_COMMANDRESTORE_H
#include "Command.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
/// Restores the archive with id \aid to path \p to (from config)
class CommandRestore : public Command {
public:
CommandRestore();
void run(Context ctx) override;
private:
/// Internal function to restore a file
/// \param file Constant reference to the File object
/// \param base Base directory to restore to
/// \param callback Stats callback
/// \return Name of the restored file
std::string backupRestoreFile(const File &file, const std::filesystem::path &base, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRESTORE_H

239
src/commands/CommandRun.cpp Normal file
View File

@@ -0,0 +1,239 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRun.h"
#include <fstream>
#include <iomanip>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../crypto/MD5.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
using namespace CommandsCommon;
CommandRun::CommandRun() : Command("run") {
}
void CommandRun::run(Context ctx) {
WorkerStats workerStats;///< Backup statistics of the worker threads
RunnerStats runnerStats;///< Backup target metrics
std::filesystem::path from = ctx.repo->getConfig().getStr("from");///< Directory to back up from
bool fullBackup = ctx.repo->getConfig().getStr("type") == "full";
if (fullBackup) {
ctx.logger->write("Backup is full because of the config\n", 1);
}
/// For progtest task compliance
if (!fullBackup) {
/// If it's time for full backup as per config, force it
auto per = ctx.repo->getConfig().getInt("full-period");
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second > r.second; });
int lastInc = 0;
for (auto const &a: list) {
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(a.second));
if (!archiveO.isFull) {
lastInc++;
continue;
} else
break;
}
if (lastInc >= per) {
fullBackup = true;
ctx.logger->write("Backup is full because of the interval\n", 1);
}
if (list.size() == 0) {
fullBackup = true;
ctx.logger->write("Backup is full because there are no backups\n", 1);
}
}
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
std::vector<Object::idType> files;///< File ids so far added to the archive
std::mutex filesLock; ///< Files vector lock
/// Function to safely add new file ids to `files`
std::function addFile = [&](Object::idType id) {std::lock_guard lock(filesLock); files.emplace_back(id); };
/// Technically the progtest task says that only the files from the last backup should be compared against...
std::map<std::string, Object::idType> prevArchiveFiles;
{
auto prevArchiveFilesList = ctx.repo->getObjects(Object::ObjectType::File);
prevArchiveFiles = {prevArchiveFilesList.begin(), prevArchiveFilesList.end()};
}
ctx.repo->clearCache(Object::ObjectType::File);
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show the progress of backup
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{[&]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&]() { return std::to_string(runnerStats.filesToSaveCount); },
" files saved, ",
[&]() { return std::to_string(runnerStats.filesSkipped); },
" files skipped, ",
[&]() { return BytesFormatter::formatStr((workerStats.bytesWritten.load() + workerStats.bytesSkipped.load())); },
" / ",
[&]() { return BytesFormatter::formatStr(runnerStats.bytesToSave); },
" read @ ",
[&]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s"},
ctx.repo->getConfig());
/// Thread pool for backup tasks, prints to progress on any errors
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Function to spawn a rechunking task
auto saveFile = [&, this](const std::filesystem::path &absPath, const std::filesystem::path &relPath) {
runnerStats.bytesToSave += File::getFileType(absPath) == File::Type::Normal ? std::filesystem::file_size(absPath) : 0;
runnerStats.filesToSaveCount++;
threadPool.push([&, relPath, absPath]() {
addFile(backupChunkFile(absPath, relPath.u8string(), workerCallback, ctx));
progress.print("Copied: " + relPath.u8string(), 1);
});
};
/// Task to process an individual file in the backup
std::function<void(std::filesystem::path)> processFile;
/// If it's a full backup, just save the file, otherwise re-chunk it only if it's changed
if (fullBackup)
processFile =
[&, this](const std::filesystem::path &p) {
saveFile(p, p.lexically_relative(from).u8string());
};
else
processFile =
[&, this](const std::filesystem::path &p) {
auto relPath = p.lexically_relative(from).u8string();
if (prevArchiveFiles.count(relPath) != 0) {
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(prevArchiveFiles.at(relPath)));
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
addFile(repoFile.id);
ctx.repo->addToCache(repoFile);
progress.print("Skipped: " + relPath, 1);
runnerStats.filesSkipped++;
return;
}
}
saveFile(p, relPath);
return;
};
/// Start the backup with the root directory and empty ignore list
threadPool.push([&]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
processFile);
});
/// Wait for all the tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
auto written = BytesFormatter::format(workerStats.bytesWritten);
auto skipped = BytesFormatter::format(workerStats.bytesSkipped);
ctx.logger->write(written.prefix + " written: " + written.number + '\n', 1);
ctx.logger->write(skipped.prefix + " skipped: " + skipped.number + '\n', 1);
auto time = std::time(0);
auto ltime = std::localtime(&time);
std::stringstream s;
s << std::put_time(ltime, "%d-%m-%Y %H-%M-%S");
/// Avoid archive name collisions
while (ctx.repo->exists(Object::ObjectType::Archive, s.str())) s << "N";
Archive a(ctx.repo->getId(), s.str(), time, files, fullBackup);
ctx.repo->putObject(a);
}
Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, workerStatsFunction &callback, Context ctx) {
/// If it's a symlink or directory, treat it specially
/// The order of checks is important, because is_directory follows the symlink
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
auto contents = File::getFileContents(orig);
Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig));
ctx.repo->putObject(c);
ctx.repo->putObject(f);
return f.id;
}
if (!std::filesystem::is_regular_file(orig))
throw Exception(orig.u8string() + "is a special file, not saving");
std::ifstream ifstream(orig, std::ios::in | std::ios::binary);
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
MD5 fileHash;
std::vector<Object::idType> fileChunks;
unsigned long long size = 0;
for (auto chunkp: *chunker) {
/// Exit when asked to
if (Signals::shouldQuit) break;
Object::idType chunkId;
size += chunkp.second.size();
if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) {
/// If the chunk already exists, reuse it
chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first);
callback(0, chunkp.second.size(), 0);
} else {
/// Otherwise, write it
Chunk c(ctx.repo->getId(), chunkp.first, chunkp.second);
chunkId = c.id;
callback(c.data.size(), 0, 0);
ctx.repo->putObject(c);
}
fileHash.feedData(chunkp.second);
fileChunks.emplace_back(chunkId);
}
/// We might have exited in the loop before, so we don't save an incomplete file
if (Signals::shouldQuit) throw Exception("Quitting!");
if (size != File::getFileSize(orig)) {
throw Exception("Something really bad happened or file " + orig.u8string() + " changed during backup");
}
File f(ctx.repo->getId(), saveAs, size, File::getFileMtime(orig), fileHash.getHash(), fileChunks, File::getFileType(orig));
ctx.repo->putObject(f);
callback(0, 0, 1);
return f.id;
}

28
src/commands/CommandRun.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRUN_H
#define SEMBACKUP_COMMANDRUN_H
#include "Command.h"
#include "CommandsCommon.h"
/// Runs the backup according to the config in the Repository
class CommandRun : public Command {
public:
CommandRun();
void run(Context ctx) override;
private:
/// Internal function to chunk the file and save it
/// \param orig Absolute path to the file
/// \param saveAs UTF-8 encoded file name to save as
/// \param callback Stats callback
/// \return ID of the saved file
Object::idType backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRUN_H

View File

@@ -0,0 +1,67 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandsCommon.h"
#include <fstream>
#include <regex>
#include "../Exception.h"
#include "../Signals.h"
void CommandsCommon::workerCallback(unsigned long long int bytesWritten, unsigned long long int bytesSkipped, unsigned long long int filesWritten, WorkerStats &to) {
to.bytesWritten += bytesWritten;
to.bytesSkipped += bytesSkipped;
to.filesWritten += filesWritten;
}
bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p) {
if (prefix.u8string().size() > p.u8string().size()) return false;
for (int i = 0; i < prefix.u8string().size(); i++)
if (p.u8string()[i] != prefix.u8string()[i]) return false;
return true;
}
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
/// Don't process the directory if it has a ".nobackup" file
if (std::filesystem::exists(dir / ".nobackup")) return;
/// If it has an .ignore file, add every line of it into our ignore vector
if (std::filesystem::exists(dir / ".ignore")) {
std::ifstream ignorefile(dir / ".ignore", std::ios::in);
std::string line;
while (std::getline(ignorefile, line)) {
ignore.emplace_back(line);
}
}
/// For each directory entry...
for (const auto &dirEntry: std::filesystem::directory_iterator(dir)) {
/// Break in case exit was requested by the user
if (Signals::shouldQuit) break;
/// Don't process the entry if it matches any of the ignore rules
if (std::any_of(ignore.begin(), ignore.end(), [dirEntry](auto pred) {
std::smatch m;
auto s = dirEntry.path().filename().u8string();
return std::regex_match(s, m, std::regex(pred));
})) continue;
/// If it's a directory, spawn a task to process the entries in it
if (!dirEntry.is_symlink() && dirEntry.is_directory()) {
spawner([dirEntry, ignore, spawner, processFile]() {
processDirWithIgnore(dirEntry.path(), ignore, spawner, processFile);
});
/// Don't save the dir if it has a .nobackup file
if (std::filesystem::exists(dirEntry.path() / ".nobackup")) continue;
}
/// Spawn a task to process each individual file
spawner([processFile, dirEntry]() {
processFile(dirEntry);
});
}
}

View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDSCOMMON_H
#define SEMBACKUP_COMMANDSCOMMON_H
#include <atomic>
#include <filesystem>
#include <functional>
namespace CommandsCommon {
// Bytes written, bytes skipped, files written
using workerStatsFunction = std::function<void(unsigned long long, unsigned long long, unsigned long long)>;
/// Internat function for recursive directory processing, taking into account ".ignore" and ".nobackup" files
/// \param dir Const reference to the path of directory to iterate through
/// \param ignore List of files to ignore
/// \param spawner Function to spawn other tasks
/// \param processFile Task to spawn on found files
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile);
struct WorkerStats {
public:
std::atomic<unsigned long long> bytesWritten = 0;
std::atomic<unsigned long long> bytesSkipped = 0;
std::atomic<unsigned long long> filesWritten = 0;
};
struct RunnerStats {
public:
std::atomic<unsigned long long> bytesToSave = 0;
std::atomic<unsigned long long> filesToSaveCount = 0;
std::atomic<unsigned long long> filesSkipped = 0;
};
/// Checks if \p p has \p prefix as prefix
/// \param prefix Constant reference to the prefix path
/// \param p Constant reference to the checked path
/// \return True if \p p contains \p prefix at its prefix, False otherwise
bool isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p);
void workerCallback(unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten, WorkerStats &to);
};// namespace CommandsCommon
#endif//SEMBACKUP_COMMANDSCOMMON_H