This commit is contained in:
2023-06-02 12:51:08 +02:00
commit 0e355fbe42
142 changed files with 10281 additions and 0 deletions

67
.clang-format Normal file
View File

@@ -0,0 +1,67 @@
# Generated from CLion C/C++ Code Style settings
BasedOnStyle: LLVM
AccessModifierOffset: -4
AlignAfterOpenBracket: Align
AlignConsecutiveAssignments: None
AlignOperands: Align
AllowAllArgumentsOnNextLine: false
AllowAllConstructorInitializersOnNextLine: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: Always
AllowShortLambdasOnASingleLine: All
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakTemplateDeclarations: Yes
SeparateDefinitionBlocks: Always
BreakBeforeBraces: Custom
BraceWrapping:
AfterCaseLabel: false
AfterClass: false
AfterControlStatement: Never
AfterEnum: false
AfterFunction: false
AfterNamespace: false
AfterUnion: false
BeforeCatch: false
BeforeElse: false
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
BreakBeforeBinaryOperators: None
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: BeforeColon
BreakInheritanceList: BeforeColon
ColumnLimit: 0
CompactNamespaces: true
ContinuationIndentWidth: 8
IndentCaseLabels: true
IndentPPDirectives: None
IndentWidth: 4
KeepEmptyLinesAtTheStartOfBlocks: false
MaxEmptyLinesToKeep: 2
NamespaceIndentation: All
ObjCSpaceAfterProperty: false
ObjCSpaceBeforeProtocolList: true
PointerAlignment: Right
ReflowComments: false
SpaceAfterCStyleCast: true
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: ControlStatements
SpaceBeforeRangeBasedForLoopColon: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 0
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
TabWidth: 4
UseTab: Never

147
.clang-tidy Normal file
View File

@@ -0,0 +1,147 @@
# Generated from CLion Inspection settings
---
Checks: '-*,
bugprone-argument-comment,
bugprone-assert-side-effect,
bugprone-bad-signal-to-kill-thread,
bugprone-branch-clone,
bugprone-copy-constructor-init,
bugprone-dangling-handle,
bugprone-dynamic-static-initializers,
bugprone-fold-init-type,
bugprone-forward-declaration-namespace,
bugprone-forwarding-reference-overload,
bugprone-inaccurate-erase,
bugprone-incorrect-roundings,
bugprone-integer-division,
bugprone-lambda-function-name,
bugprone-macro-parentheses,
bugprone-macro-repeated-side-effects,
bugprone-misplaced-operator-in-strlen-in-alloc,
bugprone-misplaced-pointer-arithmetic-in-alloc,
bugprone-misplaced-widening-cast,
bugprone-move-forwarding-reference,
bugprone-multiple-statement-macro,
bugprone-no-escape,
bugprone-not-null-terminated-result,
bugprone-parent-virtual-call,
bugprone-posix-return,
bugprone-reserved-identifier,
bugprone-sizeof-container,
bugprone-sizeof-expression,
bugprone-spuriously-wake-up-functions,
bugprone-string-constructor,
bugprone-string-integer-assignment,
bugprone-string-literal-with-embedded-nul,
bugprone-suspicious-enum-usage,
bugprone-suspicious-include,
bugprone-suspicious-memory-comparison,
bugprone-suspicious-memset-usage,
bugprone-suspicious-missing-comma,
bugprone-suspicious-semicolon,
bugprone-suspicious-string-compare,
bugprone-swapped-arguments,
bugprone-terminating-continue,
bugprone-throw-keyword-missing,
bugprone-too-small-loop-variable,
bugprone-undefined-memory-manipulation,
bugprone-undelegated-constructor,
bugprone-unhandled-self-assignment,
bugprone-unused-raii,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cert-dcl21-cpp,
cert-dcl58-cpp,
cert-err34-c,
cert-err52-cpp,
cert-err60-cpp,
cert-flp30-c,
cert-msc50-cpp,
cert-msc51-cpp,
cert-str34-c,
cppcoreguidelines-interfaces-global-init,
cppcoreguidelines-narrowing-conversions,
cppcoreguidelines-pro-type-member-init,
cppcoreguidelines-pro-type-static-cast-downcast,
cppcoreguidelines-slicing,
google-default-arguments,
google-explicit-constructor,
google-runtime-operator,
hicpp-exception-baseclass,
hicpp-multiway-paths-covered,
misc-misplaced-const,
misc-new-delete-overloads,
misc-no-recursion,
misc-non-copyable-objects,
misc-throw-by-value-catch-by-reference,
misc-unconventional-assign-operator,
misc-uniqueptr-reset-release,
modernize-avoid-bind,
modernize-concat-nested-namespaces,
modernize-deprecated-headers,
modernize-deprecated-ios-base-aliases,
modernize-loop-convert,
modernize-make-shared,
modernize-make-unique,
modernize-pass-by-value,
modernize-raw-string-literal,
modernize-redundant-void-arg,
modernize-replace-auto-ptr,
modernize-replace-disallow-copy-and-assign-macro,
modernize-replace-random-shuffle,
modernize-return-braced-init-list,
modernize-shrink-to-fit,
modernize-unary-static-assert,
modernize-use-auto,
modernize-use-bool-literals,
modernize-use-emplace,
modernize-use-equals-default,
modernize-use-equals-delete,
modernize-use-nodiscard,
modernize-use-noexcept,
modernize-use-nullptr,
modernize-use-override,
modernize-use-transparent-functors,
modernize-use-uncaught-exceptions,
mpi-buffer-deref,
mpi-type-mismatch,
openmp-use-default-none,
performance-faster-string-find,
performance-for-range-copy,
performance-implicit-conversion-in-loop,
performance-inefficient-algorithm,
performance-inefficient-string-concatenation,
performance-inefficient-vector-operation,
performance-move-const-arg,
performance-move-constructor-init,
performance-no-automatic-move,
performance-noexcept-move-constructor,
performance-trivially-destructible,
performance-type-promotion-in-math-fn,
performance-unnecessary-copy-initialization,
performance-unnecessary-value-param,
portability-simd-intrinsics,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-size-empty,
readability-convert-member-functions-to-static,
readability-delete-null-pointer,
readability-deleted-default,
readability-inconsistent-declaration-parameter-name,
readability-make-member-function-const,
readability-misleading-indentation,
readability-misplaced-array-index,
readability-non-const-parameter,
readability-redundant-control-flow,
readability-redundant-declaration,
readability-redundant-function-ptr-dereference,
readability-redundant-smartptr-get,
readability-redundant-string-cstr,
readability-redundant-string-init,
readability-simplify-subscript-expr,
readability-static-accessed-through-instance,
readability-static-definition-in-anonymous-namespace,
readability-string-compare,
readability-uniqueptr-delete-release,
readability-use-anyofallof'

586
.gitignore vendored Normal file
View File

@@ -0,0 +1,586 @@
doc
html
latex
build
testBuild
test*.txt
# Taken from:
# https://github.com/github/gitignore
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf
# AWS User-specific
.idea/**/aws.xml
# Generated files
.idea/**/contentModel.xml
# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
# Gradle
.idea/**/gradle.xml
.idea/**/libraries
# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/artifacts
# .idea/compiler.xml
# .idea/jarRepositories.xml
# .idea/modules.xml
# .idea/*.iml
# .idea/modules
# *.iml
# *.ipr
# CMake
cmake-build-*/
# Mongo Explorer plugin
.idea/**/mongoSettings.xml
# File-based project format
*.iws
# IntelliJ
out/
# mpeltonen/sbt-idea plugin
.idea_modules/
# JIRA plugin
atlassian-ide-plugin.xml
# Cursive Clojure plugin
.idea/replstate.xml
# SonarLint plugin
.idea/sonarlint/
# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties
# Editor-based Rest Client
.idea/httpRequests
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.tlog
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
*.vbp
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
*.dsw
*.dsp
# Visual Studio 6 technical files
*.ncb
*.aps
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
*.pyc
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# Visual Studio History (VSHistory) files
.vshistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# VS Code files for those working on multiple tools
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
*.code-workspace
# Local History for Visual Studio Code
.history/
# Windows Installer files from build outputs
*.cab
*.msi
*.msix
*.msm
*.msp
# JetBrains Rider
*.sln.iml
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
# Local History for Visual Studio Code
.history/
# Built Visual Studio Code Extensions
*.vsix

8
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

1
.idea/.name generated Normal file
View File

@@ -0,0 +1 @@
sembackup

2
.idea/backup.iml generated Normal file
View File

@@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CMake" type="CPP_MODULE" version="4" />

7
.idea/codeStyles/Project.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<component name="ProjectCodeStyleConfiguration">
<code_scheme name="Project" version="173">
<clangFormatSettings>
<option name="ENABLED" value="true" />
</clangFormatSettings>
</code_scheme>
</component>

5
.idea/codeStyles/codeStyleConfig.xml generated Normal file
View File

@@ -0,0 +1,5 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="USE_PER_PROJECT_SETTINGS" value="true" />
</state>
</component>

4
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/backup.iml" filepath="$PROJECT_DIR$/.idea/backup.iml" />
</modules>
</component>
</project>

28
CMakeLists.txt Normal file
View File

@@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.22)
add_compile_options(-Ofast)
add_link_options(-Ofast)
# add_compile_options(-Ofast -flto)
# add_link_options(-Ofast -flto)
#add_compile_options(-Wall -O0 -Wextra -pedantic -Wshadow -Wformat=2 -Wfloat-equal -D_GLIBCXX_DEBUG -Wconversion -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -g -rdynamic)
#add_compile_options(-fsanitize=address -fsanitize=undefined -fno-sanitize-recover)
#add_link_options(-fsanitize=address -fsanitize=undefined -fno-sanitize-recover)
#add_link_options(-rdynamic)
project(sembackup)
set(CMAKE_CXX_STANDARD 17)
find_package(OpenSSL REQUIRED)
find_package(ZLIB REQUIRED)
add_executable(sembackup src/Config.cpp src/main.cpp src/repo/Object.cpp src/repo/Object.h src/repo/Repository.cpp src/repo/Repository.h src/repo/FileRepository.cpp src/repo/FileRepository.h src/repo/objects/Archive.cpp src/repo/objects/Archive.h src/repo/objects/File.cpp src/repo/objects/File.h src/repo/objects/Chunk.cpp src/repo/objects/Chunk.h src/repo/Serialize.h src/chunkers/Chunker.cpp src/chunkers/Chunker.h src/chunkers/ConstChunker.cpp src/chunkers/ConstChunker.h src/crypto/MD5.cpp src/crypto/MD5.h src/change_detectors/ChangeDetector.cpp src/change_detectors/ChangeDetector.h src/change_detectors/SizeChangeDetector.cpp src/change_detectors/SizeChangeDetector.h src/change_detectors/EditTimeChangeDetector.cpp src/change_detectors/EditTimeChangeDetector.h src/change_detectors/ChangeDetectorFactory.cpp src/change_detectors/ChangeDetectorFactory.h src/Signals.h src/Signals.cpp src/filters/Filter.cpp src/filters/Filter.h src/filters/FilterShift.cpp src/filters/FilterShift.h src/filters/FilterShiftSecret.cpp src/filters/FilterShiftSecret.h src/repo/objects/FileBuffer.cpp src/repo/objects/FileBuffer.h src/filters/FilterZlib.cpp src/filters/FilterZlib.h src/filters/FilterAES.cpp src/filters/FilterAES.h src/chunkers/BuzhashChunker.cpp src/chunkers/BuzhashChunker.h src/chunkers/Buzhash.cpp src/chunkers/Buzhash.h src/crypto/AES.cpp src/crypto/AES.h src/chunkers/ChunkerFactory.cpp src/chunkers/ChunkerFactory.h src/Exception.cpp src/Exception.h src/filters/FilterFactory.h src/filters/FilterContainer.h src/filters/FilterFactory.cpp src/filters/FilterContainer.cpp src/change_detectors/ChangeDetectorContainer.cpp src/change_detectors/ChangeDetectorContainer.h src/Progress.cpp src/Progress.h src/change_detectors/ContentsChangeDetector.cpp src/change_detectors/ContentsChangeDetector.h src/change_detectors/ComparableFile.cpp src/change_detectors/ComparableFile.h src/RunningAverage.cpp src/RunningAverage.h src/Diff.cpp src/Diff.h src/ThreadPool.cpp src/filters/CheckFilter.cpp src/filters/CheckFilter.h src/crypto/CRC32.cpp src/crypto/CRC32.h src/change_detectors/TypeChangeDetector.cpp src/change_detectors/TypeChangeDetector.h src/RunningDiffAverage.cpp src/RunningDiffAverage.h src/BytesFormatter.cpp src/BytesFormatter.h src/Logger.cpp src/Logger.h src/Context.h src/commands/Command.cpp src/commands/Command.h src/commands/CommandRun.cpp src/commands/CommandRun.h src/commands/CommandsCommon.cpp src/commands/CommandsCommon.h src/commands/CommandRestore.cpp src/commands/CommandRestore.h src/commands/CommandDiff.cpp src/commands/CommandDiff.h src/commands/CommandList.cpp src/commands/CommandList.h src/commands/CommandListFiles.cpp src/commands/CommandListFiles.h)
add_executable(test src/Config.cpp src/crypto/MD5.cpp src/change_detectors/EditTimeChangeDetector.cpp tests/runTests.cpp tests/utils/Runnable.cpp tests/utils/HelpfulAssertTest.cpp tests/utils/HelpfulAssertTest.h tests/utils/TestGroup.cpp tests/utils/TestGroup.h tests/utils/Test.cpp tests/utils/Test.h tests/utils/TestGroupGenerator.h src/repo/Object.cpp src/repo/Object.h src/repo/Repository.cpp src/repo/Repository.h src/repo/FileRepository.cpp src/repo/FileRepository.h src/repo/objects/Archive.cpp src/repo/objects/Archive.h src/repo/objects/File.cpp src/repo/objects/File.h src/repo/objects/Chunk.cpp src/repo/objects/Chunk.h tests/repo/ChunkTest.cpp tests/repo/ChunkTest.h tests/repo/FileRepositoryTest.cpp tests/repo/FileRepositoryTest.h tests/utils/Cleaner.cpp tests/utils/Cleaner.h src/repo/Serialize.h src/chunkers/Chunker.cpp src/chunkers/Chunker.h src/chunkers/ConstChunker.cpp src/chunkers/ConstChunker.h tests/crypto/MD5Test.cpp tests/crypto/MD5Test.h tests/fulltests/FullTest.cpp tests/fulltests/FullTest.h src/change_detectors/ChangeDetector.cpp src/change_detectors/ChangeDetector.h src/change_detectors/SizeChangeDetector.cpp src/change_detectors/SizeChangeDetector.h src/change_detectors/ChangeDetectorFactory.cpp src/change_detectors/ChangeDetectorFactory.h src/filters/Filter.cpp src/filters/Filter.h src/filters/FilterShift.cpp src/filters/FilterShift.h src/filters/FilterShiftSecret.cpp src/filters/FilterShiftSecret.h src/repo/objects/FileBuffer.cpp src/repo/objects/FileBuffer.h src/filters/FilterZlib.cpp src/filters/FilterZlib.h src/filters/FilterAES.cpp src/filters/FilterAES.h src/chunkers/BuzhashChunker.cpp src/chunkers/BuzhashChunker.h src/chunkers/Buzhash.cpp src/chunkers/Buzhash.h tests/BuzhashTest.cpp tests/BuzhashTest.h src/crypto/AES.cpp src/crypto/AES.h tests/crypto/AESTest.cpp tests/crypto/AESTest.h src/chunkers/ChunkerFactory.cpp src/chunkers/ChunkerFactory.h src/Exception.cpp src/Exception.h src/filters/FilterFactory.h src/filters/FilterContainer.h src/filters/FilterFactory.cpp src/filters/FilterContainer.cpp src/change_detectors/ChangeDetectorContainer.cpp src/change_detectors/ChangeDetectorContainer.h src/Progress.cpp src/Progress.h src/change_detectors/ContentsChangeDetector.cpp src/change_detectors/ContentsChangeDetector.h src/change_detectors/ComparableFile.cpp src/change_detectors/ComparableFile.h src/RunningAverage.cpp src/RunningAverage.h src/Diff.cpp src/Diff.h src/ThreadPool.cpp tests/CLITestWrapper.cpp tests/CLITestWrapper.h src/filters/CheckFilter.cpp src/filters/CheckFilter.h src/crypto/CRC32.cpp src/crypto/CRC32.h src/change_detectors/TypeChangeDetector.cpp src/change_detectors/TypeChangeDetector.h src/RunningDiffAverage.cpp src/RunningDiffAverage.h src/BytesFormatter.cpp src/BytesFormatter.h src/Logger.cpp src/Logger.h src/Context.h src/commands/Command.cpp src/commands/Command.h src/commands/CommandRun.cpp src/commands/CommandRun.h src/commands/CommandsCommon.cpp src/commands/CommandsCommon.h src/commands/CommandRestore.cpp src/commands/CommandRestore.h src/commands/CommandList.cpp src/commands/CommandList.h src/commands/CommandListFiles.cpp src/commands/CommandListFiles.h)
add_dependencies(test sembackup)
target_link_libraries(sembackup OpenSSL::SSL ZLIB::ZLIB)
target_link_libraries(test OpenSSL::SSL ZLIB::ZLIB)
target_compile_definitions(test PUBLIC TEST)

2736
Doxyfile Normal file

File diff suppressed because it is too large Load Diff

102
README.md Normal file
View File

@@ -0,0 +1,102 @@
# Backup app
## Key features
- Deduplicated backups with additional compression and encryption
- Files are split into chunks and stored in a repository as a list of
pointers to these chunks
- These chunks are reused for all files in the repository, based on
their hash matching.
- If when creating an archive, a file hasnt changed it is reused from
a previous archive
- Content defined chunking algorithm based on a rolling hash
- Unlike when splitting file in fixed size chunks, still works
when there is an insertion/deletion of bytes in middle/beginning
of the file
- These Files, Chunks and Archives are stored in object storage
Repository
- Default implementation - FileRepository, stores these objects in
the file system
- Possible other implementations - database, cloud object storage…
- Multithreading
- Directories are ignored if they have a `.nobackup` file
- Files are ignored in a directory recursively based on a `.ignore`
file - one line contains one regex rule, which, if a directory entry
matches, it is ignored
# Quick start
Initialize a repository with compression and encryption (you can also
specify other options like from here, they will be written to the
repository if its possible, so far changing options of an already
existing repo is not implemented)
sembackup init --repo <target dir> --compression zlib --compression-level 4 --encryption aes --password <password> --salt <random salt>
Run a backup
sembackup run --from <source dir> --repo <target dir> --password <password>
List avaiable archives
sembackup list --repo <repo dir> --password <password>
List files in an archive
sembackup list-files --repo <repo dir> --password <password> --aid <archive id>
Restore a backup
sembackup restore --repo <repo dir> --password <password> --aid <archive id> --to <destination>
Compare source dir with latest archive
sembackup diff --repo <repo dir> --password <password> --from <source dir>
Compare source dir with an archive
sembackup diff --repo <repo dir> --password <password> --from <source dir> --aid <archive id>
Compare two archives
sembackup diff --repo <repo dir> --password <password> --from <source dir> --aid <archive id> --aid2 <archive id>
Compare subdirectory in the source dir with latest archive
sembackup diff --repo <repo dir> --password <password> --from <source dir> --prefix <subdir>
# Data format
All data is represented as objects, stored in a repository.
An `Archive` object represents a snapshot of the file system in the
moment of its creation, and consists of a list of pointers (Object ids)
to `File` objectsю
`File` object consists of its basic metadata, and a list of chunks,
identified by their ids, which can be shared between multiple files (and
within the same file) if their MD5 hashes match.
`Chunk` object is a binary blob, identified by its MD5 hash.
These objects are children of `Object`, providing a `getKey()` method,
(name for `Archive`, path for `File`, and MD5 hash for `Chunk`) which is
used by `Repository` to make them easily accessible.
In default (and so far the only) repository implementation
`FileRepository` these objects are grouped together into files of size
approixmately `repo-target` MB, (by default 128), and there exists a
key-value index of indexed objects written into `index`, and an
`offsets` file recording the location and offset of each object in the
file system.

37
src/BytesFormatter.cpp Normal file
View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 13.05.2023.
//
#include "BytesFormatter.h"
#include <iomanip>
#include <sstream>
BytesFormatter::BytesFormat BytesFormatter::format(unsigned long long int bytes) {
std::stringstream outNum;
outNum << std::fixed << std::setprecision(2);
if (bytes > 1024UL * 1024 * 1024 * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0 * 1024.0);
return {outNum.str(), "TiB"};
}
if (bytes > 1024UL * 1024 * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0 * 1024.0);
return {outNum.str(), "GiB"};
}
if (bytes > 1024UL * 1024) {
outNum << (double) bytes / (1024.0 * 1024.0);
return {outNum.str(), "MiB"};
}
if (bytes > 1024UL) {
outNum << (double) bytes / (1024.0);
return {outNum.str(), "KiB"};
}
outNum << bytes;
return {outNum.str(), "Bytes"};
}
std::string BytesFormatter::formatStr(unsigned long long int bytes) {
auto fmt = format(bytes);
return fmt.number + " " + fmt.prefix;
}

31
src/BytesFormatter.h Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 13.05.2023.
//
#ifndef SEMBACKUP_BYTESFORMATTER_H
#define SEMBACKUP_BYTESFORMATTER_H
#include <string>
/// Utility class to format byte values according to their magnitude
class BytesFormatter {
public:
/// Structure for returning the processed byte value
struct BytesFormat {
std::string number;///< Number part of the value
std::string prefix;///< Unit of measure
};
/// Formats the bytes in BytesFormat format
/// \param bytes Number of bytes
/// \return BytesFormat value
static BytesFormat format(unsigned long long bytes);
/// Formats the bytes into a string
/// \param bytes Number of bytes
/// \return String, consisting of the scaled number and the unit of measure separated by a space
static std::string formatStr(unsigned long long bytes);
};
#endif//SEMBACKUP_BYTESFORMATTER_H

81
src/Config.cpp Normal file
View File

@@ -0,0 +1,81 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#include "Config.h"
#include "Exception.h"
#include "repo/Serialize.h"
#include <sstream>
Config &Config::add(const std::string &k, const std::string &v) {
if (keys.count(k) == 0) throw Exception("Unknown key " + k);
if (data.count(k) > 0)
if (data.at(k) != v) throw Exception("Trying to rewrite config!");
else if (data.at(k) == v)
return *this;
switch (keys.at(k).type) {
case KeyType::STRING:
break;
case KeyType::INT:
try {
std::stoi(v);
} catch (...) {
throw Exception("Can't convert " + k + " to integer!");
}
break;
case KeyType::LIST:
break;
}
data.emplace(k, v);
return *this;
}
int Config::getInt(const std::string &k) const {
return std::stoi(getStr(k));
}
std::vector<std::string> Config::getList(const std::string &k) const {
std::vector<std::string> out;
std::string next;
std::stringstream inss(getStr(k));
while (std::getline(inss, next, ',')) {
if (next != "")
out.emplace_back(next);
}
return out;
}
std::string Config::getStr(const std::string &k) const {
if (data.count(k) > 0) return data.at(k);
else if (keys.at(k).defaultval.has_value())
return keys.at(k).defaultval.value();
throw Exception("Option " + k + " not specified and no default value exists!");
}
bool Config::exists(const std::string &k) const {
return (data.count(k) > 0) || (keys.at(k).defaultval.has_value());
}
Config::Config() = default;
Config::Config(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end) {
data = Serialize::deserialize<decltype(data)>(in, end);
}
void Config::serialize(std::vector<char> &out) const {
std::vector<decltype(data)::value_type> temp;
for (const auto &d: data) {
if (keys.at(d.first).remember) {
temp.emplace_back(d);
}
}
Serialize::serialize(temp, out);
}
void Config::merge(const Config &config) {
for (const auto &d: config.data) {
add(d.first, d.second);
}
}

117
src/Config.h Normal file
View File

@@ -0,0 +1,117 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#ifndef SEMBACKUP_CONFIG_H
#define SEMBACKUP_CONFIG_H
#include <optional>
#include <set>
#include <string>
#include <unordered_map>
#include <vector>
/// Utility class to manage configuration
/**
* Also provides keys map for information about config keys
* Serializable, remembers only the keys with remember option set in keys
*/
class Config {
public:
/// Constructs an empty Config instance
Config();
/// Deserialization constructor
Config(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Adds a key \p k with value \p v to the config
/// \param k Const reference to the config key
/// \param v Config value
/// \return Reference to itself
/// \throws Exception if key is invalid or is already set with different value
Config &add(const std::string &k, const std::string &v);
/// Merges \p config to itself
/// Adds every config pair from \p config to itself, throws on conflict
/// \param config Constant reference to the source Config
/// \throws Exception on merge conflict
void merge(const Config &config);
/// Returns an int from config key \p k
/// \param k Constant reference to the key string
/// \return Config int
/// \throws Exception if key is invalid or value isn't an int
int getInt(const std::string &k) const;
/// Returns a string from config key \p k
/// \param k Constant reference to the key string
/// \return Config value for key
/// \throws Exception if key is invalid
std::string getStr(const std::string &k) const;
/// Returns a list of strings delimited by commas from config key \p k
/// \param k Constant reference to the key string
/// \return Vector of strings
/// \throws Exception if key is invalid
std::vector<std::string> getList(const std::string &k) const;
/// Checks if key \p k exists in the config
/// \param k Constant reference to the key string
/// \return True if key exists or its default value exists
bool exists(const std::string &k) const;
/// Serialization function
void serialize(std::vector<char> &out) const;
using serializable = std::true_type;
enum class KeyType {
STRING,
INT,
LIST
};
/// Struct to record key options
struct keyopts {
std::optional<std::string> defaultval;///< Key's default value
KeyType type; ///< Key's type
bool remember; ///< Whether the key should be serialized
std::string info; ///< Printed in help
};
/// Used for printing help
const static inline std::unordered_map<KeyType, std::string> KeyTypeToStr{{KeyType::STRING, "string"}, {KeyType::INT, "number"}, {KeyType::LIST, "comma-separated list"}};
/// Default values and their metadata
const static inline std::unordered_map<std::string, keyopts> keys{
{"compression", {"none", KeyType::STRING, true, "Compression algorighm to use (zlib or none)"}},
{"encryption", {"none", KeyType::STRING, true, "Encryption algorighm to use (aes or none)"}},
{"compression-level", {"-1", KeyType::INT, true, "Compression level to use (0 to 9)"}},
{"repo", {std::nullopt, KeyType::STRING, false, "Repository root"}},
{"to", {std::nullopt, KeyType::STRING, false, "Destination of restore"}},
{"from", {std::nullopt, KeyType::STRING, true, "Backed up folder"}},
{"type", {"normal", KeyType::STRING, false, "Type of archive"}},
{"aid", {std::nullopt, KeyType::INT, false, "ID of archive to restore/compare to"}},
{"aid2", {std::nullopt, KeyType::INT, false, "ID of archive to compare with"}},
{"threads", {std::nullopt, KeyType::INT, false, "Number of threads to use"}},
{"prefix", {"", KeyType::STRING, false, "Prefix of files to compare"}},
{"password", {std::nullopt, KeyType::STRING, false, "Encryption password"}},
{"salt", {std::nullopt, KeyType::STRING, true, "Encryption salt"}},
{"chunker", {"buzhash", KeyType::STRING, true, "Chunker to use (const, buzhash)"}},
{"chunker-min", {"256", KeyType::INT, true, "Min chunk size in KB"}},
{"chunker-max", {"4096", KeyType::INT, true, "Max chunk size in KB"}},
{"chunker-mask", {"20", KeyType::INT, true, "Chunker hash bit mask (mask of n bits results in average chunk size of 2^n bytes)"}},
{"repo-target", {"128", KeyType::INT, true, "Target size of files for FileRepository"}},
{"full-period", {"2", KeyType::INT, true, "Interval between forced full backups"}},
{"progress", {"pretty", KeyType::STRING, false, "How to print progress (simple, pretty, none)"}},
{"verbose", {"1", KeyType::INT, false, "Message verbosity (0 - error, 1 - info, -1 - quiet)"}},
{"dedup", {"on", KeyType::STRING, true, "Turns deduplication on/off"}},
{"change-detectors", {"type,size,etime", KeyType::LIST, true, "Change detectors to use (in order)"}},
{"diff-mode", {"normal", KeyType::STRING, false, "Diff mode (file or normal)"}},
};
private:
std::unordered_map<std::string, std::string> data;
};
#endif//SEMBACKUP_CONFIG_H

18
src/Context.h Normal file
View File

@@ -0,0 +1,18 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_CONTEXT_H
#define SEMBACKUP_CONTEXT_H
#include "Config.h"
#include "Logger.h"
#include "repo/Repository.h"
struct Context {
Logger *logger;
Repository *repo;
};
#endif//SEMBACKUP_CONTEXT_H

103
src/Diff.cpp Normal file
View File

@@ -0,0 +1,103 @@
//
// Created by Stepan Usatiuk on 06.05.2023.
//
#include "Diff.h"
#include "BytesFormatter.h"
#include "Exception.h"
#include "Signals.h"
#include "chunkers/BuzhashChunker.h"
bool Diff::isBinary(const ComparableFile &c) {
auto b = c.contents();
for (unsigned int i = 0; i < std::min(c.bytes, 2048ULL); i++) {
auto e = b->sbumpc();
if (std::streambuf::traits_type::to_char_type(e) == '\0') return true;
if (e == std::streambuf::traits_type::eof()) return false;
}
return false;
}
std::string Diff::diff(const ComparableFile &c1, const ComparableFile &c2) {
if (isBinary(c1) || isBinary(c2)) {
if (!(isBinary(c1) && isBinary(c2))) return "One of the files is binary, the other is not";
return diffPercent(c1, c2);
}
std::stringstream out;
auto b1 = c1.contents();
auto b2 = c2.contents();
std::multimap<std::string, unsigned long> f1lines;
std::multimap<std::string, unsigned long> f2diff;
std::string line;
std::istream is1(b1.get());
std::istream is2(b2.get());
int i = 0;
while (std::getline(is1, line)) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
f1lines.emplace(line, ++i);
}
i = 0;
while (std::getline(is2, line)) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (f1lines.count(line) > 0) f1lines.erase(f1lines.find(line));
else
f2diff.emplace(line, ++i);
}
out << "\nLines only in first file: " << std::endl;
for (const auto &s: f1lines) {
out << s.second << "<" << s.first << std::endl;
}
out << "Lines only in second file: " << std::endl;
for (const auto &s: f2diff) {
out << s.second << ">" << s.first << std::endl;
}
out << "^^^\n";
return out.str();
}
std::string Diff::diffPercent(const ComparableFile &c1, const ComparableFile &c2) {
auto b1 = c1.contents();
auto b2 = c2.contents();
BuzhashChunker ch1(b1.get(), 512 * 1024, 1024 * 1024, 19, 31);
BuzhashChunker ch2(b2.get(), 512 * 1024, 1024 * 1024, 19, 31);
std::multiset<std::string> ch1hashes;
std::multiset<std::string> ch2diff;
std::unordered_map<std::string, unsigned long long> hashsize;
for (auto chunkp: ch1) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
ch1hashes.emplace(md5);
hashsize[md5] = chunkp.second.size();
}
for (auto chunkp: ch2) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
hashsize[md5] = chunkp.second.size();
if (ch1hashes.count(md5) > 0) ch1hashes.erase(md5);
else if (ch1hashes.count(md5) == 0)
ch2diff.emplace(md5);
}
unsigned long long diff = 0;
for (const auto &c: ch1hashes) {
diff += hashsize[c];
}
for (const auto &c: ch2diff) {
diff += hashsize[c];
}
return "at most " + BytesFormatter::formatStr(diff);
}

38
src/Diff.h Normal file
View File

@@ -0,0 +1,38 @@
//
// Created by Stepan Usatiuk on 06.05.2023.
//
#ifndef SEMBACKUP_DIFF_H
#define SEMBACKUP_DIFF_H
#include <sstream>
#include <string>
#include "change_detectors/ComparableFile.h"
/// Utility class to compute difference between two ComparableFile%s
class Diff {
public:
/// Compute the difference between two ComparableFile%s
/// If the file is binary, calls diffPercent, which outputs the difference between files in bytes
/// Otherwise prints linewise difference
/// \param c1 Constant reference to the first ComparableFile
/// \param c2 Constant reference to the second ComparableFile
/// \returns Difference message
static std::string diff(const ComparableFile &c1, const ComparableFile &c2);
/// Calculates the difference between \p c1 amd \p c2 in bytes
/// \param c1 Constant reference to the first ComparableFile
/// \param c2 Constant reference to the second ComparableFile
/// \returns Difference message
static std::string diffPercent(const ComparableFile &c1, const ComparableFile &c2);
/// Checks if a file is binary
/// A file is considered binary if its first 2048 bytes contain a null byte
/// \param c1 Constant reference to the checked ComparableFile
/// \return True if the file is considered binary, false otherwise
static bool isBinary(const ComparableFile &c1);
};
#endif//SEMBACKUP_DIFF_H

32
src/Exception.cpp Normal file
View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#include "Exception.h"
#include <execinfo.h>
#include <sstream>
Exception::Exception(const std::string &text) : runtime_error(text + "\n" + getStacktrace()) {}
Exception::Exception(const char *text) : runtime_error(std::string(text) + "\n" + getStacktrace()) {}
// Based on: https://www.gnu.org/software/libc/manual/html_node/Backtraces.html
std::string Exception::getStacktrace() {
std::vector<void *> functions(50);
char **strings;
int n;
n = backtrace(functions.data(), 50);
strings = backtrace_symbols(functions.data(), n);
std::stringstream out;
if (strings != nullptr) {
out << "Stacktrace:" << std::endl;
for (int i = 0; i < n; i++)
out << strings[i] << std::endl;
}
free(strings);
return out.str();
}

24
src/Exception.h Normal file
View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 01.05.2023.
//
#ifndef SEMBACKUP_EXCEPTION_H
#define SEMBACKUP_EXCEPTION_H
#include <stdexcept>
#include <string>
#include <vector>
/// Custom exception class that uses execinfo to append a stacktrace to the exception message
class Exception : public std::runtime_error {
public:
Exception(const std::string &text);
Exception(const char *text);
private:
/// Static function to get the current stacktrace
static std::string getStacktrace();
};
#endif//SEMBACKUP_EXCEPTION_H

19
src/Logger.cpp Normal file
View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "Logger.h"
Logger::Logger(int level, std::ostream &out) : loglevel(level), out(out) {
}
void Logger::write(const std::string &what, int whatlevel) {
if (whatlevel <= loglevel) {
std::lock_guard outLock(outM);
out.get() << what << std::flush;
}
}
void Logger::setLevel(int level) {
loglevel = level;
}

25
src/Logger.h Normal file
View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_LOGGER_H
#define SEMBACKUP_LOGGER_H
#include <functional>
#include <iostream>
#include <mutex>
class Logger {
public:
Logger(int level = 3, std::ostream &out = {std::cout});
void write(const std::string &what, int whatlevel);
void setLevel(int level);
private:
int loglevel;
std::mutex outM;
std::reference_wrapper<std::ostream> out;
};
#endif//SEMBACKUP_LOGGER_H

57
src/Progress.cpp Normal file
View File

@@ -0,0 +1,57 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "Progress.h"
#include <sstream>
#include <utility>
Progress::Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level) : format(std::move(format)), out(std::move(out)), type(conf.getStr("progress")), progresslevel(level) {
if (type != "none") {
this->out("\n\n", level);
thread = std::thread(&Progress::showProgress, this);
}
}
Progress::~Progress() {
stop = true;
if (thread.joinable())
thread.join();
}
void Progress::showProgress() {
while (!stop) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
{
update(std::unique_lock(refreshM));
}
}
}
void Progress::print(const std::string &s, int level) {
std::unique_lock refreshL(refreshM);
out((type == "pretty" ? "\r\33[2K " : "") + s + "\n", level);
update(std::move(refreshL));
}
void Progress::update(std::unique_lock<std::mutex> &&lock) {
std::stringstream outs;
if (type == "pretty")
outs << "\r\33[2K ";
for (auto const &l: format) {
if (std::holds_alternative<std::string>(l)) outs << std::get<std::string>(l);
else
outs << std::get<std::function<std::string()>>(l)();
}
if (type == "pretty")
outs << "\r";
else
outs << "\n";
out(outs.str(), progresslevel);
lock.unlock();
}

55
src/Progress.h Normal file
View File

@@ -0,0 +1,55 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_PROGRESS_H
#define SEMBACKUP_PROGRESS_H
#include <atomic>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <string>
#include <thread>
#include <variant>
#include "Config.h"
/// Class to handle writing progress to the screen
class Progress {
public:
/// Constructs the Progress instance
/// \param out Function to call for output
/// \param format Format of the progress string, vector of strings or functions that return strings
/// \param conf Config, used to specify format (`pretty` for line rewriting, `simple` for normal line printing, or `none`)
Progress(std::function<void(std::string, int)> out, std::vector<std::variant<std::function<std::string()>, std::string>> format, const Config &conf, int level = 1);
Progress &operator=(Progress rhs) = delete;
Progress(const Progress &orig) = delete;
/// Write a string to the terminal without disturbing the progress bar
void print(const std::string &s, int level);
/// Destructor, instructs the worker thread to stop
~Progress();
private:
int progresslevel;
std::vector<std::variant<std::function<std::string()>, std::string>> format;///< Format of the progressbar
std::function<void(std::string, int)> out; ///< Output function
/// Thread loop function
void showProgress();
std::atomic<bool> stop = false;///< Stop flag
std::mutex refreshM;///< Used to prevent mangling the output between print and progressbar update
/// Prints the progressbar on screen, then unlocks the mutex
void update(std::unique_lock<std::mutex> &&lock);
const std::string type;///< Progressbar type (Taken from Config)
std::thread thread;///< Worker thread
};
#endif//SEMBACKUP_PROGRESS_H

31
src/RunningAverage.cpp Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "RunningAverage.h"
RunningAverage::RunningAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
: getFunc(std::move(getFunc)), max(max), ms(ms), thread(&RunningAverage::loop, this) {
}
void RunningAverage::loop() {
while (!stop) {
{
std::lock_guard lock(dataLock);
data.emplace_front(getFunc());
if (data.size() > max) data.pop_back();
}
std::this_thread::sleep_for(std::chrono::duration(std::chrono::milliseconds(ms)));
}
}
RunningAverage::~RunningAverage() {
stop = true;
thread.join();
}
unsigned long long RunningAverage::get() {
std::lock_guard lock(dataLock);
if (data.empty()) return 0;
return std::accumulate(data.begin(), data.end(), 0UL) / data.size();
}

44
src/RunningAverage.h Normal file
View File

@@ -0,0 +1,44 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_RUNNINGAVERAGE_H
#define SEMBACKUP_RUNNINGAVERAGE_H
#include <atomic>
#include <deque>
#include <functional>
#include <mutex>
#include <numeric>
#include <thread>
/// Class to compute running average of some value
class RunningAverage {
public:
///
/// \param getFunc Function that samples the value
/// \param max Max number of samples to average
/// \param ms Sampling period
RunningAverage(std::function<unsigned long long()> getFunc, int max, int ms);
/// Destructor, instructs the thread to exit
~RunningAverage();
/// Returns the average
unsigned long long get();
private:
std::atomic<bool> stop = false; ///< Stop signal
std::function<unsigned long long()> getFunc;///< Sampling function
std::deque<unsigned long long> data; ///< Data collected
int max; ///< Max number of samples
int ms; ///< Sampling period
std::mutex dataLock; ///< Deque lock
std::thread thread; ///< Worker thread
/// Worker thread loop
void loop();
};
#endif//SEMBACKUP_RUNNINGAVERAGE_H

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "RunningDiffAverage.h"
RunningDiffAverage::RunningDiffAverage(std::function<unsigned long long int()> getFunc, int max, int ms)
: runningAverage(
[this, get = std::move(getFunc)] {
auto cur = get();
auto calc = cur - prev;
prev = cur;
return calc;
},
max, ms) {
}
unsigned long long RunningDiffAverage::get() {
return runningAverage.get();
}

30
src/RunningDiffAverage.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_RUNNINGDIFFAVERAGE_H
#define SEMBACKUP_RUNNINGDIFFAVERAGE_H
#include <functional>
#include "RunningAverage.h"
/// Computes the rolling average of differences between last sampled and currently sampled numbers
class RunningDiffAverage {
public:
///
/// \param getFunc Function that samples the value
/// \param max Max number of samples to average
/// \param ms Sampling period
RunningDiffAverage(std::function<unsigned long long()> getFunc, int max, int ms);
/// Returns the average
unsigned long long get();
private:
unsigned long long prev = 0; ///< Previously sampled value
RunningAverage runningAverage;///< Backing RunningAverage
};
#endif//SEMBACKUP_RUNNINGDIFFAVERAGE_H

12
src/Signals.cpp Normal file
View File

@@ -0,0 +1,12 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "Signals.h"
void Signals::setup() {
signal(SIGINT, handle);
}
void Signals::handle(int signum) {
shouldQuit = true;
}

24
src/Signals.h Normal file
View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_SIGNALS_H
#define SEMBACKUP_SIGNALS_H
#include <csignal>
/// Class to handle signals sent to the process
class Signals {
public:
/// Setup the signal handlers
static void setup();
volatile static inline std::sig_atomic_t shouldQuit = false;///< Indicates whether the program was requested to exit
private:
/// Handle the signals
static void handle(int signum);
};
#endif//SEMBACKUP_SIGNALS_H

67
src/ThreadPool.cpp Normal file
View File

@@ -0,0 +1,67 @@
#include <string>
#include "Signals.h"
#include "ThreadPool.h"
ThreadPool::ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum) : onError(std::move(onError)) {
for (int i = 0; i < workersNum; i++) threads.emplace_back(&ThreadPool::loop, this);
}
ThreadPool::~ThreadPool() {
stop = true;
somethingNew.notify_all();
for (auto &t: threads) {
t.join();
}
}
void ThreadPool::push(std::function<void()> &&func) {
{
std::lock_guard lock(queueLock);
queue.push(std::move(func));
}
somethingNew.notify_one();
}
void ThreadPool::loop() {
while (true) {
std::unique_lock qLock(queueLock);
while (queue.empty() && !stop && !Signals::shouldQuit) {
// Check for any of the stop signals every second
somethingNew.wait_for(qLock, std::chrono::seconds(1));
}
if (stop || Signals::shouldQuit) {
// Drop all tasks if requested to exit
queue = {};
if (queue.empty() && running == 0) { finished.notify_all(); }
return;
}
auto task = std::move(queue.front());
running++;
queue.pop();
qLock.unlock();
try {
task();
} catch (std::exception &e) {
onError(std::string(e.what()));
}
{
std::lock_guard qLock(queueLock);
running--;
if (queue.empty() && running == 0) { finished.notify_all(); }
}
}
}
bool ThreadPool::empty() {
std::lock_guard qLock(queueLock);
if (queue.empty() && running == 0) return true;
return false;
}

54
src/ThreadPool.h Normal file
View File

@@ -0,0 +1,54 @@
//
// Created by Stepan Usatiuk on 17.04.2023.
//
#ifndef SEMBACKUP_THREADPOOL_H
#define SEMBACKUP_THREADPOOL_H
#include <atomic>
#include <condition_variable>
#include <functional>
#include <mutex>
#include <queue>
#include <thread>
#include <utility>
/// Thread pool
/**
* Handles ctrl-c via Signals, but it is expected of tasks to also do so
* Forwards exception messages to the provided handler
*/
class ThreadPool {
public:
/// Constructs a thread pool
/// \param onError Callback function that is called when an exception happens when executing a task
/// \param workersNum Amount of worker threads (default = number of cpu threads)
ThreadPool(std::function<void(std::string)> onError, std::size_t workersNum = std::thread::hardware_concurrency());
/// Destructor, instructs the threads to stop and joins them
~ThreadPool();
/// Pushes a new task to the queue
/// \param func Rvalue to the task functon
void push(std::function<void()> &&func);
/// Returns True if the queue is empty and there are no tasks running
bool empty();
std::mutex finishedLock; ///< Lock to use when waiting on the finished variable
std::condition_variable finished;///< Condition variable to wait for all tasks to finish
private:
/// Thread loop
void loop();
std::queue<std::function<void()>> queue; ///< Task queue
std::mutex queueLock; ///< Task queue lock
std::condition_variable somethingNew; ///< Condition variable to wait for new tasks
std::vector<std::thread> threads; ///< Vector of worker threads
std::atomic<bool> stop = false; ///< Stop signal for threads
std::atomic<int> running = 0; ///< Number of currently running tasks
std::function<void(std::string)> onError;///< Function to call on exception in task
};
#endif//SEMBACKUP_THREADPOOL_H

View File

@@ -0,0 +1,7 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "ChangeDetector.h"
ChangeDetector::~ChangeDetector() = default;

View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTOR_H
#define SEMBACKUP_CHANGEDETECTOR_H
#include "ComparableFile.h"
/// An interface for a class comparing any two given ComparableFile%s
class ChangeDetector {
public:
/// Abstract method for comparing two ComparableFile%s
/// \param f1 Constant reference to the first ComparableFile
/// \param f2 Constant reference to the second ComparableFile
/// \return True if these objects are considered *different*, False otherwise
virtual bool check(const ComparableFile &f1, const ComparableFile &f2) const = 0;
/// Default virtual destructor
virtual ~ChangeDetector();
};
#endif//SEMBACKUP_CHANGEDETECTOR_H

View File

@@ -0,0 +1,16 @@
//
// Created by Stepan Usatiuk on 04.05.2023.
//
#include "ChangeDetectorContainer.h"
#include <functional>
bool ChangeDetectorContainer::check(const ComparableFile &f1, const ComparableFile &f2) const {
return std::any_of(changeDetectors.begin(), changeDetectors.end(),
[&](const auto &changeDetector) {
return changeDetector->check(f1, f2);
});
}
ChangeDetectorContainer::ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors) : changeDetectors(std::move(changeDetectors)) {}

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 04.05.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTORCONTAINER_H
#define SEMBACKUP_CHANGEDETECTORCONTAINER_H
#include <memory>
#include <vector>
#include "ChangeDetector.h"
#include "ComparableFile.h"
/// Wrapper for multiple ChangeDetector%s
/** A ChangeDetector implementation that serves as a convenience wrapper for
* multiple ChangeDetector%s, its check returns true if any of the wrapped ChangeDetector%s return true
*/
class ChangeDetectorContainer : public ChangeDetector {
public:
/// Constructs a ChangeDetectorContainer using a vector of existing ChangeDetector%s
/// \param changeDetectors An rvalue reference to a vector of unique pointers of ChangeDetector
ChangeDetectorContainer(std::vector<std::unique_ptr<ChangeDetector>> &&changeDetectors);
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if any of the wrapped ChangeDetector%s return true
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
private:
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
};
#endif//SEMBACKUP_CHANGEDETECTORCONTAINER_H

View File

@@ -0,0 +1,35 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "ChangeDetectorFactory.h"
#include <vector>
#include "../Exception.h"
#include "ContentsChangeDetector.h"
#include "EditTimeChangeDetector.h"
#include "SizeChangeDetector.h"
#include "TypeChangeDetector.h"
std::unique_ptr<ChangeDetector> ChangeDetectorFactory::getChangeDetector(const std::string &type) {
if (type == "etime") {
return std::make_unique<EditTimeChangeDetector>();
} else if (type == "size") {
return std::make_unique<SizeChangeDetector>();
} else if (type == "type") {
return std::make_unique<TypeChangeDetector>();
} else if (type == "contents") {
return std::make_unique<ContentsChangeDetector>();
} else
throw Exception("Unknown ChangeDetector type " + type);
}
ChangeDetectorContainer ChangeDetectorFactory::getChangeDetectors(const Config &config) {
std::vector<std::unique_ptr<ChangeDetector>> changeDetectors;
for (auto const &i: config.getList("change-detectors")) {
changeDetectors.emplace_back(ChangeDetectorFactory::getChangeDetector(i));
}
return ChangeDetectorContainer(std::move(changeDetectors));
}

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_CHANGEDETECTORFACTORY_H
#define SEMBACKUP_CHANGEDETECTORFACTORY_H
#include <memory>
#include <string>
#include "../Config.h"
#include "ChangeDetector.h"
#include "ChangeDetectorContainer.h"
/// Factory class for ChangeDetector
/** Can create either a vector of ChangeDetector%s according to Config,
* or an individual ChangeDetector from a type string
*/
class ChangeDetectorFactory {
public:
/// Creates a ChangeDetector of given type and returns an unique pointer to it
/// \param type Constant reference to a string containing type of the ChangeDetector to create
/// \return Unique pointer to constructed ChangeDetector
static std::unique_ptr<ChangeDetector> getChangeDetector(const std::string &type);
/// Constructs a vector of unique pointers to ChangeDetector%s according to the given \p config
/// \param config Config with comma-separated "change-detectors" option set, for each entry a ChangeDetector will be created
/// \return A vector of unique pointers to ChangeDetector%s constructed according to \p config
static ChangeDetectorContainer getChangeDetectors(const Config &config);
};
#endif//SEMBACKUP_CHANGEDETECTORFACTORY_H

View File

@@ -0,0 +1,42 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "ComparableFile.h"
#include <fstream>
#include <sstream>
#include "../Exception.h"
#include "../repo/objects/FileBuffer.h"
ComparableFile::ComparableFile(const File &file, const Repository *repo)
: path(file.name),
type(file.fileType),
bytes(file.bytes),
mtime(file.mtime),
contents(
[file, repo]() {
return std::make_unique<FileBuffer>(repo, file.id);
}) {}
ComparableFile::ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base)
: path(p.lexically_relative(base).u8string()),
type(File::getFileType(p)),
bytes(File::getFileSize(p)),
mtime(File::getFileMtime(p)),
contents(
[p, path = this->path, type = this->type]() -> std::unique_ptr<std::streambuf> {
if (type == File::Type::Normal) {
auto fb = std::make_unique<std::filebuf>();
fb->open(p, std::ios::in | std::ios::binary);
if (!fb->is_open()) throw Exception("Can't open " + p.u8string() + " for reading!");
return fb;
}
auto contentsVector = File::getFileContents(p);
std::string contents = {contentsVector.begin(), contentsVector.end()};
return std::make_unique<std::stringbuf>(contents, std::ios::in | std::ios::binary);
}) {}

View File

@@ -0,0 +1,43 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_COMPARABLEFILE_H
#define SEMBACKUP_COMPARABLEFILE_H
#include <filesystem>
#include <functional>
#include <streambuf>
#include "../repo/Repository.h"
#include "../repo/objects/File.h"
/// Helper class to allow comparing files from different sources
/**
* As we are required to allow comparisons between a File in a repository and a file in filesystem,
* comparisons between two files that are already in a Repository,
* and between File%s that are in a repository cache and between files in the filesystem (when making backups),
* this helper class exists to provide a uniform interface to be used when calling ChangeDetector%s.
*/
struct ComparableFile {
/// Constructs a ComparableFile based on a File in a Repository
/// The resulting ComparableFile will have a #contents function that returns an instance of FileBuffer for given \p file
/// \param file Constant reference to a File object
/// \param repo Constant pointer to Repository from which the File object was taken, must be valid during the lifetime of created ComparableFile
ComparableFile(const File &file, const Repository *repo);
/// Constructs a ComparableFile based on a file in the filesystem
/// The resulting ComparableFile will have a #contents function that returns an instance of std::filebuf for file at given path
/// \param p Constant reference to an absolute path to the file
/// \param base Constant reference to a base path against which #path will be set
ComparableFile(const std::filesystem::path &p, const std::filesystem::path &base);
const std::string path; ///< Relative path to the file
const File::Type type; ///< File type
const unsigned long long bytes; ///< Number of bytes in the file
const unsigned long long mtime; ///< Timestamp of last file modification
const std::function<std::unique_ptr<std::streambuf>()> contents;///< Function that returns a unique pointer to a std::streambuf instance linked to the contents of the file
};
#endif//SEMBACKUP_COMPARABLEFILE_H

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#include "ContentsChangeDetector.h"
#include <iterator>
bool ContentsChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
if (f1.type != f2.type) return true;
auto b1 = f1.contents();
auto b2 = f2.contents();
return !std::equal(std::istreambuf_iterator<char>(b1.get()),
std::istreambuf_iterator<char>(),
std::istreambuf_iterator<char>(b2.get()),
std::istreambuf_iterator<char>());
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 05.05.2023.
//
#ifndef SEMBACKUP_CONTENTSCHANGEDETECTOR_H
#define SEMBACKUP_CONTENTSCHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their contents
class ContentsChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their contents are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_CONTENTSCHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "EditTimeChangeDetector.h"
bool EditTimeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.mtime != f2.mtime;
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_EDITTIMECHANGEDETECTOR_H
#define SEMBACKUP_EDITTIMECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their modification time
class EditTimeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their modification times are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_EDITTIMECHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#include "SizeChangeDetector.h"
bool SizeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.bytes != f2.bytes;
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 16.04.2023.
//
#ifndef SEMBACKUP_SIZECHANGEDETECTOR_H
#define SEMBACKUP_SIZECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their size
class SizeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their sizes are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_SIZECHANGEDETECTOR_H

View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "TypeChangeDetector.h"
bool TypeChangeDetector::check(const ComparableFile &f1, const ComparableFile &f2) const {
return f1.type != f2.type;
}

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_TYPECHANGEDETECTOR_H
#define SEMBACKUP_TYPECHANGEDETECTOR_H
#include "ChangeDetector.h"
/// A ChangeDetector implementation that compares two files by their type
class TypeChangeDetector : public ChangeDetector {
public:
/// \copydoc ChangeDetector::check
/// \return ComparableFile%s are considered different if their types are different
bool check(const ComparableFile &f1, const ComparableFile &f2) const override;
};
#endif//SEMBACKUP_TYPECHANGEDETECTOR_H

34
src/chunkers/Buzhash.cpp Normal file
View File

@@ -0,0 +1,34 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#include "Buzhash.h"
Buzhash::Buzhash(uint32_t blockSize) : blockSize(blockSize), history() {}
uint32_t Buzhash::get() const {
return cur;
}
uint32_t Buzhash::feed(uint8_t in) {
cur = rotr32(cur, 1);
if (history.size() >= blockSize) {
auto oldest = history.back();
history.pop_back();
cur ^= rotr32(randomNumbers[oldest], blockSize);
}
history.emplace_front(in);
cur ^= randomNumbers[in];
return cur;
}
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
uint32_t Buzhash::rotr32(uint32_t value, unsigned int count) {
const unsigned int mask = CHAR_BIT * sizeof(value) - 1;
count &= mask;
return (value >> count) | (value << (-count & mask));
}

85
src/chunkers/Buzhash.h Normal file
View File

@@ -0,0 +1,85 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#ifndef SEMBACKUP_BUZHASH_H
#define SEMBACKUP_BUZHASH_H
#include <array>
#include <climits>
#include <cstdint>
#include <deque>
/// Cyclic polynomial rolling hash
/** Based on: http://www.serve.net/buz/hash.adt/java.002.html
* https://github.com/silvasur/buzhash/blob/master/hash.go
* https://en.wikipedia.org/wiki/Rolling_hash#Cyclic_polynomial
*/
class Buzhash {
public:
/// Constructs a new Buzhash instance
/// \param blockSize Rolling hash window
Buzhash(uint32_t blockSize);
/// Returns current hash value
uint32_t get() const;
/// Adds \p in to the hash
/// \param in Byte to add
/// \return New hash value
uint32_t feed(uint8_t in);
private:
uint32_t cur = 0; ///< Current hash value
const uint32_t blockSize; ///< Hashing window size
std::deque<uint32_t> history;///< Bytes used to calculate current hash, used to compute the hash in a rolling fashion (to remove the oldest byte from the hash when blockSize is reached)
// Circular shift taken from: https://en.wikipedia.org/wiki/Circular_shift
/// Shift \p value \p count bits to the right circularly
/// \param value Value to shift
/// \param count By how many bytes
/// \return Shifted value
static uint32_t rotr32(uint32_t value, unsigned int count);
/// 256 32-bit random numbers used for hashing
/// Ideally, should have an equal distribution of 0s and 1s, but I didn't bother checking it
// clang-format off
static constexpr std::array<uint32_t, 256> randomNumbers{
0x827f934c, 0xebcd9924, 0x667fdea2, 0x8a8b0997, 0x42af49e8, 0x556cb313, 0x505da41b, 0xb23be60f,
0xc3901be4, 0xee1d8d4d, 0x4d59795c, 0x8d542ba4, 0x043f073c, 0x2af19a39, 0xb2c4aa36, 0x6e30ff43,
0x77ad3ef7, 0xd4c077e5, 0x3a1155aa, 0x866b07d3, 0xc16022b2, 0x6d4dad6e, 0x7a69c6dd, 0xd436dc23,
0x32b64948, 0x1f72475f, 0x129be871, 0x05d46f6e, 0x7e405cd5, 0x31fdd272, 0x84a56b1a, 0xeaf43633,
0x5f8148d4, 0x6d4bf6d9, 0xc2b4dbd7, 0xaa804cc7, 0xcb3de5ca, 0x6503cdb3, 0xa3c6d727, 0x20e2f098,
0xd525bb67, 0x37b1b81e, 0xc1f1fd79, 0x4fe91240, 0x6a4ea716, 0x71245e33, 0xdbaab854, 0xfc24600e,
0xd72dc72f, 0x2d7139ae, 0x075fb38d, 0xb18028a5, 0x9970d103, 0x235ec64b, 0x68645255, 0x352945f0,
0x7a4b19a1, 0xe17df5f5, 0x676a6644, 0x75aad7aa, 0x63bdfc9a, 0x607586c7, 0x1546400e, 0xfe582141,
0xb50a199f, 0xb0769910, 0x5d74ab3b, 0x2404799b, 0xa66a3a78, 0x1b6e24aa, 0x630674cc, 0x3272fea4,
0xd4e9e078, 0xe586d12a, 0x579f8b98, 0xfd16bcb5, 0xd1e4faee, 0xe30953c7, 0x3ac73f87, 0xab66983f,
0x5fe12f90, 0x10952ef1, 0x5c7ac32a, 0x89ccd941, 0xb82c3fa9, 0xacd374e5, 0x50984746, 0x09f082e8,
0x11ee3b91, 0x31764e3a, 0xb59df38a, 0x67e94f2d, 0xcceaca68, 0xc68a89d8, 0x5f2e80ac, 0xd5556741,
0x8c815df6, 0xde71c2b5, 0x7b1f5c49, 0xd64682a4, 0x4fb59748, 0x4968707f, 0x909c0c1a, 0x5f1dd608,
0x1c601e37, 0x96e01ada, 0xc5582ef8, 0xae6834c1, 0xbe63b0ce, 0xab2aea9f, 0xf13e77c2, 0xe433350b,
0x17a24a33, 0xc1f31bb6, 0xa23e9de4, 0x7e28ef69, 0x23e0ef42, 0x0796e53f, 0xf9e3045d, 0x7bbacd31,
0xa48bee27, 0x15f3c3b3, 0x4c320cb4, 0x916429d9, 0xa15ccb3c, 0x82a4a23c, 0xb0cc6a4a, 0xcf8d93fa,
0x3b18b937, 0xad0488e4, 0xaa568114, 0x80b9b8c7, 0x8f3a9071, 0x818b790d, 0x99c8dbf2, 0x0d23b2a4,
0x74c81a28, 0x1aa65d76, 0x7168ee7d, 0xc0d40b6c, 0x77c70a0c, 0xd3752839, 0xc2f7981c, 0x83767124,
0xb881618f, 0xb263d8cf, 0xbbb40400, 0xdb9702eb, 0xaccad841, 0x806af5a7, 0x16f096e3, 0x64bf45d9,
0x5f7c0a58, 0xdac0c665, 0x1dbebaac, 0xb97027a6, 0xfc934433, 0xfc7b2d06, 0x8871fe4e, 0x0df24135,
0x6ddf7cc8, 0x32e0d1cd, 0xe88abedd, 0x214af930, 0x90990f97, 0xc7691171, 0xbf7b6ca3, 0x8af6589c,
0x452c8ee0, 0xbc2c5891, 0xcf8d13b4, 0x698d1f1f, 0x802a011a, 0x19820708, 0x25c79d2f, 0xedf91253,
0xc93fe5dd, 0xa03a117b, 0x10912ae7, 0xc90d59d0, 0xc3522549, 0x3e4f3e81, 0x494ae40f, 0x2d157b6e,
0xd7bf06b2, 0x19c5bb2a, 0xa869261c, 0xa80cfd2c, 0x1ea7c6ec, 0x1b36a51f, 0x8bd227cc, 0xad2d2260,
0x181258c3, 0xbd253a58, 0x3273f94b, 0x9c315309, 0xb2d8d3e3, 0x11ec35a8, 0x384e6475, 0x855a9009,
0x854cc06a, 0xe7408809, 0xe583ce2a, 0x895fb756, 0x6a8a2072, 0x6598a92b, 0x530f41bb, 0xb1bd57f1,
0x62d57fa0, 0xe6505776, 0x42fcfe4d, 0x0fbdf1ee, 0x8e3104c4, 0xf11c8a65, 0x5bc51ad9, 0x5f1f8ce9,
0xab179a87, 0xd5448444, 0x7bd4a26b, 0x658f1963, 0x86db95b8, 0xaba6734e, 0x486fddea, 0x859c3e0b,
0xebce0106, 0x99c3014e, 0xc151b942, 0x9604aad8, 0xf6ce654b, 0xa1e7982e, 0xf6d8ed14, 0xd4bdf7e2,
0x13696254, 0x05ec638c, 0x306dbc29, 0x1676eb60, 0xadbf3ce3, 0x966dde56, 0x6d5bea46, 0x719aa10d,
0x0e65093d, 0x0b1a3c43, 0x0321ea8c, 0xe0ef2cbd, 0x43432ee3, 0x3e62046d, 0x425e7b44, 0x892e119c,
0xfdec4de5, 0x48c5dd6c, 0x79e6bfcd, 0x8d53372e, 0xe96f6d32, 0x52cddacd, 0x3e99e0eb, 0xa9e5d28f,
};
// clang-format on
};
#endif//SEMBACKUP_BUZHASH_H

View File

@@ -0,0 +1,42 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#include "BuzhashChunker.h"
#include "../Exception.h"
#include "../crypto/MD5.h"
BuzhashChunker::BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window) : Chunker(buf, maxBytes), window(window), minBytes(minBytes), mask(mask), buzhash(window) {}
std::pair<std::string, std::vector<char>> BuzhashChunker::getNext() {
if (eof) throw Exception("Trying to read from a file that is finished!");
std::vector<char> rbuf(minBytes);
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) minBytes));
if (read != minBytes) {
eof = true;
rbuf.resize(read);
return {MD5::calculate(rbuf), rbuf};
}
for (auto c: rbuf) {
buzhash.feed(static_cast<uint8_t>(c));
}
// Continue reading the file until either the last mask bits are zero of we exceed the maxSize
while (((buzhash.get() & (~0UL >> (sizeof(unsigned long long) * 8 - mask))) != 0) && rbuf.size() < maxBytes) {
auto r = buf->sbumpc();
if (r == std::streambuf::traits_type::eof()) {
eof = true;
break;
} else {
char c = std::streambuf::traits_type::to_char_type(r);
rbuf.emplace_back(c);
buzhash.feed(static_cast<uint8_t>(c));
}
}
return {MD5::calculate(rbuf), rbuf};
}

View File

@@ -0,0 +1,34 @@
//
// Created by Stepan Usatiuk on 26.04.2023.
//
#ifndef SEMBACKUP_BUZHASHCHUNKER_H
#define SEMBACKUP_BUZHASHCHUNKER_H
#include <streambuf>
#include "Buzhash.h"
#include "Chunker.h"
/// Chunker implementation using rolling hash
class BuzhashChunker : public Chunker {
public:
/// Constructs a BuzhashChunker
/// \copydoc Chunker::Chunker
/// \param minBytes Minimum amount of bytes in returned chunks
/// \param mask Amount of trailing zeroes in the rolling hash at which the file is cut (results in average chunk size of 2^mask bytes)
/// \param window Rolling hash window (how many of chunks last bytes are included in the hash, the default is recommended)
BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window = 4095);
/// \copydoc Chunker::getNext
std::pair<std::string, std::vector<char>> getNext() override;
private:
const unsigned long long window; ///< Rolling hash window
const unsigned long long minBytes;///< Minimum amount of bytes in returned chunks
const unsigned long long mask; ///< Amount of trailing zeroes in the rolling hash at which the file is cut
Buzhash buzhash; ///< Hasher instance
};
#endif//SEMBACKUP_BUZHASHCHUNKER_H

51
src/chunkers/Chunker.cpp Normal file
View File

@@ -0,0 +1,51 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "Chunker.h"
#include "../Exception.h"
Chunker::Chunker(std::streambuf *buf, unsigned long long maxBytes) : buf(buf), maxBytes(maxBytes) {}
bool Chunker::getEof() const {
return eof;
}
Chunker::~Chunker() = default;
Chunker::ChunkerIterator Chunker::begin() {
return {this};
}
Chunker::ChunkerIterator Chunker::end() {
return {nullptr};
}
Chunker::ChunkerIterator &Chunker::ChunkerIterator::operator++() {
if (pastEOF) throw Exception("Trying to increment pastEOF ChunkerIterator!");
if (source->getEof())
pastEOF = true;
else
buf = source->getNext();
return *this;
}
bool Chunker::ChunkerIterator::operator!=(const Chunker::ChunkerIterator &rhs) const {
return pastEOF != rhs.pastEOF;
}
Chunker::ChunkerIterator::value_type Chunker::ChunkerIterator::operator*() const {
if (pastEOF) throw Exception("Trying to dereference pastEOF ChunkerIterator!");
return buf.value();
}
bool Chunker::ChunkerIterator::operator==(const Chunker::ChunkerIterator &rhs) const {
return pastEOF == rhs.pastEOF;
}
Chunker::ChunkerIterator::ChunkerIterator(Chunker *source)
: source(source), pastEOF(source == nullptr) {
if (source)
operator++();
}

74
src/chunkers/Chunker.h Normal file
View File

@@ -0,0 +1,74 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_CHUNKER_H
#define SEMBACKUP_CHUNKER_H
#include <array>
#include <optional>
#include <streambuf>
#include <vector>
/// Abstract base class for a Chunker that takes a file and splits it into chunks to be backed up
class Chunker {
private:
/// Convenience iterator to allow using Chunker%s in range for loops
struct ChunkerIterator {
using value_type = std::pair<std::string, std::vector<char>>;
/// Creates a ChunkerIterator pointing to the first chunk or past-EOF
/// \param source Pointer to a Chunker, should be available during the entire iterator lifetime, or nullptr if this is pastEOF iterator
ChunkerIterator(Chunker *source);
/// Increments the iterator to the next chunk, or past-EOF
/// \throws Exception if iterator points past-EOF
ChunkerIterator &operator++();
/// Returns the current pointed-to chunk
/// \throws Exception if iterator points past-EOF
value_type operator*() const;
/// Returns true if both iterators are past-EOF
bool operator==(const ChunkerIterator &rhs) const;
/// Returns false if both iterators are past-EOF
bool operator!=(const ChunkerIterator &rhs) const;
private:
Chunker *const source; ///< Pointer to the underlying Chunker
std::optional<value_type> buf;///< Currently pointed to chunk
bool pastEOF = false; ///< Whether past EOF has been reached
};
public:
/// Returns the next chunk of the file
/// Returns a single empty chunk if a file is empty
/// \return Pair consisting of chunk's bytes and its MD5 hash
/// \throws Exception if EOF was already reached
virtual std::pair<std::string, std::vector<char>> getNext() = 0;
/// Returns True if EOF was reached, False otherwise
bool getEof() const;
/// Default virtual destructor
virtual ~Chunker();
/// Returns a ChunkerIterator pointing to the first chunk in a file
ChunkerIterator begin();
/// Returns a past-EOF ChunkerIterator
static ChunkerIterator end();
protected:
/// \param buf Pointer to a std::streambuf, should be available during the entire lifetime of a Chunker
/// \param maxBytes Maximal amount of bytes in returned chunks
Chunker(std::streambuf *buf, unsigned long long maxBytes);
std::streambuf *const buf; ///< Constant pointer to the source std::streambuf
bool eof = false; ///< Indicates whether EOF has been reached
const unsigned long long maxBytes;///< Max number of bytes in returned chunks
};
#endif//SEMBACKUP_CHUNKER_H

View File

@@ -0,0 +1,19 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#include "ChunkerFactory.h"
#include "../Exception.h"
#include "BuzhashChunker.h"
#include "ConstChunker.h"
std::unique_ptr<Chunker> ChunkerFactory::getChunker(const Config &config, std::streambuf *buf) {
if (config.getStr("chunker") == "const") {
return std::make_unique<ConstChunker>(buf, config.getInt("chunker-max") * 1024);
} else if (config.getStr("chunker") == "buzhash") {
return std::make_unique<BuzhashChunker>(buf, config.getInt("chunker-min") * 1024, config.getInt("chunker-max") * 1024, config.getInt("chunker-mask"));
} else {
throw Exception("Unknown chunker type!");
}
}

View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#ifndef SEMBACKUP_CHUNKERFACTORY_H
#define SEMBACKUP_CHUNKERFACTORY_H
#include <memory>
#include <streambuf>
#include "../Config.h"
#include "Chunker.h"
/// Factory for Chunker%s
class ChunkerFactory {
public:
/// Creates a new Chunker based on provided \p config backed with \p buf
/// \param config Constant reference to Config
/// \param buf Pointer to a std::streambuf instance, should be avaliable during the Chunker lifetime
/// \return Unique pointer to the created Chunker
static std::unique_ptr<Chunker> getChunker(const Config &config, std::streambuf *buf);
};
#endif//SEMBACKUP_CHUNKERFACTORY_H

View File

@@ -0,0 +1,27 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "ConstChunker.h"
#include "../Exception.h"
#include "../crypto/MD5.h"
ConstChunker::ConstChunker(std::streambuf *buf, unsigned long long maxBytes) : Chunker(buf, maxBytes) {}
std::pair<std::string, std::vector<char>> ConstChunker::getNext() {
if (eof) throw Exception("Trying to read from a file that is finished!");
std::vector<char> rbuf(maxBytes);
auto read = static_cast<unsigned long>(buf->sgetn(rbuf.data(), (long) maxBytes));
if (read != maxBytes) {
eof = true;
rbuf.resize(read);
}
auto md5 = MD5::calculate(rbuf);
return {md5, rbuf};
}

View File

@@ -0,0 +1,24 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_CONSTCHUNKER_H
#define SEMBACKUP_CONSTCHUNKER_H
#include <streambuf>
#include "Chunker.h"
/// Chunker implementation that splits the file into equally-sized chunks of maxBytes bytes
class ConstChunker : public Chunker {
public:
/// Constructs a ConstChunker
/// \copydoc Chunker::Chunker
ConstChunker(std::streambuf *buf, unsigned long long maxBytes);
/// \copydoc Chunker::getNext
std::pair<std::string, std::vector<char>> getNext() override;
};
#endif//SEMBACKUP_CONSTCHUNKER_H

9
src/commands/Command.cpp Normal file
View File

@@ -0,0 +1,9 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "Command.h"
Command::Command(std::string name) : name(std::move(name)) {}
Command::~Command() = default;

28
src/commands/Command.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMAND_H
#define SEMBACKUP_COMMAND_H
#include "../Context.h"
/// Abstract base class for some process running with some Context
class Command {
public:
/// Runs the command with Context \p ctx
virtual void run(Context ctx) = 0;
/// Default virtual destructor
virtual ~Command() = 0;
/// The name of the command
const std::string name;
protected:
/// Constructs a command with name \p name
Command(std::string name);
};
#endif//SEMBACKUP_COMMAND_H

View File

@@ -0,0 +1,152 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandDiff.h"
#include "../BytesFormatter.h"
#include "../Diff.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandDiff::CommandDiff() : Command("diff") {}
void CommandDiff::run(Context ctx) {
std::string diffMode = ctx.repo->getConfig().getStr("diff-mode");
Object::idType archive1;
if (!ctx.repo->getConfig().exists("aid")) {
auto archives = ctx.repo->getObjects(Object::ObjectType::Archive);
archive1 = std::max_element(archives.begin(), archives.end(), [](const auto &a1, const auto &a2) { return a1.second < a2.second; })->second;
} else {
archive1 = ctx.repo->getConfig().getInt("aid");
}
ThreadPool threadPool([&](const std::string &error) {
ctx.logger->write("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1));
std::mutex filesLock;
std::map<std::filesystem::path, File> files;///< Files in the first archive
for (auto id: archiveO1.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files.emplace(file.getKey(), std::move(file));
}
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Task to to compare the given file with the first archive
auto processFile = [&, this](ComparableFile p) {
auto relPath = p.path;
std::unique_lock lock(filesLock);
if (files.count(relPath) == 0) {
ctx.logger->write(relPath + " is new\n", 0);
lock.unlock();
} else {
File repoFile = files.at(relPath);
lock.unlock();
if (changeDetector.check({repoFile, ctx.repo}, p)) {
ctx.logger->write(relPath + " is different " + Diff::diff({repoFile, ctx.repo}, p) + "\n", 1);
} else {
if (diffMode == "file")
ctx.logger->write(relPath + " are same ", 0);
}
}
lock.lock();
files.erase(relPath);
};
std::optional<Archive> archiveO2;
if (diffMode == "normal") {
/// If a second archive is given, run the task for each of its files, otherwise use the "from" config option
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
threadPool.push([&]() {
for (auto id: archiveO2.value().files) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::u8path(file.name)))
threadPool.push([&, file]() {
processFile(ComparableFile{file, ctx.repo});
});
if (Signals::shouldQuit) break;
}
return true;
});
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
/// Start the diff with the root directory and empty ignore list
threadPool.push([&, from]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
[processFile, from, prefix = ctx.repo->getConfig().getStr("prefix")](const std::filesystem::directory_entry &dirEntry) {
if (isSubpath(prefix, dirEntry.path().lexically_relative(from)))
processFile(ComparableFile{dirEntry, from});
});
});
}
} else if (diffMode == "file") {
if (files.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the first archive", 0);
return;
}
if (ctx.repo->getConfig().exists("aid2")) {
archiveO2.emplace(Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2"))));
std::map<std::filesystem::path, File> files2;///< Files in the first archive
for (auto id: archiveO2->files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(id));
auto path = std::filesystem::u8path(file.name);
if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path))
files2.emplace(file.getKey(), std::move(file));
}
if (files2.count(ctx.repo->getConfig().getStr("prefix")) == 0) {
ctx.logger->write("Doesn't exist in the second archive", 0);
return;
} else {
processFile(ComparableFile{files2.at(ctx.repo->getConfig().getStr("prefix")), ctx.repo});
}
} else {
std::filesystem::path from = ctx.repo->getConfig().getStr("from");
if (!std::filesystem::exists(from / ctx.repo->getConfig().getStr("prefix"))) {
ctx.logger->write("Doesn't exist in the filesystem archive", 0);
return;
}
/// Start the diff with the root directory and empty ignore list
processFile(ComparableFile{from / ctx.repo->getConfig().getStr("prefix"), from});
}
} else {
throw Exception("Unknown diff-mode: " + diffMode);
}
/// Wait for diff to end
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
if (diffMode == "normal")
for (auto const &s: files) {
ctx.logger->write(s.first.u8string() + " is removed\n", 0);
}
}

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDDIFF_H
#define SEMBACKUP_COMMANDDIFF_H
#include "Command.h"
#include "CommandsCommon.h"
/// Run the diff between:
/// 1. The latest archive and the `from` directory
/// 2. if `aid` is set the aid archive and the `from` directory
/// 3. if `aid` and `aid2` are set between `aid` and `aid2`
class CommandDiff : public Command {
public:
CommandDiff();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDDIFF_H

View File

@@ -0,0 +1,16 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandList.h"
CommandList::CommandList() : Command("list") {
}
void CommandList::run(Context ctx) {
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second < r.second; });
for (auto const &aid: list) {
std::cout << "Name: " << aid.first << " Id: " << aid.second << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLIST_H
#define SEMBACKUP_COMMANDLIST_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists available archives in a repository
class CommandList : public Command {
public:
CommandList();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLIST_H

View File

@@ -0,0 +1,22 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandListFiles.h"
#include "../BytesFormatter.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
CommandListFiles::CommandListFiles() : Command("list-files") {
}
void CommandListFiles::run(Context ctx) {
auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid")));
for (auto const &fid: archive.files) {
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl;
}
}

View File

@@ -0,0 +1,20 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDLISTFILES_H
#define SEMBACKUP_COMMANDLISTFILES_H
#include "Command.h"
#include "CommandsCommon.h"
/// Lists files in the selected Archive
class CommandListFiles : public Command {
public:
CommandListFiles();
void run(Context ctx) override;
};
#endif//SEMBACKUP_COMMANDLISTFILES_H

View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRestore.h"
#include <fstream>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../chunkers/ChunkerFactory.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
using namespace CommandsCommon;
CommandRestore::CommandRestore() : Command("restore") {
}
void CommandRestore::run(Context ctx) {
Object::idType archive = ctx.repo->getConfig().getInt("aid");
std::filesystem::path to = std::filesystem::u8path(ctx.repo->getConfig().getStr("to"));
std::atomic<unsigned long long> filesToRestoreCount = 0;
std::atomic<unsigned long long> bytesToRestore = 0;
WorkerStats workerStats;///< Backup statistics of the worker threads
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&workerStats](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show restore progress
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{
[&workerStats]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&filesToRestoreCount]() { return std::to_string(filesToRestoreCount); },
" files saved, ",
[&workerStats]() { return BytesFormatter::formatStr(workerStats.bytesWritten.load() + workerStats.bytesSkipped.load()); },
" / ",
[&bytesToRestore]() { return BytesFormatter::formatStr(bytesToRestore); },
" saved @ ",
[&avg]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s",
},
ctx.repo->getConfig());
/// Thread pool for restore tasks
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Add the main restore task
threadPool.push([&, this]() {
/// Get the archive and its file IDs
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive));
std::vector<Object::idType> files = archiveO.files;
/// For each file...
for (const auto fid: files) {
/// Stop when asked to
if (Signals::shouldQuit) break;
auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid));
filesToRestoreCount++;
bytesToRestore += file.bytes;
/// Spawn a restore task
threadPool.push([&, this, to, file]() {
backupRestoreFile(file, to, workerCallback, ctx);
progress.print("Restored " + file.name, 1);
});
}
});
/// Wait for all tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
}
std::string CommandRestore::backupRestoreFile(const File &file, const std::filesystem::path &baseDir, workerStatsFunction &callback, Context ctx) {
auto fullpath = baseDir / std::filesystem::u8path(file.name);
std::filesystem::create_directories(fullpath.parent_path());
if (file.fileType == File::Type::Directory) {
std::filesystem::create_directory(fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
if (file.fileType == File::Type::Symlink) {
auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks[0]));
std::filesystem::create_symlink(std::filesystem::u8path(std::string{dest.data.begin(), dest.data.end()}), fullpath);
callback(0, 0, 1);
return fullpath.u8string();
}
std::ofstream ostream(fullpath, std::ios::binary | std::ios::out | std::ios::trunc);
for (const auto cid: file.chunks) {
if (Signals::shouldQuit) throw Exception("Quitting!");
Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid));
if (!c.data.empty()) {
ostream.rdbuf()->sputn(c.data.data(), c.data.size());
callback(c.data.size(), 0, 0);
}
}
callback(0, 0, 1);
return fullpath.u8string();
}

View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRESTORE_H
#define SEMBACKUP_COMMANDRESTORE_H
#include "Command.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
/// Restores the archive with id \aid to path \p to (from config)
class CommandRestore : public Command {
public:
CommandRestore();
void run(Context ctx) override;
private:
/// Internal function to restore a file
/// \param file Constant reference to the File object
/// \param base Base directory to restore to
/// \param callback Stats callback
/// \return Name of the restored file
std::string backupRestoreFile(const File &file, const std::filesystem::path &base, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRESTORE_H

239
src/commands/CommandRun.cpp Normal file
View File

@@ -0,0 +1,239 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandRun.h"
#include <fstream>
#include <iomanip>
#include <sstream>
#include "../BytesFormatter.h"
#include "../Exception.h"
#include "../Progress.h"
#include "../RunningDiffAverage.h"
#include "../Signals.h"
#include "../ThreadPool.h"
#include "../change_detectors/ChangeDetectorFactory.h"
#include "../chunkers/ChunkerFactory.h"
#include "../crypto/MD5.h"
#include "../repo/Serialize.h"
#include "../repo/objects/Archive.h"
#include "../repo/objects/Chunk.h"
#include "../repo/objects/File.h"
#include "CommandsCommon.h"
using namespace CommandsCommon;
CommandRun::CommandRun() : Command("run") {
}
void CommandRun::run(Context ctx) {
WorkerStats workerStats;///< Backup statistics of the worker threads
RunnerStats runnerStats;///< Backup target metrics
std::filesystem::path from = ctx.repo->getConfig().getStr("from");///< Directory to back up from
bool fullBackup = ctx.repo->getConfig().getStr("type") == "full";
if (fullBackup) {
ctx.logger->write("Backup is full because of the config\n", 1);
}
/// For progtest task compliance
if (!fullBackup) {
/// If it's time for full backup as per config, force it
auto per = ctx.repo->getConfig().getInt("full-period");
auto list = ctx.repo->getObjects(Object::ObjectType::Archive);
std::sort(list.begin(), list.end(), [](const auto &l, const auto &r) { return l.second > r.second; });
int lastInc = 0;
for (auto const &a: list) {
auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(a.second));
if (!archiveO.isFull) {
lastInc++;
continue;
} else
break;
}
if (lastInc >= per) {
fullBackup = true;
ctx.logger->write("Backup is full because of the interval\n", 1);
}
if (list.size() == 0) {
fullBackup = true;
ctx.logger->write("Backup is full because there are no backups\n", 1);
}
}
/// Worker callback, bound to the local workerStats variable
workerStatsFunction workerCallback = [&](unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten) {
CommandsCommon::workerCallback(bytesWritten, bytesSkipped, filesWritten, workerStats);
};
std::vector<Object::idType> files;///< File ids so far added to the archive
std::mutex filesLock; ///< Files vector lock
/// Function to safely add new file ids to `files`
std::function addFile = [&](Object::idType id) {std::lock_guard lock(filesLock); files.emplace_back(id); };
/// Technically the progtest task says that only the files from the last backup should be compared against...
std::map<std::string, Object::idType> prevArchiveFiles;
{
auto prevArchiveFilesList = ctx.repo->getObjects(Object::ObjectType::File);
prevArchiveFiles = {prevArchiveFilesList.begin(), prevArchiveFilesList.end()};
}
ctx.repo->clearCache(Object::ObjectType::File);
{
/// Calculate the average speed of backup
RunningDiffAverage avg(
[&]() { return workerStats.bytesWritten.load(); },
100, 100);
/// Show the progress of backup
Progress progress([this, ctx](const std::string &s, int l) { ctx.logger->write(s, l); },
{[&]() { return std::to_string(workerStats.filesWritten.load()); },
"/",
[&]() { return std::to_string(runnerStats.filesToSaveCount); },
" files saved, ",
[&]() { return std::to_string(runnerStats.filesSkipped); },
" files skipped, ",
[&]() { return BytesFormatter::formatStr((workerStats.bytesWritten.load() + workerStats.bytesSkipped.load())); },
" / ",
[&]() { return BytesFormatter::formatStr(runnerStats.bytesToSave); },
" read @ ",
[&]() { return BytesFormatter::formatStr(avg.get() * 10); },
"/s"},
ctx.repo->getConfig());
/// Thread pool for backup tasks, prints to progress on any errors
ThreadPool threadPool([&](const std::string &error) {
progress.print("Error: " + error, 0);
},
ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") : std::thread::hardware_concurrency());
/// Container of ChangeDetectors built using the config of the repository
ChangeDetectorContainer changeDetector = ChangeDetectorFactory::getChangeDetectors(ctx.repo->getConfig());
/// Function to spawn a rechunking task
auto saveFile = [&, this](const std::filesystem::path &absPath, const std::filesystem::path &relPath) {
runnerStats.bytesToSave += File::getFileType(absPath) == File::Type::Normal ? std::filesystem::file_size(absPath) : 0;
runnerStats.filesToSaveCount++;
threadPool.push([&, relPath, absPath]() {
addFile(backupChunkFile(absPath, relPath.u8string(), workerCallback, ctx));
progress.print("Copied: " + relPath.u8string(), 1);
});
};
/// Task to process an individual file in the backup
std::function<void(std::filesystem::path)> processFile;
/// If it's a full backup, just save the file, otherwise re-chunk it only if it's changed
if (fullBackup)
processFile =
[&, this](const std::filesystem::path &p) {
saveFile(p, p.lexically_relative(from).u8string());
};
else
processFile =
[&, this](const std::filesystem::path &p) {
auto relPath = p.lexically_relative(from).u8string();
if (prevArchiveFiles.count(relPath) != 0) {
File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(prevArchiveFiles.at(relPath)));
if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) {
addFile(repoFile.id);
ctx.repo->addToCache(repoFile);
progress.print("Skipped: " + relPath, 1);
runnerStats.filesSkipped++;
return;
}
}
saveFile(p, relPath);
return;
};
/// Start the backup with the root directory and empty ignore list
threadPool.push([&]() {
processDirWithIgnore(
from,
{},
[&](std::function<void()> f) { threadPool.push(std::move(f)); },
processFile);
});
/// Wait for all the tasks to finish
std::unique_lock finishedLock(threadPool.finishedLock);
threadPool.finished.wait(finishedLock, [&threadPool] { return threadPool.empty(); });
}
ctx.logger->write("\n", 1);
auto written = BytesFormatter::format(workerStats.bytesWritten);
auto skipped = BytesFormatter::format(workerStats.bytesSkipped);
ctx.logger->write(written.prefix + " written: " + written.number + '\n', 1);
ctx.logger->write(skipped.prefix + " skipped: " + skipped.number + '\n', 1);
auto time = std::time(0);
auto ltime = std::localtime(&time);
std::stringstream s;
s << std::put_time(ltime, "%d-%m-%Y %H-%M-%S");
/// Avoid archive name collisions
while (ctx.repo->exists(Object::ObjectType::Archive, s.str())) s << "N";
Archive a(ctx.repo->getId(), s.str(), time, files, fullBackup);
ctx.repo->putObject(a);
}
Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, workerStatsFunction &callback, Context ctx) {
/// If it's a symlink or directory, treat it specially
/// The order of checks is important, because is_directory follows the symlink
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
auto contents = File::getFileContents(orig);
Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig));
ctx.repo->putObject(c);
ctx.repo->putObject(f);
return f.id;
}
if (!std::filesystem::is_regular_file(orig))
throw Exception(orig.u8string() + "is a special file, not saving");
std::ifstream ifstream(orig, std::ios::in | std::ios::binary);
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
MD5 fileHash;
std::vector<Object::idType> fileChunks;
unsigned long long size = 0;
for (auto chunkp: *chunker) {
/// Exit when asked to
if (Signals::shouldQuit) break;
Object::idType chunkId;
size += chunkp.second.size();
if (ctx.repo->getConfig().getStr("dedup") == "on" && ctx.repo->exists(Object::ObjectType::Chunk, chunkp.first)) {
/// If the chunk already exists, reuse it
chunkId = ctx.repo->getObjectId(Object::ObjectType::Chunk, chunkp.first);
callback(0, chunkp.second.size(), 0);
} else {
/// Otherwise, write it
Chunk c(ctx.repo->getId(), chunkp.first, chunkp.second);
chunkId = c.id;
callback(c.data.size(), 0, 0);
ctx.repo->putObject(c);
}
fileHash.feedData(chunkp.second);
fileChunks.emplace_back(chunkId);
}
/// We might have exited in the loop before, so we don't save an incomplete file
if (Signals::shouldQuit) throw Exception("Quitting!");
if (size != File::getFileSize(orig)) {
throw Exception("Something really bad happened or file " + orig.u8string() + " changed during backup");
}
File f(ctx.repo->getId(), saveAs, size, File::getFileMtime(orig), fileHash.getHash(), fileChunks, File::getFileType(orig));
ctx.repo->putObject(f);
callback(0, 0, 1);
return f.id;
}

28
src/commands/CommandRun.h Normal file
View File

@@ -0,0 +1,28 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDRUN_H
#define SEMBACKUP_COMMANDRUN_H
#include "Command.h"
#include "CommandsCommon.h"
/// Runs the backup according to the config in the Repository
class CommandRun : public Command {
public:
CommandRun();
void run(Context ctx) override;
private:
/// Internal function to chunk the file and save it
/// \param orig Absolute path to the file
/// \param saveAs UTF-8 encoded file name to save as
/// \param callback Stats callback
/// \return ID of the saved file
Object::idType backupChunkFile(const std::filesystem::path &orig, const std::string &saveAs, CommandsCommon::workerStatsFunction &callback, Context ctx);
};
#endif//SEMBACKUP_COMMANDRUN_H

View File

@@ -0,0 +1,67 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#include "CommandsCommon.h"
#include <fstream>
#include <regex>
#include "../Exception.h"
#include "../Signals.h"
void CommandsCommon::workerCallback(unsigned long long int bytesWritten, unsigned long long int bytesSkipped, unsigned long long int filesWritten, WorkerStats &to) {
to.bytesWritten += bytesWritten;
to.bytesSkipped += bytesSkipped;
to.filesWritten += filesWritten;
}
bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p) {
if (prefix.u8string().size() > p.u8string().size()) return false;
for (int i = 0; i < prefix.u8string().size(); i++)
if (p.u8string()[i] != prefix.u8string()[i]) return false;
return true;
}
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
/// Don't process the directory if it has a ".nobackup" file
if (std::filesystem::exists(dir / ".nobackup")) return;
/// If it has an .ignore file, add every line of it into our ignore vector
if (std::filesystem::exists(dir / ".ignore")) {
std::ifstream ignorefile(dir / ".ignore", std::ios::in);
std::string line;
while (std::getline(ignorefile, line)) {
ignore.emplace_back(line);
}
}
/// For each directory entry...
for (const auto &dirEntry: std::filesystem::directory_iterator(dir)) {
/// Break in case exit was requested by the user
if (Signals::shouldQuit) break;
/// Don't process the entry if it matches any of the ignore rules
if (std::any_of(ignore.begin(), ignore.end(), [dirEntry](auto pred) {
std::smatch m;
auto s = dirEntry.path().filename().u8string();
return std::regex_match(s, m, std::regex(pred));
})) continue;
/// If it's a directory, spawn a task to process the entries in it
if (!dirEntry.is_symlink() && dirEntry.is_directory()) {
spawner([dirEntry, ignore, spawner, processFile]() {
processDirWithIgnore(dirEntry.path(), ignore, spawner, processFile);
});
/// Don't save the dir if it has a .nobackup file
if (std::filesystem::exists(dirEntry.path() / ".nobackup")) continue;
}
/// Spawn a task to process each individual file
spawner([processFile, dirEntry]() {
processFile(dirEntry);
});
}
}

View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 23.05.2023.
//
#ifndef SEMBACKUP_COMMANDSCOMMON_H
#define SEMBACKUP_COMMANDSCOMMON_H
#include <atomic>
#include <filesystem>
#include <functional>
namespace CommandsCommon {
// Bytes written, bytes skipped, files written
using workerStatsFunction = std::function<void(unsigned long long, unsigned long long, unsigned long long)>;
/// Internat function for recursive directory processing, taking into account ".ignore" and ".nobackup" files
/// \param dir Const reference to the path of directory to iterate through
/// \param ignore List of files to ignore
/// \param spawner Function to spawn other tasks
/// \param processFile Task to spawn on found files
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile);
struct WorkerStats {
public:
std::atomic<unsigned long long> bytesWritten = 0;
std::atomic<unsigned long long> bytesSkipped = 0;
std::atomic<unsigned long long> filesWritten = 0;
};
struct RunnerStats {
public:
std::atomic<unsigned long long> bytesToSave = 0;
std::atomic<unsigned long long> filesToSaveCount = 0;
std::atomic<unsigned long long> filesSkipped = 0;
};
/// Checks if \p p has \p prefix as prefix
/// \param prefix Constant reference to the prefix path
/// \param p Constant reference to the checked path
/// \return True if \p p contains \p prefix at its prefix, False otherwise
bool isSubpath(const std::filesystem::path &prefix, const std::filesystem::path &p);
void workerCallback(unsigned long long bytesWritten, unsigned long long bytesSkipped, unsigned long long filesWritten, WorkerStats &to);
};// namespace CommandsCommon
#endif//SEMBACKUP_COMMANDSCOMMON_H

82
src/crypto/AES.cpp Normal file
View File

@@ -0,0 +1,82 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#include "AES.h"
#include <openssl/aes.h>
#include <openssl/evp.h>
#include <openssl/rand.h>
#include "../Exception.h"
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
return AES::encrypt(in, AES::deriveKey(password, salt));
}
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt) {
return AES::decrypt(in, AES::deriveKey(password, salt));
}
std::vector<char> AES::encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
if (!ctx) throw Exception("Error initializing encryption context!");
std::vector<char> out(in.size() + AES_BLOCK_SIZE + 32);
if (!RAND_bytes(reinterpret_cast<unsigned char *>(out.data()), 32))
throw Exception("Error generating IV!");
if (!EVP_EncryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(out.data())))
throw Exception("Error encrypting!");
int outlen = static_cast<int>(out.size()) - 32;
if (!EVP_EncryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + 32), &outlen, reinterpret_cast<const unsigned char *>(in.data()), static_cast<int>(in.size())))
throw Exception("Error encrypting!");
int finlen = 0;
if (!EVP_EncryptFinal_ex(ctx.get(), reinterpret_cast<unsigned char *>(out.data() + outlen + 32), &finlen))
throw Exception("Error encrypting!");
out.resize(outlen + finlen + 32);
return out;
}
std::vector<char> AES::decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key) {
if (in.size() < 32) throw Exception("Array to decrypt is too small!");
std::unique_ptr<EVP_CIPHER_CTX, decltype(&EVP_CIPHER_CTX_free)> ctx(EVP_CIPHER_CTX_new(), &EVP_CIPHER_CTX_free);
if (!ctx) throw Exception("Error initializing encryption context!");
std::vector<char> out(in.size() - 32);
int outlen = static_cast<int>(out.size());
if (!EVP_DecryptInit_ex(ctx.get(), EVP_aes_256_cbc(), nullptr, key.data(), reinterpret_cast<const unsigned char *>(in.data())))
throw Exception("Error decrypting!");
if (!EVP_DecryptUpdate(ctx.get(), reinterpret_cast<unsigned char *>(out.data()), &outlen, reinterpret_cast<const unsigned char *>(in.data() + 32), static_cast<int>(in.size() - 32)))
throw Exception("Error decrypting!");
int finlen = 0;
if (!EVP_DecryptFinal_ex(ctx.get(), (unsigned char *) (out.data() + outlen), &finlen))
throw Exception("Error decrypting!");
out.resize(outlen + finlen);
return out;
}
std::array<uint8_t, 32> AES::deriveKey(const std::string &password, const std::string &salt) {
std::array<uint8_t, 32> key;//NOLINT
if (!PKCS5_PBKDF2_HMAC_SHA1(password.data(),
static_cast<int>(password.length()),
reinterpret_cast<const unsigned char *>(salt.data()),
static_cast<int>(salt.length()),
10000,
32,
key.data()))
throw Exception("Error deriving key!");
return key;
}

59
src/crypto/AES.h Normal file
View File

@@ -0,0 +1,59 @@
//
// Created by Stepan Usatiuk on 30.04.2023.
//
#ifndef SEMBACKUP_AES_H
#define SEMBACKUP_AES_H
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <vector>
/// Utility class to handle encryption/decryption of byte vectors
/**
* Based on: https://wiki.openssl.org/index.php/EVP_Symmetric_Encryption_and_Decryption
*/
class AES {
public:
/// Encrypts the provided \p in vector using \p password and \p salt
/// \param in Constant reference to to-be-encrypted vector
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
/// \throws Exception on any error
static std::vector<char> encrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
/// Decrypts the provided \p in vector using \p password and \p salt
/// \param in Constant reference to to-be-decrypted vector
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Decrypted vector
/// \throws Exception on any error
static std::vector<char> decrypt(const std::vector<char> &in, const std::string &password, const std::string &salt);
/// Encrypts the provided \p in vector using \p key
/// \param in Constant reference to to-be-encrypted vector
/// \param key Constant reference to the key
/// \return Encrypted vector of size at most original + 48 (16 for possible padding, 32 for the IV)
/// \throws Exception on any error
static std::vector<char> encrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
/// Decrypts the provided \p in vector using \p key
/// \param in Constant reference to to-be-decrypted vector
/// \param key Constant reference to the key
/// \return Decrypted vector
/// \throws Exception on any error
static std::vector<char> decrypt(const std::vector<char> &in, const std::array<uint8_t, 32> &key);
/// Generates a key for the encryption using \p password and \p salt using PKCS5_PBKDF2_HMAC_SHA1
/// \param password Constant reference to the password
/// \param salt Constant reference to the salt
/// \return Derived key
/// \throws Exception on any error
static std::array<uint8_t, 32> deriveKey(const std::string &password, const std::string &salt);
};
#endif//SEMBACKUP_AES_H

11
src/crypto/CRC32.cpp Normal file
View File

@@ -0,0 +1,11 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "CRC32.h"
CRC32::crcType CRC32::calculate(const std::vector<char> &in) {
crcType res = crc32(0L, nullptr, 0);
res = crc32(res, reinterpret_cast<const Bytef *>(in.data()), in.size());
return res;
}

25
src/crypto/CRC32.h Normal file
View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_CRC32_H
#define SEMBACKUP_CRC32_H
#include <cstdint>
#include <vector>
#include <zlib.h>
/// Utility class to compute CRC32 values of vectors of chars
class CRC32 {
public:
using crcType = uLong;
/// Calculates the CRC32 of given vector
/// \param in Constant reference to a vector of chars
/// \return CRC32 result
static crcType calculate(const std::vector<char> &in);
};
#endif//SEMBACKUP_CRC32_H

48
src/crypto/MD5.cpp Normal file
View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "MD5.h"
#include "../Exception.h"
std::string MD5::calculate(const std::vector<char> &in) {
MD5 hasher;
hasher.feedData(in);
return hasher.getHash();
}
MD5::MD5() {
if (!mdctx)
throw Exception("Can't create hashing context!");
if (!EVP_DigestInit_ex(mdctx.get(), EVP_md5(), nullptr))
throw Exception("Can't create hashing context!");
}
void MD5::feedData(const std::vector<char> &in) {
if (in.empty()) return;
if (!EVP_DigestUpdate(mdctx.get(), in.data(), in.size()))
throw Exception("Error hashing!");
}
std::string MD5::getHash() {
std::array<char, 16> out;
unsigned int s = 0;
if (!EVP_DigestFinal_ex(mdctx.get(), reinterpret_cast<unsigned char *>(out.data()), &s))
throw Exception("Error hashing!");
if (s != out.size())
throw Exception("Error hashing!");
if (!EVP_MD_CTX_reset(mdctx.get()))
throw Exception("Error hashing!");
return {out.begin(), out.end()};
}
std::string MD5::calculate(const std::string &in) {
std::vector<char> tmp(in.begin(), in.end());
return MD5::calculate(tmp);
}

48
src/crypto/MD5.h Normal file
View File

@@ -0,0 +1,48 @@
//
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_MD5_H
#define SEMBACKUP_MD5_H
#include <array>
#include <memory>
#include <vector>
#include <openssl/evp.h>
/// Class to handle MD5 hashing
/**
* Based on: https://wiki.openssl.org/index.php/EVP_Message_Digests
*/
class MD5 {
public:
/// Constructs an empty MD5 hasher instance
/// \throws Exception on initialization error
MD5();
/// Calculates the hash for a given \p in char vector
/// \param in Constant reference to an input vector
/// \return MD5 hash of \p in
static std::string calculate(const std::vector<char> &in);
/// Calculates the hash for a given \p in string
/// \param in Constant reference to an input string
/// \return MD5 hash of \p in
static std::string calculate(const std::string &in);
/// Append a vector of chars to the current hash
/// \param in Constant reference to an input vector
/// \throws Exception on any error
void feedData(const std::vector<char> &in);
/// Returns the hash, resets the hashing context
/// \throws Exception on any error
std::string getHash();
private:
const std::unique_ptr<EVP_MD_CTX, decltype(&EVP_MD_CTX_free)> mdctx{EVP_MD_CTX_new(), &EVP_MD_CTX_free};///< Current hashing context
};
#endif//SEMBACKUP_MD5_H

View File

@@ -0,0 +1,47 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#include "CheckFilter.h"
#include "../crypto/CRC32.h"
#include "../repo/Serialize.h"
std::vector<char> CheckFilter::filterWrite(std::vector<char> from) const {
return filterWriteStatic(std::move(from));
}
std::vector<char> CheckFilter::filterRead(std::vector<char> from) const {
return filterReadStatic(std::move(from));
}
std::vector<char> CheckFilter::filterWriteStatic(std::vector<char> from) {
auto out = magic;
Serialize::serialize(from, out);
auto crc = CRC32::calculate(from);
Serialize::serialize(crc, out);
return out;
}
std::vector<char> CheckFilter::filterReadStatic(std::vector<char> from) {
if (from.size() < magic.size()) throw Exception("Input is corrupted (too small)!");
for (size_t i = 0; i < magic.size(); i++) {
if (from[i] != magic[i]) throw Exception("Magic prefix is wrong!");
}
auto fromIt = from.cbegin() + magic.size();
auto out = Serialize::deserialize<std::vector<char>>(fromIt, from.cend());
auto crc = CRC32::calculate(out);
auto crcRecorded = Serialize::deserialize<CRC32::crcType>(fromIt, from.cend());
if (crc != crcRecorded) throw Exception("CRC mismatch!");
return out;
}

36
src/filters/CheckFilter.h Normal file
View File

@@ -0,0 +1,36 @@
//
// Created by Stepan Usatiuk on 12.05.2023.
//
#ifndef SEMBACKUP_CHECKFILTER_H
#define SEMBACKUP_CHECKFILTER_H
#include "Filter.h"
/// Filter implementation that checks the input for corruption using CRC
/**
* Additionally, it has static methods for work outside FilterContainer%s
*/
class CheckFilter : public Filter {
public:
/// \copydoc Filter::filterWrite
/// \copydoc CheckFilter::filterWriteS
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
/// \copydoc CheckFilter::filterReadS
std::vector<char> filterRead(std::vector<char> from) const override;
/// Adds CRC hash and magic string to the the \p from vector
static std::vector<char> filterWriteStatic(std::vector<char> from);
/// Checks the \p from vector and removes the metadata
/// \throws Exception on any error
static std::vector<char> filterReadStatic(std::vector<char> from);
private:
static const inline std::vector<char> magic{'s', 'e', 'm', 'b', 'a'};
};
#endif//SEMBACKUP_CHECKFILTER_H

6
src/filters/Filter.cpp Normal file
View File

@@ -0,0 +1,6 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "Filter.h"
Filter::~Filter() = default;

30
src/filters/Filter.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTER_H
#define SEMBACKUP_FILTER_H
#include <vector>
/// Interface class for I/O filters
class Filter {
public:
/// Applies the filter to \p from vector and returns the result
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
/// \param from Source vector of chars
/// \return Filtered vector of chars
virtual std::vector<char> filterWrite(std::vector<char> from) const = 0;
/// Reverses the applied filter from \p from vector and returns the result
/// Note: the vector is passed by value, as it allows to avoid copying with std::move in case the filter modifies the \p in vector in-place
/// \param from Source vector of chars
/// \return Filtered vector of chars
virtual std::vector<char> filterRead(std::vector<char> from) const = 0;
/// Default virtual destructor
virtual ~Filter();
};
#endif//SEMBACKUP_FILTER_H

17
src/filters/FilterAES.cpp Normal file
View File

@@ -0,0 +1,17 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FilterAES.h"
#include "../crypto/AES.h"
std::vector<char> FilterAES::filterWrite(std::vector<char> from) const {
return AES::encrypt(from, key);
}
std::vector<char> FilterAES::filterRead(std::vector<char> from) const {
return AES::decrypt(from, key);
}
FilterAES::FilterAES(const std::string &password, const std::string &salt) : key(AES::deriveKey(password, salt)) {}

37
src/filters/FilterAES.h Normal file
View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILTERAES_H
#define SEMBACKUP_FILTERAES_H
#include <array>
#include <cstdint>
#include <string>
#include "Filter.h"
/// Filter implementation that encrypts/decrypts data using provided password and salt
class FilterAES : public Filter {
public:
/// Constructs the filter, using \p password and \p salt to generate the encryption key
/// \param password Constant reference to password string
/// \param salt Constant reference to salt string
FilterAES(const std::string &password, const std::string &salt);
/// Encrypts the \p from vector
/// \copydoc Filter::filterWrite
/// \throws Exception on any error
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Decrypts the \p from vector
/// \copydoc Filter::filterRead
/// \throws Exception on any error
std::vector<char> filterRead(std::vector<char> from) const override;
private:
const std::array<uint8_t, 32> key;///< Key used for encryption, derived from \p password and \p salt
};
#endif//SEMBACKUP_FILTERAES_H

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "FilterContainer.h"
FilterContainer::FilterContainer() = default;
FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) {
filters.emplace_back(std::move(f));
return *this;
}
std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const {
for (auto const &f: filters) from = f->filterWrite(std::move(from));
return from;
}
std::vector<char> FilterContainer::filterRead(std::vector<char> from) const {
for (auto f = filters.crbegin(); f != filters.crend(); f++)
from = (*f)->filterRead(std::move(from));
return from;
}

View File

@@ -0,0 +1,37 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTERCONTAINER_H
#define SEMBACKUP_FILTERCONTAINER_H
#include <memory>
#include <vector>
#include "Filter.h"
/// Convenience Filter implementation, that applies multiple Filter%s in succession
class FilterContainer : public Filter {
public:
/// Constructs an empty FilterContainer
FilterContainer();
/// Adds a Filter into itself
/// \param f Rvalue reference to a unique pointer to Filter
/// \return Reference to itself
FilterContainer &addFilter(std::unique_ptr<Filter> &&f);
/// Applies the filters in order of insertion
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Applies the filters in reverse order of insertion
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
std::vector<std::unique_ptr<Filter>> filters;///< Vector of unique pointers to Filter%s
};
#endif//SEMBACKUP_FILTERCONTAINER_H

View File

@@ -0,0 +1,32 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#include "FilterFactory.h"
#include "../Exception.h"
#include "CheckFilter.h"
#include "FilterAES.h"
#include "FilterShift.h"
#include "FilterShiftSecret.h"
#include "FilterZlib.h"
std::unique_ptr<Filter> FilterFactory::makeFilter(const std::string &type, const Config &config) {
if (type == "none") throw Exception("Trying to make a \"none\" filter!");
if (type == "aes") {
return std::make_unique<FilterAES>(config.getStr("password"), config.getStr("salt"));
} else if (type == "zlib") {
return std::make_unique<FilterZlib>(config.getInt("compression-level"));
} else if (type == "crc") {
return std::make_unique<CheckFilter>();
}
#ifdef TEST
else if (type == "shiftC") {
return std::make_unique<FilterShift>(config.getInt("compression-level"));
} else if (type == "shiftE")
return std::make_unique<FilterShiftSecret>(config.getStr("password"), config.getStr("salt"));
#endif
throw Exception("Unknown filter value");
}

View File

@@ -0,0 +1,25 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifndef SEMBACKUP_FILTERFACTORY_H
#define SEMBACKUP_FILTERFACTORY_H
#include <memory>
#include <string>
#include "../Config.h"
#include "Filter.h"
/// Utility factory class for Filter%s
class FilterFactory {
public:
/// Constructs a Filter of type \p type according to \p config
/// \param type Constant reference to a string containing the type of filter to construct
/// \param config Constant reference to Config which will be used to determine constructed Filter%'s parameters
/// \return Unique pointer to the constructed Filter
static std::unique_ptr<Filter> makeFilter(const std::string &type, const Config &config);
};
#endif//SEMBACKUP_FILTERFACTORY_H

View File

@@ -0,0 +1,18 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifdef TEST
#include "FilterShift.h"
std::vector<char> FilterShift::filterWrite(std::vector<char> from) const {
for (auto &c: from) c += shiftVal;
return from;
}
std::vector<char> FilterShift::filterRead(std::vector<char> from) const {
for (auto &c: from) c -= shiftVal;
return from;
}
FilterShift::FilterShift(int level) : shiftVal(level) {}
#endif

30
src/filters/FilterShift.h Normal file
View File

@@ -0,0 +1,30 @@
//
// Created by Stepan Usatiuk on 22.04.2023.
//
#ifdef TEST
#ifndef SEMBACKUP_FILTERSHIFT_H
#define SEMBACKUP_FILTERSHIFT_H
#include "Filter.h"
/// Filter implementation that shifts every byte in input vector using provided value
/// \warning For testing purposes only!
class FilterShift : public Filter {
public:
/// Constructs the filter using \p level as shift value
/// \param level Number that will be added to each input byte
FilterShift(int level);
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int shiftVal;///< Value to add to input bytes
};
#endif//SEMBACKUP_FILTERSHIFT_H
#endif//TEST

View File

@@ -0,0 +1,23 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifdef TEST
#include "FilterShiftSecret.h"
#include <string>
std::vector<char> FilterShiftSecret::filterWrite(std::vector<char> from) const {
for (auto &c: from) c += shiftVal;
return from;
}
std::vector<char> FilterShiftSecret::filterRead(std::vector<char> from) const {
for (auto &c: from) c -= shiftVal;
return from;
}
FilterShiftSecret::FilterShiftSecret(const std::string &password, const std::string &salt) {
shiftVal = password[0] + salt[0];
}
#endif

View File

@@ -0,0 +1,33 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifdef TEST
#ifndef SEMBACKUP_FILTERSHIFTSECRET_H
#define SEMBACKUP_FILTERSHIFTSECRET_H
#include <string>
#include "Filter.h"
/// Filter implementation that shifts every byte in input vector using two provided value
/// \warning For testing purposes only!
class FilterShiftSecret : public Filter {
public:
/// Constructs the filter using the sum of first bytes of \p password and \p salt to initialize shiftVal
/// \param password Constant reference to "password" string
/// \param salt Constant reference to "salt" string
FilterShiftSecret(const std::string &password, const std::string &salt);
/// \copydoc Filter::filterWrite
std::vector<char> filterWrite(std::vector<char> from) const override;
/// \copydoc Filter::filterRead
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int shiftVal = 0;///< Value to add to input bytes
};
#endif//SEMBACKUP_FILTERSHIFTSECRET_H
#endif//TEST

View File

@@ -0,0 +1,50 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#include "FilterZlib.h"
#include <zlib.h>
#include "../repo/Serialize.h"
std::vector<char> FilterZlib::filterWrite(std::vector<char> from) const {
uLongf outSize = compressBound(from.size());
std::vector<char> out;
Serialize::serialize('C', out);
Serialize::serialize(static_cast<unsigned long long>(from.size()), out);
uLongf sizeSize = out.size();
out.resize(sizeSize + outSize);
if (compress2(reinterpret_cast<Bytef *>(out.data() + sizeSize), &outSize, reinterpret_cast<const Bytef *>(from.data()), from.size(), level) !=
Z_OK)
throw Exception("Error compressing!");
out.resize(outSize + sizeSize);
return out;
}
std::vector<char> FilterZlib::filterRead(std::vector<char> from) const {
auto desI = from.cbegin();
char C = Serialize::deserialize<char>(desI, from.cend());
if (C != 'C') throw Exception("Bad compression prefix!");
uLongf size = Serialize::deserialize<unsigned long long>(desI, from.cend());
std::vector<char> out(size);
if (desI >= from.cend()) throw Exception("Unexpected end of archive!");
if (uncompress(reinterpret_cast<Bytef *>(out.data()), &size, reinterpret_cast<const Bytef *>(&(*desI)), std::distance(desI, from.cend())) !=
Z_OK)
throw Exception("Error decompressing!");
return out;
}
FilterZlib::FilterZlib(int level) : level(level) {}

31
src/filters/FilterZlib.h Normal file
View File

@@ -0,0 +1,31 @@
//
// Created by Stepan Usatiuk on 23.04.2023.
//
#ifndef SEMBACKUP_FILTERZLIB_H
#define SEMBACKUP_FILTERZLIB_H
#include "Filter.h"
/// Filter implementation that uses Zlib to compress data
class FilterZlib : public Filter {
public:
/// Creates the filter using \p level as compression level
/// \param level
FilterZlib(int level);
/// Compresses the \p from vector
/// \copydoc Filter::filterWrite
/// \throws Exception on any error
std::vector<char> filterWrite(std::vector<char> from) const override;
/// Decompresses the \p from vector
/// \copydoc Filter::filterRead
/// \throws Exception on any error
std::vector<char> filterRead(std::vector<char> from) const override;
private:
int level = -1;///< Compression level to use, -1 is the Zlib default
};
#endif//SEMBACKUP_FILTERZLIB_H

129
src/main.cpp Normal file
View File

@@ -0,0 +1,129 @@
#include <iostream>
#include "BytesFormatter.h"
#include "Config.h"
#include "Context.h"
#include "Exception.h"
#include "Logger.h"
#include "Signals.h"
#include "commands/Command.h"
#include "commands/CommandDiff.h"
#include "commands/CommandList.h"
#include "commands/CommandListFiles.h"
#include "commands/CommandRestore.h"
#include "commands/CommandRun.h"
#include "repo/FileRepository.h"
#include "repo/Repository.h"
#include "repo/Serialize.h"
#include "repo/objects/Archive.h"
#include "repo/objects/File.h"
Config getConf(int argc, char *argv[]) {
Config out;
for (int i = 0; i < argc; i++) {
std::string key = argv[i];
if (key.substr(0, 2) != "--") throw Exception("Options should start with --");
key = key.substr(2);
if (++i == argc) throw Exception("Option not specified for " + key);
std::string val = argv[i];
out.add(key, val);
}
return out;
}
int help() {
for (auto const &o: Config::keys) {
std::cout << "--" << o.first << " <" << Config::KeyTypeToStr.at(o.second.type) << ">" << std::endl;
if (o.second.defaultval.has_value())
std::cout << " Default: " << o.second.defaultval.value() << std::endl;
std::cout << " Is saved in repository: " << (o.second.remember ? "yes" : "no") << std::endl;
std::cout << " Info: " << o.second.info << std::endl;
}
return 0;
}
std::unique_ptr<Repository> openRepo(Config &conf) {
try {
auto repo = std::make_unique<FileRepository>(conf);
repo->open();
return repo;
} catch (std::exception &e) {
std::cout << "Error opening repo: " << e.what() << std::endl;
return nullptr;
}
}
int init(Config &conf) {
auto repo = std::make_unique<FileRepository>(conf);
if (repo == nullptr) return -1;
try {
repo->init();
} catch (std::exception &e) {
std::cout << "Error initializing repo: " << e.what() << std::endl;
return -1;
}
return 0;
}
int main(int argc, char *argv[]) {
try {
Signals::setup();
if (argc < 2) {
std::cerr << "No argument specified" << std::endl;
help();
return -1;
}
std::string opt = argv[1];
if (opt == "help") {
return help();
}
Config conf;
try {
conf = getConf(argc - 2, argv + 2);
} catch (std::exception &e) {
std::cerr << "Error reading config!" << std::endl
<< e.what() << std::endl;
return -1;
}
if (opt == "init") {
return init(conf);
}
auto repo = openRepo(conf);
if (repo == nullptr) {
std::cerr << "Can't open repo!" << std::endl;
return -1;
}
Logger logger(conf.getInt("verbose"));
Context ctx{&logger, repo.get()};
std::unordered_map<std::string, std::unique_ptr<Command>> commands;
commands.emplace(CommandDiff().name, std::make_unique<CommandDiff>());
commands.emplace(CommandRestore().name, std::make_unique<CommandRestore>());
commands.emplace(CommandRun().name, std::make_unique<CommandRun>());
commands.emplace(CommandListFiles().name, std::make_unique<CommandListFiles>());
commands.emplace(CommandList().name, std::make_unique<CommandList>());
if (commands.count(opt) == 0) {
std::cerr << "Unknown argument" << std::endl;
return -1;
} else {
commands.at(opt)->run(ctx);
}
} catch (std::exception &e) {
std::cerr << "Error!" << std::endl
<< e.what() << std::endl;
} catch (...) {
std::cerr << "Something very bad happened!" << std::endl;
}
}

246
src/repo/FileRepository.cpp Normal file
View File

@@ -0,0 +1,246 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "FileRepository.h"
#include <exception>
#include <iterator>
#include <mutex>
#include "../filters/CheckFilter.h"
#include "../filters/FilterFactory.h"
#include "Object.h"
#include "Serialize.h"
FileRepository::FileRepository(Config config) : Repository(std::move(config)), root(std::filesystem::path(this->config.getStr("repo"))), writeCacheMax(config.getInt("repo-target") * 1024 * 1024) {}
bool FileRepository::exists() {
return std::filesystem::is_directory(root) && std::filesystem::exists(root / "info");
}
bool FileRepository::flush() {
flushWriteCache(std::unique_lock(writeCacheLock));
return true;
}
bool FileRepository::open() {
if (!exists()) throw Exception("Repository doesn't exist!");
auto readConf = Serialize::deserialize<Config>(CheckFilter::filterReadStatic(readFile(root / "info")));
std::swap(config, readConf);
config.merge(readConf);
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
try {
std::tie(maxFileId, offsetIndex) = Serialize::deserialize<std::pair<decltype(maxFileId), decltype(offsetIndex)>>(filters.filterRead(readFile(root / "offsets")));
std::tie(keyIndex, largestUnusedId) = Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>(filters.filterRead(readFile(root / "index")));
} catch (const std::exception &e) {
ready = false;
throw;
}
return true;
}
bool FileRepository::init() {
if (ready) throw Exception("Trying to initialize already initialized repository!");
if (exists()) throw Exception("Trying to initialize already existing repository!");
if (!std::filesystem::is_directory(root) && !std::filesystem::create_directories(root))
throw Exception("Can't create directory " + root.u8string());
writeFile(root / "info", CheckFilter::filterWriteStatic(Serialize::serialize(config)));
if (config.getStr("compression") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("compression"), config));
if (config.getStr("encryption") != "none") filters.addFilter(FilterFactory::makeFilter(config.getStr("encryption"), config));
filters.addFilter(FilterFactory::makeFilter("crc", config));
ready = true;
return true;
}
FileRepository::~FileRepository() {
if (ready) {
ready = false;
flushWriteCache(std::unique_lock(writeCacheLock));
writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex))));
writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId))));
}
}
std::vector<char> FileRepository::getObject(Object::idType id) const {
if (!ready) throw Exception("Tried working with uninitialized repo!");
std::unique_lock lock(repoLock);
if (offsetIndex.count(id) == 0)
throw Exception("Object with id " + std::to_string(id) + " doesn't exist!");
auto entry = offsetIndex.at(id);
lock.unlock();
return filters.filterRead(readFile(root / std::to_string(entry.fileId), entry.offset, entry.length));
}
bool FileRepository::writeObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
auto tmp = filters.filterWrite(Serialize::serialize(obj));
{
std::unique_lock lockW(writeCacheLock);
writeCacheSize += tmp.size();
writeCache[obj.id] = std::move(tmp);
// If we have reached the target file size, flush the cache
if (writeCacheSize >= writeCacheMax) {
flushWriteCache(std::move(lockW));
}
}
return true;
}
void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) {
if (writeCache.empty()) {
lockW.unlock();
return;
}
// Swap the cache for a new one and unlock the mutex so other threads can continue working
decltype(writeCache) objs;
std::swap(writeCache, objs);
writeCacheSize = 0;
decltype(maxFileId) currentFileId;
{
std::lock_guard lockI(repoLock);
currentFileId = maxFileId;
maxFileId++;
}
lockW.unlock();
unsigned long long offset = 0;
std::ofstream ofstream(root / std::to_string(currentFileId), std::ios::binary | std::ios::trunc | std::ios::out);
for (auto &i: objs) {
{
std::lock_guard lockI(repoLock);
offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size()));
}
offset += i.second.size();
ofstream.rdbuf()->sputn(i.second.data(), i.second.size());
}
}
bool FileRepository::putObject(const Object &obj) {
// Put the object into index, and then write it to the storage
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
writeObject(obj);
return true;
}
bool FileRepository::deleteObject(const Object &obj) {
if (!ready) throw Exception("Tried working with uninitialized repo!");
throw Exception("Deletion not implemented!");
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const {
if (size > absoluteMaxFileLimit) throw Exception("Tried to read " + std::to_string(size) +
" bytes from " + file.u8string() +
" which is more than absoluteMaxFileLimit");
std::ifstream ifstream(file, std::ios::binary | std::ios::in);
if (!ifstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for reading!");
std::vector<char> buf(size);
if (ifstream.rdbuf()->pubseekpos(offset) == std::streampos(std::streamoff(-1))) throw Exception("Unexpected end of file " + file.u8string());
if (ifstream.rdbuf()->sgetn(buf.data(), size) != size) throw Exception("Unexpected end of file " + file.u8string());
return buf;
}
std::vector<char> FileRepository::readFile(const std::filesystem::path &file) const {
if (!std::filesystem::is_regular_file(file)) throw Exception("File " + file.u8string() + " is not a regular file!");
auto fileSize = std::filesystem::file_size(file);
if (fileSize == 0) return {};
return readFile(file, 0, fileSize);
}
bool FileRepository::writeFile(const std::filesystem::path &file, const std::vector<char> &data) {
std::ofstream ofstream(file, std::ios::binary | std::ios::trunc | std::ios::out);
if (!ofstream.is_open()) throw Exception("Can't open file " + file.u8string() + " for writing!");
if (ofstream.rdbuf()->sputn(data.data(), data.size()) != data.size())
throw Exception("Couldn't write all the data for " + file.u8string());
return true;
}
std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const {
return getObject(getObjectId(type, key));
}
Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) throw Exception("No objects of requested type!");
return keyIndex.at(type).at(key);
}
std::vector<std::pair<std::string, Object::idType>> FileRepository::getObjects(Object::ObjectType type) const {
std::lock_guard lock(repoLock);
std::vector<std::pair<std::string, Object::idType>> out;
if (keyIndex.count(type) == 0) return {};
for (auto const &i: keyIndex.at(type))
out.emplace_back(i);
return out;
}
bool FileRepository::exists(Object::ObjectType type, const std::string &key) const {
std::lock_guard lock(repoLock);
if (keyIndex.count(type) == 0) return false;
return keyIndex.at(type).count(key) > 0;
}
Object::idType FileRepository::getId() {
std::lock_guard lock(repoLock);
return largestUnusedId++;
}
FileRepository::OffsetEntry::OffsetEntry(std::vector<char, std::allocator<char>>::const_iterator &in, const std::vector<char, std::allocator<char>>::const_iterator &end)
: fileId(Serialize::deserialize<decltype(fileId)>(in, end)),
offset(Serialize::deserialize<decltype(offset)>(in, end)),
length(Serialize::deserialize<decltype(length)>(in, end)) {
}
void FileRepository::OffsetEntry::serialize(std::vector<char> &out) const {
Serialize::serialize(fileId, out);
Serialize::serialize(offset, out);
Serialize::serialize(length, out);
}
FileRepository::OffsetEntry::OffsetEntry(unsigned long long int fileId, unsigned long long int offset, unsigned long long int length)
: fileId(fileId), offset(offset), length(length) {}
bool FileRepository::clearCache(Object::ObjectType type) {
keyIndex[type] = {};
return true;
}
bool FileRepository::addToCache(const Object &obj) {
{
std::unique_lock lock(repoLock);
if (offsetIndex.count(obj.id) == 0)
throw Exception("Object with id " + std::to_string(obj.id) + " doesn't exist!");
}
{
std::lock_guard lock(repoLock);
keyIndex[obj.type][obj.getKey()] = obj.id;
}
return true;
}

125
src/repo/FileRepository.h Normal file
View File

@@ -0,0 +1,125 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#ifndef SEMBACKUP_FILEREPOSITORY_H
#define SEMBACKUP_FILEREPOSITORY_H
#include <filesystem>
#include <fstream>
#include <map>
#include <mutex>
#include "Object.h"
#include "Repository.h"
/// Repository implementation in the local filesystem
/**
* `root` Config value is used as a root
* Objects are stored concatenated in files with approximate size of `repo-target` MB (from Config)
* The object key/object id index is stored as a hash map, as a `index` file out of the object storage structure
* Hints for the location of objects inside of files are also stored as a hash map in the `offsets` file
* Config is stored in the `info` file, merged with the supplied Config on open()
*
* Thread safe, approx. max memory usage is `number of threads` * `repo-target`,
* as every thread can be flushing its write cache at the same time
*/
class FileRepository final : public Repository {
public:
/// Constructs a new FileRepository
/// \param config Config to use
FileRepository(Config config);
bool exists() override;
bool open() override;
bool init() override;
bool flush() override;
std::vector<char> getObject(Object::idType id) const override;
bool putObject(const Object &obj) override;
bool deleteObject(const Object &obj) override;
std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override;
Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override;
std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override;
bool clearCache(Object::ObjectType type) override;
bool addToCache(const Object &obj) override;
bool exists(Object::ObjectType type, const std::string &key) const override;
Object::idType getId() override;
/// FileRepository destructor
/// Flushes write cache, and writes the metadata
~FileRepository() override;
FileRepository(const FileRepository &r) = delete;
FileRepository &operator=(const FileRepository &r) = delete;
private:
const std::filesystem::path root;///< Root of the repository in the filesystem
/// Puts the Object raw data into write cache
bool writeObject(const Object &obj);
bool ready = false;/// < Indicates whether the FileRepository was open or initialized
/// Reads the file and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \return Vector of bytes of the file
std::vector<char> readFile(const std::filesystem::path &file) const;
/// Reads the \psize bytes of the file from \p offset and returns its raw data
/// \param file Constant reference to the absolute path of the file
/// \param offset First byte of the file to read
/// \param size Amount of bytes to read (no more than absoluteMaxFileLimit)
/// \return Vector of bytes of the file
/// \throws Exception on any error, or when absoluteMaxFileLimit is reached
std::vector<char> readFile(const std::filesystem::path &file, unsigned long long offset, unsigned long long size) const;
static constexpr unsigned long long absoluteMaxFileLimit{4ULL * 1024 * 1024 * 1024};///<Max file read size (4GB)
/// Writes \p data to \p file
/// \param file Constant reference to the absolute path of the file
/// \param data Constant reference to the vector of bytes to write
/// \return True
/// \throws Exception on any error
bool writeFile(const std::filesystem::path &file, const std::vector<char> &data);
mutable std::mutex repoLock;///< Lock for any operations on the Repository
/// Helper struct to store the location of objects in the filesystem
struct OffsetEntry {
unsigned long long fileId;///< ID of file where the object is located
unsigned long long offset;///< Offset in the file where the object starts
unsigned long long length;///< Length of the object
using serializable = std::true_type;
/// Default constructor
OffsetEntry(unsigned long long fileId, unsigned long long offset, unsigned long long length);
/// Deserialization constrictor
OffsetEntry(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
/// Serializes the entry to \p out
void serialize(std::vector<char> &out) const;
};
unsigned long long maxFileId = 1; ///< Largest ID of object storage file
std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem
std::mutex writeCacheLock; ///< Write cache lock
std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data
unsigned long long writeCacheSize = 0; ///< Current byte size of the write cache
const unsigned long long writeCacheMax; ///< Target size of the write cache, it is automatically flushed after this is reached
/// Flushes the write cache
/// Takes the cache lock, swaps the cache with an empty one and unlocks it
/// \param lockW Write cache lock
void flushWriteCache(std::unique_lock<std::mutex> &&lockW);
Object::idType largestUnusedId = 1; ///< Largest available objectID
std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> keyIndex;///< Maps Object%'s keys to their ID's
};
#endif//SEMBACKUP_FILEREPOSITORY_H

21
src/repo/Object.cpp Normal file
View File

@@ -0,0 +1,21 @@
//
// Created by Stepan Usatiuk on 14.04.2023.
//
#include "Object.h"
#include "Serialize.h"
Object::Object(idType id, ObjectType type) : id(id), type(type) {}
Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: id(Serialize::deserialize<idType>(in, end)),
type(Serialize::deserialize<ObjectType>(in, end)) {
}
void Object::serialize(std::vector<char> &out) const {
Serialize::serialize(id, out);
Serialize::serialize(type, out);
}
Object::~Object() = default;

Some files were not shown because too many files have changed in this diff Show More