sembackup: use sha

This commit is contained in:
2023-06-07 11:57:26 +02:00
parent fdcb0cf0c4
commit 97cc03770e
16 changed files with 67 additions and 62 deletions

View File

@@ -44,7 +44,7 @@ private:
public:
/// Returns the next chunk of the file
/// Returns a single empty chunk if a file is empty
/// \return Pair consisting of chunk's bytes and its MD5 hash
/// \return Pair consisting of chunk's bytes and its SHA hash
/// \throws Exception if EOF was already reached
virtual std::pair<std::string, std::vector<char>> getNext() = 0;

View File

@@ -4,7 +4,7 @@
#include "../includes/BuzhashChunker.h"
#include "../../crypto/includes/MD5.h"
#include "../../crypto/includes/SHA.h"
#include "../../utils/includes/Exception.h"
BuzhashChunker::BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window) : Chunker(buf, maxBytes), window(window), minBytes(minBytes), mask(mask), buzhash(window) {}
@@ -18,7 +18,7 @@ std::pair<std::string, std::vector<char>> BuzhashChunker::getNext() {
if (read != minBytes) {
eof = true;
rbuf.resize(read);
return {MD5::calculate(rbuf), rbuf};
return {SHA::calculate(rbuf), rbuf};
}
for (auto c: rbuf) {
@@ -38,5 +38,5 @@ std::pair<std::string, std::vector<char>> BuzhashChunker::getNext() {
}
}
return {MD5::calculate(rbuf), rbuf};
return {SHA::calculate(rbuf), rbuf};
}

View File

@@ -4,7 +4,7 @@
#include "../includes/ConstChunker.h"
#include "../../crypto/includes/MD5.h"
#include "../../crypto/includes/SHA.h"
#include "../../utils/includes/Exception.h"
ConstChunker::ConstChunker(std::streambuf *buf, unsigned long long maxBytes) : Chunker(buf, maxBytes) {}
@@ -21,7 +21,7 @@ std::pair<std::string, std::vector<char>> ConstChunker::getNext() {
rbuf.resize(read);
}
auto md5 = MD5::calculate(rbuf);
auto SHA = SHA::calculate(rbuf);
return {md5, rbuf};
return {SHA, rbuf};
}

View File

@@ -18,7 +18,7 @@ namespace CommandsCommon {
/// \param ignore List of files to ignore
/// \param spawner Function to spawn other tasks
/// \param processFile Task to spawn on found files
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile);
void processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, const std::function<void(std::function<void()>)> &spawner, std::function<void(std::filesystem::directory_entry)> processFile);
struct WorkerStats {
public:

View File

@@ -10,7 +10,7 @@
#include "../../change_detectors/includes/ChangeDetectorFactory.h"
#include "../../chunkers/includes/ChunkerFactory.h"
#include "../../crypto/includes/MD5.h"
#include "../../crypto/includes/SHA.h"
#include "../../repo/includes/Serialize.h"
#include "../../repo/includes/objects/Archive.h"
#include "../../repo/includes/objects/Chunk.h"
@@ -187,8 +187,8 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
/// The order of checks is important, because is_directory follows the symlink
if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) {
auto contents = File::getFileContents(orig);
Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig));
Chunk c(ctx.repo->getId(), SHA::calculate(contents), contents);
File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {c.id}, File::getFileType(orig));
ctx.repo->putObject(c);
ctx.repo->putObject(f);
return f.id;
@@ -200,7 +200,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co
if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading");
std::unique_ptr<Chunker> chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf());
MD5 fileHash;
SHA fileHash;
std::vector<Object::idType> fileChunks;
unsigned long long size = 0;

View File

@@ -23,7 +23,7 @@ bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::f
return true;
}
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, std::function<void(std::function<void()>)> spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector<std::string> ignore, const std::function<void(std::function<void()>)> &spawner, std::function<void(std::filesystem::directory_entry)> processFile) {
if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!");
/// Don't process the directory if it has a ".nobackup" file

View File

@@ -74,20 +74,20 @@ std::string Diff::diffPercent(const ComparableFile &c1, const ComparableFile &c2
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
ch1hashes.emplace(md5);
hashsize[md5] = chunkp.second.size();
std::string SHA(chunkp.first.begin(), chunkp.first.end());
ch1hashes.emplace(SHA);
hashsize[SHA] = chunkp.second.size();
}
for (auto chunkp: ch2) {
/// Exit when asked to
if (Signals::shouldQuit) throw Exception("Quitting");
if (chunkp.second.empty()) continue;
std::string md5(chunkp.first.begin(), chunkp.first.end());
hashsize[md5] = chunkp.second.size();
if (ch1hashes.count(md5) > 0) ch1hashes.erase(md5);
else if (ch1hashes.count(md5) == 0)
ch2diff.emplace(md5);
std::string SHA(chunkp.first.begin(), chunkp.first.end());
hashsize[SHA] = chunkp.second.size();
if (ch1hashes.count(SHA) > 0) ch1hashes.erase(SHA);
else if (ch1hashes.count(SHA) == 0)
ch2diff.emplace(SHA);
}
unsigned long long diff = 0;

View File

@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.22)
find_package(OpenSSL REQUIRED)
add_library(crypto srcs/AES.cpp srcs/CRC32.cpp srcs/MD5.cpp)
add_library(crypto srcs/AES.cpp srcs/CRC32.cpp srcs/SHA.cpp)
target_include_directories(crypto PUBLIC includes)

View File

@@ -2,8 +2,8 @@
// Created by Stepan Usatiuk on 15.04.2023.
//
#ifndef SEMBACKUP_MD5_H
#define SEMBACKUP_MD5_H
#ifndef SEMBACKUP_SHA_H
#define SEMBACKUP_SHA_H
#include <array>
#include <memory>
@@ -11,24 +11,24 @@
#include <openssl/evp.h>
/// Class to handle MD5 hashing
/// Class to handle SHA hashing
/**
* Based on: https://wiki.openssl.org/index.php/EVP_Message_Digests
*/
class MD5 {
class SHA {
public:
/// Constructs an empty MD5 hasher instance
/// Constructs an empty SHA hasher instance
/// \throws Exception on initialization error
MD5();
SHA();
/// Calculates the hash for a given \p in char vector
/// \param in Constant reference to an input vector
/// \return MD5 hash of \p in
/// \return SHA hash of \p in
static std::string calculate(const std::vector<char> &in);
/// Calculates the hash for a given \p in string
/// \param in Constant reference to an input string
/// \return MD5 hash of \p in
/// \return SHA hash of \p in
static std::string calculate(const std::string &in);
/// Append a vector of chars to the current hash
@@ -45,4 +45,4 @@ private:
};
#endif//SEMBACKUP_MD5_H
#endif//SEMBACKUP_SHA_H

View File

@@ -2,32 +2,32 @@
// Created by Stepan Usatiuk on 15.04.2023.
//
#include "../includes/MD5.h"
#include "../includes/SHA.h"
#include "../../utils/includes/Exception.h"
std::string MD5::calculate(const std::vector<char> &in) {
MD5 hasher;
std::string SHA::calculate(const std::vector<char> &in) {
SHA hasher;
hasher.feedData(in);
return hasher.getHash();
}
MD5::MD5() {
SHA::SHA() {
if (!mdctx)
throw Exception("Can't create hashing context!");
if (!EVP_DigestInit_ex(mdctx.get(), EVP_md5(), nullptr))
if (!EVP_DigestInit_ex(mdctx.get(), EVP_sha256(), nullptr))
throw Exception("Can't create hashing context!");
}
void MD5::feedData(const std::vector<char> &in) {
void SHA::feedData(const std::vector<char> &in) {
if (in.empty()) return;
if (!EVP_DigestUpdate(mdctx.get(), in.data(), in.size()))
throw Exception("Error hashing!");
}
std::string MD5::getHash() {
std::array<char, 16> out;
std::string SHA::getHash() {
std::array<char, 32> out;
unsigned int s = 0;
if (!EVP_DigestFinal_ex(mdctx.get(), reinterpret_cast<unsigned char *>(out.data()), &s))
@@ -42,7 +42,7 @@ std::string MD5::getHash() {
return {out.begin(), out.end()};
}
std::string MD5::calculate(const std::string &in) {
std::string SHA::calculate(const std::string &in) {
std::vector<char> tmp(in.begin(), in.end());
return MD5::calculate(tmp);
return SHA::calculate(tmp);
}

View File

@@ -21,10 +21,10 @@ public:
/// \copydoc Object::serialize
void serialize(std::vector<char> &out) const override;
/// Returns the MD5 of the chunk
/// Returns the SHA of the chunk
std::string getKey() const override;
const std::string md5; ///< MD5 hash of the chunk
const std::string SHA; ///< SHA hash of the chunk
const std::vector<char> data; ///< Raw chunk data
const unsigned long long length;///< Size of chunk in bytes
};

View File

@@ -26,7 +26,7 @@ public:
static inline const std::unordered_map<Type, std::string> TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}};
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType);
File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector<idType> chunks, Type fileType);
/// Deserialization constructor
File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end);
@@ -64,7 +64,7 @@ public:
const std::string name; ///< Relative path to backup root, as UTF-8 string
const unsigned long long bytes;///< Amount of bytes in the file
const unsigned long long mtime;///< Last modification time as timestamp
const std::string md5; ///< Hash of the file
const std::string SHA; ///< Hash of the file
const Type fileType; ///< File type
/// List of the chunks in file

View File

@@ -7,11 +7,11 @@
#include "../../../utils/includes/Exception.h"
#include "../../includes/Serialize.h"
Chunk::Chunk(idType id, std::string md5, std::vector<char> data) : Object(id, ObjectType::Chunk), data(std::move(data)), md5(std::move(md5)), length(this->data.size()) {}
Chunk::Chunk(idType id, std::string SHA, std::vector<char> data) : Object(id, ObjectType::Chunk), data(std::move(data)), SHA(std::move(SHA)), length(this->data.size()) {}
Chunk::Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
SHA(Serialize::deserialize<std::remove_const<decltype(SHA)>::type>(in, end)),
data(Serialize::deserialize<std::remove_const<decltype(data)>::type>(in, end)),
length(Serialize::deserialize<std::remove_const<decltype(length)>::type>(in, end)) {
if (type != ObjectType::Chunk) throw Exception("Type mismatch for Chunk!");
@@ -20,11 +20,11 @@ Chunk::Chunk(std::vector<char>::const_iterator &in, const std::vector<char>::con
void Chunk::serialize(std::vector<char> &out) const {
Object::serialize(out);
Serialize::serialize(md5, out);
Serialize::serialize(SHA, out);
Serialize::serialize(data, out);
Serialize::serialize(length, out);
}
std::string Chunk::getKey() const {
return md5;
return SHA;
}

View File

@@ -11,15 +11,15 @@
#include "../../../utils/includes/Exception.h"
#include "../../includes/Serialize.h"
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector<idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), md5(md5), fileType(fileType), chunks(chunks) {}
File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector<idType> chunks, Type fileType)
: Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(chunks) {}
File::File(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end)
: Object(in, end),
name(Serialize::deserialize<std::string>(in, end)),
bytes(Serialize::deserialize<unsigned long long>(in, end)),
mtime(Serialize::deserialize<unsigned long long>(in, end)),
md5(Serialize::deserialize<std::remove_const<decltype(md5)>::type>(in, end)),
SHA(Serialize::deserialize<std::remove_const<decltype(SHA)>::type>(in, end)),
fileType(Serialize::deserialize<std::remove_const<decltype(fileType)>::type>(in, end)),
chunks(Serialize::deserialize<std::remove_const<decltype(chunks)>::type>(in, end)) {
if (type != ObjectType::File) throw Exception("Type mismatch for File!");
@@ -30,7 +30,7 @@ void File::serialize(std::vector<char> &out) const {
Serialize::serialize(name, out);
Serialize::serialize(bytes, out);
Serialize::serialize(mtime, out);
Serialize::serialize(md5, out);
Serialize::serialize(SHA, out);
Serialize::serialize(fileType, out);
Serialize::serialize(chunks, out);
}

View File

@@ -3,7 +3,7 @@
//
#include "AES.h"
#include "MD5.h"
#include "SHA.h"
#include <gtest/gtest.h>
@@ -13,6 +13,11 @@ TEST(CryptoTests, AES) {
auto dec = AES::decrypt(enc, "p1", "e");
EXPECT_EQ(in, std::string(dec.begin(), dec.end()));
in = "";
enc = AES::encrypt(std::vector<char>(in.begin(), in.end()), "p1", "e");
dec = AES::decrypt(enc, "p1", "e");
EXPECT_EQ(in, std::string(dec.begin(), dec.end()));
in = "1234567890asdfg";
enc = AES::encrypt(std::vector<char>(in.begin(), in.end()), "p1", "e");
dec = AES::decrypt(enc, "p1", "e");
@@ -34,13 +39,13 @@ TEST(CryptoTests, AES) {
EXPECT_EQ(in, std::string(dec.begin(), dec.end()));
}
TEST(CryptoTests, MD5) {
TEST(CryptoTests, SHA) {
std::vector<char> data{'h', 'e', 'l', 'l', 'o'};
std::array<unsigned char, 16> excepted{0x5d, 0x41, 0x40, 0x2a, 0xbc, 0x4b, 0x2a, 0x76, 0xb9, 0x71, 0x9d, 0x91, 0x10, 0x17, 0xc5, 0x92};
std::array<unsigned char, 32> excepted{0x2c, 0xf2, 0x4d, 0xba, 0x5f, 0xb0, 0xa3, 0x0e, 0x26, 0xe8, 0x3b, 0x2a, 0xc5, 0xb9, 0xe2, 0x9e, 0x1b, 0x16, 0x1e, 0x5c, 0x1f, 0xa7, 0x42, 0x5e, 0x73, 0x04, 0x33, 0x62, 0x93, 0x8b, 0x98, 0x24};
auto out = MD5::calculate(data);
auto out = SHA::calculate(data);
EXPECT_EQ(out.size(), 16);
EXPECT_EQ(out.size(), 32);
for (int i = 0; i < out.size(); i++) {
EXPECT_EQ(static_cast<uint8_t>(out[i]), excepted[i]);
}

View File

@@ -43,11 +43,11 @@ TEST(Chunk, Deserialize) {
EXPECT_EQ(o2.data[i], o2e.data[i]);
}
for (int i = 0; i < o1.md5.size(); i++) {
EXPECT_EQ(o1.md5[i], o1e.md5[i]);
for (int i = 0; i < o1.SHA.size(); i++) {
EXPECT_EQ(o1.SHA[i], o1e.SHA[i]);
}
for (int i = 0; i < o2.md5.size(); i++) {
EXPECT_EQ(o2.md5[i], o2e.md5[i]);
for (int i = 0; i < o2.SHA.size(); i++) {
EXPECT_EQ(o2.SHA[i], o2e.SHA[i]);
}
}
}