diff --git a/src/chunkers/includes/Chunker.h b/src/chunkers/includes/Chunker.h index 6e1e224..a203759 100644 --- a/src/chunkers/includes/Chunker.h +++ b/src/chunkers/includes/Chunker.h @@ -44,7 +44,7 @@ private: public: /// Returns the next chunk of the file /// Returns a single empty chunk if a file is empty - /// \return Pair consisting of chunk's bytes and its MD5 hash + /// \return Pair consisting of chunk's bytes and its SHA hash /// \throws Exception if EOF was already reached virtual std::pair> getNext() = 0; diff --git a/src/chunkers/srcs/BuzhashChunker.cpp b/src/chunkers/srcs/BuzhashChunker.cpp index 0b02f7e..84b8af5 100644 --- a/src/chunkers/srcs/BuzhashChunker.cpp +++ b/src/chunkers/srcs/BuzhashChunker.cpp @@ -4,7 +4,7 @@ #include "../includes/BuzhashChunker.h" -#include "../../crypto/includes/MD5.h" +#include "../../crypto/includes/SHA.h" #include "../../utils/includes/Exception.h" BuzhashChunker::BuzhashChunker(std::streambuf *buf, unsigned long long minBytes, unsigned long long maxBytes, unsigned long long mask, uint32_t window) : Chunker(buf, maxBytes), window(window), minBytes(minBytes), mask(mask), buzhash(window) {} @@ -18,7 +18,7 @@ std::pair> BuzhashChunker::getNext() { if (read != minBytes) { eof = true; rbuf.resize(read); - return {MD5::calculate(rbuf), rbuf}; + return {SHA::calculate(rbuf), rbuf}; } for (auto c: rbuf) { @@ -38,5 +38,5 @@ std::pair> BuzhashChunker::getNext() { } } - return {MD5::calculate(rbuf), rbuf}; + return {SHA::calculate(rbuf), rbuf}; } diff --git a/src/chunkers/srcs/ConstChunker.cpp b/src/chunkers/srcs/ConstChunker.cpp index f41e1b5..bcfa515 100644 --- a/src/chunkers/srcs/ConstChunker.cpp +++ b/src/chunkers/srcs/ConstChunker.cpp @@ -4,7 +4,7 @@ #include "../includes/ConstChunker.h" -#include "../../crypto/includes/MD5.h" +#include "../../crypto/includes/SHA.h" #include "../../utils/includes/Exception.h" ConstChunker::ConstChunker(std::streambuf *buf, unsigned long long maxBytes) : Chunker(buf, maxBytes) {} @@ -21,7 +21,7 @@ std::pair> ConstChunker::getNext() { rbuf.resize(read); } - auto md5 = MD5::calculate(rbuf); + auto SHA = SHA::calculate(rbuf); - return {md5, rbuf}; + return {SHA, rbuf}; } diff --git a/src/commands/includes/CommandsCommon.h b/src/commands/includes/CommandsCommon.h index 8ff19ee..61cd259 100644 --- a/src/commands/includes/CommandsCommon.h +++ b/src/commands/includes/CommandsCommon.h @@ -18,7 +18,7 @@ namespace CommandsCommon { /// \param ignore List of files to ignore /// \param spawner Function to spawn other tasks /// \param processFile Task to spawn on found files - void processDirWithIgnore(const std::filesystem::path &dir, std::vector ignore, std::function)> spawner, std::function processFile); + void processDirWithIgnore(const std::filesystem::path &dir, std::vector ignore, const std::function)> &spawner, std::function processFile); struct WorkerStats { public: diff --git a/src/commands/srcs/CommandRun.cpp b/src/commands/srcs/CommandRun.cpp index 6e9a0a3..64ce5f6 100644 --- a/src/commands/srcs/CommandRun.cpp +++ b/src/commands/srcs/CommandRun.cpp @@ -10,7 +10,7 @@ #include "../../change_detectors/includes/ChangeDetectorFactory.h" #include "../../chunkers/includes/ChunkerFactory.h" -#include "../../crypto/includes/MD5.h" +#include "../../crypto/includes/SHA.h" #include "../../repo/includes/Serialize.h" #include "../../repo/includes/objects/Archive.h" #include "../../repo/includes/objects/Chunk.h" @@ -187,8 +187,8 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co /// The order of checks is important, because is_directory follows the symlink if (std::filesystem::is_symlink(orig) || std::filesystem::is_directory(orig)) { auto contents = File::getFileContents(orig); - Chunk c(ctx.repo->getId(), MD5::calculate(contents), contents); - File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.md5, {c.id}, File::getFileType(orig)); + Chunk c(ctx.repo->getId(), SHA::calculate(contents), contents); + File f(ctx.repo->getId(), saveAs, c.length, File::getFileMtime(orig), c.SHA, {c.id}, File::getFileType(orig)); ctx.repo->putObject(c); ctx.repo->putObject(f); return f.id; @@ -200,7 +200,7 @@ Object::idType CommandRun::backupChunkFile(const std::filesystem::path &orig, co if (!ifstream) throw Exception("Couldn't open " + orig.u8string() + " for reading"); std::unique_ptr chunker = ChunkerFactory::getChunker(ctx.repo->getConfig(), ifstream.rdbuf()); - MD5 fileHash; + SHA fileHash; std::vector fileChunks; unsigned long long size = 0; diff --git a/src/commands/srcs/CommandsCommon.cpp b/src/commands/srcs/CommandsCommon.cpp index 1e14d74..af7b61d 100644 --- a/src/commands/srcs/CommandsCommon.cpp +++ b/src/commands/srcs/CommandsCommon.cpp @@ -23,7 +23,7 @@ bool CommandsCommon::isSubpath(const std::filesystem::path &prefix, const std::f return true; } -void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector ignore, std::function)> spawner, std::function processFile) { +void CommandsCommon::processDirWithIgnore(const std::filesystem::path &dir, std::vector ignore, const std::function)> &spawner, std::function processFile) { if (!std::filesystem::is_directory(dir)) throw Exception(dir.u8string() + " is not a directory!"); /// Don't process the directory if it has a ".nobackup" file diff --git a/src/commands/srcs/Diff.cpp b/src/commands/srcs/Diff.cpp index 9b07270..3da909c 100644 --- a/src/commands/srcs/Diff.cpp +++ b/src/commands/srcs/Diff.cpp @@ -74,20 +74,20 @@ std::string Diff::diffPercent(const ComparableFile &c1, const ComparableFile &c2 /// Exit when asked to if (Signals::shouldQuit) throw Exception("Quitting"); if (chunkp.second.empty()) continue; - std::string md5(chunkp.first.begin(), chunkp.first.end()); - ch1hashes.emplace(md5); - hashsize[md5] = chunkp.second.size(); + std::string SHA(chunkp.first.begin(), chunkp.first.end()); + ch1hashes.emplace(SHA); + hashsize[SHA] = chunkp.second.size(); } for (auto chunkp: ch2) { /// Exit when asked to if (Signals::shouldQuit) throw Exception("Quitting"); if (chunkp.second.empty()) continue; - std::string md5(chunkp.first.begin(), chunkp.first.end()); - hashsize[md5] = chunkp.second.size(); - if (ch1hashes.count(md5) > 0) ch1hashes.erase(md5); - else if (ch1hashes.count(md5) == 0) - ch2diff.emplace(md5); + std::string SHA(chunkp.first.begin(), chunkp.first.end()); + hashsize[SHA] = chunkp.second.size(); + if (ch1hashes.count(SHA) > 0) ch1hashes.erase(SHA); + else if (ch1hashes.count(SHA) == 0) + ch2diff.emplace(SHA); } unsigned long long diff = 0; diff --git a/src/crypto/CMakeLists.txt b/src/crypto/CMakeLists.txt index 142f644..82fccb9 100644 --- a/src/crypto/CMakeLists.txt +++ b/src/crypto/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.22) find_package(OpenSSL REQUIRED) -add_library(crypto srcs/AES.cpp srcs/CRC32.cpp srcs/MD5.cpp) +add_library(crypto srcs/AES.cpp srcs/CRC32.cpp srcs/SHA.cpp) target_include_directories(crypto PUBLIC includes) diff --git a/src/crypto/includes/MD5.h b/src/crypto/includes/SHA.h similarity index 81% rename from src/crypto/includes/MD5.h rename to src/crypto/includes/SHA.h index 87d6a9f..83c1188 100644 --- a/src/crypto/includes/MD5.h +++ b/src/crypto/includes/SHA.h @@ -2,8 +2,8 @@ // Created by Stepan Usatiuk on 15.04.2023. // -#ifndef SEMBACKUP_MD5_H -#define SEMBACKUP_MD5_H +#ifndef SEMBACKUP_SHA_H +#define SEMBACKUP_SHA_H #include #include @@ -11,24 +11,24 @@ #include -/// Class to handle MD5 hashing +/// Class to handle SHA hashing /** * Based on: https://wiki.openssl.org/index.php/EVP_Message_Digests */ -class MD5 { +class SHA { public: - /// Constructs an empty MD5 hasher instance + /// Constructs an empty SHA hasher instance /// \throws Exception on initialization error - MD5(); + SHA(); /// Calculates the hash for a given \p in char vector /// \param in Constant reference to an input vector - /// \return MD5 hash of \p in + /// \return SHA hash of \p in static std::string calculate(const std::vector &in); /// Calculates the hash for a given \p in string /// \param in Constant reference to an input string - /// \return MD5 hash of \p in + /// \return SHA hash of \p in static std::string calculate(const std::string &in); /// Append a vector of chars to the current hash @@ -45,4 +45,4 @@ private: }; -#endif//SEMBACKUP_MD5_H +#endif//SEMBACKUP_SHA_H diff --git a/src/crypto/srcs/MD5.cpp b/src/crypto/srcs/SHA.cpp similarity index 69% rename from src/crypto/srcs/MD5.cpp rename to src/crypto/srcs/SHA.cpp index 1c9b553..bd7743c 100644 --- a/src/crypto/srcs/MD5.cpp +++ b/src/crypto/srcs/SHA.cpp @@ -2,32 +2,32 @@ // Created by Stepan Usatiuk on 15.04.2023. // -#include "../includes/MD5.h" +#include "../includes/SHA.h" #include "../../utils/includes/Exception.h" -std::string MD5::calculate(const std::vector &in) { - MD5 hasher; +std::string SHA::calculate(const std::vector &in) { + SHA hasher; hasher.feedData(in); return hasher.getHash(); } -MD5::MD5() { +SHA::SHA() { if (!mdctx) throw Exception("Can't create hashing context!"); - if (!EVP_DigestInit_ex(mdctx.get(), EVP_md5(), nullptr)) + if (!EVP_DigestInit_ex(mdctx.get(), EVP_sha256(), nullptr)) throw Exception("Can't create hashing context!"); } -void MD5::feedData(const std::vector &in) { +void SHA::feedData(const std::vector &in) { if (in.empty()) return; if (!EVP_DigestUpdate(mdctx.get(), in.data(), in.size())) throw Exception("Error hashing!"); } -std::string MD5::getHash() { - std::array out; +std::string SHA::getHash() { + std::array out; unsigned int s = 0; if (!EVP_DigestFinal_ex(mdctx.get(), reinterpret_cast(out.data()), &s)) @@ -42,7 +42,7 @@ std::string MD5::getHash() { return {out.begin(), out.end()}; } -std::string MD5::calculate(const std::string &in) { +std::string SHA::calculate(const std::string &in) { std::vector tmp(in.begin(), in.end()); - return MD5::calculate(tmp); + return SHA::calculate(tmp); } diff --git a/src/repo/includes/objects/Chunk.h b/src/repo/includes/objects/Chunk.h index 87a4c99..79eb26e 100644 --- a/src/repo/includes/objects/Chunk.h +++ b/src/repo/includes/objects/Chunk.h @@ -21,10 +21,10 @@ public: /// \copydoc Object::serialize void serialize(std::vector &out) const override; - /// Returns the MD5 of the chunk + /// Returns the SHA of the chunk std::string getKey() const override; - const std::string md5; ///< MD5 hash of the chunk + const std::string SHA; ///< SHA hash of the chunk const std::vector data; ///< Raw chunk data const unsigned long long length;///< Size of chunk in bytes }; diff --git a/src/repo/includes/objects/File.h b/src/repo/includes/objects/File.h index e57b119..6107b5e 100644 --- a/src/repo/includes/objects/File.h +++ b/src/repo/includes/objects/File.h @@ -26,7 +26,7 @@ public: static inline const std::unordered_map TypeToStr{{Type::Normal, "normal"}, {Type::Symlink, "symlink"}, {Type::Directory, "directory"}}; - File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector chunks, Type fileType); + File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector chunks, Type fileType); /// Deserialization constructor File(std::vector::const_iterator &in, const std::vector::const_iterator &end); @@ -64,7 +64,7 @@ public: const std::string name; ///< Relative path to backup root, as UTF-8 string const unsigned long long bytes;///< Amount of bytes in the file const unsigned long long mtime;///< Last modification time as timestamp - const std::string md5; ///< Hash of the file + const std::string SHA; ///< Hash of the file const Type fileType; ///< File type /// List of the chunks in file diff --git a/src/repo/srcs/objects/Chunk.cpp b/src/repo/srcs/objects/Chunk.cpp index 550cb61..ff4ddcd 100644 --- a/src/repo/srcs/objects/Chunk.cpp +++ b/src/repo/srcs/objects/Chunk.cpp @@ -7,11 +7,11 @@ #include "../../../utils/includes/Exception.h" #include "../../includes/Serialize.h" -Chunk::Chunk(idType id, std::string md5, std::vector data) : Object(id, ObjectType::Chunk), data(std::move(data)), md5(std::move(md5)), length(this->data.size()) {} +Chunk::Chunk(idType id, std::string SHA, std::vector data) : Object(id, ObjectType::Chunk), data(std::move(data)), SHA(std::move(SHA)), length(this->data.size()) {} Chunk::Chunk(std::vector::const_iterator &in, const std::vector::const_iterator &end) : Object(in, end), - md5(Serialize::deserialize::type>(in, end)), + SHA(Serialize::deserialize::type>(in, end)), data(Serialize::deserialize::type>(in, end)), length(Serialize::deserialize::type>(in, end)) { if (type != ObjectType::Chunk) throw Exception("Type mismatch for Chunk!"); @@ -20,11 +20,11 @@ Chunk::Chunk(std::vector::const_iterator &in, const std::vector::con void Chunk::serialize(std::vector &out) const { Object::serialize(out); - Serialize::serialize(md5, out); + Serialize::serialize(SHA, out); Serialize::serialize(data, out); Serialize::serialize(length, out); } std::string Chunk::getKey() const { - return md5; + return SHA; } diff --git a/src/repo/srcs/objects/File.cpp b/src/repo/srcs/objects/File.cpp index 6725b3b..f77ca1c 100644 --- a/src/repo/srcs/objects/File.cpp +++ b/src/repo/srcs/objects/File.cpp @@ -11,15 +11,15 @@ #include "../../../utils/includes/Exception.h" #include "../../includes/Serialize.h" -File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string md5, std::vector chunks, Type fileType) - : Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), md5(md5), fileType(fileType), chunks(chunks) {} +File::File(Object::idType id, std::string name, unsigned long long bytes, unsigned long long mtime, std::string SHA, std::vector chunks, Type fileType) + : Object(id, ObjectType::File), name(name), bytes(bytes), mtime(mtime), SHA(SHA), fileType(fileType), chunks(chunks) {} File::File(std::vector::const_iterator &in, const std::vector::const_iterator &end) : Object(in, end), name(Serialize::deserialize(in, end)), bytes(Serialize::deserialize(in, end)), mtime(Serialize::deserialize(in, end)), - md5(Serialize::deserialize::type>(in, end)), + SHA(Serialize::deserialize::type>(in, end)), fileType(Serialize::deserialize::type>(in, end)), chunks(Serialize::deserialize::type>(in, end)) { if (type != ObjectType::File) throw Exception("Type mismatch for File!"); @@ -30,7 +30,7 @@ void File::serialize(std::vector &out) const { Serialize::serialize(name, out); Serialize::serialize(bytes, out); Serialize::serialize(mtime, out); - Serialize::serialize(md5, out); + Serialize::serialize(SHA, out); Serialize::serialize(fileType, out); Serialize::serialize(chunks, out); } diff --git a/tests/crypto/srcs/CryptoTests.cpp b/tests/crypto/srcs/CryptoTests.cpp index 33089e6..4873169 100644 --- a/tests/crypto/srcs/CryptoTests.cpp +++ b/tests/crypto/srcs/CryptoTests.cpp @@ -3,7 +3,7 @@ // #include "AES.h" -#include "MD5.h" +#include "SHA.h" #include @@ -12,6 +12,11 @@ TEST(CryptoTests, AES) { auto enc = AES::encrypt(std::vector(in.begin(), in.end()), "p1", "e"); auto dec = AES::decrypt(enc, "p1", "e"); EXPECT_EQ(in, std::string(dec.begin(), dec.end())); + + in = ""; + enc = AES::encrypt(std::vector(in.begin(), in.end()), "p1", "e"); + dec = AES::decrypt(enc, "p1", "e"); + EXPECT_EQ(in, std::string(dec.begin(), dec.end())); in = "1234567890asdfg"; enc = AES::encrypt(std::vector(in.begin(), in.end()), "p1", "e"); @@ -34,13 +39,13 @@ TEST(CryptoTests, AES) { EXPECT_EQ(in, std::string(dec.begin(), dec.end())); } -TEST(CryptoTests, MD5) { +TEST(CryptoTests, SHA) { std::vector data{'h', 'e', 'l', 'l', 'o'}; - std::array excepted{0x5d, 0x41, 0x40, 0x2a, 0xbc, 0x4b, 0x2a, 0x76, 0xb9, 0x71, 0x9d, 0x91, 0x10, 0x17, 0xc5, 0x92}; + std::array excepted{0x2c, 0xf2, 0x4d, 0xba, 0x5f, 0xb0, 0xa3, 0x0e, 0x26, 0xe8, 0x3b, 0x2a, 0xc5, 0xb9, 0xe2, 0x9e, 0x1b, 0x16, 0x1e, 0x5c, 0x1f, 0xa7, 0x42, 0x5e, 0x73, 0x04, 0x33, 0x62, 0x93, 0x8b, 0x98, 0x24}; - auto out = MD5::calculate(data); + auto out = SHA::calculate(data); - EXPECT_EQ(out.size(), 16); + EXPECT_EQ(out.size(), 32); for (int i = 0; i < out.size(); i++) { EXPECT_EQ(static_cast(out[i]), excepted[i]); } diff --git a/tests/repo/srcs/ChunkTest.cpp b/tests/repo/srcs/ChunkTest.cpp index 6e598d5..25d711c 100644 --- a/tests/repo/srcs/ChunkTest.cpp +++ b/tests/repo/srcs/ChunkTest.cpp @@ -43,11 +43,11 @@ TEST(Chunk, Deserialize) { EXPECT_EQ(o2.data[i], o2e.data[i]); } - for (int i = 0; i < o1.md5.size(); i++) { - EXPECT_EQ(o1.md5[i], o1e.md5[i]); + for (int i = 0; i < o1.SHA.size(); i++) { + EXPECT_EQ(o1.SHA[i], o1e.SHA[i]); } - for (int i = 0; i < o2.md5.size(); i++) { - EXPECT_EQ(o2.md5[i], o2e.md5[i]); + for (int i = 0; i < o2.SHA.size(); i++) { + EXPECT_EQ(o2.SHA[i], o2e.SHA[i]); } } }