diff --git a/src/commands/CMakeLists.txt b/src/commands/CMakeLists.txt index 7e73911..79ea5be 100644 --- a/src/commands/CMakeLists.txt +++ b/src/commands/CMakeLists.txt @@ -1,6 +1,16 @@ cmake_minimum_required(VERSION 3.18) -add_library(commands srcs/CommandDiff.cpp srcs/CommandList.cpp srcs/CommandListFiles.cpp srcs/CommandRestore.cpp srcs/CommandRun.cpp srcs/CommandsCommon.cpp srcs/Diff.cpp srcs/CommandMount.cpp) +add_library(commands + srcs/CommandDiff.cpp + srcs/CommandList.cpp + srcs/CommandListFiles.cpp + srcs/CommandRestore.cpp + srcs/CommandRun.cpp + srcs/CommandsCommon.cpp + srcs/Diff.cpp + srcs/CommandMount.cpp + srcs/CommandDelete.cpp +) target_include_directories(commands PUBLIC includes) diff --git a/src/commands/includes/CommandDelete.h b/src/commands/includes/CommandDelete.h new file mode 100644 index 0000000..ffa39bf --- /dev/null +++ b/src/commands/includes/CommandDelete.h @@ -0,0 +1,18 @@ +// +// Created by Stepan Usatiuk on 06.08.2023. +// + +#ifndef BACKUP_COMMANDDELETE_H +#define BACKUP_COMMANDDELETE_H + +#include "Command.h" + +class CommandDelete : public Command { +public: + CommandDelete(); + void run(Context ctx) override; + static constexpr std::string_view name{"delete"}; +}; + + +#endif//BACKUP_COMMANDDELETE_H diff --git a/src/commands/srcs/CommandDelete.cpp b/src/commands/srcs/CommandDelete.cpp new file mode 100644 index 0000000..488f3b9 --- /dev/null +++ b/src/commands/srcs/CommandDelete.cpp @@ -0,0 +1,15 @@ +// +// Created by Stepan Usatiuk on 06.08.2023. +// + +#include "CommandDelete.h" + +#include "CommandsCommon.h" + +using namespace CommandsCommon; + +CommandDelete::CommandDelete() {} + +void CommandDelete::run(Context ctx) { + ctx.repo->deleteObjects({static_cast(ctx.repo->getConfig().getInt("aid"))}); +} diff --git a/src/main.cpp b/src/main.cpp index d26b377..8367241 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,7 @@ #include "BytesFormatter.h" #include "Command.h" +#include "CommandDelete.h" #include "CommandDiff.h" #include "CommandList.h" #include "CommandListFiles.h" @@ -109,6 +110,7 @@ int main(int argc, char *argv[]) { commands.emplace(CommandListFiles::name, std::make_unique()); commands.emplace(CommandList::name, std::make_unique()); commands.emplace(CommandMount::name, std::make_unique()); + commands.emplace(CommandDelete::name, std::make_unique()); if (commands.count(opt) == 0) { std::cerr << "Unknown argument" << std::endl; diff --git a/src/repo/includes/FileRepository.h b/src/repo/includes/FileRepository.h index d83ea1f..127740c 100644 --- a/src/repo/includes/FileRepository.h +++ b/src/repo/includes/FileRepository.h @@ -37,7 +37,7 @@ public: std::vector getObjectRaw(Object::idType id) const override; bool putObject(const Object &obj) override; - bool deleteObject(const Object &obj) override; + bool deleteObjects(const std::vector &objs) override; std::vector getObjectRaw(Object::ObjectType type, const std::string &key) const override; Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override; @@ -104,6 +104,8 @@ private: unsigned long long maxFileId = 1; ///< Largest ID of object storage file std::unordered_map offsetIndex;///< Used to locate Object%s in the filesystem + std::unordered_map> + fileToObjs;///< Used to locate Object%s in the filesystem std::mutex writeCacheLock; ///< Write cache lock std::map> writeCache;///< Write cache, map of Object ids and their serialized data @@ -116,9 +118,12 @@ private: /// \param lockW Write cache lock void flushWriteCache(std::unique_lock &&lockW); - Object::idType largestUnusedId = 1;///< Largest available objectID + Object::idType largestUnusedId = 1; ///< Largest available objectID + std::vector unusedIds;///< Vector of unused IDs std::unordered_map> keyIndex;///< Maps Object%'s keys to their ID's + + std::unordered_map refCounts;///< Count of references to an object per its id }; diff --git a/src/repo/includes/Object.h b/src/repo/includes/Object.h index e18709c..e6f92cb 100644 --- a/src/repo/includes/Object.h +++ b/src/repo/includes/Object.h @@ -37,6 +37,10 @@ public: const idType id; ///< Unique numerical of the object const ObjectType type;///< Type of the object + static std::unique_ptr deserialize(const std::vector &src); + static std::unique_ptr deserialize(std::vector::const_iterator &in, + const std::vector::const_iterator &end); + protected: /// Default constructor /// \param id Object ID diff --git a/src/repo/includes/Repository.h b/src/repo/includes/Repository.h index a21f8f0..e92f0ab 100644 --- a/src/repo/includes/Repository.h +++ b/src/repo/includes/Repository.h @@ -47,6 +47,12 @@ public: /// \throws Exception on any error or if object doesn't exist virtual std::vector getObjectRaw(Object::idType id) const = 0; + /// Returns the Object with id \p id + /// \param id ID of object to return + /// \return Serialized object + /// \throws Exception on any error or if object doesn't exist + std::unique_ptr getObject(Object::idType id) const; + /// Adds the Object \p obj to the Repository /// \param obj Constant reference to the object /// \return True @@ -54,10 +60,10 @@ public: virtual bool putObject(const Object &obj) = 0; /// Deletes Object \p obj from the Repository - /// \param obj Constant reference to the object + /// \param obj Constant reference to the vector with ids of objects to delete /// \return True if successful, False if it didn't exist /// \throws Exception on any error - virtual bool deleteObject(const Object &obj) = 0; + virtual bool deleteObjects(const std::vector &objs) = 0; /// Returns the Object of type \p type and with key \p key /// \param type Type of the object diff --git a/src/repo/srcs/FileRepository.cpp b/src/repo/srcs/FileRepository.cpp index 68a285d..912085e 100644 --- a/src/repo/srcs/FileRepository.cpp +++ b/src/repo/srcs/FileRepository.cpp @@ -5,8 +5,10 @@ #include "FileRepository.h" #include +#include #include #include +#include #include "CheckFilter.h" #include "FilterFactory.h" @@ -45,6 +47,9 @@ bool FileRepository::open() { std::tie(keyIndex, largestUnusedId) = Serialize::deserialize>( filters.filterRead(readFile(root / "index"))); + refCounts = Serialize::deserialize(filters.filterRead(readFile(root / "refcounts"))); + unusedIds = Serialize::deserialize(filters.filterRead(readFile(root / "unusedIds"))); + fileToObjs = Serialize::deserialize(filters.filterRead(readFile(root / "fileToObjs"))); } catch (const std::exception &e) { ready = false; throw; @@ -79,6 +84,9 @@ FileRepository::~FileRepository() { writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex)))); writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId)))); + writeFile(root / "unusedIds", filters.filterWrite(Serialize::serialize(unusedIds))); + writeFile(root / "refcounts", filters.filterWrite(Serialize::serialize(refCounts))); + writeFile(root / "fileToObjs", filters.filterWrite(Serialize::serialize(fileToObjs))); } } @@ -133,6 +141,7 @@ void FileRepository::flushWriteCache(std::unique_lock &&lockW) { { std::lock_guard lockI(repoLock); offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size())); + fileToObjs[currentFileId].emplace(i.first); } offset += i.second.size(); ofstream.rdbuf()->sputn(i.second.data(), i.second.size()); @@ -144,14 +153,81 @@ bool FileRepository::putObject(const Object &obj) { { std::lock_guard lock(repoLock); keyIndex[obj.type][obj.getKey()] = obj.id; + for (auto const &i: obj.getRefs()) refCounts[i]++; } writeObject(obj); return true; } -bool FileRepository::deleteObject(const Object &obj) { +bool FileRepository::deleteObjects(const std::vector &objs) { if (!ready) throw Exception("Tried working with uninitialized repo!"); - throw Exception("Deletion not implemented!"); + + std::queue toVisit; + std::set toDelete; + + for (auto const &o: objs) { + toVisit.emplace(o); + toDelete.emplace(o); + } + + std::cout << "Scanning for objects" << std::endl; + + while (!toVisit.empty()) { + auto o = toVisit.back(); + toVisit.pop(); + + auto obj = getObject(o); + for (const auto &id: obj->getRefs()) { + std::unique_lock lock(repoLock); + refCounts[id]--; + if (refCounts.at(id) == 0) { + toDelete.emplace(id); + toVisit.emplace(id); + refCounts.erase(id); + } + } + } + + std::cout << "Found " << toDelete.size() << " to delete " << std::endl; + + + std::unordered_map fileToObj; + std::set touchedFiles; + + for (auto const &id: toDelete) { + fileToObj.emplace(offsetIndex.at(id).fileId, id); + touchedFiles.emplace(offsetIndex.at(id).fileId); + } + + std::cout << "Will rewrite " << touchedFiles.size() << " files" << std::endl; + + for (auto const &f: touchedFiles) { + std::cout << "Rewriting file " << f << std::endl; + const auto &objs = fileToObjs.at(f); + std::vector> objects; + for (auto const &o: objs) { + auto obj = getObject(o); + { + std::unique_lock lock(repoLock); + offsetIndex.erase(o); + } + if (toDelete.find(o) == toDelete.end()) putObject(*obj); + } + { + std::unique_lock lock(repoLock); + fileToObjs.erase(f); + } + std::filesystem::remove(root / std::to_string(f)); + } + { + std::unique_lock lock(repoLock); + for (auto const &id: toDelete) { + unusedIds.emplace_back(id); + // FIXME: this is a bit inefficient + for (auto &m: keyIndex) erase_if(m.second, [&](const auto &t) { return toDelete.contains(t.second); }); + } + } + return true; } std::vector FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, @@ -214,6 +290,11 @@ bool FileRepository::exists(Object::ObjectType type, const std::string &key) con Object::idType FileRepository::getId() { std::lock_guard lock(repoLock); + if (!unusedIds.empty()) { + auto ret = unusedIds.back(); + unusedIds.pop_back(); + return ret; + } return largestUnusedId++; } diff --git a/src/repo/srcs/Object.cpp b/src/repo/srcs/Object.cpp index 1b69264..0717837 100644 --- a/src/repo/srcs/Object.cpp +++ b/src/repo/srcs/Object.cpp @@ -6,6 +6,10 @@ #include "Serialize.h" +#include "objects/Archive.h" +#include "objects/Chunk.h" +#include "objects/File.h" + Object::Object(idType id, ObjectType type) : id(id), type(type) {} Object::Object(std::vector::const_iterator &in, const std::vector::const_iterator &end) @@ -21,3 +25,28 @@ Object::~Object() = default; static std::vector emptyRef{}; const std::vector &Object::getRefs() const { return emptyRef; } + +std::unique_ptr Object::deserialize(std::vector::const_iterator &in, + const std::vector::const_iterator &end) { + auto inCpy = in; + auto id = Serialize::deserialize(in, end); + auto type = Serialize::deserialize(in, end); + + switch (type) { + case ObjectType::Archive: + return std::make_unique(Serialize::deserialize(inCpy, end)); + case ObjectType::File: + return std::make_unique(Serialize::deserialize(inCpy, end)); + case ObjectType::Chunk: + return std::make_unique(Serialize::deserialize(inCpy, end)); + case ObjectType::END: + break; + default: + throw Exception("Bad object!"); + } +} + +std::unique_ptr Object::deserialize(const std::vector &src) { + auto srcIterator = src.cbegin(); + return deserialize(srcIterator, src.end()); +} diff --git a/src/repo/srcs/Repository.cpp b/src/repo/srcs/Repository.cpp index dc7a887..df84ad3 100644 --- a/src/repo/srcs/Repository.cpp +++ b/src/repo/srcs/Repository.cpp @@ -8,3 +8,7 @@ Repository::~Repository() = default; Repository::Repository(Config config) : config(std::move(config)) {} const Config &Repository::getConfig() const { return config; } + +std::unique_ptr Repository::getObject(Object::idType id) const { + return Object::deserialize(this->getObjectRaw(id)); +} diff --git a/tests/repo/srcs/FileRepositoryTest.cpp b/tests/repo/srcs/FileRepositoryTest.cpp index d80f302..9fab58f 100644 --- a/tests/repo/srcs/FileRepositoryTest.cpp +++ b/tests/repo/srcs/FileRepositoryTest.cpp @@ -192,7 +192,6 @@ TEST(FileRepository, Filters) { } TEST(FileRepository, IDsDisabled) { - GTEST_SKIP(); Cleaner c({"IDS/testrepo"}); { Config conf; @@ -257,7 +256,7 @@ TEST(FileRepository, IDsDisabled) { ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2); - repo.deleteObject(o1); + repo.deleteObjects({o1.id}); } { Config conf;