mirror of
				https://github.com/usatiuk/backup.git
				synced 2025-10-26 17:37:47 +01:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 10d570f3ea | 
							
								
								
									
										9
									
								
								.github/workflows/cmake.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										9
									
								
								.github/workflows/cmake.yml
									
									
									
									
										vendored
									
									
								
							| @@ -19,11 +19,18 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|  | ||||
|     steps: | ||||
|       - uses: actions/checkout@v3 | ||||
|       - uses: actions/checkout@v4 | ||||
|        | ||||
|       - run: apt-get update && apt-get install -y sudo | ||||
|         if: env.ACT=='true' | ||||
|  | ||||
|       - name: Fix kernel mmap rnd bits | ||||
|         # Asan in llvm 14 provided in ubuntu 22.04 is incompatible with | ||||
|         # high-entropy ASLR in much newer kernels that GitHub runners are | ||||
|         # using leading to random crashes: https://reviews.llvm.org/D148280 | ||||
|         run: sudo sysctl vm.mmap_rnd_bits=28 | ||||
|         if: env.ACT!='true' | ||||
|       | ||||
|       - name: install everything | ||||
|         run: sudo apt-get update && sudo apt-get install -y fuse libfuse-dev cmake build-essential gcc g++ libssl-dev zlib1g-dev | ||||
|  | ||||
|   | ||||
| @@ -1,16 +1,6 @@ | ||||
| cmake_minimum_required(VERSION 3.18) | ||||
|  | ||||
| add_library(commands | ||||
|         srcs/CommandDiff.cpp | ||||
|         srcs/CommandList.cpp | ||||
|         srcs/CommandListFiles.cpp | ||||
|         srcs/CommandRestore.cpp | ||||
|         srcs/CommandRun.cpp | ||||
|         srcs/CommandsCommon.cpp | ||||
|         srcs/Diff.cpp | ||||
|         srcs/CommandMount.cpp | ||||
|         srcs/CommandDelete.cpp | ||||
| ) | ||||
| add_library(commands srcs/CommandDiff.cpp srcs/CommandList.cpp srcs/CommandListFiles.cpp srcs/CommandRestore.cpp srcs/CommandRun.cpp srcs/CommandsCommon.cpp srcs/Diff.cpp srcs/CommandMount.cpp) | ||||
|  | ||||
| target_include_directories(commands PUBLIC includes) | ||||
|  | ||||
|   | ||||
| @@ -1,18 +0,0 @@ | ||||
| // | ||||
| // Created by Stepan Usatiuk on 06.08.2023. | ||||
| // | ||||
|  | ||||
| #ifndef BACKUP_COMMANDDELETE_H | ||||
| #define BACKUP_COMMANDDELETE_H | ||||
|  | ||||
| #include "Command.h" | ||||
|  | ||||
| class CommandDelete : public Command { | ||||
| public: | ||||
|     CommandDelete(); | ||||
|     void run(Context ctx) override; | ||||
|     static constexpr std::string_view name{"delete"}; | ||||
| }; | ||||
|  | ||||
|  | ||||
| #endif//BACKUP_COMMANDDELETE_H | ||||
| @@ -1,15 +0,0 @@ | ||||
| // | ||||
| // Created by Stepan Usatiuk on 06.08.2023. | ||||
| // | ||||
|  | ||||
| #include "CommandDelete.h" | ||||
|  | ||||
| #include "CommandsCommon.h" | ||||
|  | ||||
| using namespace CommandsCommon; | ||||
|  | ||||
| CommandDelete::CommandDelete() {} | ||||
|  | ||||
| void CommandDelete::run(Context ctx) { | ||||
|     ctx.repo->deleteObjects({static_cast<unsigned long long>(ctx.repo->getConfig().getInt("aid"))}); | ||||
| } | ||||
| @@ -38,11 +38,11 @@ void CommandDiff::run(Context ctx) { | ||||
|                           ctx.repo->getConfig().exists("threads") ? ctx.repo->getConfig().getInt("threads") | ||||
|                                                                   : std::thread::hardware_concurrency()); | ||||
|  | ||||
|     auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive1)); | ||||
|     auto archiveO1 = Serialize::deserialize<Archive>(ctx.repo->getObject(archive1)); | ||||
|     std::mutex filesLock; | ||||
|     std::map<std::filesystem::path, File> files;///< Files in the first archive | ||||
|     for (auto id: archiveO1.files) { | ||||
|         auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); | ||||
|         auto file = Serialize::deserialize<File>(ctx.repo->getObject(id)); | ||||
|         auto path = std::filesystem::path(file.name); | ||||
|         if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) files.emplace(file.getKey(), std::move(file)); | ||||
|     } | ||||
| @@ -76,13 +76,13 @@ void CommandDiff::run(Context ctx) { | ||||
|         /// If a second archive is given, run the task for each of its files, otherwise use the "from" config option | ||||
|         if (ctx.repo->getConfig().exists("aid2")) { | ||||
|             archiveO2.emplace( | ||||
|                     Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2")))); | ||||
|                     Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2")))); | ||||
|  | ||||
|             threadPool.push([&]() { | ||||
|                 for (auto id: archiveO2.value().files) { | ||||
|                     /// Exit when asked to | ||||
|                     if (Signals::shouldQuit) throw Exception("Quitting"); | ||||
|                     auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); | ||||
|                     auto file = Serialize::deserialize<File>(ctx.repo->getObject(id)); | ||||
|                     if (isSubpath(ctx.repo->getConfig().getStr("prefix"), std::filesystem::path(file.name))) | ||||
|                         threadPool.push([&, file]() { processFile(ComparableFile{file, ctx.repo}); }); | ||||
|                     if (Signals::shouldQuit) break; | ||||
| @@ -111,10 +111,10 @@ void CommandDiff::run(Context ctx) { | ||||
|  | ||||
|         if (ctx.repo->getConfig().exists("aid2")) { | ||||
|             archiveO2.emplace( | ||||
|                     Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid2")))); | ||||
|                     Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid2")))); | ||||
|             std::map<std::filesystem::path, File> files2;///< Files in the first archive | ||||
|             for (auto id: archiveO2->files) { | ||||
|                 auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(id)); | ||||
|                 auto file = Serialize::deserialize<File>(ctx.repo->getObject(id)); | ||||
|                 auto path = std::filesystem::path(file.name); | ||||
|                 if (isSubpath(ctx.repo->getConfig().getStr("prefix"), path)) | ||||
|                     files2.emplace(file.getKey(), std::move(file)); | ||||
|   | ||||
| @@ -13,9 +13,9 @@ | ||||
| CommandListFiles::CommandListFiles() : Command() {} | ||||
|  | ||||
| void CommandListFiles::run(Context ctx) { | ||||
|     auto archive = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(ctx.repo->getConfig().getInt("aid"))); | ||||
|     auto archive = Serialize::deserialize<Archive>(ctx.repo->getObject(ctx.repo->getConfig().getInt("aid"))); | ||||
|     for (auto const &fid: archive.files) { | ||||
|         auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid)); | ||||
|         auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid)); | ||||
|         std::cout << "Name: " << file.name << " type: " << File::TypeToStr.at(file.fileType) | ||||
|                   << " size: " << BytesFormatter::formatStr(file.bytes) << std::endl; | ||||
|     } | ||||
|   | ||||
| @@ -68,14 +68,14 @@ void CommandRestore::run(Context ctx) { | ||||
|         /// Add the main restore task | ||||
|         threadPool.push([&, this]() { | ||||
|             /// Get the archive and its file IDs | ||||
|             auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObjectRaw(archive)); | ||||
|             auto archiveO = Serialize::deserialize<Archive>(ctx.repo->getObject(archive)); | ||||
|             std::vector<Object::idType> files = archiveO.files; | ||||
|             /// For each file... | ||||
|             for (const auto fid: files) { | ||||
|                 /// Stop when asked to | ||||
|                 if (Signals::shouldQuit) break; | ||||
|  | ||||
|                 auto file = Serialize::deserialize<File>(ctx.repo->getObjectRaw(fid)); | ||||
|                 auto file = Serialize::deserialize<File>(ctx.repo->getObject(fid)); | ||||
|                 filesToRestoreCount++; | ||||
|                 bytesToRestore += file.bytes; | ||||
|                 /// Spawn a restore task | ||||
| @@ -105,7 +105,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files | ||||
|         return fullpath.string(); | ||||
|     } | ||||
|     if (file.fileType == File::Type::Symlink) { | ||||
|         auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(file.chunks.at(0))); | ||||
|         auto dest = Serialize::deserialize<Chunk>(ctx.repo->getObject(file.chunks.at(0))); | ||||
|         std::filesystem::create_symlink(std::filesystem::path(std::string{dest.data.begin(), dest.data.end()}), | ||||
|                                         fullpath); | ||||
|         callback(0, 0, 1); | ||||
| @@ -116,7 +116,7 @@ std::string CommandRestore::backupRestoreFile(const File &file, const std::files | ||||
|     for (const auto cid: file.chunks) { | ||||
|         if (Signals::shouldQuit) throw Exception("Quitting!"); | ||||
|  | ||||
|         Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObjectRaw(cid.second)); | ||||
|         Chunk c = Serialize::deserialize<Chunk>(ctx.repo->getObject(cid.second)); | ||||
|         if (!c.data.empty()) { | ||||
|             ostream.rdbuf()->sputn(c.data.data(), c.data.size()); | ||||
|             callback(c.data.size(), 0, 0); | ||||
|   | ||||
| @@ -89,7 +89,7 @@ void CommandRun::run(Context ctx) { | ||||
|             auto relPath = p.lexically_relative(from).string(); | ||||
|  | ||||
|             if (ctx.repo->exists(Object::ObjectType::File, relPath) != 0) { | ||||
|                 File repoFile = Serialize::deserialize<File>(ctx.repo->getObjectRaw(Object::ObjectType::File, relPath)); | ||||
|                 File repoFile = Serialize::deserialize<File>(ctx.repo->getObject(Object::ObjectType::File, relPath)); | ||||
|                 if (!changeDetector.check({repoFile, ctx.repo}, {p, from})) { | ||||
|                     addFile(repoFile.id); | ||||
|                     progress.print("Skipped: " + relPath, 1); | ||||
|   | ||||
| @@ -12,11 +12,11 @@ FilterContainer &FilterContainer::addFilter(std::unique_ptr<Filter> &&f) { | ||||
| } | ||||
|  | ||||
| std::vector<char> FilterContainer::filterWrite(std::vector<char> from) const { | ||||
|     for (auto const &f: filters) from = std::move(f->filterWrite(std::move(from))); | ||||
|     for (auto const &f: filters) from = f->filterWrite(std::move(from)); | ||||
|     return from; | ||||
| } | ||||
|  | ||||
| std::vector<char> FilterContainer::filterRead(std::vector<char> from) const { | ||||
|     for (auto f = filters.crbegin(); f != filters.crend(); f++) from = std::move((*f)->filterRead(std::move(from))); | ||||
|     for (auto f = filters.crbegin(); f != filters.crend(); f++) from = (*f)->filterRead(std::move(from)); | ||||
|     return from; | ||||
| } | ||||
|   | ||||
| @@ -101,7 +101,7 @@ static int rfsRead(const char *path, char *buf, size_t size, off_t offset, struc | ||||
|         size_t curInBuf = 0; | ||||
|         size_t curInChunk = offset - curchunk->first; | ||||
|         while (curInBuf < size) { | ||||
|             auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(curchunk->second)); | ||||
|             auto chunk = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(curchunk->second)); | ||||
|             size_t read = std::min((size_t) chunk.length - curInChunk, size - curInBuf); | ||||
|             memcpy(buf + curInBuf, chunk.data.data() + curInChunk, read); | ||||
|             curInBuf += read; | ||||
| @@ -121,7 +121,7 @@ static int rfsReadlink(const char *path, char *buf, size_t size) { | ||||
|         } catch (...) { return -ENOENT; } | ||||
|  | ||||
|     if (entry->file->fileType != File::Type::Symlink) return -ENOENT; | ||||
|     auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObjectRaw(entry->file->chunks.at(0))); | ||||
|     auto dst = Serialize::deserialize<Chunk>(RepoFS::repo->getObject(entry->file->chunks.at(0))); | ||||
|     strncpy(buf, dst.data.data(), std::min(dst.data.size(), size)); | ||||
|  | ||||
|     return 0; | ||||
| @@ -139,9 +139,9 @@ void RepoFS::start(Repository *repo, std::string path) { | ||||
|     RepoFS::repo = repo; | ||||
|     auto ars = repo->getObjects(Object::ObjectType::Archive); | ||||
|     for (auto const &r: ars) { | ||||
|         auto a = Serialize::deserialize<Archive>(repo->getObjectRaw(r.second)); | ||||
|         auto a = Serialize::deserialize<Archive>(repo->getObject(r.second)); | ||||
|         for (auto const &f: a.files) { | ||||
|             auto file = Serialize::deserialize<File>(repo->getObjectRaw(f)); | ||||
|             auto file = Serialize::deserialize<File>(repo->getObject(f)); | ||||
|             auto path = std::filesystem::path(file.name); | ||||
|             DirEntry *entry = root->children[std::to_string(a.id)].get() | ||||
|                                       ? root->children[std::to_string(a.id)].get() | ||||
|   | ||||
| @@ -3,7 +3,6 @@ | ||||
|  | ||||
| #include "BytesFormatter.h" | ||||
| #include "Command.h" | ||||
| #include "CommandDelete.h" | ||||
| #include "CommandDiff.h" | ||||
| #include "CommandList.h" | ||||
| #include "CommandListFiles.h" | ||||
| @@ -110,7 +109,6 @@ int main(int argc, char *argv[]) { | ||||
|         commands.emplace(CommandListFiles::name, std::make_unique<CommandListFiles>()); | ||||
|         commands.emplace(CommandList::name, std::make_unique<CommandList>()); | ||||
|         commands.emplace(CommandMount::name, std::make_unique<CommandMount>()); | ||||
|         commands.emplace(CommandDelete::name, std::make_unique<CommandDelete>()); | ||||
|  | ||||
|         if (commands.count(opt) == 0) { | ||||
|             std::cerr << "Unknown argument" << std::endl; | ||||
|   | ||||
| @@ -35,11 +35,11 @@ public: | ||||
|     bool init() override; | ||||
|     bool flush() override; | ||||
|  | ||||
|     std::vector<char> getObjectRaw(Object::idType id) const override; | ||||
|     std::vector<char> getObject(Object::idType id) const override; | ||||
|     bool putObject(const Object &obj) override; | ||||
|     bool deleteObjects(const std::vector<Object::idType> &objs) override; | ||||
|     bool deleteObject(const Object &obj) override; | ||||
|  | ||||
|     std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const override; | ||||
|     std::vector<char> getObject(Object::ObjectType type, const std::string &key) const override; | ||||
|     Object::idType getObjectId(Object::ObjectType type, const std::string &key) const override; | ||||
|     std::vector<std::pair<std::string, Object::idType>> getObjects(Object::ObjectType type) const override; | ||||
|  | ||||
| @@ -104,8 +104,6 @@ private: | ||||
|  | ||||
|     unsigned long long maxFileId = 1;                           ///< Largest ID of object storage file | ||||
|     std::unordered_map<Object::idType, OffsetEntry> offsetIndex;///< Used to locate Object%s in the filesystem | ||||
|     std::unordered_map<Object::idType, std::set<Object::idType>> | ||||
|             fileToObjs;///< Used to locate Object%s in the filesystem | ||||
|  | ||||
|     std::mutex writeCacheLock;                             ///< Write cache lock | ||||
|     std::map<Object::idType, std::vector<char>> writeCache;///< Write cache, map of Object ids and their serialized data | ||||
| @@ -119,11 +117,8 @@ private: | ||||
|     void flushWriteCache(std::unique_lock<std::mutex> &&lockW); | ||||
|  | ||||
|     Object::idType largestUnusedId = 1;///< Largest available objectID | ||||
|     std::vector<Object::idType> unusedIds;///< Vector of unused IDs | ||||
|     std::unordered_map<Object::ObjectType, std::unordered_map<std::string, Object::idType>> | ||||
|             keyIndex;///< Maps Object%'s keys to their ID's | ||||
|  | ||||
|     std::unordered_map<Object::idType, uint64_t> refCounts;///< Count of references to an object per its id | ||||
| }; | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -31,16 +31,9 @@ public: | ||||
|     /// All derived objects should implement this method | ||||
|     virtual std::string getKey() const = 0; | ||||
|  | ||||
|     /// Returns the keys of that this object refers to | ||||
|     virtual const std::vector<idType> &getRefs() const; | ||||
|  | ||||
|     const idType id;      ///< Unique numerical of the object | ||||
|     const ObjectType type;///< Type of the object | ||||
|  | ||||
|     static std::unique_ptr<Object> deserialize(const std::vector<char> &src); | ||||
|     static std::unique_ptr<Object> deserialize(std::vector<char>::const_iterator &in, | ||||
|                                                const std::vector<char>::const_iterator &end); | ||||
|  | ||||
| protected: | ||||
|     /// Default constructor | ||||
|     /// \param id   Object ID | ||||
|   | ||||
| @@ -45,13 +45,7 @@ public: | ||||
|     /// \param id ID of object to return | ||||
|     /// \return   Serialized object | ||||
|     /// \throws   Exception on any error or if object doesn't exist | ||||
|     virtual std::vector<char> getObjectRaw(Object::idType id) const = 0; | ||||
|  | ||||
|     /// Returns the Object with id \p id | ||||
|     /// \param id ID of object to return | ||||
|     /// \return   Serialized object | ||||
|     /// \throws   Exception on any error or if object doesn't exist | ||||
|     std::unique_ptr<Object> getObject(Object::idType id) const; | ||||
|     virtual std::vector<char> getObject(Object::idType id) const = 0; | ||||
|  | ||||
|     /// Adds the Object \p obj to the Repository | ||||
|     /// \param obj  Constant reference to the object | ||||
| @@ -60,17 +54,17 @@ public: | ||||
|     virtual bool putObject(const Object &obj) = 0; | ||||
|  | ||||
|     /// Deletes Object \p obj from the Repository | ||||
|     /// \param obj  Constant reference to the vector with ids of objects to delete | ||||
|     /// \param obj  Constant reference to the object | ||||
|     /// \return     True if successful, False if it didn't exist | ||||
|     /// \throws     Exception on any error | ||||
|     virtual bool deleteObjects(const std::vector<Object::idType> &objs) = 0; | ||||
|     virtual bool deleteObject(const Object &obj) = 0; | ||||
|  | ||||
|     /// Returns the Object of type \p type and with key \p key | ||||
|     /// \param type Type of the object | ||||
|     /// \param key  Constant reference to the key of the object | ||||
|     /// \return     Serialized object | ||||
|     /// \throws   Exception on any error or if object doesn't exist | ||||
|     virtual std::vector<char> getObjectRaw(Object::ObjectType type, const std::string &key) const = 0; | ||||
|     virtual std::vector<char> getObject(Object::ObjectType type, const std::string &key) const = 0; | ||||
|  | ||||
|     /// Returns the id of an Object of type \p type and with key \p key | ||||
|     /// \param type Type of the object | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
| #include "../Object.h" | ||||
|  | ||||
| /// Object representing a backup | ||||
| class Archive final : public Object { | ||||
| class Archive : public Object { | ||||
| public: | ||||
|     Archive(Object::idType id, std::string name, unsigned long long mtime, std::vector<idType> files); | ||||
|  | ||||
| @@ -22,9 +22,6 @@ public: | ||||
|     /// Returns the name of the archive | ||||
|     std::string getKey() const override; | ||||
|  | ||||
|     /// Returns the files in this archive | ||||
|     const std::vector<Object::idType> &getRefs() const override; | ||||
|  | ||||
|     const std::string name;         ///< Archive name | ||||
|     const unsigned long long mtime; ///< Time of creation | ||||
|     const std::vector<idType> files;///< List of ids of File objects in the Archive | ||||
|   | ||||
| @@ -11,7 +11,7 @@ | ||||
| #include "../Object.h" | ||||
|  | ||||
| /// Object representing a part of a File | ||||
| class Chunk final : public Object { | ||||
| class Chunk : public Object { | ||||
| public: | ||||
|     Chunk(idType id, std::string, std::vector<char> data); | ||||
|  | ||||
|   | ||||
| @@ -15,7 +15,7 @@ | ||||
| #include "../Object.h" | ||||
|  | ||||
| /// Object representing a saved file | ||||
| class File final : public Object { | ||||
| class File : public Object { | ||||
| public: | ||||
|     enum class Type { Normal, Symlink, Directory, END }; | ||||
|  | ||||
| @@ -68,12 +68,6 @@ public: | ||||
|     /// List of the chunks in file | ||||
|     /// Normal file has normal chunks as its contents, for Directory it's empty, Symlink has a chunk with its target path | ||||
|     const std::map<size_t, idType> chunks; | ||||
|  | ||||
|     const std::vector<idType> &getRefs() const override; | ||||
|  | ||||
| private: | ||||
|     void makeChunksList() const; | ||||
|     mutable std::optional<std::vector<idType>> chunksList{std::nullopt}; | ||||
| }; | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -5,10 +5,8 @@ | ||||
| #include "FileRepository.h" | ||||
|  | ||||
| #include <exception> | ||||
| #include <iostream> | ||||
| #include <iterator> | ||||
| #include <mutex> | ||||
| #include <queue> | ||||
|  | ||||
| #include "CheckFilter.h" | ||||
| #include "FilterFactory.h" | ||||
| @@ -47,9 +45,6 @@ bool FileRepository::open() { | ||||
|         std::tie(keyIndex, largestUnusedId) = | ||||
|                 Serialize::deserialize<std::pair<decltype(keyIndex), decltype(largestUnusedId)>>( | ||||
|                         filters.filterRead(readFile(root / "index"))); | ||||
|         refCounts = Serialize::deserialize<decltype(refCounts)>(filters.filterRead(readFile(root / "refcounts"))); | ||||
|         unusedIds = Serialize::deserialize<decltype(unusedIds)>(filters.filterRead(readFile(root / "unusedIds"))); | ||||
|         fileToObjs = Serialize::deserialize<decltype(fileToObjs)>(filters.filterRead(readFile(root / "fileToObjs"))); | ||||
|     } catch (const std::exception &e) { | ||||
|         ready = false; | ||||
|         throw; | ||||
| @@ -84,13 +79,10 @@ FileRepository::~FileRepository() { | ||||
|  | ||||
|         writeFile(root / "offsets", filters.filterWrite(Serialize::serialize(std::make_pair(maxFileId, offsetIndex)))); | ||||
|         writeFile(root / "index", filters.filterWrite(Serialize::serialize(std::make_pair(keyIndex, largestUnusedId)))); | ||||
|         writeFile(root / "unusedIds", filters.filterWrite(Serialize::serialize(unusedIds))); | ||||
|         writeFile(root / "refcounts", filters.filterWrite(Serialize::serialize(refCounts))); | ||||
|         writeFile(root / "fileToObjs", filters.filterWrite(Serialize::serialize(fileToObjs))); | ||||
|     } | ||||
| } | ||||
|  | ||||
| std::vector<char> FileRepository::getObjectRaw(Object::idType id) const { | ||||
| std::vector<char> FileRepository::getObject(Object::idType id) const { | ||||
|     if (!ready) throw Exception("Tried working with uninitialized repo!"); | ||||
|  | ||||
|     std::unique_lock lock(repoLock); | ||||
| @@ -141,7 +133,6 @@ void FileRepository::flushWriteCache(std::unique_lock<std::mutex> &&lockW) { | ||||
|         { | ||||
|             std::lock_guard lockI(repoLock); | ||||
|             offsetIndex.emplace(i.first, OffsetEntry(currentFileId, offset, i.second.size())); | ||||
|             fileToObjs[currentFileId].emplace(i.first); | ||||
|         } | ||||
|         offset += i.second.size(); | ||||
|         ofstream.rdbuf()->sputn(i.second.data(), i.second.size()); | ||||
| @@ -153,81 +144,14 @@ bool FileRepository::putObject(const Object &obj) { | ||||
|     { | ||||
|         std::lock_guard lock(repoLock); | ||||
|         keyIndex[obj.type][obj.getKey()] = obj.id; | ||||
|         for (auto const &i: obj.getRefs()) refCounts[i]++; | ||||
|     } | ||||
|     writeObject(obj); | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| bool FileRepository::deleteObjects(const std::vector<Object::idType> &objs) { | ||||
| bool FileRepository::deleteObject(const Object &obj) { | ||||
|     if (!ready) throw Exception("Tried working with uninitialized repo!"); | ||||
|  | ||||
|     std::queue<Object::idType> toVisit; | ||||
|     std::set<Object::idType> toDelete; | ||||
|  | ||||
|     for (auto const &o: objs) { | ||||
|         toVisit.emplace(o); | ||||
|         toDelete.emplace(o); | ||||
|     } | ||||
|  | ||||
|     std::cout << "Scanning for objects" << std::endl; | ||||
|  | ||||
|     while (!toVisit.empty()) { | ||||
|         auto o = toVisit.back(); | ||||
|         toVisit.pop(); | ||||
|  | ||||
|         auto obj = getObject(o); | ||||
|         for (const auto &id: obj->getRefs()) { | ||||
|             std::unique_lock lock(repoLock); | ||||
|             refCounts[id]--; | ||||
|             if (refCounts.at(id) == 0) { | ||||
|                 toDelete.emplace(id); | ||||
|                 toVisit.emplace(id); | ||||
|                 refCounts.erase(id); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     std::cout << "Found " << toDelete.size() << " to delete " << std::endl; | ||||
|  | ||||
|  | ||||
|     std::unordered_map<uint64_t, Object::idType> fileToObj; | ||||
|     std::set<uint64_t> touchedFiles; | ||||
|  | ||||
|     for (auto const &id: toDelete) { | ||||
|         fileToObj.emplace(offsetIndex.at(id).fileId, id); | ||||
|         touchedFiles.emplace(offsetIndex.at(id).fileId); | ||||
|     } | ||||
|  | ||||
|     std::cout << "Will rewrite " << touchedFiles.size() << " files" << std::endl; | ||||
|  | ||||
|     for (auto const &f: touchedFiles) { | ||||
|         std::cout << "Rewriting file " << f << std::endl; | ||||
|         const auto &objs = fileToObjs.at(f); | ||||
|         std::vector<std::unique_ptr<Object>> objects; | ||||
|         for (auto const &o: objs) { | ||||
|             auto obj = getObject(o); | ||||
|             { | ||||
|                 std::unique_lock lock(repoLock); | ||||
|                 offsetIndex.erase(o); | ||||
|             } | ||||
|             if (toDelete.find(o) == toDelete.end()) putObject(*obj); | ||||
|         } | ||||
|         { | ||||
|             std::unique_lock lock(repoLock); | ||||
|             fileToObjs.erase(f); | ||||
|         } | ||||
|         std::filesystem::remove(root / std::to_string(f)); | ||||
|     } | ||||
|     { | ||||
|         std::unique_lock lock(repoLock); | ||||
|         for (auto const &id: toDelete) { | ||||
|             unusedIds.emplace_back(id); | ||||
|             // FIXME: this is a bit inefficient | ||||
|             for (auto &m: keyIndex) erase_if(m.second, [&](const auto &t) { return toDelete.contains(t.second); }); | ||||
|         } | ||||
|     } | ||||
|     return true; | ||||
|     throw Exception("Deletion not implemented!"); | ||||
| } | ||||
|  | ||||
| std::vector<char> FileRepository::readFile(const std::filesystem::path &file, unsigned long long offset, | ||||
| @@ -264,8 +188,8 @@ bool FileRepository::writeFile(const std::filesystem::path &file, const std::vec | ||||
|     return true; | ||||
| } | ||||
|  | ||||
| std::vector<char> FileRepository::getObjectRaw(Object::ObjectType type, const std::string &key) const { | ||||
|     return getObjectRaw(getObjectId(type, key)); | ||||
| std::vector<char> FileRepository::getObject(Object::ObjectType type, const std::string &key) const { | ||||
|     return getObject(getObjectId(type, key)); | ||||
| } | ||||
|  | ||||
| Object::idType FileRepository::getObjectId(Object::ObjectType type, const std::string &key) const { | ||||
| @@ -290,11 +214,6 @@ bool FileRepository::exists(Object::ObjectType type, const std::string &key) con | ||||
|  | ||||
| Object::idType FileRepository::getId() { | ||||
|     std::lock_guard lock(repoLock); | ||||
|     if (!unusedIds.empty()) { | ||||
|         auto ret = unusedIds.back(); | ||||
|         unusedIds.pop_back(); | ||||
|         return ret; | ||||
|     } | ||||
|     return largestUnusedId++; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -6,10 +6,6 @@ | ||||
|  | ||||
| #include "Serialize.h" | ||||
|  | ||||
| #include "objects/Archive.h" | ||||
| #include "objects/Chunk.h" | ||||
| #include "objects/File.h" | ||||
|  | ||||
| Object::Object(idType id, ObjectType type) : id(id), type(type) {} | ||||
|  | ||||
| Object::Object(std::vector<char>::const_iterator &in, const std::vector<char>::const_iterator &end) | ||||
| @@ -21,32 +17,3 @@ void Object::serialize(std::vector<char> &out) const { | ||||
| } | ||||
|  | ||||
| Object::~Object() = default; | ||||
|  | ||||
| static std::vector<Object::idType> emptyRef{}; | ||||
|  | ||||
| const std::vector<Object::idType> &Object::getRefs() const { return emptyRef; } | ||||
|  | ||||
| std::unique_ptr<Object> Object::deserialize(std::vector<char>::const_iterator &in, | ||||
|                                             const std::vector<char>::const_iterator &end) { | ||||
|     auto inCpy = in; | ||||
|     auto id = Serialize::deserialize<idType>(in, end); | ||||
|     auto type = Serialize::deserialize<ObjectType>(in, end); | ||||
|  | ||||
|     switch (type) { | ||||
|         case ObjectType::Archive: | ||||
|             return std::make_unique<Archive>(Serialize::deserialize<Archive>(inCpy, end)); | ||||
|         case ObjectType::File: | ||||
|             return std::make_unique<File>(Serialize::deserialize<File>(inCpy, end)); | ||||
|         case ObjectType::Chunk: | ||||
|             return std::make_unique<Chunk>(Serialize::deserialize<Chunk>(inCpy, end)); | ||||
|         case ObjectType::END: | ||||
|             break; | ||||
|         default: | ||||
|             throw Exception("Bad object!"); | ||||
|     } | ||||
| } | ||||
|  | ||||
| std::unique_ptr<Object> Object::deserialize(const std::vector<char> &src) { | ||||
|     auto srcIterator = src.cbegin(); | ||||
|     return deserialize(srcIterator, src.end()); | ||||
| } | ||||
|   | ||||
| @@ -8,7 +8,3 @@ Repository::~Repository() = default; | ||||
| Repository::Repository(Config config) : config(std::move(config)) {} | ||||
|  | ||||
| const Config &Repository::getConfig() const { return config; } | ||||
|  | ||||
| std::unique_ptr<Object> Repository::getObject(Object::idType id) const { | ||||
|     return Object::deserialize(this->getObjectRaw(id)); | ||||
| } | ||||
|   | ||||
| @@ -28,5 +28,3 @@ void Archive::serialize(std::vector<char> &out) const { | ||||
| } | ||||
|  | ||||
| std::string Archive::getKey() const { return name; } | ||||
|  | ||||
| const std::vector<Object::idType> &Archive::getRefs() const { return files; } | ||||
|   | ||||
| @@ -83,16 +83,3 @@ unsigned long long File::getFileSize(const std::filesystem::path &p) { | ||||
|     else | ||||
|         return getFileContents(p).size(); | ||||
| } | ||||
|  | ||||
| void File::makeChunksList() const { | ||||
|     if (chunksList) return; | ||||
|     chunksList.emplace(); | ||||
|  | ||||
|     chunksList->reserve(chunks.size()); | ||||
|     for (auto const &c: chunks) chunksList->emplace_back(c.second); | ||||
| } | ||||
|  | ||||
| const std::vector<Object::idType> &File::getRefs() const { | ||||
|     if (!chunksList) makeChunksList(); | ||||
|     return *chunksList; | ||||
| } | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
| #include "Serialize.h" | ||||
|  | ||||
| FileBuffer::FileBuffer(const Repository *repo, Object::idType fileId) | ||||
|     : repo(repo), file(Serialize::deserialize<File>(repo->getObjectRaw(fileId))), chunksQueue() { | ||||
|     : repo(repo), file(Serialize::deserialize<File>(repo->getObject(fileId))), chunksQueue() { | ||||
|     for (auto const &id: file.chunks) chunksQueue.emplace(id.second); | ||||
| }; | ||||
|  | ||||
| @@ -35,7 +35,7 @@ int FileBuffer::underflow() { | ||||
|     if (getBuf.empty() || curGetBufPos == getBuf.size()) { | ||||
|         if (chunksQueue.empty()) return traits_type::eof(); | ||||
|         else { | ||||
|             auto chunk = Serialize::deserialize<Chunk>(repo->getObjectRaw(chunksQueue.front())); | ||||
|             auto chunk = Serialize::deserialize<Chunk>(repo->getObject(chunksQueue.front())); | ||||
|             getBuf = chunk.data; | ||||
|             chunksQueue.pop(); | ||||
|             curGetBufPos = 0; | ||||
|   | ||||
| @@ -48,8 +48,8 @@ TEST(FileRepository, Deserialize) { | ||||
|         ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666); | ||||
|         ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777); | ||||
|  | ||||
|         auto o1o = repo.getObjectRaw(666); | ||||
|         auto o2o = repo.getObjectRaw(777); | ||||
|         auto o1o = repo.getObject(666); | ||||
|         auto o2o = repo.getObject(777); | ||||
|  | ||||
|         auto o1ob = o1o.cbegin(); | ||||
|         auto o2ob = o2o.cbegin(); | ||||
| @@ -126,7 +126,7 @@ TEST(FileRepository, Filters) { | ||||
|  | ||||
|  | ||||
|         try { | ||||
|             auto o1o = repo.getObjectRaw(666); | ||||
|             auto o1o = repo.getObject(666); | ||||
|             auto o1ob = o1o.cbegin(); | ||||
|  | ||||
|             Chunk o1(o1ob, o1o.cend()); | ||||
| @@ -134,7 +134,7 @@ TEST(FileRepository, Filters) { | ||||
|         } catch (...) {} | ||||
|  | ||||
|         try { | ||||
|             auto o2o = repo.getObjectRaw(777); | ||||
|             auto o2o = repo.getObject(777); | ||||
|             auto o2ob = o2o.cbegin(); | ||||
|  | ||||
|             Chunk o2(o2ob, o2o.cend()); | ||||
| @@ -163,8 +163,8 @@ TEST(FileRepository, Filters) { | ||||
|         ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o1k), 666); | ||||
|         ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 777); | ||||
|  | ||||
|         auto o1o = repo.getObjectRaw(666); | ||||
|         auto o2o = repo.getObjectRaw(777); | ||||
|         auto o1o = repo.getObject(666); | ||||
|         auto o2o = repo.getObject(777); | ||||
|  | ||||
|         auto o1ob = o1o.cbegin(); | ||||
|         auto o2ob = o2o.cbegin(); | ||||
| @@ -192,6 +192,7 @@ TEST(FileRepository, Filters) { | ||||
| } | ||||
|  | ||||
| TEST(FileRepository, IDsDisabled) { | ||||
|     GTEST_SKIP(); | ||||
|     Cleaner c({"IDS/testrepo"}); | ||||
|     { | ||||
|         Config conf; | ||||
| @@ -223,8 +224,8 @@ TEST(FileRepository, IDsDisabled) { | ||||
|         conf.add("repo", "IDS/testrepo"); | ||||
|         FileRepository repo(conf); | ||||
|         repo.open(); | ||||
|         auto o1o = repo.getObjectRaw(1); | ||||
|         auto o2o = repo.getObjectRaw(2); | ||||
|         auto o1o = repo.getObject(1); | ||||
|         auto o2o = repo.getObject(2); | ||||
|  | ||||
|         auto o1ob = o1o.cbegin(); | ||||
|         auto o2ob = o2o.cbegin(); | ||||
| @@ -256,7 +257,7 @@ TEST(FileRepository, IDsDisabled) { | ||||
|         ASSERT_EQ(repo.getObjectId(Object::ObjectType::Chunk, o2k), 2); | ||||
|  | ||||
|  | ||||
|         repo.deleteObjects({o1.id}); | ||||
|         repo.deleteObject(o1); | ||||
|     } | ||||
|     { | ||||
|         Config conf; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user