55 Commits

Author SHA1 Message Date
83ceefa041 Sync-base: more javadocs 2 2025-05-13 23:46:07 +02:00
838405fb46 Sync-base: more javadocs 2025-05-13 22:20:13 +02:00
dbad8a2b22 Objects: javadocs 2025-05-13 20:53:44 +02:00
66dabdef25 a couple more javadocs 2025-05-13 20:13:28 +02:00
87e127bdfb KleppmannTree javadocs
and some more
2025-05-13 15:55:33 +02:00
fd62543687 CI: reenable rest of CI 2025-05-12 16:15:51 +02:00
757a0bbc8a javadocs github pages (#7)
* javadocs github pages

* fix

* fix

* proper pages
2025-05-12 16:13:42 +02:00
0c3524851e Some javadocs + CI 2025-05-12 12:49:10 +02:00
3eb7164c0f Dhfs-fuse: fix LazyFsIT tests 2025-05-10 16:49:58 +02:00
f544a67fb5 Objects: cleanup AutoCloseableNoThrow 2025-05-10 13:49:42 +02:00
964b3da951 Objects: remove getUsableSpace 2025-05-10 11:20:34 +02:00
cb33472dc5 Utils: remove VoidFn 2025-05-10 11:07:40 +02:00
de211bb2d2 Objects: remove prepareTx 2025-05-07 16:12:47 +02:00
56ab3bad4c Objects: remove TransactionPrivate 2025-05-07 15:00:15 +02:00
9403556220 Objects: remove TransactionFactory 2025-05-07 14:39:55 +02:00
469a6b9011 Objects: remove lockmanager 2025-05-07 11:21:48 +02:00
52ccbb99bc Sync-base: rename ConnectedPeerManager to ReachablePeerManager
tests check for "connected" in logs
2025-05-06 20:28:21 +02:00
d972cd1562 Objects: remove LockingStrategy 2025-05-06 20:21:29 +02:00
80151bcca5 Dhfs-fuse: less parallel e2e tests 2025-05-06 20:07:03 +02:00
289a2b880e Sync-base: rename ConnectedPeerManager 2025-05-05 22:18:00 +02:00
0849df60ae Dhfs-fs: remove DhfsFileService 2025-05-05 21:58:37 +02:00
9cb5c226f9 remove dhfs-app 2025-05-05 21:20:07 +02:00
87c404828c add powershell run scripts 2025-05-04 17:44:57 +02:00
b074e8eb44 Dhfs-fs: proper not found unlink exception 2025-05-04 17:13:43 +02:00
eb5b0ae03c cleanup run wrapper 2025-05-03 17:18:28 +02:00
c329c1f982 Objects: nested transactions 2025-05-03 13:57:44 +02:00
4e7b13227b Sync-base: "kick out" inactive peers 2025-05-03 13:14:06 +02:00
db51d7280c Revert "Sync-base: get rid of JDataRemotePush"
This reverts commit 07133a71
2025-05-03 11:25:23 +02:00
70fecb389b Objects: cleanup transaction put a little 2025-05-02 12:50:43 +02:00
6e9a2b25f6 Utils: cleanup UnsafeAccessor a little 2025-05-02 12:39:50 +02:00
b84ef95703 Revert "Objects: simplify tx commit hooks"
This reverts commit c0735801b9.
2025-05-01 13:42:06 +02:00
c0735801b9 Objects: simplify tx commit hooks 2025-05-01 13:27:39 +02:00
b506ced9d5 Objects: simplify WritebackObjectPersistentStore 2025-05-01 10:29:11 +02:00
46bc9fa810 Objects: remove transactionobject 2025-05-01 09:14:50 +02:00
8ab034402d Revert "Objects: simplify cache"
This reverts commit d94d11ec8b.
2025-04-29 20:36:19 +02:00
d94d11ec8b Objects: simplify cache 2025-04-29 20:22:21 +02:00
5beaad2d32 Objects: better iterators 2025-04-29 16:53:26 +02:00
c4484d21e5 Objects: simplify tx commit a little 2025-04-29 16:50:41 +02:00
2766ef1bae Add --enable-preview to run.xml and run 2025-04-29 12:49:03 +02:00
58de85c078 Sync-base: WritebackObjectPersistentStore cleanup 2025-04-29 12:46:39 +02:00
cc9da86440 Sync-base: JObjectKeyImpl import fix 2025-04-29 12:44:41 +02:00
e6c9e6aee9 Dhfs-fuse: implement write_buf for one less copy 2025-04-29 12:44:18 +02:00
62265355c4 Dhfs-fs: a little cleanup in DhfsFileServiceImpl 2025-04-29 12:44:00 +02:00
854bce1627 Utils: UninitializedByteBuffer 2025-04-29 12:43:18 +02:00
1b19c77bb6 Utils: slightly faster add in HashSetDelayedBlockingQueue 2025-04-29 12:40:45 +02:00
7aa968a569 Dhfs-fuse: fix import 2025-04-29 00:45:34 +02:00
e348c39be1 Utils: add UnsafeAccessor to JnrPtrByteOutput
oops
2025-04-28 23:50:31 +02:00
1b54830651 Objects: don't lock some objects twice for no reason 2025-04-28 23:49:45 +02:00
bc5f0b816c Objects: add putNew
to avoid searching for nonexistent objects
2025-04-28 23:47:53 +02:00
9ff914bdaa Utils: move UnsafeAccessor to utils 2025-04-28 23:36:42 +02:00
1cee6f62b8 Utils: less dumb DataLocker 2025-04-28 23:34:30 +02:00
81703a9406 Sync-base: some microoptimizations 2025-04-28 15:44:36 +02:00
1757034e0b Sync-base: speed up RemoteObjPusherTxHook
they are immutable, no need to do real equals, they can't be same if different
2025-04-28 15:09:23 +02:00
d9765a51d8 Sync-base: freeze JKleppmannTreeNodeHolder root nodes 2025-04-28 13:00:50 +02:00
99ef560b95 Sync-base: static final hooks
so that compiler can fold them
2025-04-28 12:59:21 +02:00
221 changed files with 4524 additions and 3497 deletions

View File

@@ -21,7 +21,7 @@ jobs:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: 'recursive'
submodules: "recursive"
- name: Install sudo for ACT
run: apt-get update && apt-get install -y sudo
@@ -32,7 +32,7 @@ jobs:
- name: User allow other for fuse
run: echo "user_allow_other" | sudo tee -a /etc/fuse.conf
- name: Dump fuse.conf
run: cat /etc/fuse.conf
@@ -47,7 +47,7 @@ jobs:
run: cd thirdparty/lazyfs/ && ./build.sh
- name: Test with Maven
run: cd dhfs-parent && mvn -T $(nproc) --batch-mode --update-snapshots package verify
run: cd dhfs-parent && mvn -T $(nproc) --batch-mode --update-snapshots package verify javadoc:aggregate
# - name: Build with Maven
# run: cd dhfs-parent && mvn --batch-mode --update-snapshots package # -Dquarkus.log.category.\"com.usatiuk.dhfs\".min-level=DEBUG
@@ -55,7 +55,12 @@ jobs:
- uses: actions/upload-artifact@v4
with:
name: DHFS Server Package
path: dhfs-parent/dhfs-app/target/quarkus-app
path: dhfs-parent/dhfs-fuse/target/quarkus-app
- uses: actions/upload-artifact@v4
with:
name: DHFS Javadocs
path: dhfs-parent/target/reports/apidocs/
- uses: actions/upload-artifact@v4
if: ${{ always() }}
@@ -212,7 +217,7 @@ jobs:
run: mkdir -p run-wrapper-out/dhfs/data && mkdir -p run-wrapper-out/dhfs/fuse && mkdir -p run-wrapper-out/dhfs/app
- name: Copy DHFS
run: cp -r ./dhfs-package-downloaded "run-wrapper-out/dhfs/app/DHFS Package"
run: cp -r ./dhfs-package-downloaded "run-wrapper-out/dhfs/app/Server"
- name: Copy Webui
run: cp -r ./webui-dist-downloaded "run-wrapper-out/dhfs/app/Webui"
@@ -231,3 +236,37 @@ jobs:
with:
name: Run wrapper
path: ~/run-wrapper.tar.gz
publish-javadoc:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
permissions:
contents: read
pages: write
id-token: write
needs: [build-webui, build-dhfs]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
name: DHFS Javadocs
path: dhfs-javadocs-downloaded
- name: Setup Pages
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
# Upload entire repository
path: 'dhfs-javadocs-downloaded'
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -41,3 +41,5 @@ nb-configuration.xml
# Plugin directory
/.quarkus/cli/plugins/
.jqwik-database

View File

@@ -1,17 +1,16 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Main 2" type="QsApplicationConfigurationType" factoryName="QuarkusApplication">
<option name="MAIN_CLASS_NAME" value="com.usatiuk.dhfsapp.Main"/>
<module name="dhfs-app"/>
<option name="VM_PARAMETERS"
value="-XX:+UnlockDiagnosticVMOptions -XX:+DebugNonSafepoints --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-exports java.base/jdk.internal.access=ALL-UNNAMED -ea -Xmx2G -Ddhfs.webui.root=$ProjectFileDir$/../webui/dist -Ddhfs.fuse.root=${HOME}/dhfs_test/2/fuse -Ddhfs.objects.persistence.files.root=${HOME}/dhfs_test/2/data -Ddhfs.objects.persistence.stuff.root=${HOME}/dhfs_test/2/data/stuff -Ddhfs.objects.peerdiscovery.broadcast=false -Dquarkus.http.port=9020 -Dquarkus.http.ssl-port=9021 -Ddhfs.peerdiscovery.preset-uuid=22000000-0000-0000-0000-000000000000 -Ddhfs.peerdiscovery.static-peers=11000000-0000-0000-0000-000000000000:127.0.0.1:9010:9011"/>
<extension name="coverage">
<pattern>
<option name="PATTERN" value="com.usatiuk.dhfs.remoteobj.*"/>
<option name="ENABLED" value="true"/>
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true"/>
</method>
</configuration>
<configuration default="false" name="Main 2" type="QsApplicationConfigurationType" factoryName="QuarkusApplication">
<option name="MAIN_CLASS_NAME" value="com.usatiuk.dhfsfuse.Main" />
<module name="dhfs-fuse" />
<option name="VM_PARAMETERS" value="-XX:+UnlockDiagnosticVMOptions -XX:+UseParallelGC -XX:+DebugNonSafepoints --enable-preview --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-exports java.base/jdk.internal.access=ALL-UNNAMED -ea -Xmx512M -Ddhfs.webui.root=$ProjectFileDir$/../webui/dist -Ddhfs.fuse.root=${HOME}/dhfs_test/2/fuse -Ddhfs.objects.persistence.files.root=${HOME}/dhfs_test/2/data -Ddhfs.objects.persistence.stuff.root=${HOME}/dhfs_test/2/data/stuff -Ddhfs.objects.peerdiscovery.broadcast=false -Dquarkus.http.port=9020 -Dquarkus.http.ssl-port=9021 -Ddhfs.peerdiscovery.preset-uuid=22000000-0000-0000-0000-000000000000 -Ddhfs.peerdiscovery.static-peers=11000000-0000-0000-0000-000000000000:127.0.0.1:9010:9011" />
<extension name="coverage">
<pattern>
<option name="PATTERN" value="com.usatiuk.dhfs.remoteobj.*" />
<option name="ENABLED" value="true" />
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@@ -1,18 +1,16 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="Main" type="QsApplicationConfigurationType" factoryName="QuarkusApplication"
nameIsGenerated="true">
<option name="MAIN_CLASS_NAME" value="com.usatiuk.dhfsapp.Main"/>
<module name="dhfs-app"/>
<option name="VM_PARAMETERS"
value="-XX:+UnlockDiagnosticVMOptions -XX:+UseParallelGC -XX:+DebugNonSafepoints --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-exports java.base/jdk.internal.access=ALL-UNNAMED -ea -Xmx2G -Ddhfs.webui.root=$ProjectFileDir$/../webui/dist -Ddhfs.fuse.root=${HOME}/dhfs_test/1/fuse -Ddhfs.objects.persistence.files.root=${HOME}/dhfs_test/1/data -Ddhfs.objects.persistence.stuff.root=${HOME}/dhfs_test/1/data/stuff -Ddhfs.objects.peerdiscovery.broadcast=true -Dquarkus.http.port=9010 -Dquarkus.http.ssl-port=9011 -Ddhfs.peerdiscovery.preset-uuid=11000000-0000-0000-0000-000000000000 -Ddhfs.peerdiscovery.static-peers=22000000-0000-0000-0000-000000000000:127.0.0.1:9020:9021 -Dquarkus.http.host=0.0.0.0"/>
<extension name="coverage">
<pattern>
<option name="PATTERN" value="com.usatiuk.dhfs.remoteobj.*"/>
<option name="ENABLED" value="true"/>
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true"/>
</method>
</configuration>
<configuration default="false" name="Main" type="QsApplicationConfigurationType" factoryName="QuarkusApplication" nameIsGenerated="true">
<option name="MAIN_CLASS_NAME" value="com.usatiuk.dhfsfuse.Main" />
<module name="dhfs-fuse" />
<option name="VM_PARAMETERS" value="-XX:+UnlockDiagnosticVMOptions -XX:+UseZGC -XX:+ZGenerational --enable-preview -XX:+DebugNonSafepoints --add-exports java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-exports java.base/jdk.internal.access=ALL-UNNAMED -ea -Xmx1G -Ddhfs.webui.root=$ProjectFileDir$/../webui/dist -Ddhfs.fuse.root=${HOME}/dhfs_test/1/fuse -Ddhfs.objects.persistence.files.root=${HOME}/dhfs_test/1/data -Ddhfs.objects.persistence.stuff.root=${HOME}/dhfs_test/1/data/stuff -Ddhfs.objects.peerdiscovery.broadcast=true -Dquarkus.http.port=9010 -Dquarkus.http.ssl-port=9011 -Ddhfs.peerdiscovery.preset-uuid=11000000-0000-0000-0000-000000000000 -Ddhfs.peerdiscovery.static-peers=22000000-0000-0000-0000-000000000000:127.0.0.1:9020:9021 -Dquarkus.http.host=0.0.0.0" />
<extension name="coverage">
<pattern>
<option name="PATTERN" value="com.usatiuk.dhfs.remoteobj.*" />
<option name="ENABLED" value="true" />
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@@ -1,5 +0,0 @@
*
!target/*-runner
!target/*-runner.jar
!target/lib/*
!target/quarkus-app/*

View File

@@ -1,43 +0,0 @@
#Maven
target/
pom.xml.tag
pom.xml.releaseBackup
pom.xml.versionsBackup
release.properties
.flattened-pom.xml
# Eclipse
.project
.classpath
.settings/
bin/
# IntelliJ
.idea
*.ipr
*.iml
*.iws
# NetBeans
nb-configuration.xml
# Visual Studio Code
.vscode
.factorypath
# OSX
.DS_Store
# Vim
*.swp
*.swo
# patch
*.orig
*.rej
# Local environment
.env
# Plugin directory
/.quarkus/cli/plugins/

View File

@@ -1,2 +0,0 @@
FROM azul/zulu-openjdk-debian:21-jre-latest
RUN apt update && apt install -y libfuse2 curl

View File

@@ -1,43 +0,0 @@
version: "3.2"
services:
dhfs1:
build: .
privileged: true
devices:
- /dev/fuse
volumes:
- $HOME/dhfs/dhfs1:/dhfs_root
- $HOME/dhfs/dhfs1_f:/dhfs_root/fuse:rshared
- ./target/quarkus-app:/app
command: "java --add-exports java.base/sun.nio.ch=ALL-UNNAMED
-Ddhfs.objects.persistence.files.root=/dhfs_root/p
-Ddhfs.objects.root=/dhfs_root/d
-Ddhfs.fuse.root=/dhfs_root/fuse -Dquarkus.http.host=0.0.0.0
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005
-jar /app/quarkus-run.jar"
ports:
- 8080:8080
- 8081:8443
- 5005:5005
dhfs2:
build: .
privileged: true
devices:
- /dev/fuse
volumes:
- $HOME/dhfs/dhfs2:/dhfs_root
- $HOME/dhfs/dhfs2_f:/dhfs_root/fuse:rshared
- ./target/quarkus-app:/app
command: "java --add-exports java.base/sun.nio.ch=ALL-UNNAMED
--add-exports java.base/jdk.internal.access=ALL-UNNAMED
--add-opens=java.base/java.nio=ALL-UNNAMED
-Ddhfs.objects.persistence.files.root=/dhfs_root/p
-Ddhfs.objects.root=/dhfs_root/d
-Ddhfs.fuse.root=/dhfs_root/fuse -Dquarkus.http.host=0.0.0.0
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5010
-jar /app/quarkus-run.jar"
ports:
- 8090:8080
- 8091:8443
- 5010:5010

View File

@@ -1,172 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.usatiuk.dhfs</groupId>
<artifactId>dhfs-app</artifactId>
<version>1.0-SNAPSHOT</version>
<parent>
<groupId>com.usatiuk.dhfs</groupId>
<artifactId>parent</artifactId>
<version>1.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>testcontainers</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.awaitility</groupId>
<artifactId>awaitility</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcprov-jdk18on</artifactId>
</dependency>
<dependency>
<groupId>org.bouncycastle</groupId>
<artifactId>bcpkix-jdk18on</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-security</artifactId>
</dependency>
<dependency>
<groupId>net.openhft</groupId>
<artifactId>zero-allocation-hashing</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-grpc</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-arc</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-rest</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-rest-client</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-rest-client-jsonb</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-rest-jsonb</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-scheduler</artifactId>
</dependency>
<dependency>
<groupId>io.quarkus</groupId>
<artifactId>quarkus-junit5</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>org.jboss.slf4j</groupId>
<artifactId>slf4j-jboss-logmanager</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
</dependency>
<dependency>
<groupId>org.pcollections</groupId>
<artifactId>pcollections</artifactId>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>com.usatiuk.dhfs</groupId>
<artifactId>dhfs-fuse</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.usatiuk.dhfs</groupId>
<artifactId>utils</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<forkCount>1C</forkCount>
<reuseForks>false</reuseForks>
<parallel>classes</parallel>
<systemPropertyVariables>
<junit.jupiter.execution.parallel.enabled>
false
</junit.jupiter.execution.parallel.enabled>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<configuration>
<forkCount>1C</forkCount>
<reuseForks>false</reuseForks>
<parallel>classes</parallel>
<systemPropertyVariables>
<junit.jupiter.execution.parallel.enabled>
false
</junit.jupiter.execution.parallel.enabled>
<junit.platform.output.capture.stdout>true</junit.platform.output.capture.stdout>
<junit.platform.output.capture.stderr>true</junit.platform.output.capture.stderr>
</systemPropertyVariables>
</configuration>
</plugin>
<plugin>
<groupId>${quarkus.platform.group-id}</groupId>
<artifactId>quarkus-maven-plugin</artifactId>
<version>${quarkus.platform.version}</version>
<extensions>true</extensions>
<executions>
<execution>
<id>quarkus-plugin</id>
<goals>
<goal>build</goal>
<goal>generate-code</goal>
<goal>generate-code-tests</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

View File

@@ -1,97 +0,0 @@
####
# This Dockerfile is used in order to build a container that runs the Quarkus application in JVM mode
#
# Before building the container image run:
#
# ./mvnw package
#
# Then, build the image with:
#
# docker build -f src/main/docker/Dockerfile.jvm -t quarkus/server-jvm .
#
# Then run the container using:
#
# docker run -i --rm -p 8080:8080 quarkus/server-jvm
#
# If you want to include the debug port into your docker image
# you will have to expose the debug port (default 5005 being the default) like this : EXPOSE 8080 5005.
# Additionally you will have to set -e JAVA_DEBUG=true and -e JAVA_DEBUG_PORT=*:5005
# when running the container
#
# Then run the container using :
#
# docker run -i --rm -p 8080:8080 quarkus/server-jvm
#
# This image uses the `run-java.sh` script to run the application.
# This scripts computes the command line to execute your Java application, and
# includes memory/GC tuning.
# You can configure the behavior using the following environment properties:
# - JAVA_OPTS: JVM options passed to the `java` command (example: "-verbose:class")
# - JAVA_OPTS_APPEND: User specified Java options to be appended to generated options
# in JAVA_OPTS (example: "-Dsome.property=foo")
# - JAVA_MAX_MEM_RATIO: Is used when no `-Xmx` option is given in JAVA_OPTS. This is
# used to calculate a default maximal heap memory based on a containers restriction.
# If used in a container without any memory constraints for the container then this
# option has no effect. If there is a memory constraint then `-Xmx` is set to a ratio
# of the container available memory as set here. The default is `50` which means 50%
# of the available memory is used as an upper boundary. You can skip this mechanism by
# setting this value to `0` in which case no `-Xmx` option is added.
# - JAVA_INITIAL_MEM_RATIO: Is used when no `-Xms` option is given in JAVA_OPTS. This
# is used to calculate a default initial heap memory based on the maximum heap memory.
# If used in a container without any memory constraints for the container then this
# option has no effect. If there is a memory constraint then `-Xms` is set to a ratio
# of the `-Xmx` memory as set here. The default is `25` which means 25% of the `-Xmx`
# is used as the initial heap size. You can skip this mechanism by setting this value
# to `0` in which case no `-Xms` option is added (example: "25")
# - JAVA_MAX_INITIAL_MEM: Is used when no `-Xms` option is given in JAVA_OPTS.
# This is used to calculate the maximum value of the initial heap memory. If used in
# a container without any memory constraints for the container then this option has
# no effect. If there is a memory constraint then `-Xms` is limited to the value set
# here. The default is 4096MB which means the calculated value of `-Xms` never will
# be greater than 4096MB. The value of this variable is expressed in MB (example: "4096")
# - JAVA_DIAGNOSTICS: Set this to get some diagnostics information to standard output
# when things are happening. This option, if set to true, will set
# `-XX:+UnlockDiagnosticVMOptions`. Disabled by default (example: "true").
# - JAVA_DEBUG: If set remote debugging will be switched on. Disabled by default (example:
# true").
# - JAVA_DEBUG_PORT: Port used for remote debugging. Defaults to 5005 (example: "8787").
# - CONTAINER_CORE_LIMIT: A calculated core limit as described in
# https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt. (example: "2")
# - CONTAINER_MAX_MEMORY: Memory limit given to the container (example: "1024").
# - GC_MIN_HEAP_FREE_RATIO: Minimum percentage of heap free after GC to avoid expansion.
# (example: "20")
# - GC_MAX_HEAP_FREE_RATIO: Maximum percentage of heap free after GC to avoid shrinking.
# (example: "40")
# - GC_TIME_RATIO: Specifies the ratio of the time spent outside the garbage collection.
# (example: "4")
# - GC_ADAPTIVE_SIZE_POLICY_WEIGHT: The weighting given to the current GC time versus
# previous GC times. (example: "90")
# - GC_METASPACE_SIZE: The initial metaspace size. (example: "20")
# - GC_MAX_METASPACE_SIZE: The maximum metaspace size. (example: "100")
# - GC_CONTAINER_OPTIONS: Specify Java GC to use. The value of this variable should
# contain the necessary JRE command-line options to specify the required GC, which
# will override the default of `-XX:+UseParallelGC` (example: -XX:+UseG1GC).
# - HTTPS_PROXY: The location of the https proxy. (example: "myuser@127.0.0.1:8080")
# - HTTP_PROXY: The location of the http proxy. (example: "myuser@127.0.0.1:8080")
# - NO_PROXY: A comma separated lists of hosts, IP addresses or domains that can be
# accessed directly. (example: "foo.example.com,bar.example.com")
#
###
FROM registry.access.redhat.com/ubi8/openjdk-21:1.18
ENV LANGUAGE='en_US:en'
# We make four distinct layers so if there are application changes the library layers can be re-used
COPY --chown=185 target/quarkus-app/lib/ /deployments/lib/
COPY --chown=185 target/quarkus-app/*.jar /deployments/
COPY --chown=185 target/quarkus-app/app/ /deployments/app/
COPY --chown=185 target/quarkus-app/quarkus/ /deployments/quarkus/
EXPOSE 8080
USER 185
ENV JAVA_OPTS_APPEND="-Dquarkus.http.host=0.0.0.0 -Djava.util.logging.manager=org.jboss.logmanager.LogManager"
ENV JAVA_APP_JAR="/deployments/quarkus-run.jar"
ENTRYPOINT [ "/opt/jboss/container/java/run/run-java.sh" ]

View File

@@ -1,93 +0,0 @@
####
# This Dockerfile is used in order to build a container that runs the Quarkus application in JVM mode
#
# Before building the container image run:
#
# ./mvnw package -Dquarkus.package.jar.type=legacy-jar
#
# Then, build the image with:
#
# docker build -f src/main/docker/Dockerfile.legacy-jar -t quarkus/server-legacy-jar .
#
# Then run the container using:
#
# docker run -i --rm -p 8080:8080 quarkus/server-legacy-jar
#
# If you want to include the debug port into your docker image
# you will have to expose the debug port (default 5005 being the default) like this : EXPOSE 8080 5005.
# Additionally you will have to set -e JAVA_DEBUG=true and -e JAVA_DEBUG_PORT=*:5005
# when running the container
#
# Then run the container using :
#
# docker run -i --rm -p 8080:8080 quarkus/server-legacy-jar
#
# This image uses the `run-java.sh` script to run the application.
# This scripts computes the command line to execute your Java application, and
# includes memory/GC tuning.
# You can configure the behavior using the following environment properties:
# - JAVA_OPTS: JVM options passed to the `java` command (example: "-verbose:class")
# - JAVA_OPTS_APPEND: User specified Java options to be appended to generated options
# in JAVA_OPTS (example: "-Dsome.property=foo")
# - JAVA_MAX_MEM_RATIO: Is used when no `-Xmx` option is given in JAVA_OPTS. This is
# used to calculate a default maximal heap memory based on a containers restriction.
# If used in a container without any memory constraints for the container then this
# option has no effect. If there is a memory constraint then `-Xmx` is set to a ratio
# of the container available memory as set here. The default is `50` which means 50%
# of the available memory is used as an upper boundary. You can skip this mechanism by
# setting this value to `0` in which case no `-Xmx` option is added.
# - JAVA_INITIAL_MEM_RATIO: Is used when no `-Xms` option is given in JAVA_OPTS. This
# is used to calculate a default initial heap memory based on the maximum heap memory.
# If used in a container without any memory constraints for the container then this
# option has no effect. If there is a memory constraint then `-Xms` is set to a ratio
# of the `-Xmx` memory as set here. The default is `25` which means 25% of the `-Xmx`
# is used as the initial heap size. You can skip this mechanism by setting this value
# to `0` in which case no `-Xms` option is added (example: "25")
# - JAVA_MAX_INITIAL_MEM: Is used when no `-Xms` option is given in JAVA_OPTS.
# This is used to calculate the maximum value of the initial heap memory. If used in
# a container without any memory constraints for the container then this option has
# no effect. If there is a memory constraint then `-Xms` is limited to the value set
# here. The default is 4096MB which means the calculated value of `-Xms` never will
# be greater than 4096MB. The value of this variable is expressed in MB (example: "4096")
# - JAVA_DIAGNOSTICS: Set this to get some diagnostics information to standard output
# when things are happening. This option, if set to true, will set
# `-XX:+UnlockDiagnosticVMOptions`. Disabled by default (example: "true").
# - JAVA_DEBUG: If set remote debugging will be switched on. Disabled by default (example:
# true").
# - JAVA_DEBUG_PORT: Port used for remote debugging. Defaults to 5005 (example: "8787").
# - CONTAINER_CORE_LIMIT: A calculated core limit as described in
# https://www.kernel.org/doc/Documentation/scheduler/sched-bwc.txt. (example: "2")
# - CONTAINER_MAX_MEMORY: Memory limit given to the container (example: "1024").
# - GC_MIN_HEAP_FREE_RATIO: Minimum percentage of heap free after GC to avoid expansion.
# (example: "20")
# - GC_MAX_HEAP_FREE_RATIO: Maximum percentage of heap free after GC to avoid shrinking.
# (example: "40")
# - GC_TIME_RATIO: Specifies the ratio of the time spent outside the garbage collection.
# (example: "4")
# - GC_ADAPTIVE_SIZE_POLICY_WEIGHT: The weighting given to the current GC time versus
# previous GC times. (example: "90")
# - GC_METASPACE_SIZE: The initial metaspace size. (example: "20")
# - GC_MAX_METASPACE_SIZE: The maximum metaspace size. (example: "100")
# - GC_CONTAINER_OPTIONS: Specify Java GC to use. The value of this variable should
# contain the necessary JRE command-line options to specify the required GC, which
# will override the default of `-XX:+UseParallelGC` (example: -XX:+UseG1GC).
# - HTTPS_PROXY: The location of the https proxy. (example: "myuser@127.0.0.1:8080")
# - HTTP_PROXY: The location of the http proxy. (example: "myuser@127.0.0.1:8080")
# - NO_PROXY: A comma separated lists of hosts, IP addresses or domains that can be
# accessed directly. (example: "foo.example.com,bar.example.com")
#
###
FROM registry.access.redhat.com/ubi8/openjdk-21:1.18
ENV LANGUAGE='en_US:en'
COPY target/lib/* /deployments/lib/
COPY target/*-runner.jar /deployments/quarkus-run.jar
EXPOSE 8080
USER 185
ENV JAVA_OPTS_APPEND="-Dquarkus.http.host=0.0.0.0 -Djava.util.logging.manager=org.jboss.logmanager.LogManager"
ENV JAVA_APP_JAR="/deployments/quarkus-run.jar"
ENTRYPOINT [ "/opt/jboss/container/java/run/run-java.sh" ]

View File

@@ -1,27 +0,0 @@
####
# This Dockerfile is used in order to build a container that runs the Quarkus application in native (no JVM) mode.
#
# Before building the container image run:
#
# ./mvnw package -Dnative
#
# Then, build the image with:
#
# docker build -f src/main/docker/Dockerfile.native -t quarkus/server .
#
# Then run the container using:
#
# docker run -i --rm -p 8080:8080 quarkus/server
#
###
FROM registry.access.redhat.com/ubi8/ubi-minimal:8.9
WORKDIR /work/
RUN chown 1001 /work \
&& chmod "g+rwX" /work \
&& chown 1001:root /work
COPY --chown=1001:root target/*-runner /work/application
EXPOSE 8080
USER 1001
ENTRYPOINT ["./application", "-Dquarkus.http.host=0.0.0.0"]

View File

@@ -1,30 +0,0 @@
####
# This Dockerfile is used in order to build a container that runs the Quarkus application in native (no JVM) mode.
# It uses a micro base image, tuned for Quarkus native executables.
# It reduces the size of the resulting container image.
# Check https://quarkus.io/guides/quarkus-runtime-base-image for further information about this image.
#
# Before building the container image run:
#
# ./mvnw package -Dnative
#
# Then, build the image with:
#
# docker build -f src/main/docker/Dockerfile.native-micro -t quarkus/server .
#
# Then run the container using:
#
# docker run -i --rm -p 8080:8080 quarkus/server
#
###
FROM quay.io/quarkus/quarkus-micro-image:2.0
WORKDIR /work/
RUN chown 1001 /work \
&& chmod "g+rwX" /work \
&& chown 1001:root /work
COPY --chown=1001:root target/*-runner /work/application
EXPOSE 8080
USER 1001
ENTRYPOINT ["./application", "-Dquarkus.http.host=0.0.0.0"]

View File

@@ -1,34 +0,0 @@
quarkus.grpc.server.use-separate-server=false
dhfs.objects.peerdiscovery.port=42069
dhfs.objects.peerdiscovery.interval=4s
dhfs.objects.peerdiscovery.broadcast=true
dhfs.objects.sync.timeout=30
dhfs.objects.sync.ping.timeout=5
dhfs.objects.invalidation.threads=16
dhfs.objects.invalidation.delay=1000
dhfs.objects.reconnect_interval=5s
dhfs.objects.write_log=false
dhfs.objects.periodic-push-op-interval=5m
dhfs.fuse.root=${HOME}/dhfs_default/fuse
dhfs.objects.persistence.stuff.root=${HOME}/dhfs_default/data/stuff
dhfs.fuse.debug=false
dhfs.fuse.enabled=true
dhfs.files.allow_recursive_delete=false
dhfs.files.target_chunk_size=2097152
dhfs.files.target_chunk_alignment=19
dhfs.objects.deletion.delay=1000
dhfs.objects.deletion.can-delete-retry-delay=10000
dhfs.objects.ref_verification=true
dhfs.files.use_hash_for_chunks=false
dhfs.objects.autosync.threads=16
dhfs.objects.autosync.download-all=false
dhfs.objects.move-processor.threads=16
dhfs.objects.ref-processor.threads=16
dhfs.objects.opsender.batch-size=100
dhfs.objects.lock_timeout_secs=2
dhfs.local-discovery=true
dhfs.peerdiscovery.timeout=10000
quarkus.log.category."com.usatiuk".min-level=TRACE
quarkus.log.category."com.usatiuk".level=TRACE
quarkus.http.insecure-requests=enabled
quarkus.http.ssl.client-auth=required

View File

@@ -1,29 +0,0 @@
package com.usatiuk.dhfsapp;
import io.quarkus.test.junit.QuarkusTestProfile;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
abstract public class TempDataProfile implements QuarkusTestProfile {
protected void getConfigOverrides(Map<String, String> toPut) {
}
@Override
final public Map<String, String> getConfigOverrides() {
Path tempDirWithPrefix;
try {
tempDirWithPrefix = Files.createTempDirectory("dhfs-test");
} catch (IOException e) {
throw new RuntimeException(e);
}
var ret = new HashMap<String, String>();
ret.put("dhfs.objects.persistence.files.root", tempDirWithPrefix.resolve("dhfs_root_test").toString());
ret.put("dhfs.fuse.root", tempDirWithPrefix.resolve("dhfs_fuse_root_test").toString());
getConfigOverrides(ret);
return ret;
}
}

View File

@@ -1,44 +0,0 @@
package com.usatiuk.dhfsapp;
import io.quarkus.logging.Log;
import io.quarkus.runtime.ShutdownEvent;
import io.quarkus.runtime.StartupEvent;
import jakarta.annotation.Priority;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.event.Observes;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.util.Objects;
@ApplicationScoped
public class TestDataCleaner {
@ConfigProperty(name = "dhfs.objects.persistence.files.root")
String tempDirectory;
public static void purgeDirectory(File dir) {
try {
for (File file : Objects.requireNonNull(dir.listFiles())) {
if (file.isDirectory())
purgeDirectory(file);
file.delete();
}
} catch (Exception e) {
Log.error("Couldn't purge directory " + dir, e);
}
}
void init(@Observes @Priority(1) StartupEvent event) throws IOException {
try {
purgeDirectory(Path.of(tempDirectory).toFile());
} catch (Exception ignored) {
Log.warn("Couldn't cleanup test data on init");
}
}
void shutdown(@Observes @Priority(1000000000) ShutdownEvent event) throws IOException {
purgeDirectory(Path.of(tempDirectory).toFile());
}
}

View File

@@ -1,11 +0,0 @@
dhfs.objects.persistence.files.root=${HOME}/dhfs_data/dhfs_root_test
dhfs.objects.root=${HOME}/dhfs_data/dhfs_root_d_test
dhfs.fuse.root=${HOME}/dhfs_data/dhfs_fuse_root_test
dhfs.objects.ref_verification=true
dhfs.objects.deletion.delay=0
quarkus.log.category."com.usatiuk.dhfs".level=TRACE
quarkus.log.category."com.usatiuk.dhfs".min-level=TRACE
quarkus.http.test-port=0
quarkus.http.test-ssl-port=0
dhfs.local-discovery=false
dhfs.objects.persistence.snapshot-extra-checks=true

View File

@@ -5,6 +5,11 @@ import com.usatiuk.dhfs.remoteobj.JDataRemote;
import com.usatiuk.dhfs.remoteobj.JDataRemoteDto;
import com.usatiuk.objects.JObjectKey;
/**
* ChunkData is a data structure that represents an immutable binary blob
* @param key unique key
* @param data binary data
*/
public record ChunkData(JObjectKey key, ByteString data) implements JDataRemote, JDataRemoteDto {
@Override
public int estimateSize() {

View File

@@ -1,26 +0,0 @@
package com.usatiuk.dhfsfs.objects;
import com.usatiuk.dhfs.ProtoSerializer;
import com.usatiuk.dhfs.persistence.ChunkDataP;
import com.usatiuk.dhfs.persistence.JObjectKeyP;
import com.usatiuk.objects.JObjectKey;
import jakarta.inject.Singleton;
@Singleton
public class ChunkDataProtoSerializer implements ProtoSerializer<ChunkDataP, ChunkData> {
@Override
public ChunkData deserialize(ChunkDataP message) {
return new ChunkData(
JObjectKey.of(message.getKey().getName()),
message.getData()
);
}
@Override
public ChunkDataP serialize(ChunkData object) {
return ChunkDataP.newBuilder()
.setKey(JObjectKeyP.newBuilder().setName(object.key().value()).build())
.setData(object.data())
.build();
}
}

View File

@@ -9,6 +9,14 @@ import com.usatiuk.objects.JObjectKey;
import java.util.Collection;
import java.util.Set;
/**
* File is a data structure that represents a file in the file system
* @param key unique key
* @param mode file mode
* @param cTime creation time
* @param mTime modification time
* @param symlink true if the file is a symlink, false otherwise
*/
public record File(JObjectKey key, long mode, long cTime, long mTime,
boolean symlink
) implements JDataRemote, JMapHolder<JMapLongKey> {

View File

@@ -7,6 +7,11 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
/**
* FileDto is a data transfer object that contains a file and its chunks.
* @param file the file
* @param chunks the list of chunks, each represented as a pair of a long and a JObjectKey
*/
public record FileDto(File file, List<Pair<Long, JObjectKey>> chunks) implements JDataRemoteDto {
@Override
public Class<? extends JDataRemote> objClass() {

View File

@@ -5,6 +5,9 @@ import com.usatiuk.dhfs.syncmap.DtoMapper;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
/**
* Maps a {@link File} object to a {@link FileDto} object and vice versa.
*/
@ApplicationScoped
public class FileDtoMapper implements DtoMapper<File, FileDto> {
@Inject

View File

@@ -10,11 +10,20 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.ArrayList;
import java.util.List;
/**
* Helper class for working with files.
*/
@ApplicationScoped
public class FileHelper {
@Inject
JMapHelper jMapHelper;
/**
* Get the chunks of a file.
* Transaction is expected to be already started.
* @param file the file to get chunks from
* @return a list of pairs of chunk offset and chunk key
*/
public List<Pair<Long, JObjectKey>> getChunks(File file) {
ArrayList<Pair<Long, JObjectKey>> chunks = new ArrayList<>();
try (var it = jMapHelper.getIterator(file)) {
@@ -26,6 +35,13 @@ public class FileHelper {
return List.copyOf(chunks);
}
/**
* Replace the chunks of a file.
* All previous chunks will be deleted.
* Transaction is expected to be already started.
* @param file the file to replace chunks in
* @param chunks the list of pairs of chunk offset and chunk key
*/
public void replaceChunks(File file, List<Pair<Long, JObjectKey>> chunks) {
jMapHelper.deleteAll(file);

View File

@@ -1,25 +0,0 @@
package com.usatiuk.dhfsfs.objects;
import com.usatiuk.dhfs.ProtoSerializer;
import com.usatiuk.dhfs.persistence.FileDtoP;
import com.usatiuk.utils.SerializationHelper;
import jakarta.inject.Singleton;
import java.io.IOException;
@Singleton
public class FileProtoSerializer implements ProtoSerializer<FileDtoP, FileDto> {
@Override
public FileDto deserialize(FileDtoP message) {
try (var is = message.getSerializedData().newInput()) {
return SerializationHelper.deserialize(is);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public FileDtoP serialize(FileDto object) {
return FileDtoP.newBuilder().setSerializedData(SerializationHelper.serialize(object)).build();
}
}

View File

@@ -8,7 +8,6 @@ import com.usatiuk.dhfs.remoteobj.*;
import com.usatiuk.dhfsfs.service.DhfsFileService;
import com.usatiuk.kleppmanntree.AlreadyExistsException;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.transaction.LockingStrategy;
import com.usatiuk.objects.transaction.Transaction;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
@@ -24,6 +23,9 @@ import javax.annotation.Nullable;
import java.util.List;
import java.util.Objects;
/**
* Handles synchronization of file objects.
*/
@ApplicationScoped
public class FileSyncHandler implements ObjSyncHandler<File, FileDto> {
@Inject
@@ -42,14 +44,18 @@ public class FileSyncHandler implements ObjSyncHandler<File, FileDto> {
@Inject
DhfsFileService fileService;
private JKleppmannTreeManager.JKleppmannTree getTreeW() {
private JKleppmannTreeManager.JKleppmannTree getTree() {
return jKleppmannTreeManager.getTree(JObjectKey.of("fs")).orElseThrow();
}
private JKleppmannTreeManager.JKleppmannTree getTreeR() {
return jKleppmannTreeManager.getTree(JObjectKey.of("fs"), LockingStrategy.OPTIMISTIC).orElseThrow();
}
/**
* Resolve conflict between two file versions, update the file in storage and create a conflict file.
*
* @param from the peer that sent the update
* @param key the key of the file
* @param receivedChangelog the changelog of the received file
* @param receivedData the received file data
*/
private void resolveConflict(PeerId from, JObjectKey key, PMap<PeerId, Long> receivedChangelog,
@Nullable FileDto receivedData) {
var oursCurMeta = curTx.get(RemoteObjectMeta.class, key).orElse(null);
@@ -131,12 +137,12 @@ public class FileSyncHandler implements ObjSyncHandler<File, FileDto> {
do {
try {
getTreeW().move(parent.getRight(),
getTree().move(parent.getRight(),
new JKleppmannTreeNodeMetaFile(
parent.getLeft() + ".fconflict." + persistentPeerDataService.getSelfUuid() + "." + otherHostname.toString() + "." + i,
newFile.key()
),
getTreeW().getNewNodeId()
getTree().getNewNodeId()
);
} catch (AlreadyExistsException aex) {
i++;

View File

@@ -6,6 +6,10 @@ import com.usatiuk.objects.JObjectKey;
import java.util.Collection;
import java.util.List;
/**
* JKleppmannTreeNodeMetaDirectory is a record that represents a directory in the JKleppmann tree.
* @param name the name of the directory
*/
public record JKleppmannTreeNodeMetaDirectory(String name) implements JKleppmannTreeNodeMeta {
public JKleppmannTreeNodeMeta withName(String name) {
return new JKleppmannTreeNodeMetaDirectory(name);

View File

@@ -6,6 +6,11 @@ import com.usatiuk.objects.JObjectKey;
import java.util.Collection;
import java.util.List;
/**
* JKleppmannTreeNodeMetaFile is a record that represents a file in the JKleppmann tree.
* @param name the name of the file
* @param fileIno a reference to the `File` object
*/
public record JKleppmannTreeNodeMetaFile(String name, JObjectKey fileIno) implements JKleppmannTreeNodeMeta {
@Override
public JKleppmannTreeNodeMeta withName(String name) {

View File

@@ -2,47 +2,783 @@ package com.usatiuk.dhfsfs.service;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import com.usatiuk.dhfs.jkleppmanntree.JKleppmannTreeManager;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNode;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNodeHolder;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNodeMeta;
import com.usatiuk.dhfs.jmap.JMapEntry;
import com.usatiuk.dhfs.jmap.JMapHelper;
import com.usatiuk.dhfs.jmap.JMapLongKey;
import com.usatiuk.dhfs.remoteobj.JDataRemote;
import com.usatiuk.dhfs.remoteobj.RemoteObjectMeta;
import com.usatiuk.dhfs.remoteobj.RemoteTransaction;
import com.usatiuk.dhfsfs.objects.ChunkData;
import com.usatiuk.dhfsfs.objects.File;
import com.usatiuk.dhfsfs.objects.JKleppmannTreeNodeMetaDirectory;
import com.usatiuk.dhfsfs.objects.JKleppmannTreeNodeMetaFile;
import com.usatiuk.objects.JData;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.transaction.Transaction;
import com.usatiuk.objects.transaction.TransactionManager;
import com.usatiuk.utils.StatusRuntimeExceptionNoStacktrace;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.quarkus.logging.Log;
import io.quarkus.runtime.StartupEvent;
import jakarta.annotation.Priority;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.event.Observes;
import jakarta.inject.Inject;
import org.apache.commons.lang3.tuple.Pair;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import java.util.Optional;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.StreamSupport;
public interface DhfsFileService {
Optional<JObjectKey> open(String name);
/**
* Actual filesystem implementation.
*/
@ApplicationScoped
public class DhfsFileService {
@ConfigProperty(name = "dhfs.files.target_chunk_alignment")
int targetChunkAlignment;
@ConfigProperty(name = "dhfs.files.target_chunk_size")
int targetChunkSize;
@ConfigProperty(name = "dhfs.files.max_chunk_size", defaultValue = "524288")
int maxChunkSize;
@ConfigProperty(name = "dhfs.files.use_hash_for_chunks")
boolean useHashForChunks;
@ConfigProperty(name = "dhfs.files.allow_recursive_delete")
boolean allowRecursiveDelete;
@ConfigProperty(name = "dhfs.objects.ref_verification")
boolean refVerification;
@ConfigProperty(name = "dhfs.objects.write_log")
boolean writeLogging;
Optional<JObjectKey> create(String name, long mode);
@Inject
Transaction curTx;
@Inject
RemoteTransaction remoteTx;
@Inject
TransactionManager jObjectTxManager;
@Inject
JKleppmannTreeManager jKleppmannTreeManager;
@Inject
JMapHelper jMapHelper;
Pair<String, JObjectKey> inoToParent(JObjectKey ino);
void mkdir(String name, long mode);
Optional<GetattrRes> getattr(JObjectKey name);
Boolean chmod(JObjectKey name, long mode);
void unlink(String name);
Boolean rename(String from, String to);
Boolean setTimes(JObjectKey fileUuid, long atimeMs, long mtimeMs);
Iterable<String> readDir(String name);
long size(JObjectKey fileUuid);
ByteString read(JObjectKey fileUuid, long offset, int length);
Long write(JObjectKey fileUuid, long offset, ByteString data);
default Long write(JObjectKey fileUuid, long offset, byte[] data) {
return write(fileUuid, offset, UnsafeByteOperations.unsafeWrap(data));
private JKleppmannTreeManager.JKleppmannTree getTree() {
return jKleppmannTreeManager.getTree(JObjectKey.of("fs"), () -> new JKleppmannTreeNodeMetaDirectory(""));
}
Boolean truncate(JObjectKey fileUuid, long length);
/**
* Create a new chunk with the given data and a new unique ID.
*
* @param bytes the data to store in the chunk
* @return the created chunk
*/
private ChunkData createChunk(ByteString bytes) {
var newChunk = new ChunkData(JObjectKey.of(UUID.randomUUID().toString()), bytes);
remoteTx.putDataNew(newChunk);
return newChunk;
}
String readlink(JObjectKey uuid);
void init(@Observes @Priority(500) StartupEvent event) {
Log.info("Initializing file service");
getTree();
}
ByteString readlinkBS(JObjectKey uuid);
private JKleppmannTreeNode getDirEntry(String name) {
var res = getTree().traverse(StreamSupport.stream(Path.of(name).spliterator(), false).map(p -> p.toString()).toList());
if (res == null) throw new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND);
var ret = curTx.get(JKleppmannTreeNodeHolder.class, res).map(JKleppmannTreeNodeHolder::node).orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND.withDescription("Tree node exists but not found as jObject: " + name)));
return ret;
}
JObjectKey symlink(String oldpath, String newpath);
private Optional<JKleppmannTreeNode> getDirEntryOpt(String name) {
var res = getTree().traverse(StreamSupport.stream(Path.of(name).spliterator(), false).map(p -> p.toString()).toList());
if (res == null) return Optional.empty();
var ret = curTx.get(JKleppmannTreeNodeHolder.class, res).map(JKleppmannTreeNodeHolder::node);
return ret;
}
/**
* Get the attributes of a file or directory.
*
* @param uuid the UUID of the file or directory
* @return the attributes of the file or directory
*/
public Optional<GetattrRes> getattr(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
var ref = curTx.get(JData.class, uuid).orElse(null);
if (ref == null) return Optional.empty();
GetattrRes ret;
if (ref instanceof RemoteObjectMeta r) {
var remote = remoteTx.getData(JDataRemote.class, uuid).orElse(null);
if (remote instanceof File f) {
ret = new GetattrRes(f.mTime(), f.cTime(), f.mode(), f.symlink() ? GetattrType.SYMLINK : GetattrType.FILE);
} else {
throw new StatusRuntimeException(Status.DATA_LOSS.withDescription("FsNode is not an FsNode: " + ref.key()));
}
} else if (ref instanceof JKleppmannTreeNodeHolder) {
ret = new GetattrRes(100, 100, 0700, GetattrType.DIRECTORY);
} else {
throw new StatusRuntimeException(Status.DATA_LOSS.withDescription("FsNode is not an FsNode: " + ref.key()));
}
return Optional.of(ret);
});
}
/**
* Try to resolve a path to a file or directory.
*
* @param name the path to resolve
* @return the key of the file or directory, or an empty optional if it does not exist
*/
public Optional<JObjectKey> open(String name) {
return jObjectTxManager.executeTx(() -> {
try {
var ret = getDirEntry(name);
return switch (ret.meta()) {
case JKleppmannTreeNodeMetaFile f -> Optional.of(f.fileIno());
case JKleppmannTreeNodeMetaDirectory f -> Optional.of(ret.key());
default -> Optional.empty();
};
} catch (StatusRuntimeException e) {
if (e.getStatus().getCode() == Status.Code.NOT_FOUND) {
return Optional.empty();
}
throw e;
}
});
}
private void ensureDir(JKleppmannTreeNode entry) {
if (!(entry.meta() instanceof JKleppmannTreeNodeMetaDirectory))
throw new StatusRuntimeExceptionNoStacktrace(Status.INVALID_ARGUMENT.withDescription("Not a directory: " + entry.key()));
}
/**
* Create a new file with the given name and mode.
*
* @param name the name of the file
* @param mode the mode of the file
* @return the key of the created file
*/
public Optional<JObjectKey> create(String name, long mode) {
return jObjectTxManager.executeTx(() -> {
Path path = Path.of(name);
var parent = getDirEntry(path.getParent().toString());
ensureDir(parent);
String fname = path.getFileName().toString();
var fuuid = UUID.randomUUID();
Log.debug("Creating file " + fuuid);
File f = new File(JObjectKey.of(fuuid.toString()), mode, System.currentTimeMillis(), System.currentTimeMillis(), false);
remoteTx.putData(f);
try {
getTree().move(parent.key(), new JKleppmannTreeNodeMetaFile(fname, f.key()), getTree().getNewNodeId());
} catch (Exception e) {
// fobj.getMeta().removeRef(newNodeId);
throw e;
}
return Optional.of(f.key());
});
}
/**
* Get the parent directory of a file or directory.
*
* @param ino the key of the file or directory
* @return the parent directory
*/
public Pair<String, JObjectKey> inoToParent(JObjectKey ino) {
return jObjectTxManager.executeTx(() -> {
// FIXME: Slow
return getTree().findParent(w -> {
if (w.meta() instanceof JKleppmannTreeNodeMetaFile f)
return f.fileIno().equals(ino);
return false;
});
});
}
/**
* Create a new directory with the given name and mode.
*
* @param name the name of the directory
* @param mode the mode of the directory
*/
public void mkdir(String name, long mode) {
jObjectTxManager.executeTx(() -> {
Path path = Path.of(name);
var parent = getDirEntry(path.getParent().toString());
ensureDir(parent);
String dname = path.getFileName().toString();
Log.debug("Creating directory " + name);
// TODO: No modes for directories yet
getTree().move(parent.key(), new JKleppmannTreeNodeMetaDirectory(dname), getTree().getNewNodeId());
});
}
/**
* Unlink a file or directory.
*
* @param name the name of the file or directory
* @throws DirectoryNotEmptyException if the directory is not empty and recursive delete is not allowed
*/
public void unlink(String name) {
jObjectTxManager.executeTx(() -> {
var node = getDirEntryOpt(name).orElse(null);
if (node == null)
throw new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to unlink: " + name));
if (node.meta() instanceof JKleppmannTreeNodeMetaDirectory f) {
if (!allowRecursiveDelete && !node.children().isEmpty())
throw new DirectoryNotEmptyException();
}
getTree().trash(node.meta(), node.key());
});
}
/**
* Rename a file or directory.
*
* @param from the old name
* @param to the new name
* @return true if the rename was successful, false otherwise
*/
public Boolean rename(String from, String to) {
return jObjectTxManager.executeTx(() -> {
var node = getDirEntry(from);
JKleppmannTreeNodeMeta meta = node.meta();
var toPath = Path.of(to);
var toDentry = getDirEntry(toPath.getParent().toString());
ensureDir(toDentry);
getTree().move(toDentry.key(), meta.withName(toPath.getFileName().toString()), node.key());
return true;
});
}
/**
* Change the mode of a file or directory.
*
* @param uuid the ID of the file or directory
* @param mode the new mode
* @return true if the mode was changed successfully, false otherwise
*/
public Boolean chmod(JObjectKey uuid, long mode) {
return jObjectTxManager.executeTx(() -> {
var dent = curTx.get(JData.class, uuid).orElseThrow(() -> new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND));
if (dent instanceof JKleppmannTreeNodeHolder) {
return true;
} else if (dent instanceof RemoteObjectMeta) {
var remote = remoteTx.getData(JDataRemote.class, uuid).orElse(null);
if (remote instanceof File f) {
remoteTx.putData(f.withMode(mode).withCurrentMTime());
return true;
} else {
throw new IllegalArgumentException(uuid + " is not a file");
}
} else {
throw new IllegalArgumentException(uuid + " is not a file");
}
});
}
/**
* Read the contents of a directory.
*
* @param name the path of the directory
* @return an iterable of the names of the files in the directory
*/
public Iterable<String> readDir(String name) {
return jObjectTxManager.executeTx(() -> {
var found = getDirEntry(name);
if (!(found.meta() instanceof JKleppmannTreeNodeMetaDirectory md))
throw new StatusRuntimeException(Status.INVALID_ARGUMENT);
return found.children().keySet();
});
}
/**
* Read the contents of a file.
*
* @param fileUuid the ID of the file
* @param offset the offset to start reading from
* @param length the number of bytes to read
* @return the contents of the file as a ByteString
*/
public ByteString read(JObjectKey fileUuid, long offset, int length) {
return jObjectTxManager.executeTx(() -> {
if (length < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Length should be more than zero: " + length));
if (offset < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Offset should be more than zero: " + offset));
var file = remoteTx.getData(File.class, fileUuid).orElse(null);
if (file == null) {
Log.error("File not found when trying to read: " + fileUuid);
throw new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to read: " + fileUuid));
}
try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(offset))) {
if (!it.hasNext())
return ByteString.empty();
// if (it.peekNextKey().key() != offset) {
// Log.warnv("Read over the end of file: {0} {1} {2}, next chunk: {3}", fileUuid, offset, length, it.peekNextKey());
// return Optional.of(ByteString.empty());
// }
long curPos = offset;
ByteString buf = ByteString.empty();
var chunk = it.next();
while (curPos < offset + length) {
var chunkPos = chunk.getKey().key();
long offInChunk = curPos - chunkPos;
long toReadInChunk = (offset + length) - curPos;
var chunkBytes = readChunk(chunk.getValue().ref());
long readableLen = chunkBytes.size() - offInChunk;
var toReadReally = Math.min(readableLen, toReadInChunk);
if (toReadReally < 0) break;
buf = buf.concat(chunkBytes.substring((int) offInChunk, (int) (offInChunk + toReadReally)));
curPos += toReadReally;
if (readableLen > toReadInChunk)
break;
if (!it.hasNext()) break;
chunk = it.next();
}
return buf;
} catch (Exception e) {
Log.error("Error reading file: " + fileUuid, e);
throw new StatusRuntimeException(Status.INTERNAL.withDescription("Error reading file: " + fileUuid));
}
});
}
/**
* Get the size of a file.
*
* @param uuid the ID of the file
* @return the size of the file
*/
private ByteString readChunk(JObjectKey uuid) {
var chunkRead = remoteTx.getData(ChunkData.class, uuid).orElse(null);
if (chunkRead == null) {
Log.error("Chunk requested not found: " + uuid);
throw new StatusRuntimeException(Status.NOT_FOUND);
}
return chunkRead.data();
}
/**
* Get the size of a chunk.
*
* @param uuid the ID of the chunk
* @return the size of the chunk
*/
private int getChunkSize(JObjectKey uuid) {
return readChunk(uuid).size();
}
private long alignDown(long num, long n) {
return num & -(1L << n);
}
/**
* Write data to a file.
*
* @param fileUuid the ID of the file
* @param offset the offset to write to
* @param data the data to write
* @return the number of bytes written
*/
public Long write(JObjectKey fileUuid, long offset, ByteString data) {
return jObjectTxManager.executeTx(() -> {
if (offset < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Offset should be more than zero: " + offset));
var file = remoteTx.getData(File.class, fileUuid).orElse(null);
if (file == null) {
throw new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to write: " + fileUuid));
}
Map<Long, JObjectKey> removedChunks = new HashMap<>();
long realOffset = targetChunkAlignment >= 0 ? alignDown(offset, targetChunkAlignment) : offset;
long writeEnd = offset + data.size();
long start = realOffset;
long existingEnd = 0;
ByteString pendingPrefix = ByteString.empty();
ByteString pendingSuffix = ByteString.empty();
try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(realOffset))) {
while (it.hasNext()) {
var curEntry = it.next();
long curChunkStart = curEntry.getKey().key();
var curChunkId = curEntry.getValue().ref();
long curChunkEnd = it.hasNext() ? it.peekNextKey().key() : curChunkStart + getChunkSize(curChunkId);
existingEnd = curChunkEnd;
if (curChunkEnd <= realOffset) break;
removedChunks.put(curEntry.getKey().key(), curChunkId);
if (curChunkStart < offset) {
if (curChunkStart < start)
start = curChunkStart;
var readChunk = readChunk(curChunkId);
pendingPrefix = pendingPrefix.concat(readChunk.substring(0, Math.min(readChunk.size(), (int) (offset - curChunkStart))));
}
if (curChunkEnd > writeEnd) {
var readChunk = readChunk(curChunkId);
pendingSuffix = pendingSuffix.concat(readChunk.substring((int) (writeEnd - curChunkStart), readChunk.size()));
}
if (curChunkEnd >= writeEnd) break;
}
}
Map<Long, JObjectKey> newChunks = new HashMap<>();
if (existingEnd < offset) {
if (!pendingPrefix.isEmpty()) {
int diff = Math.toIntExact(offset - existingEnd);
pendingPrefix = pendingPrefix.concat(UnsafeByteOperations.unsafeWrap(ByteBuffer.allocateDirect(diff)));
} else {
fillZeros(existingEnd, offset, newChunks);
start = offset;
}
}
ByteString pendingWrites = pendingPrefix.concat(data).concat(pendingSuffix);
int combinedSize = pendingWrites.size();
{
int cur = 0;
while (cur < combinedSize) {
int end;
if (combinedSize - cur < maxChunkSize)
end = combinedSize;
else if (targetChunkAlignment < 0)
end = combinedSize;
else
end = Math.min(cur + targetChunkSize, combinedSize);
var thisChunk = pendingWrites.substring(cur, end);
ChunkData newChunkData = createChunk(thisChunk);
newChunks.put(start, newChunkData.key());
start += thisChunk.size();
cur = end;
}
}
for (var e : removedChunks.entrySet()) {
// Log.tracev("Removing chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.delete(file, JMapLongKey.of(e.getKey()));
}
for (var e : newChunks.entrySet()) {
// Log.tracev("Adding chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.put(file, JMapLongKey.of(e.getKey()), e.getValue());
}
remoteTx.putData(file.withCurrentMTime());
return (long) data.size();
});
}
/**
* Truncate a file to the given length.
*
* @param fileUuid the ID of the file
* @param length the new length of the file
* @return true if the truncate was successful, false otherwise
*/
public Boolean truncate(JObjectKey fileUuid, long length) {
return jObjectTxManager.executeTx(() -> {
if (length < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Length should be more than zero: " + length));
var file = remoteTx.getData(File.class, fileUuid).orElse(null);
if (file == null) {
Log.error("File not found when trying to write: " + fileUuid);
return false;
}
if (length == 0) {
jMapHelper.deleteAll(file);
remoteTx.putData(file);
return true;
}
var curSize = size(fileUuid);
if (curSize == length) return true;
NavigableMap<Long, JObjectKey> removedChunks = new TreeMap<>();
NavigableMap<Long, JObjectKey> newChunks = new TreeMap<>();
if (curSize < length) {
fillZeros(curSize, length, newChunks);
} else {
// Pair<JMapLongKey, JMapEntry<JMapLongKey>> first;
Pair<JMapLongKey, JMapEntry<JMapLongKey>> last;
try (var it = jMapHelper.getIterator(file, IteratorStart.LT, JMapLongKey.of(length))) {
last = it.hasNext() ? it.next() : null;
while (it.hasNext()) {
var next = it.next();
removedChunks.put(next.getKey().key(), next.getValue().ref());
}
}
removedChunks.put(last.getKey().key(), last.getValue().ref());
//
// NavigableMap<Long, JObjectKey> removedChunks = new TreeMap<>();
//
// long start = 0;
//
// try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(offset))) {
// first = it.hasNext() ? it.next() : null;
// boolean empty = last == null;
// if (first != null && getChunkSize(first.getValue().ref()) + first.getKey().key() <= offset) {
// first = null;
// last = null;
// start = offset;
// } else if (!empty) {
// assert first != null;
// removedChunks.put(first.getKey().key(), first.getValue().ref());
// while (it.hasNext() && it.peekNextKey() != last.getKey()) {
// var next = it.next();
// removedChunks.put(next.getKey().key(), next.getValue().ref());
// }
// removedChunks.put(last.getKey().key(), last.getValue().ref());
// }
// }
//
// var tail = chunksAll.lowerEntry(length);
// var afterTail = chunksAll.tailMap(tail.getKey(), false);
//
// removedChunks.put(tail.getKey(), tail.getValue());
// removedChunks.putAll(afterTail);
var tailBytes = readChunk(last.getValue().ref());
var newChunk = tailBytes.substring(0, (int) (length - last.getKey().key()));
ChunkData newChunkData = createChunk(newChunk);
newChunks.put(last.getKey().key(), newChunkData.key());
}
// file = file.withChunks(file.chunks().minusAll(removedChunks.keySet()).plusAll(newChunks)).withMTime(System.currentTimeMillis());
for (var e : removedChunks.entrySet()) {
// Log.tracev("Removing chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.delete(file, JMapLongKey.of(e.getKey()));
}
for (var e : newChunks.entrySet()) {
// Log.tracev("Adding chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.put(file, JMapLongKey.of(e.getKey()), e.getValue());
}
remoteTx.putData(file.withCurrentMTime());
return true;
});
}
/**
* Fill the given range with zeroes.
*
* @param fillStart the start of the range
* @param length the end of the range
* @param newChunks the map to store the new chunks in
*/
private void fillZeros(long fillStart, long length, Map<Long, JObjectKey> newChunks) {
long combinedSize = (length - fillStart);
long start = fillStart;
// Hack
HashMap<Long, ChunkData> zeroCache = new HashMap<>();
{
long cur = 0;
while (cur < combinedSize) {
long end;
if (targetChunkSize <= 0)
end = combinedSize;
else {
if ((combinedSize - cur) > (targetChunkSize * 1.5)) {
end = cur + targetChunkSize;
} else {
end = combinedSize;
}
}
if (!zeroCache.containsKey(end - cur))
zeroCache.put(end - cur, createChunk(UnsafeByteOperations.unsafeWrap(ByteBuffer.allocateDirect(Math.toIntExact(end - cur)))));
ChunkData newChunkData = zeroCache.get(end - cur);
newChunks.put(start, newChunkData.key());
start += newChunkData.data().size();
cur = end;
}
}
}
/**
* Read the contents of a symlink.
*
* @param uuid the ID of the symlink
* @return the contents of the symlink as a string
*/
public String readlink(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
return readlinkBS(uuid).toStringUtf8();
});
}
/**
* Read the contents of a symlink as a ByteString.
*
* @param uuid the ID of the symlink
* @return the contents of the symlink as a ByteString
*/
public ByteString readlinkBS(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
var fileOpt = remoteTx.getData(File.class, uuid).orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to readlink: " + uuid)));
return read(uuid, 0, Math.toIntExact(size(uuid)));
});
}
/**
* Create a symlink.
*
* @param oldpath the target of the symlink
* @param newpath the path of the symlink
* @return the key of the created symlink
*/
public JObjectKey symlink(String oldpath, String newpath) {
return jObjectTxManager.executeTx(() -> {
Path path = Path.of(newpath);
var parent = getDirEntry(path.getParent().toString());
ensureDir(parent);
String fname = path.getFileName().toString();
var fuuid = UUID.randomUUID();
Log.debug("Creating file " + fuuid);
ChunkData newChunkData = createChunk(UnsafeByteOperations.unsafeWrap(oldpath.getBytes(StandardCharsets.UTF_8)));
File f = new File(JObjectKey.of(fuuid.toString()), 0, System.currentTimeMillis(), System.currentTimeMillis(), true);
jMapHelper.put(f, JMapLongKey.of(0), newChunkData.key());
remoteTx.putData(f);
getTree().move(parent.key(), new JKleppmannTreeNodeMetaFile(fname, f.key()), getTree().getNewNodeId());
return f.key();
});
}
/**
* Set the access and modification times of a file.
*
* @param fileUuid the ID of the file
* @param atimeMs the access time in milliseconds
* @param mtimeMs the modification time in milliseconds
* @return true if the times were set successfully, false otherwise
*/
public Boolean setTimes(JObjectKey fileUuid, long atimeMs, long mtimeMs) {
return jObjectTxManager.executeTx(() -> {
var dent = curTx.get(JData.class, fileUuid).orElseThrow(() -> new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND));
// FIXME:
if (dent instanceof JKleppmannTreeNodeHolder) {
return true;
} else if (dent instanceof RemoteObjectMeta) {
var remote = remoteTx.getData(JDataRemote.class, fileUuid).orElse(null);
if (remote instanceof File f) {
remoteTx.putData(f.withCTime(atimeMs).withMTime(mtimeMs));
return true;
} else {
throw new IllegalArgumentException(fileUuid + " is not a file");
}
} else {
throw new IllegalArgumentException(fileUuid + " is not a file");
}
});
}
/**
* Get the size of a file.
*
* @param fileUuid the ID of the file
* @return the size of the file
*/
public long size(JObjectKey fileUuid) {
return jObjectTxManager.executeTx(() -> {
long realSize = 0;
var file = remoteTx.getData(File.class, fileUuid)
.orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND));
Pair<JMapLongKey, JMapEntry<JMapLongKey>> last;
try (var it = jMapHelper.getIterator(file, IteratorStart.LT, JMapLongKey.max())) {
last = it.hasNext() ? it.next() : null;
}
if (last != null) {
realSize = last.getKey().key() + getChunkSize(last.getValue().ref());
}
return realSize;
});
}
/**
* Write data to a file.
*
* @param fileUuid the ID of the file
* @param offset the offset to write to
* @param data the data to write
* @return the number of bytes written
*/
public Long write(JObjectKey fileUuid, long offset, byte[] data) {
return write(fileUuid, offset, UnsafeByteOperations.unsafeWrap(data));
}
}

View File

@@ -1,667 +0,0 @@
package com.usatiuk.dhfsfs.service;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import com.usatiuk.dhfs.jkleppmanntree.JKleppmannTreeManager;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNode;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNodeHolder;
import com.usatiuk.dhfs.jkleppmanntree.structs.JKleppmannTreeNodeMeta;
import com.usatiuk.dhfs.jmap.JMapEntry;
import com.usatiuk.dhfs.jmap.JMapHelper;
import com.usatiuk.dhfs.jmap.JMapLongKey;
import com.usatiuk.dhfs.remoteobj.JDataRemote;
import com.usatiuk.dhfs.remoteobj.RemoteObjectMeta;
import com.usatiuk.dhfs.remoteobj.RemoteTransaction;
import com.usatiuk.dhfsfs.objects.ChunkData;
import com.usatiuk.dhfsfs.objects.File;
import com.usatiuk.dhfsfs.objects.JKleppmannTreeNodeMetaDirectory;
import com.usatiuk.dhfsfs.objects.JKleppmannTreeNodeMetaFile;
import com.usatiuk.objects.JData;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.transaction.LockingStrategy;
import com.usatiuk.objects.transaction.Transaction;
import com.usatiuk.objects.transaction.TransactionManager;
import com.usatiuk.utils.StatusRuntimeExceptionNoStacktrace;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.quarkus.logging.Log;
import io.quarkus.runtime.StartupEvent;
import jakarta.annotation.Priority;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.event.Observes;
import jakarta.inject.Inject;
import org.apache.commons.lang3.tuple.Pair;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.StreamSupport;
@ApplicationScoped
public class DhfsFileServiceImpl implements DhfsFileService {
@Inject
Transaction curTx;
@Inject
RemoteTransaction remoteTx;
@Inject
TransactionManager jObjectTxManager;
@ConfigProperty(name = "dhfs.files.target_chunk_alignment")
int targetChunkAlignment;
@ConfigProperty(name = "dhfs.files.target_chunk_size")
int targetChunkSize;
@ConfigProperty(name = "dhfs.files.use_hash_for_chunks")
boolean useHashForChunks;
@ConfigProperty(name = "dhfs.files.allow_recursive_delete")
boolean allowRecursiveDelete;
@ConfigProperty(name = "dhfs.objects.ref_verification")
boolean refVerification;
@ConfigProperty(name = "dhfs.objects.write_log")
boolean writeLogging;
@Inject
JKleppmannTreeManager jKleppmannTreeManager;
@Inject
JMapHelper jMapHelper;
private JKleppmannTreeManager.JKleppmannTree getTreeW() {
return jKleppmannTreeManager.getTree(JObjectKey.of("fs"), () -> new JKleppmannTreeNodeMetaDirectory(""));
}
private JKleppmannTreeManager.JKleppmannTree getTreeR() {
return jKleppmannTreeManager.getTree(JObjectKey.of("fs"), LockingStrategy.OPTIMISTIC, () -> new JKleppmannTreeNodeMetaDirectory(""));
}
private ChunkData createChunk(ByteString bytes) {
var newChunk = new ChunkData(JObjectKey.of(UUID.randomUUID().toString()), bytes);
remoteTx.putData(newChunk);
return newChunk;
}
void init(@Observes @Priority(500) StartupEvent event) {
Log.info("Initializing file service");
getTreeW();
}
private JKleppmannTreeNode getDirEntryW(String name) {
var res = getTreeW().traverse(StreamSupport.stream(Path.of(name).spliterator(), false).map(p -> p.toString()).toList());
if (res == null) throw new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND);
var ret = curTx.get(JKleppmannTreeNodeHolder.class, res).map(JKleppmannTreeNodeHolder::node).orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND.withDescription("Tree node exists but not found as jObject: " + name)));
return ret;
}
private JKleppmannTreeNode getDirEntryR(String name) {
var res = getTreeR().traverse(StreamSupport.stream(Path.of(name).spliterator(), false).map(p -> p.toString()).toList());
if (res == null) throw new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND);
var ret = curTx.get(JKleppmannTreeNodeHolder.class, res).map(JKleppmannTreeNodeHolder::node).orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND.withDescription("Tree node exists but not found as jObject: " + name)));
return ret;
}
private Optional<JKleppmannTreeNode> getDirEntryOpt(String name) {
var res = getTreeW().traverse(StreamSupport.stream(Path.of(name).spliterator(), false).map(p -> p.toString()).toList());
if (res == null) return Optional.empty();
var ret = curTx.get(JKleppmannTreeNodeHolder.class, res).map(JKleppmannTreeNodeHolder::node);
return ret;
}
@Override
public Optional<GetattrRes> getattr(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
var ref = curTx.get(JData.class, uuid).orElse(null);
if (ref == null) return Optional.empty();
GetattrRes ret;
if (ref instanceof RemoteObjectMeta r) {
var remote = remoteTx.getData(JDataRemote.class, uuid).orElse(null);
if (remote instanceof File f) {
ret = new GetattrRes(f.mTime(), f.cTime(), f.mode(), f.symlink() ? GetattrType.SYMLINK : GetattrType.FILE);
} else {
throw new StatusRuntimeException(Status.DATA_LOSS.withDescription("FsNode is not an FsNode: " + ref.key()));
}
} else if (ref instanceof JKleppmannTreeNodeHolder) {
ret = new GetattrRes(100, 100, 0700, GetattrType.DIRECTORY);
} else {
throw new StatusRuntimeException(Status.DATA_LOSS.withDescription("FsNode is not an FsNode: " + ref.key()));
}
return Optional.of(ret);
});
}
@Override
public Optional<JObjectKey> open(String name) {
return jObjectTxManager.executeTx(() -> {
try {
var ret = getDirEntryR(name);
return switch (ret.meta()) {
case JKleppmannTreeNodeMetaFile f -> Optional.of(f.fileIno());
case JKleppmannTreeNodeMetaDirectory f -> Optional.of(ret.key());
default -> Optional.empty();
};
} catch (StatusRuntimeException e) {
if (e.getStatus().getCode() == Status.Code.NOT_FOUND) {
return Optional.empty();
}
throw e;
}
});
}
private void ensureDir(JKleppmannTreeNode entry) {
if (!(entry.meta() instanceof JKleppmannTreeNodeMetaDirectory))
throw new StatusRuntimeExceptionNoStacktrace(Status.INVALID_ARGUMENT.withDescription("Not a directory: " + entry.key()));
}
@Override
public Optional<JObjectKey> create(String name, long mode) {
return jObjectTxManager.executeTx(() -> {
Path path = Path.of(name);
var parent = getDirEntryW(path.getParent().toString());
ensureDir(parent);
String fname = path.getFileName().toString();
var fuuid = UUID.randomUUID();
Log.debug("Creating file " + fuuid);
File f = new File(JObjectKey.of(fuuid.toString()), mode, System.currentTimeMillis(), System.currentTimeMillis(), false);
remoteTx.putData(f);
try {
getTreeW().move(parent.key(), new JKleppmannTreeNodeMetaFile(fname, f.key()), getTreeW().getNewNodeId());
} catch (Exception e) {
// fobj.getMeta().removeRef(newNodeId);
throw e;
}
return Optional.of(f.key());
});
}
//FIXME: Slow..
@Override
public Pair<String, JObjectKey> inoToParent(JObjectKey ino) {
return jObjectTxManager.executeTx(() -> {
return getTreeW().findParent(w -> {
if (w.meta() instanceof JKleppmannTreeNodeMetaFile f)
return f.fileIno().equals(ino);
return false;
});
});
}
@Override
public void mkdir(String name, long mode) {
jObjectTxManager.executeTx(() -> {
Path path = Path.of(name);
var parent = getDirEntryW(path.getParent().toString());
ensureDir(parent);
String dname = path.getFileName().toString();
Log.debug("Creating directory " + name);
getTreeW().move(parent.key(), new JKleppmannTreeNodeMetaDirectory(dname), getTreeW().getNewNodeId());
});
}
@Override
public void unlink(String name) {
jObjectTxManager.executeTx(() -> {
var node = getDirEntryOpt(name).orElse(null);
if (node.meta() instanceof JKleppmannTreeNodeMetaDirectory f) {
if (!allowRecursiveDelete && !node.children().isEmpty())
throw new DirectoryNotEmptyException();
}
getTreeW().trash(node.meta(), node.key());
});
}
@Override
public Boolean rename(String from, String to) {
return jObjectTxManager.executeTx(() -> {
var node = getDirEntryW(from);
JKleppmannTreeNodeMeta meta = node.meta();
var toPath = Path.of(to);
var toDentry = getDirEntryW(toPath.getParent().toString());
ensureDir(toDentry);
getTreeW().move(toDentry.key(), meta.withName(toPath.getFileName().toString()), node.key());
return true;
});
}
@Override
public Boolean chmod(JObjectKey uuid, long mode) {
return jObjectTxManager.executeTx(() -> {
var dent = curTx.get(JData.class, uuid).orElseThrow(() -> new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND));
if (dent instanceof JKleppmannTreeNodeHolder) {
return true;
} else if (dent instanceof RemoteObjectMeta) {
var remote = remoteTx.getData(JDataRemote.class, uuid).orElse(null);
if (remote instanceof File f) {
remoteTx.putData(f.withMode(mode).withCurrentMTime());
return true;
} else {
throw new IllegalArgumentException(uuid + " is not a file");
}
} else {
throw new IllegalArgumentException(uuid + " is not a file");
}
});
}
@Override
public Iterable<String> readDir(String name) {
return jObjectTxManager.executeTx(() -> {
var found = getDirEntryW(name);
if (!(found.meta() instanceof JKleppmannTreeNodeMetaDirectory md))
throw new StatusRuntimeException(Status.INVALID_ARGUMENT);
return found.children().keySet();
});
}
@Override
public ByteString read(JObjectKey fileUuid, long offset, int length) {
return jObjectTxManager.executeTx(() -> {
if (length < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Length should be more than zero: " + length));
if (offset < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Offset should be more than zero: " + offset));
var file = remoteTx.getData(File.class, fileUuid).orElse(null);
if (file == null) {
Log.error("File not found when trying to read: " + fileUuid);
throw new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to read: " + fileUuid));
}
try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(offset))) {
if (!it.hasNext())
return ByteString.empty();
// if (it.peekNextKey().key() != offset) {
// Log.warnv("Read over the end of file: {0} {1} {2}, next chunk: {3}", fileUuid, offset, length, it.peekNextKey());
// return Optional.of(ByteString.empty());
// }
long curPos = offset;
ByteString buf = ByteString.empty();
var chunk = it.next();
while (curPos < offset + length) {
var chunkPos = chunk.getKey().key();
long offInChunk = curPos - chunkPos;
long toReadInChunk = (offset + length) - curPos;
var chunkBytes = readChunk(chunk.getValue().ref());
long readableLen = chunkBytes.size() - offInChunk;
var toReadReally = Math.min(readableLen, toReadInChunk);
if (toReadReally < 0) break;
buf = buf.concat(chunkBytes.substring((int) offInChunk, (int) (offInChunk + toReadReally)));
curPos += toReadReally;
if (readableLen > toReadInChunk)
break;
if (!it.hasNext()) break;
chunk = it.next();
}
return buf;
} catch (Exception e) {
Log.error("Error reading file: " + fileUuid, e);
throw new StatusRuntimeException(Status.INTERNAL.withDescription("Error reading file: " + fileUuid));
}
});
}
private ByteString readChunk(JObjectKey uuid) {
var chunkRead = remoteTx.getData(ChunkData.class, uuid).orElse(null);
if (chunkRead == null) {
Log.error("Chunk requested not found: " + uuid);
throw new StatusRuntimeException(Status.NOT_FOUND);
}
return chunkRead.data();
}
private int getChunkSize(JObjectKey uuid) {
return readChunk(uuid).size();
}
private long alignDown(long num, long n) {
return num & -(1L << n);
}
@Override
public Long write(JObjectKey fileUuid, long offset, ByteString data) {
return jObjectTxManager.executeTx(() -> {
if (offset < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Offset should be more than zero: " + offset));
var file = remoteTx.getData(File.class, fileUuid, LockingStrategy.WRITE).orElse(null);
if (file == null) {
Log.error("File not found when trying to write: " + fileUuid);
return -1L;
}
if (writeLogging) {
Log.info("Writing to file: " + file.key() + " size=" + size(fileUuid) + " "
+ offset + " " + data.size());
}
NavigableMap<Long, JObjectKey> removedChunks = new TreeMap<>();
long realOffset = targetChunkAlignment >= 0 ? alignDown(offset, targetChunkAlignment) : offset;
long writeEnd = offset + data.size();
long start = realOffset;
long existingEnd = 0;
ByteString pendingPrefix = ByteString.empty();
ByteString pendingSuffix = ByteString.empty();
try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(realOffset))) {
while (it.hasNext()) {
var curEntry = it.next();
long curChunkStart = curEntry.getKey().key();
var curChunkId = curEntry.getValue().ref();
long curChunkEnd = it.hasNext() ? it.peekNextKey().key() : curChunkStart + getChunkSize(curChunkId);
existingEnd = curChunkEnd;
if (curChunkEnd <= realOffset) break;
removedChunks.put(curEntry.getKey().key(), curChunkId);
if (curChunkStart < offset) {
if (curChunkStart < start)
start = curChunkStart;
var readChunk = readChunk(curChunkId);
pendingPrefix = pendingPrefix.concat(readChunk.substring(0, Math.min(readChunk.size(), (int) (offset - curChunkStart))));
}
if (curChunkEnd > writeEnd) {
var readChunk = readChunk(curChunkId);
pendingSuffix = pendingSuffix.concat(readChunk.substring((int) (writeEnd - curChunkStart), readChunk.size()));
}
if (curChunkEnd >= writeEnd) break;
}
}
NavigableMap<Long, JObjectKey> newChunks = new TreeMap<>();
if (existingEnd < offset) {
if (!pendingPrefix.isEmpty()) {
int diff = Math.toIntExact(offset - existingEnd);
pendingPrefix = pendingPrefix.concat(UnsafeByteOperations.unsafeWrap(ByteBuffer.allocateDirect(diff)));
} else {
fillZeros(existingEnd, offset, newChunks);
start = offset;
}
}
ByteString pendingWrites = pendingPrefix.concat(data).concat(pendingSuffix);
int combinedSize = pendingWrites.size();
{
int targetChunkSize = 1 << targetChunkAlignment;
int cur = 0;
while (cur < combinedSize) {
int end;
if (targetChunkAlignment < 0)
end = combinedSize;
else
end = Math.min(cur + targetChunkSize, combinedSize);
var thisChunk = pendingWrites.substring(cur, end);
ChunkData newChunkData = createChunk(thisChunk);
newChunks.put(start, newChunkData.key());
start += thisChunk.size();
cur = end;
}
}
for (var e : removedChunks.entrySet()) {
// Log.tracev("Removing chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.delete(file, JMapLongKey.of(e.getKey()));
}
for (var e : newChunks.entrySet()) {
// Log.tracev("Adding chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.put(file, JMapLongKey.of(e.getKey()), e.getValue());
}
remoteTx.putData(file.withCurrentMTime());
return (long) data.size();
});
}
@Override
public Boolean truncate(JObjectKey fileUuid, long length) {
return jObjectTxManager.executeTx(() -> {
if (length < 0)
throw new StatusRuntimeException(Status.INVALID_ARGUMENT.withDescription("Length should be more than zero: " + length));
var file = remoteTx.getData(File.class, fileUuid).orElse(null);
if (file == null) {
Log.error("File not found when trying to write: " + fileUuid);
return false;
}
if (length == 0) {
jMapHelper.deleteAll(file);
remoteTx.putData(file);
return true;
}
var curSize = size(fileUuid);
if (curSize == length) return true;
NavigableMap<Long, JObjectKey> removedChunks = new TreeMap<>();
NavigableMap<Long, JObjectKey> newChunks = new TreeMap<>();
if (curSize < length) {
fillZeros(curSize, length, newChunks);
} else {
// Pair<JMapLongKey, JMapEntry<JMapLongKey>> first;
Pair<JMapLongKey, JMapEntry<JMapLongKey>> last;
try (var it = jMapHelper.getIterator(file, IteratorStart.LT, JMapLongKey.of(length))) {
last = it.hasNext() ? it.next() : null;
while (it.hasNext()) {
var next = it.next();
removedChunks.put(next.getKey().key(), next.getValue().ref());
}
}
removedChunks.put(last.getKey().key(), last.getValue().ref());
//
// NavigableMap<Long, JObjectKey> removedChunks = new TreeMap<>();
//
// long start = 0;
//
// try (var it = jMapHelper.getIterator(file, IteratorStart.LE, JMapLongKey.of(offset))) {
// first = it.hasNext() ? it.next() : null;
// boolean empty = last == null;
// if (first != null && getChunkSize(first.getValue().ref()) + first.getKey().key() <= offset) {
// first = null;
// last = null;
// start = offset;
// } else if (!empty) {
// assert first != null;
// removedChunks.put(first.getKey().key(), first.getValue().ref());
// while (it.hasNext() && it.peekNextKey() != last.getKey()) {
// var next = it.next();
// removedChunks.put(next.getKey().key(), next.getValue().ref());
// }
// removedChunks.put(last.getKey().key(), last.getValue().ref());
// }
// }
//
// var tail = chunksAll.lowerEntry(length);
// var afterTail = chunksAll.tailMap(tail.getKey(), false);
//
// removedChunks.put(tail.getKey(), tail.getValue());
// removedChunks.putAll(afterTail);
var tailBytes = readChunk(last.getValue().ref());
var newChunk = tailBytes.substring(0, (int) (length - last.getKey().key()));
ChunkData newChunkData = createChunk(newChunk);
newChunks.put(last.getKey().key(), newChunkData.key());
}
// file = file.withChunks(file.chunks().minusAll(removedChunks.keySet()).plusAll(newChunks)).withMTime(System.currentTimeMillis());
for (var e : removedChunks.entrySet()) {
// Log.tracev("Removing chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.delete(file, JMapLongKey.of(e.getKey()));
}
for (var e : newChunks.entrySet()) {
// Log.tracev("Adding chunk {0}-{1}", e.getKey(), e.getValue());
jMapHelper.put(file, JMapLongKey.of(e.getKey()), e.getValue());
}
remoteTx.putData(file.withCurrentMTime());
return true;
});
}
private void fillZeros(long fillStart, long length, NavigableMap<Long, JObjectKey> newChunks) {
long combinedSize = (length - fillStart);
long start = fillStart;
// Hack
HashMap<Long, ChunkData> zeroCache = new HashMap<>();
{
long cur = 0;
while (cur < combinedSize) {
long end;
if (targetChunkSize <= 0)
end = combinedSize;
else {
if ((combinedSize - cur) > (targetChunkSize * 1.5)) {
end = cur + targetChunkSize;
} else {
end = combinedSize;
}
}
if (!zeroCache.containsKey(end - cur))
zeroCache.put(end - cur, createChunk(UnsafeByteOperations.unsafeWrap(ByteBuffer.allocateDirect(Math.toIntExact(end - cur)))));
ChunkData newChunkData = zeroCache.get(end - cur);
newChunks.put(start, newChunkData.key());
start += newChunkData.data().size();
cur = end;
}
}
}
@Override
public String readlink(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
return readlinkBS(uuid).toStringUtf8();
});
}
@Override
public ByteString readlinkBS(JObjectKey uuid) {
return jObjectTxManager.executeTx(() -> {
var fileOpt = remoteTx.getData(File.class, uuid).orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND.withDescription("File not found when trying to readlink: " + uuid)));
return read(uuid, 0, Math.toIntExact(size(uuid)));
});
}
@Override
public JObjectKey symlink(String oldpath, String newpath) {
return jObjectTxManager.executeTx(() -> {
Path path = Path.of(newpath);
var parent = getDirEntryW(path.getParent().toString());
ensureDir(parent);
String fname = path.getFileName().toString();
var fuuid = UUID.randomUUID();
Log.debug("Creating file " + fuuid);
ChunkData newChunkData = createChunk(UnsafeByteOperations.unsafeWrap(oldpath.getBytes(StandardCharsets.UTF_8)));
File f = new File(JObjectKey.of(fuuid.toString()), 0, System.currentTimeMillis(), System.currentTimeMillis(), true);
jMapHelper.put(f, JMapLongKey.of(0), newChunkData.key());
remoteTx.putData(f);
getTreeW().move(parent.key(), new JKleppmannTreeNodeMetaFile(fname, f.key()), getTreeW().getNewNodeId());
return f.key();
});
}
@Override
public Boolean setTimes(JObjectKey fileUuid, long atimeMs, long mtimeMs) {
return jObjectTxManager.executeTx(() -> {
var dent = curTx.get(JData.class, fileUuid).orElseThrow(() -> new StatusRuntimeExceptionNoStacktrace(Status.NOT_FOUND));
// FIXME:
if (dent instanceof JKleppmannTreeNodeHolder) {
return true;
} else if (dent instanceof RemoteObjectMeta) {
var remote = remoteTx.getData(JDataRemote.class, fileUuid).orElse(null);
if (remote instanceof File f) {
remoteTx.putData(f.withCTime(atimeMs).withMTime(mtimeMs));
return true;
} else {
throw new IllegalArgumentException(fileUuid + " is not a file");
}
} else {
throw new IllegalArgumentException(fileUuid + " is not a file");
}
});
}
@Override
public long size(JObjectKey fileUuid) {
return jObjectTxManager.executeTx(() -> {
long realSize = 0;
var file = remoteTx.getData(File.class, fileUuid)
.orElseThrow(() -> new StatusRuntimeException(Status.NOT_FOUND));
Pair<JMapLongKey, JMapEntry<JMapLongKey>> last;
try (var it = jMapHelper.getIterator(file, IteratorStart.LT, JMapLongKey.max())) {
last = it.hasNext() ? it.next() : null;
}
if (last != null) {
realSize = last.getKey().key() + getChunkSize(last.getValue().ref());
}
return realSize;
});
}
}

View File

@@ -1,5 +1,10 @@
package com.usatiuk.dhfsfs.service;
/**
* DirectoryNotEmptyException is thrown when a directory is not empty.
* This exception is used to indicate that a directory cannot be deleted
* because it contains files or subdirectories.
*/
public class DirectoryNotEmptyException extends RuntimeException {
@Override
public synchronized Throwable fillInStackTrace() {

View File

@@ -1,4 +1,11 @@
package com.usatiuk.dhfsfs.service;
public record GetattrRes(long mtime, long ctime, long mode, GetattrType type) {
/**
* GetattrRes is a record that represents the result of a getattr operation.
* @param mtime File modification time
* @param ctime File creation time
* @param mode File mode
* @param type File type
*/
public record GetattrRes(long mtime, long ctime, long mode, GetattrType type) {
}

View File

@@ -30,7 +30,7 @@ public class TestDataCleaner {
purgeDirectory(Path.of(tempDirectory).toFile());
}
void purgeDirectory(File dir) {
public void purgeDirectory(File dir) {
for (File file : Objects.requireNonNull(dir.listFiles())) {
if (file.isDirectory())
purgeDirectory(file);

View File

@@ -139,16 +139,13 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-failsafe-plugin</artifactId>
<configuration>
<forkCount>1C</forkCount>
<reuseForks>false</reuseForks>
<parallel>classes</parallel>
<systemPropertyVariables>
<junit.jupiter.execution.parallel.enabled>
true
false
</junit.jupiter.execution.parallel.enabled>
<junit.jupiter.execution.parallel.mode.default>
concurrent
</junit.jupiter.execution.parallel.mode.default>
<junit.jupiter.execution.parallel.config.dynamic.factor>
0.5
</junit.jupiter.execution.parallel.config.dynamic.factor>
<junit.platform.output.capture.stdout>true</junit.platform.output.capture.stdout>
<junit.platform.output.capture.stderr>true</junit.platform.output.capture.stderr>
</systemPropertyVariables>

View File

@@ -1,12 +1,15 @@
package com.usatiuk.dhfsfuse;
import com.google.protobuf.UnsafeByteOperations;
import com.kenai.jffi.MemoryIO;
import com.sun.security.auth.module.UnixSystem;
import com.usatiuk.dhfsfs.service.DhfsFileService;
import com.usatiuk.dhfsfs.service.DirectoryNotEmptyException;
import com.usatiuk.dhfsfs.service.GetattrRes;
import com.usatiuk.kleppmanntree.AlreadyExistsException;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.utils.UninitializedByteBuffer;
import com.usatiuk.utils.UnsafeAccessor;
import io.grpc.Status;
import io.grpc.StatusRuntimeException;
import io.quarkus.logging.Log;
@@ -17,15 +20,15 @@ import jakarta.enterprise.context.ApplicationScoped;
import jakarta.enterprise.event.Observes;
import jakarta.inject.Inject;
import jnr.ffi.Pointer;
import jnr.ffi.Runtime;
import jnr.ffi.Struct;
import jnr.ffi.types.off_t;
import org.apache.commons.lang3.SystemUtils;
import org.eclipse.microprofile.config.inject.ConfigProperty;
import ru.serce.jnrfuse.ErrorCodes;
import ru.serce.jnrfuse.FuseFillDir;
import ru.serce.jnrfuse.FuseStubFS;
import ru.serce.jnrfuse.struct.FileStat;
import ru.serce.jnrfuse.struct.FuseFileInfo;
import ru.serce.jnrfuse.struct.Statvfs;
import ru.serce.jnrfuse.struct.Timespec;
import ru.serce.jnrfuse.struct.*;
import java.nio.ByteBuffer;
import java.nio.file.Paths;
@@ -36,6 +39,9 @@ import java.util.concurrent.atomic.AtomicLong;
import static jnr.posix.FileStat.*;
/**
* FUSE file system implementation.
*/
@ApplicationScoped
public class DhfsFuse extends FuseStubFS {
private static final int blksize = 1048576;
@@ -51,10 +57,13 @@ public class DhfsFuse extends FuseStubFS {
@ConfigProperty(name = "dhfs.files.target_chunk_size")
int targetChunkSize;
@Inject
JnrPtrByteOutputAccessors jnrPtrByteOutputAccessors;
@Inject
DhfsFileService fileService;
/**
* Allocate a handle for the given key.
* @param key the key to allocate a handle for
* @return the allocated handle, not 0
*/
private long allocateHandle(JObjectKey key) {
while (true) {
var newFh = _fh.getAndIncrement();
@@ -65,8 +74,14 @@ public class DhfsFuse extends FuseStubFS {
}
}
/**
* Get the key from the handle.
* @param handle the handle to get the key from
* @return the key, or null if not found
*/
private JObjectKey getFromHandle(long handle) {
assert handle != 0;
if(handle == 0)
throw new IllegalStateException("Handle is 0");
return _openHandles.get(handle);
}
@@ -231,7 +246,7 @@ public class DhfsFuse extends FuseStubFS {
var fileKey = getFromHandle(fi.fh.get());
var read = fileService.read(fileKey, offset, (int) size);
if (read.isEmpty()) return 0;
UnsafeByteOperations.unsafeWriteTo(read, new JnrPtrByteOutput(jnrPtrByteOutputAccessors, buf, size));
UnsafeByteOperations.unsafeWriteTo(read, new JnrPtrByteOutput(buf, size));
return read.size();
} catch (Throwable e) {
Log.error("When reading " + path, e);
@@ -241,24 +256,22 @@ public class DhfsFuse extends FuseStubFS {
@Override
public int write(String path, Pointer buf, long size, long offset, FuseFileInfo fi) {
var buffer = UninitializedByteBuffer.allocate((int) size);
UnsafeAccessor.UNSAFE.copyMemory(
buf.address(),
UnsafeAccessor.NIO.getBufferAddress(buffer),
size
);
return write(path, buffer, offset, fi);
}
public int write(String path, ByteBuffer buffer, long offset, FuseFileInfo fi) {
if (offset < 0) return -ErrorCodes.EINVAL();
try {
var fileKey = getFromHandle(fi.fh.get());
var buffer = ByteBuffer.allocateDirect((int) size);
if (buffer.isDirect()) {
jnrPtrByteOutputAccessors.getUnsafe().copyMemory(
buf.address(),
jnrPtrByteOutputAccessors.getNioAccess().getBufferAddress(buffer),
size
);
} else {
buf.get(0, buffer.array(), 0, (int) size);
}
var written = fileService.write(fileKey, offset, UnsafeByteOperations.unsafeWrap(buffer));
return written.intValue();
} catch (Throwable e) {
} catch (Exception e) {
Log.error("When writing " + path, e);
return -ErrorCodes.EIO();
}
@@ -394,7 +407,7 @@ public class DhfsFuse extends FuseStubFS {
var file = fileOpt.get();
var read = fileService.readlinkBS(fileOpt.get());
if (read.isEmpty()) return 0;
UnsafeByteOperations.unsafeWriteTo(read, new JnrPtrByteOutput(jnrPtrByteOutputAccessors, buf, size));
UnsafeByteOperations.unsafeWriteTo(read, new JnrPtrByteOutput(buf, size));
buf.putByte(Math.min(size - 1, read.size()), (byte) 0);
return 0;
} catch (Throwable e) {
@@ -426,4 +439,29 @@ public class DhfsFuse extends FuseStubFS {
return -ErrorCodes.EIO();
}
}
@Override
public int write_buf(String path, FuseBufvec buf, @off_t long off, FuseFileInfo fi) {
int size = (int) libFuse.fuse_buf_size(buf);
FuseBufvec tmpVec = new FuseBufvec(Runtime.getSystemRuntime());
long tmpVecAddr = MemoryIO.getInstance().allocateMemory(Struct.size(tmpVec), false);
try {
tmpVec.useMemory(Pointer.wrap(Runtime.getSystemRuntime(), tmpVecAddr));
FuseBufvec.init(tmpVec, size);
var bb = UninitializedByteBuffer.allocate(size);
var mem = UninitializedByteBuffer.getAddress(bb);
tmpVec.buf.mem.set(mem);
tmpVec.buf.size.set(size);
int res = (int) libFuse.fuse_buf_copy(tmpVec, buf, 0);
if (res != size) {
Log.errorv("fuse_buf_copy failed: {0} != {1}", res, size);
return -ErrorCodes.ENOMEM();
}
return write(path, bb, off, fi);
} finally {
if (tmpVecAddr != 0) {
MemoryIO.getInstance().freeMemory(tmpVecAddr);
}
}
}
}

View File

@@ -1,22 +1,24 @@
package com.usatiuk.dhfsfuse;
import com.google.protobuf.ByteOutput;
import com.usatiuk.utils.UnsafeAccessor;
import jnr.ffi.Pointer;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
/**
* JnrPtrByteOutput is a ByteOutput implementation that writes to a `jnr.ffi.Pointer`.
*/
public class JnrPtrByteOutput extends ByteOutput {
private final Pointer _backing;
private final long _size;
private final JnrPtrByteOutputAccessors _accessors;
private long _pos;
public JnrPtrByteOutput(JnrPtrByteOutputAccessors accessors, Pointer backing, long size) {
public JnrPtrByteOutput(Pointer backing, long size) {
_backing = backing;
_size = size;
_pos = 0;
_accessors = accessors;
}
@Override
@@ -47,9 +49,9 @@ public class JnrPtrByteOutput extends ByteOutput {
if (value instanceof MappedByteBuffer mb) {
mb.load();
}
long addr = _accessors.getNioAccess().getBufferAddress(value) + value.position();
long addr = UnsafeAccessor.NIO.getBufferAddress(value) + value.position();
var out = _backing.address() + _pos;
_accessors.getUnsafe().copyMemory(addr, out, rem);
UnsafeAccessor.UNSAFE.copyMemory(addr, out, rem);
} else {
_backing.put(_pos, value.array(), value.arrayOffset() + value.position(), rem);
}

View File

@@ -1,29 +0,0 @@
package com.usatiuk.dhfsfuse;
import jakarta.inject.Singleton;
import jdk.internal.access.JavaNioAccess;
import jdk.internal.access.SharedSecrets;
import sun.misc.Unsafe;
import java.lang.reflect.Field;
@Singleton
class JnrPtrByteOutputAccessors {
JavaNioAccess _nioAccess;
Unsafe _unsafe;
JnrPtrByteOutputAccessors() throws NoSuchFieldException, IllegalAccessException {
_nioAccess = SharedSecrets.getJavaNioAccess();
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
_unsafe = (Unsafe) f.get(null);
}
public JavaNioAccess getNioAccess() {
return _nioAccess;
}
public Unsafe getUnsafe() {
return _unsafe;
}
}

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp;
package com.usatiuk.dhfsfuse;
import io.quarkus.runtime.Quarkus;
import io.quarkus.runtime.QuarkusApplication;

View File

@@ -14,8 +14,9 @@ dhfs.objects.persistence.stuff.root=${HOME}/dhfs_default/data/stuff
dhfs.fuse.debug=false
dhfs.fuse.enabled=true
dhfs.files.allow_recursive_delete=false
dhfs.files.target_chunk_size=2097152
dhfs.files.target_chunk_alignment=19
dhfs.files.target_chunk_size=524288
dhfs.files.max_chunk_size=524288
dhfs.files.target_chunk_alignment=17
dhfs.objects.deletion.delay=1000
dhfs.objects.deletion.can-delete-retry-delay=10000
dhfs.objects.ref_verification=true

View File

@@ -30,7 +30,7 @@ public class TestDataCleaner {
purgeDirectory(Path.of(tempDirectory).toFile());
}
void purgeDirectory(File dir) {
public static void purgeDirectory(File dir) {
for (File file : Objects.requireNonNull(dir.listFiles())) {
if (file.isDirectory())
purgeDirectory(file);

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import com.github.dockerjava.api.model.Device;
import io.quarkus.logging.Log;
@@ -168,35 +168,6 @@ public class DhfsFuseIT {
"rewritten\n".equals(container2.execInContainer("/bin/sh", "-c", "cat /dhfs_test/fuse/testf1").getStdout()));
}
// TODO: How this fits with the tree?
@Test
@Disabled
void deleteDelayedTest() throws IOException, InterruptedException, TimeoutException {
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container1.execInContainer("/bin/sh", "-c", "echo tesempty > /dhfs_test/fuse/testf1").getExitCode());
await().atMost(45, TimeUnit.SECONDS).until(() -> "tesempty\n".equals(container2.execInContainer("/bin/sh", "-c", "cat /dhfs_test/fuse/testf1").getStdout()));
await().atMost(45, TimeUnit.SECONDS).until(() -> "tesempty\n".equals(container1.execInContainer("/bin/sh", "-c", "cat /dhfs_test/fuse/testf1").getStdout()));
var client = DockerClientFactory.instance().client();
client.pauseContainerCmd(container2.getContainerId()).exec();
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Lost connection to"), 60, TimeUnit.SECONDS);
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container1.execInContainer("/bin/sh", "-c", "rm /dhfs_test/fuse/testf1").getExitCode());
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Delaying deletion check"), 60, TimeUnit.SECONDS, 1);
client.unpauseContainerCmd(container2.getContainerId()).exec();
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Connected"), 60, TimeUnit.SECONDS);
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container2.execInContainer("/bin/sh", "-c", "ls /dhfs_test/fuse").getExitCode());
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Deleting from persistent"), 60, TimeUnit.SECONDS, 1);
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("Deleting from persistent"), 60, TimeUnit.SECONDS, 3);
await().atMost(45, TimeUnit.SECONDS).until(() -> 1 == container2.execInContainer("/bin/sh", "-c", "test -f /dhfs_test/fuse/testf1").getExitCode());
await().atMost(45, TimeUnit.SECONDS).until(() -> 1 == container1.execInContainer("/bin/sh", "-c", "test -f /dhfs_test/fuse/testf1").getExitCode());
}
@Test
void deleteTest() throws IOException, InterruptedException, TimeoutException {
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container1.execInContainer("/bin/sh", "-c", "echo tesempty > /dhfs_test/fuse/testf1").getExitCode());
@@ -221,6 +192,28 @@ public class DhfsFuseIT {
1 == container1.execInContainer("/bin/sh", "-c", "test -f /dhfs_test/fuse/testf1").getExitCode());
}
@Test
void deleteTestKickedOut() throws IOException, InterruptedException, TimeoutException {
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container1.execInContainer("/bin/sh", "-c", "echo tesempty > /dhfs_test/fuse/testf1").getExitCode());
await().atMost(45, TimeUnit.SECONDS).until(() ->
"tesempty\n".equals(container2.execInContainer("/bin/sh", "-c", "cat /dhfs_test/fuse/testf1").getStdout()));
await().atMost(45, TimeUnit.SECONDS).until(() ->
"tesempty\n".equals(container1.execInContainer("/bin/sh", "-c", "cat /dhfs_test/fuse/testf1").getStdout()));
container2.stop();
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("kicked"), 60, TimeUnit.SECONDS, 1);
Log.info("Deleting");
await().atMost(45, TimeUnit.SECONDS).until(() -> 0 == container1.execInContainer("/bin/sh", "-c", "rm /dhfs_test/fuse/testf1").getExitCode());
Log.info("Deleted");
// FIXME?
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Deleting from persistent"), 60, TimeUnit.SECONDS, 3);
await().atMost(45, TimeUnit.SECONDS).until(() ->
1 == container1.execInContainer("/bin/sh", "-c", "test -f /dhfs_test/fuse/testf1").getExitCode());
}
@Test
void moveFileTest() throws IOException, InterruptedException, TimeoutException {
Log.info("Creating");

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import com.github.dockerjava.api.model.Device;
import io.quarkus.logging.Log;

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import io.quarkus.logging.Log;
import org.jetbrains.annotations.NotNull;
@@ -70,6 +70,7 @@ public class DhfsImage implements Future<String> {
"--add-exports", "java.base/sun.nio.ch=ALL-UNNAMED",
"--add-exports", "java.base/jdk.internal.access=ALL-UNNAMED",
"--add-opens=java.base/java.nio=ALL-UNNAMED",
"--enable-preview",
"-Ddhfs.objects.peerdiscovery.interval=1s",
"-Ddhfs.objects.invalidation.delay=100",
"-Ddhfs.objects.deletion.delay=0",
@@ -78,6 +79,8 @@ public class DhfsImage implements Future<String> {
"-Ddhfs.objects.sync.timeout=30",
"-Ddhfs.objects.sync.ping.timeout=5",
"-Ddhfs.objects.reconnect_interval=1s",
"-Ddhfs.objects.last-seen.timeout=30",
"-Ddhfs.objects.last-seen.update=10",
"-Ddhfs.sync.cert-check=false",
"-Dquarkus.log.category.\"com.usatiuk\".level=TRACE",
"-Dquarkus.log.category.\"com.usatiuk.dhfs\".level=TRACE",

View File

@@ -1,7 +1,7 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import com.github.dockerjava.api.model.Device;
import com.usatiuk.dhfsapp.TestDataCleaner;
import com.usatiuk.dhfsfuse.TestDataCleaner;
import io.quarkus.logging.Log;
import org.junit.jupiter.api.*;
import org.slf4j.LoggerFactory;

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import io.quarkus.logging.Log;

View File

@@ -1,7 +1,7 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import com.github.dockerjava.api.model.Device;
import com.usatiuk.dhfsapp.TestDataCleaner;
import com.usatiuk.dhfsfuse.TestDataCleaner;
import io.quarkus.logging.Log;
import org.junit.jupiter.api.*;
import org.junit.jupiter.params.ParameterizedTest;
@@ -154,7 +154,7 @@ public class LazyFsIT {
Thread.sleep(3000);
Log.info("Killing");
lazyFs1.crash();
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
var client = DockerClientFactory.instance().client();
client.killContainerCmd(container1.getContainerId()).exec();
container1.stop();
@@ -195,7 +195,7 @@ public class LazyFsIT {
lazyFs1.crash();
}
try {
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// Sometimes crash doesn't work
Log.info("Failed to crash: " + testInfo.getDisplayName());
@@ -237,7 +237,7 @@ public class LazyFsIT {
Thread.sleep(3000);
Log.info("Killing");
lazyFs1.crash();
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
var client = DockerClientFactory.instance().client();
client.killContainerCmd(container1.getContainerId()).exec();
container1.stop();
@@ -279,7 +279,7 @@ public class LazyFsIT {
lazyFs1.crash();
}
try {
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer1.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// Sometimes crash doesn't work
Log.info("Failed to crash: " + testInfo.getDisplayName());
@@ -322,7 +322,7 @@ public class LazyFsIT {
Log.info("Killing");
lazyFs2.crash();
container1.execInContainer("/bin/sh", "-c", "touch /tmp/stopprinting1");
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
var client = DockerClientFactory.instance().client();
client.killContainerCmd(container2.getContainerId()).exec();
container2.stop();
@@ -366,7 +366,7 @@ public class LazyFsIT {
}
container1.execInContainer("/bin/sh", "-c", "touch /tmp/stopprinting2");
try {
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// Sometimes crash doesn't work
Log.info("Failed to crash: " + testInfo.getDisplayName());
@@ -409,7 +409,7 @@ public class LazyFsIT {
Log.info("Killing");
lazyFs2.crash();
container1.execInContainer("/bin/sh", "-c", "touch /tmp/stopprinting1");
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
var client = DockerClientFactory.instance().client();
client.killContainerCmd(container2.getContainerId()).exec();
container2.stop();
@@ -453,7 +453,7 @@ public class LazyFsIT {
}
container1.execInContainer("/bin/sh", "-c", "touch /tmp/stopprinting2");
try {
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("Caused by: org.lmdbjava"), 60, TimeUnit.SECONDS);
waitingConsumer2.waitUntil(frame -> frame.getUtf8String().contains("org.lmdbjava.LmdbNativeException"), 60, TimeUnit.SECONDS);
} catch (TimeoutException e) {
// Sometimes crash doesn't work
Log.info("Failed to crash: " + testInfo.getDisplayName());

View File

@@ -1,4 +1,4 @@
package com.usatiuk.dhfsapp.integration;
package com.usatiuk.dhfsfuse.integration;
import com.github.dockerjava.api.model.Device;
import org.junit.jupiter.api.*;

View File

@@ -1,5 +1,8 @@
package com.usatiuk.kleppmanntree;
/**
* Exception thrown when an attempt is made to create a new tree node as a child with a name that already exists.
*/
public class AlreadyExistsException extends RuntimeException {
public AlreadyExistsException(String message) {
super(message);

View File

@@ -1,32 +0,0 @@
package com.usatiuk.kleppmanntree;
import java.io.Serializable;
public class AtomicClock implements Clock<Long>, Serializable {
private long _max = 0;
public AtomicClock(long counter) {
_max = counter;
}
@Override
public Long getTimestamp() {
return ++_max;
}
public void setTimestamp(Long timestamp) {
_max = timestamp;
}
@Override
public Long peekTimestamp() {
return _max;
}
@Override
public Long updateTimestamp(Long receivedTimestamp) {
var old = _max;
_max = Math.max(_max, receivedTimestamp) + 1;
return old;
}
}

View File

@@ -1,9 +1,26 @@
package com.usatiuk.kleppmanntree;
/**
* Clock interface
*/
public interface Clock<TimestampT extends Comparable<TimestampT>> {
/**
* Increment and get the current timestamp.
* @return the incremented timestamp
*/
TimestampT getTimestamp();
/**
* Get the current timestamp without incrementing it.
* @return the current timestamp
*/
TimestampT peekTimestamp();
/**
* Update the timestamp with an externally received timestamp.
* Will set the currently stored timestamp to <code>max(receivedTimestamp, currentTimestamp) + 1</code>
* @param receivedTimestamp the received timestamp
* @return the previous timestamp
*/
TimestampT updateTimestamp(TimestampT receivedTimestamp);
}

View File

@@ -3,6 +3,13 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
import java.util.Comparator;
/**
* CombinedTimestamp is a record that represents a timestamp and a node ID, ordered first by timestamp and then by node ID.
* @param timestamp the timestamp
* @param nodeId the node ID. If null, then only the timestamp is used for ordering.
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the node ID
*/
public record CombinedTimestamp<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>>
(TimestampT timestamp,
PeerIdT nodeId) implements Comparable<CombinedTimestamp<TimestampT, PeerIdT>>, Serializable {

View File

@@ -8,6 +8,14 @@ import java.util.function.Function;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* An implementation of a tree as described in <a href="https://martin.kleppmann.com/papers/move-op.pdf">A highly-available move operation for replicated trees</a>
*
* @param <TimestampT> Type of the timestamp
* @param <PeerIdT> Type of the peer ID
* @param <MetaT> Type of the node metadata
* @param <NodeIdT> Type of the node ID
*/
public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT> {
private static final Logger LOGGER = Logger.getLogger(KleppmannTree.class.getName());
@@ -16,6 +24,14 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
private final Clock<TimestampT> _clock;
private final OpRecorder<TimestampT, PeerIdT, MetaT, NodeIdT> _opRecorder;
/**
* Constructor with all the dependencies
*
* @param storage Storage interface
* @param peers Peer interface
* @param clock Clock interface
* @param opRecorder Operation recorder interface
*/
public KleppmannTree(StorageInterface<TimestampT, PeerIdT, MetaT, NodeIdT> storage,
PeerInterface<PeerIdT> peers,
Clock<TimestampT> clock,
@@ -26,6 +42,13 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
_opRecorder = opRecorder;
}
/**
* Traverse the tree from the given node ID using the given list of names
*
* @param fromId The starting node ID
* @param names The list of names to traverse
* @return The resulting node ID or null if not found
*/
private NodeIdT traverseImpl(NodeIdT fromId, List<String> names) {
if (names.isEmpty()) return fromId;
@@ -39,14 +62,21 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
return traverseImpl(childId, names.subList(1, names.size()));
}
public NodeIdT traverse(NodeIdT fromId, List<String> names) {
return traverseImpl(fromId, names.subList(1, names.size()));
}
/**
* Traverse the tree from its root node using the given list of names
*
* @param names The list of names to traverse
* @return The resulting node ID or null if not found
*/
public NodeIdT traverse(List<String> names) {
return traverseImpl(_storage.getRootId(), names);
}
/**
* Undo the effect of a log effect
*
* @param effect The log effect to undo
*/
private void undoEffect(LogEffect<TimestampT, PeerIdT, MetaT, NodeIdT> effect) {
if (effect.oldInfo() != null) {
var node = _storage.getById(effect.childId());
@@ -89,6 +119,11 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
}
}
/**
* Undo the effects of a log record
*
* @param op The log record to undo
*/
private void undoOp(LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> op) {
LOGGER.finer(() -> "Will undo op: " + op);
if (op.effects() != null)
@@ -96,16 +131,32 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
undoEffect(e);
}
/**
* Redo the operation in a log record
*
* @param entry The log record to redo
*/
private void redoOp(Map.Entry<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>> entry) {
var newEffects = doOp(entry.getValue().op(), false);
_storage.getLog().replace(entry.getKey(), newEffects);
}
/**
* Perform the operation and put it in the log
*
* @param op The operation to perform
* @param failCreatingIfExists Whether to fail if there is a name conflict,
* otherwise replace the existing node
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
private void doAndPut(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op, boolean failCreatingIfExists) {
var res = doOp(op, failCreatingIfExists);
_storage.getLog().put(res.op().timestamp(), res);
}
/**
* Try to trim the log to the causality threshold
*/
private void tryTrimLog() {
var log = _storage.getLog();
var timeLog = _storage.getPeerTimestampLog();
@@ -161,22 +212,52 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
}
}
/**
* Move a node to a new parent with new metadata
*
* @param newParent The new parent node ID
* @param newMeta The new metadata
* @param child The child node ID
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
public <LocalMetaT extends MetaT> void move(NodeIdT newParent, LocalMetaT newMeta, NodeIdT child) {
move(newParent, newMeta, child, true);
}
/**
* Move a node to a new parent with new metadata
*
* @param newParent The new parent node ID
* @param newMeta The new metadata
* @param child The child node ID
* @param failCreatingIfExists Whether to fail if there is a name conflict,
* otherwise replace the existing node
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
public void move(NodeIdT newParent, MetaT newMeta, NodeIdT child, boolean failCreatingIfExists) {
var createdMove = createMove(newParent, newMeta, child);
applyOp(_peers.getSelfId(), createdMove, failCreatingIfExists);
_opRecorder.recordOp(createdMove);
}
/**
* Apply an external operation from a remote peer
*
* @param from The peer ID
* @param op The operation to apply
*/
public void applyExternalOp(PeerIdT from, OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op) {
_clock.updateTimestamp(op.timestamp().timestamp());
applyOp(from, op, false);
}
// Returns true if the timestamp is newer than what's seen, false otherwise
/**
* Update the causality threshold timestamp for a peer
*
* @param from The peer ID
* @param newTimestamp The timestamp received from it
* @return True if the timestamp was updated, false otherwise
*/
private boolean updateTimestampImpl(PeerIdT from, TimestampT newTimestamp) {
TimestampT oldRef = _storage.getPeerTimestampLog().getForPeer(from);
if (oldRef != null && oldRef.compareTo(newTimestamp) >= 0) { // FIXME?
@@ -187,6 +268,12 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
return true;
}
/**
* Update the causality threshold timestamp for a peer
*
* @param from The peer ID
* @param timestamp The timestamp received from it
*/
public void updateExternalTimestamp(PeerIdT from, TimestampT timestamp) {
var gotExt = _storage.getPeerTimestampLog().getForPeer(from);
var gotSelf = _storage.getPeerTimestampLog().getForPeer(_peers.getSelfId());
@@ -197,6 +284,15 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
tryTrimLog();
}
/**
* Apply an operation from a peer
*
* @param from The peer ID
* @param op The operation to apply
* @param failCreatingIfExists Whether to fail if there is a name conflict,
* otherwise replace the existing node
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
private void applyOp(PeerIdT from, OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op, boolean failCreatingIfExists) {
if (!updateTimestampImpl(op.timestamp().nodeId(), op.timestamp().timestamp())) return;
@@ -229,14 +325,36 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
}
}
/**
* Get a new timestamp, incrementing the one in storage
*
* @return A new timestamp
*/
private CombinedTimestamp<TimestampT, PeerIdT> getTimestamp() {
return new CombinedTimestamp<>(_clock.getTimestamp(), _peers.getSelfId());
}
/**
* Create a new move operation
*
* @param newParent The new parent node ID
* @param newMeta The new metadata
* @param node The child node ID
* @return A new move operation
*/
private <LocalMetaT extends MetaT> OpMove<TimestampT, PeerIdT, LocalMetaT, NodeIdT> createMove(NodeIdT newParent, LocalMetaT newMeta, NodeIdT node) {
return new OpMove<>(getTimestamp(), newParent, newMeta, node);
}
/**
* Perform the operation and return the log record
*
* @param op The operation to perform
* @param failCreatingIfExists Whether to fail if there is a name conflict,
* otherwise replace the existing node
* @return The log record
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
private LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> doOp(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op, boolean failCreatingIfExists) {
LOGGER.finer(() -> "Doing op: " + op);
LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> computed;
@@ -253,10 +371,24 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
return computed;
}
/**
* Get a new node from storage
*
* @param key The node ID
* @param parent The parent node ID
* @param meta The metadata
* @return A new tree node
*/
private TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> getNewNode(NodeIdT key, NodeIdT parent, MetaT meta) {
return _storage.createNewNode(key, parent, meta);
}
/**
* Apply the effects of a log record
*
* @param sourceOp The source operation
* @param effects The list of log effects
*/
private void applyEffects(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> sourceOp, List<LogEffect<TimestampT, PeerIdT, MetaT, NodeIdT>> effects) {
for (var effect : effects) {
LOGGER.finer(() -> "Applying effect: " + effect + " from op " + sourceOp);
@@ -297,6 +429,15 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
}
}
/**
* Compute the effects of a move operation
*
* @param op The operation to process
* @param failCreatingIfExists Whether to fail if there is a name conflict,
* otherwise replace the existing node
* @return The log record with the computed effects
* @throws AlreadyExistsException If the node already exists and failCreatingIfExists is true
*/
private LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> computeEffects(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op, boolean failCreatingIfExists) {
var node = _storage.getById(op.childId());
@@ -380,6 +521,13 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
));
}
/**
* Check if a node is an ancestor of another node
*
* @param child The child node ID
* @param parent The parent node ID
* @return True if the child is an ancestor of the parent, false otherwise
*/
private boolean isAncestor(NodeIdT child, NodeIdT parent) {
var node = _storage.getById(parent);
NodeIdT curParent;
@@ -390,6 +538,11 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
return false;
}
/**
* Walk the tree and apply the given consumer to each node
*
* @param consumer The consumer to apply to each node
*/
public void walkTree(Consumer<TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT>> consumer) {
ArrayDeque<NodeIdT> queue = new ArrayDeque<>();
queue.push(_storage.getRootId());
@@ -403,6 +556,12 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
}
}
/**
* Find the parent of a node that matches the given predicate
*
* @param kidPredicate The predicate to match the child node
* @return A pair containing the name of the child and the ID of the parent, or null if not found
*/
public Pair<String, NodeIdT> findParent(Function<TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT>, Boolean> kidPredicate) {
ArrayDeque<NodeIdT> queue = new ArrayDeque<>();
queue.push(_storage.getRootId());
@@ -423,6 +582,13 @@ public class KleppmannTree<TimestampT extends Comparable<TimestampT>, PeerIdT ex
return null;
}
/**
* Record the bootstrap operations for a given peer
* Will visit all nodes of the tree and add their effective operations to both the queue to be sent to the peer,
* and to the global operation log.
*
* @param host The peer ID
*/
public void recordBoostrapFor(PeerIdT host) {
TreeMap<CombinedTimestamp<TimestampT, PeerIdT>, OpMove<TimestampT, PeerIdT, MetaT, NodeIdT>> result = new TreeMap<>();

View File

@@ -2,6 +2,18 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
/**
* LogEffect is a record that represents the effect of a log entry on a tree node.
* @param oldInfo the old information about the node, before it was moved. Null if the node did not exist before
* @param effectiveOp the operation that had caused this effect to be applied
* @param newParentId the ID of the new parent node
* @param newMeta the new metadata of the node
* @param childId the ID of the child node
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public record LogEffect<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT>(
LogEffectOld<TimestampT, PeerIdT, MetaT, NodeIdT> oldInfo,
OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> effectiveOp,

View File

@@ -2,6 +2,16 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
/**
* Represents the old information about a node before it was moved.
* @param oldEffectiveMove the old effective move that had caused this effect to be applied
* @param oldParent the ID of the old parent node
* @param oldMeta the old metadata of the node
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public record LogEffectOld<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT>
(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> oldEffectiveMove,
NodeIdT oldParent,

View File

@@ -4,29 +4,82 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
/**
* LogInterface is an interface that allows accessing the log of operations
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public interface LogInterface<
TimestampT extends Comparable<TimestampT>,
PeerIdT extends Comparable<PeerIdT>,
MetaT extends NodeMeta,
NodeIdT> {
/**
* Peek the oldest log entry.
* @return the oldest log entry
*/
Pair<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>> peekOldest();
/**
* Take the oldest log entry.
* @return the oldest log entry
*/
Pair<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>> takeOldest();
/**
* Peek the newest log entry.
* @return the newest log entry
*/
Pair<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>> peekNewest();
/**
* Return all log entries that are newer than the given timestamp.
* @param since the timestamp to compare with
* @param inclusive if true, include the log entry with the given timestamp
* @return a list of log entries that are newer than the given timestamp
*/
List<Pair<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>>>
newestSlice(CombinedTimestamp<TimestampT, PeerIdT> since, boolean inclusive);
/**
* Return all the log entries
* @return a list of all log entries
*/
List<Pair<CombinedTimestamp<TimestampT, PeerIdT>, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT>>> getAll();
/**
* Checks if the log is empty.
* @return true if the log is empty, false otherwise
*/
boolean isEmpty();
/**
* Checks if the log contains the given timestamp.
* @param timestamp the timestamp to check
* @return true if the log contains the given timestamp, false otherwise
*/
boolean containsKey(CombinedTimestamp<TimestampT, PeerIdT> timestamp);
/**
* Get the size of the log.
* @return the size of the log (number of entries)
*/
long size();
/**
* Add a log entry to the log.
* @param timestamp the timestamp of the log entry
* @param record the log entry
* @throws IllegalStateException if the log entry already exists
*/
void put(CombinedTimestamp<TimestampT, PeerIdT> timestamp, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> record);
/**
* Replace a log entry in the log.
* @param timestamp the timestamp of the log entry
* @param record the log entry
*/
void replace(CombinedTimestamp<TimestampT, PeerIdT> timestamp, LogRecord<TimestampT, PeerIdT, MetaT, NodeIdT> record);
}

View File

@@ -3,6 +3,15 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
import java.util.List;
/**
* Represents a log record in the Kleppmann tree.
* @param op the operation that is stored in this log record
* @param effects the effects of the operation (resulting moves)
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public record LogRecord<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT>
(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op,
List<LogEffect<TimestampT, PeerIdT, MetaT, NodeIdT>> effects) implements Serializable {

View File

@@ -2,8 +2,24 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
/**
* Represents metadata associated with a node in the Kleppmann tree.
* This interface is used to define the metadata that can be associated with nodes in the tree.
* Implementations of this interface should provide a name for the node and a method to create a copy of it with a new name.
*/
public interface NodeMeta extends Serializable {
/**
* Returns the name of the node.
*
* @return the name of the node
*/
String name();
/**
* Creates a copy of the metadata with a new name.
*
* @param name the new name for the metadata
* @return a new instance of NodeMeta with the specified name
*/
NodeMeta withName(String name);
}

View File

@@ -2,9 +2,27 @@ package com.usatiuk.kleppmanntree;
import java.io.Serializable;
/**
* Operation that moves a child node to a new parent node.
*
* @param timestamp the timestamp of the operation
* @param newParentId the ID of the new parent node
* @param newMeta the new metadata of the node, can be null
* @param childId the ID of the child node (the node that is being moved)
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public record OpMove<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT>
(CombinedTimestamp<TimestampT, PeerIdT> timestamp, NodeIdT newParentId, MetaT newMeta,
NodeIdT childId) implements Serializable {
/**
* Returns the new name of the node: name extracted from the new metadata if available,
* otherwise the child ID converted to string.
*
* @return the new name of the node
*/
public String newName() {
if (newMeta != null)
return newMeta.name();

View File

@@ -1,7 +1,26 @@
package com.usatiuk.kleppmanntree;
/**
* Interface to provide recording operations to be sent to peers asynchronously.
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public interface OpRecorder<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT> {
/**
* Records an operation to be sent to peers asynchronously.
* The operation will be sent to all known peers in the system.
*
* @param op the operation to be recorded
*/
void recordOp(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op);
/**
* Records an operation to be sent to a specific peer asynchronously.
*
* @param peer the ID of the peer to send the operation to
* @param op the operation to be recorded
*/
void recordOpForPeer(PeerIdT peer, OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> op);
}

View File

@@ -2,8 +2,22 @@ package com.usatiuk.kleppmanntree;
import java.util.Collection;
/**
* Interface providing access to a list of known peers.
* @param <PeerIdT> the type of the peer ID
*/
public interface PeerInterface<PeerIdT extends Comparable<PeerIdT>> {
/**
* Returns the ID of the current peer.
*
* @return the ID of the current peer
*/
PeerIdT getSelfId();
/**
* Returns a collection of all known peers.
*
* @return a collection of all known peers
*/
Collection<PeerIdT> getAllPeers();
}

View File

@@ -1,11 +1,26 @@
package com.usatiuk.kleppmanntree;
/**
* Interface providing a map of newest received timestamps for each peer. (causality thresholds)
* If a peer has some timestamp recorded in this map,
* it means that all messages coming from this peer will have a newer timestamp.
* @param <TimestampT>
* @param <PeerIdT>
*/
public interface PeerTimestampLogInterface<
TimestampT extends Comparable<TimestampT>,
PeerIdT extends Comparable<PeerIdT>> {
/**
* Get the timestamp for a specific peer.
* @param peerId the ID of the peer
* @return the timestamp for the peer
*/
TimestampT getForPeer(PeerIdT peerId);
/**
* Get the timestamp for the current peer.
*/
void putForPeer(PeerIdT peerId, TimestampT timestamp);
}

View File

@@ -1,28 +1,89 @@
package com.usatiuk.kleppmanntree;
/**
* Storage interface for the Kleppmann tree.
*
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public interface StorageInterface<
TimestampT extends Comparable<TimestampT>,
PeerIdT extends Comparable<PeerIdT>,
MetaT extends NodeMeta,
NodeIdT> {
/**
* Get the root node ID.
*
* @return the root node IDx
*/
NodeIdT getRootId();
/**
* Get the trash node ID.
*
* @return the trash node ID
*/
NodeIdT getTrashId();
/**
* Get the lost and found node ID.
*
* @return the lost and found node ID
*/
NodeIdT getLostFoundId();
/**
* Get the new node ID.
*
* @return the new node ID
*/
NodeIdT getNewNodeId();
/**
* Get the node by its ID.
*
* @param id the ID of the node
* @return the node with the specified ID, or null if not found
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> getById(NodeIdT id);
// Creates a node, returned wrapper is RW-locked
/**
* Create a new node with the specified key, parent, and metadata.
*
* @param key the ID of the new node
* @param parent the ID of the parent node
* @param meta the metadata of the new node
* @return the new node
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> createNewNode(NodeIdT key, NodeIdT parent, MetaT meta);
/**
* Put a node into the storage.
*
* @param node the node to put into the storage
*/
void putNode(TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> node);
/**
* Remove a node from the storage.
*
* @param id the ID of the node to remove
*/
void removeNode(NodeIdT id);
/**
* Get the log interface.
*
* @return the log interface
*/
LogInterface<TimestampT, PeerIdT, MetaT, NodeIdT> getLog();
/**
* Get the peer timestamp log interface.
*
* @return the peer timestamp log interface
*/
PeerTimestampLogInterface<TimestampT, PeerIdT> getPeerTimestampLog();
}

View File

@@ -5,29 +5,92 @@ import org.pcollections.PMap;
import java.io.Serializable;
/**
* Represents a node in the Kleppmann tree.
*
* @param <TimestampT> the type of the timestamp
* @param <PeerIdT> the type of the peer ID
* @param <MetaT> the type of the node metadata
* @param <NodeIdT> the type of the node ID
*/
public interface TreeNode<TimestampT extends Comparable<TimestampT>, PeerIdT extends Comparable<PeerIdT>, MetaT extends NodeMeta, NodeIdT> extends Serializable {
/**
* Get the ID of the node.
*
* @return the ID of the node
*/
NodeIdT key();
/**
* Get the ID of the parent node.
*
* @return the ID of the parent node
*/
NodeIdT parent();
/**
* Get the last effective operation that moved this node.
*
* @return the last effective operation
*/
OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> lastEffectiveOp();
/**
* Get the metadata stored in this node.
*
* @return the metadata of the node
*/
@Nullable
MetaT meta();
/**
* Get the name of the node.
* If the node has metadata, the name is extracted from it, otherwise the key is converted to string.
*
* @return the name of the node
*/
default String name() {
var meta = meta();
if (meta != null) return meta.name();
return key().toString();
}
/**
* Get the children of this node.
*
* @return a map of child IDs to their respective nodes
*/
PMap<String, NodeIdT> children();
/**
* Make a copy of this node with a new parent.
*
* @param parent the ID of the new parent node
* @return a new TreeNode instance with the updated parent
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> withParent(NodeIdT parent);
/**
* Make a copy of this node with a new last effective operation.
*
* @param lastEffectiveOp the new last effective operation
* @return a new TreeNode instance with the updated last effective operation
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> withLastEffectiveOp(OpMove<TimestampT, PeerIdT, MetaT, NodeIdT> lastEffectiveOp);
/**
* Make a copy of this node with new metadata.
*
* @param meta the new metadata
* @return a new TreeNode instance with the updated metadata
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> withMeta(MetaT meta);
/**
* Make a copy of this node with new children.
*
* @param children the new children
* @return a new TreeNode instance with the updated children
*/
TreeNode<TimestampT, PeerIdT, MetaT, NodeIdT> withChildren(PMap<String, NodeIdT> children);
}

View File

@@ -18,11 +18,6 @@
</properties>
<dependencies>
<dependency>
<groupId>org.apache.fury</groupId>
<artifactId>fury-core</artifactId>
<version>0.10.1</version>
</dependency>
<dependency>
<groupId>net.jqwik</groupId>
<artifactId>jqwik</artifactId>

View File

@@ -2,9 +2,20 @@ package com.usatiuk.objects;
import java.io.Serializable;
/**
* JData is a marker interface for all objects that can be stored in the object store.
*/
public interface JData extends Serializable {
/**
* Returns the key of the object.
* @return the key of the object
*/
JObjectKey key();
/**
* Returns the estimated size of the object in bytes.
* @return the estimated size of the object in bytes
*/
default int estimateSize() {
return 100;
}

View File

@@ -2,15 +2,34 @@ package com.usatiuk.objects;
import com.usatiuk.objects.iterators.Data;
/**
* JDataVersionedWrapper is a wrapper for JData that contains its version number
* (the id of the transaction that had changed it last)
*/
public sealed interface JDataVersionedWrapper extends Data<JDataVersionedWrapper> permits JDataVersionedWrapperLazy, JDataVersionedWrapperImpl {
@Override
default JDataVersionedWrapper value() {
return this;
}
/**
* Returns the wrapped object.
*
* @return the wrapped object
*/
JData data();
/**
* Returns the version number of the object.
*
* @return the version number of the object
*/
long version();
/**
* Returns the estimated size of the object in bytes.
*
* @return the estimated size of the object in bytes
*/
int estimateSize();
}

View File

@@ -4,6 +4,9 @@ import jakarta.annotation.Nonnull;
import java.io.Serializable;
/**
* Simple wrapper for an already-existing JData object with a version.
*/
public record JDataVersionedWrapperImpl(@Nonnull JData data,
long version) implements Serializable, JDataVersionedWrapper {
@Override

View File

@@ -2,18 +2,35 @@ package com.usatiuk.objects;
import java.util.function.Supplier;
/**
* Lazy JDataVersionedWrapper implementation.
* The object is deserialized only when data() is called for the first time.
* Also allows to set a callback to be called when the data is loaded (e.g. to cache it).
*/
public final class JDataVersionedWrapperLazy implements JDataVersionedWrapper {
private final long _version;
private final int _estimatedSize;
private JData _data;
private Supplier<JData> _producer;
/**
* Creates a new JDataVersionedWrapperLazy object.
*
* @param version the version number of the object
* @param estimatedSize the estimated size of the object in bytes
* @param producer a supplier that produces the wrapped object
*/
public JDataVersionedWrapperLazy(long version, int estimatedSize, Supplier<JData> producer) {
_version = version;
_estimatedSize = estimatedSize;
_producer = producer;
}
/**
* Set a callback to be called when the data is loaded.
*
* @param cacheCallback the callback to be called
*/
public void setCacheCallback(Runnable cacheCallback) {
if (_data != null) {
throw new IllegalStateException("Cache callback can be set only before data is loaded");

View File

@@ -7,11 +7,21 @@ import jakarta.inject.Singleton;
import java.nio.ByteBuffer;
/**
* Serializer for JDataVersionedWrapper objects.
* The objects are stored in a simple format: first is 8-byte long, then the serialized object.
*/
@Singleton
public class JDataVersionedWrapperSerializer {
@Inject
ObjectSerializer<JData> dataSerializer;
/**
* Serializes a JDataVersionedWrapper object to a ByteString.
*
* @param obj the object to serialize
* @return the serialized object as a ByteString
*/
public ByteString serialize(JDataVersionedWrapper obj) {
ByteBuffer buffer = ByteBuffer.allocate(Long.BYTES);
buffer.putLong(obj.version());
@@ -19,6 +29,13 @@ public class JDataVersionedWrapperSerializer {
return ByteString.copyFrom(buffer).concat(dataSerializer.serialize(obj.data()));
}
/**
* Deserializes a JDataVersionedWrapper object from a ByteBuffer.
* Returns a lazy wrapper (JDataVersionedWrapperLazy).
*
* @param data the ByteBuffer containing the serialized object
* @return the deserialized object
*/
public JDataVersionedWrapper deserialize(ByteBuffer data) {
var version = data.getLong();
return new JDataVersionedWrapperLazy(version, data.remaining(),

View File

@@ -5,30 +5,68 @@ import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
/**
* JObjectKey is an interface for object keys to be used in the object store.
*/
public sealed interface JObjectKey extends Serializable, Comparable<JObjectKey> permits JObjectKeyImpl, JObjectKeyMax, JObjectKeyMin {
JObjectKeyMin MIN = new JObjectKeyMin();
JObjectKeyMax MAX = new JObjectKeyMax();
/**
* Creates a new JObjectKey from a string value.
*
* @param value the string value of the key
* @return a new JObjectKey
*/
static JObjectKey of(String value) {
return new JObjectKeyImpl(value);
}
/**
* Creates a new JObjectKey with a random UUID.
*
* @return a new JObjectKey with a random UUID
*/
static JObjectKey random() {
return new JObjectKeyImpl(UUID.randomUUID().toString());
}
/**
* Returns a JObjectKey that compares less than all other keys.
* Calling value on this key will result in an exception.
*
* @return a JObjectKey that compares less than all other keys
*/
static JObjectKey first() {
return MIN;
}
/**
* Returns a JObjectKey that compares greater than all other keys.
* Calling value on this key will result in an exception.
*
* @return a JObjectKey that compares greater than all other keys
*/
static JObjectKey last() {
return MAX;
}
/**
* Creates a new JObjectKey from a byte array.
*
* @param bytes the byte array representing the key
* @return a new JObjectKey
*/
static JObjectKey fromBytes(byte[] bytes) {
return new JObjectKeyImpl(new String(bytes, StandardCharsets.ISO_8859_1));
}
/**
* Creates a new JObjectKey from a ByteBuffer.
*
* @param buff the ByteBuffer representing the key
* @return a new JObjectKey
*/
static JObjectKey fromByteBuffer(ByteBuffer buff) {
byte[] bytes = new byte[buff.remaining()];
buff.get(bytes);
@@ -41,7 +79,17 @@ public sealed interface JObjectKey extends Serializable, Comparable<JObjectKey>
@Override
String toString();
/**
* Returns the byte buffer representation of the key.
*
* @return the byte buffer representation of the key
*/
ByteBuffer toByteBuffer();
/**
* Returns the string value of the key.
*
* @return the string value of the key
*/
String value();
}

View File

@@ -1,10 +1,15 @@
package com.usatiuk.objects;
import com.usatiuk.utils.UninitializedByteBuffer;
import java.io.Serial;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
/**
* A "real" implementation of JObjectKey, containing an underlying string, and a cached lazily created byte buffer.
*/
public final class JObjectKeyImpl implements JObjectKey {
@Serial
private static final long serialVersionUID = 0L;
@@ -46,7 +51,7 @@ public final class JObjectKeyImpl implements JObjectKey {
synchronized (this) {
if (_bb != null) return _bb;
var bytes = value.getBytes(StandardCharsets.ISO_8859_1);
var directBb = ByteBuffer.allocateDirect(bytes.length);
var directBb = UninitializedByteBuffer.allocate(bytes.length);
directBb.put(bytes);
directBb.flip();
_bb = directBb;
@@ -69,7 +74,7 @@ public final class JObjectKeyImpl implements JObjectKey {
@Override
public int hashCode() {
return Objects.hash(value);
return value.hashCode();
}
}

View File

@@ -2,6 +2,9 @@ package com.usatiuk.objects;
import java.nio.ByteBuffer;
/**
* JObjectKey implementation that compares greater than all other keys.
*/
public record JObjectKeyMax() implements JObjectKey {
@Override
public int compareTo(JObjectKey o) {

View File

@@ -2,6 +2,9 @@ package com.usatiuk.objects;
import java.nio.ByteBuffer;
/**
* JObjectKey implementation that compares less than all other keys.
*/
public record JObjectKeyMin() implements JObjectKey {
@Override
public int compareTo(JObjectKey o) {

View File

@@ -3,30 +3,29 @@ package com.usatiuk.objects;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import com.usatiuk.utils.SerializationHelper;
import io.quarkus.arc.DefaultBean;
import jakarta.enterprise.context.ApplicationScoped;
import org.apache.fury.Fury;
import org.apache.fury.ThreadSafeFury;
import org.apache.fury.config.Language;
import java.io.IOException;
import java.nio.ByteBuffer;
/**
* Simple Java object serializer.
*/
@ApplicationScoped
@DefaultBean
public class JavaDataSerializer implements ObjectSerializer<JData> {
private static final ThreadSafeFury fury = Fury.builder().withLanguage(Language.JAVA)
// Allow to deserialize objects unknown types,
// more flexible but less secure.
.requireClassRegistration(false)
.buildThreadSafeFury();
@Override
public ByteString serialize(JData obj) {
return UnsafeByteOperations.unsafeWrap(fury.serialize(obj));
return SerializationHelper.serialize(obj);
}
@Override
public JData deserialize(ByteBuffer data) {
return (JData) fury.deserialize(data);
try (var is = UnsafeByteOperations.unsafeWrap(data).newInput()) {
return SerializationHelper.deserialize(is);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}

View File

@@ -4,8 +4,25 @@ import com.google.protobuf.ByteString;
import java.nio.ByteBuffer;
/**
* Interface for serializing and deserializing objects.
*
* @param <T> the type of object to serialize/deserialize
*/
public interface ObjectSerializer<T> {
/**
* Serialize an object to a ByteString.
*
* @param obj the object to serialize
* @return the serialized object as a ByteString
*/
ByteString serialize(T obj);
/**
* Deserialize an object from a ByteBuffer.
*
* @param data the ByteBuffer containing the serialized object
* @return the deserialized object
*/
T deserialize(ByteBuffer data);
}

View File

@@ -1,24 +1,70 @@
package com.usatiuk.objects.iterators;
import com.usatiuk.utils.AutoCloseableNoThrow;
import org.apache.commons.lang3.tuple.Pair;
import java.util.Iterator;
public interface CloseableKvIterator<K extends Comparable<? super K>, V> extends Iterator<Pair<K, V>>, AutoCloseableNoThrow {
/**
* An iterator over key-value pairs that can be closed and supports peek and skip operations, in both directions.
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public interface CloseableKvIterator<K extends Comparable<? super K>, V> extends Iterator<Pair<K, V>>, AutoCloseable {
/**
* Returns the upcoming key in the forward direction without advancing the iterator.
*
* @return the current key
* @throws IllegalStateException if there is no next element
*/
K peekNextKey();
/**
* Skips the next element in the forward direction.
*
* @throws IllegalStateException if there is no next element
*/
void skip();
/**
* Checks if there is a next element in the forward direction.
*
* @return true if there is a next element, false otherwise
* @throws IllegalStateException if there is no next element
*/
K peekPrevKey();
/**
* Returns the key-value pair in the reverse direction, and advances the iterator.
*
* @return the previous key-value pair
* @throws IllegalStateException if there is no previous element
*/
Pair<K, V> prev();
/**
* Checks if there is a previous element in the reverse direction.
*
* @return true if there is a previous element, false otherwise
*/
boolean hasPrev();
/**
* Skips the previous element in the reverse direction.
*
* @throws IllegalStateException if there is no previous element
*/
void skipPrev();
/**
* Returns a reversed iterator that iterates in the reverse direction.
*
* @return a new CloseableKvIterator that iterates in the reverse direction
*/
default CloseableKvIterator<K, V> reversed() {
return new ReversedKvIterator<K, V>(this);
}
@Override
void close();
}

View File

@@ -1,5 +1,13 @@
package com.usatiuk.objects.iterators;
/**
* Interface indicating that data is present.
* @param <V> the type of the value
*/
public interface Data<V> extends MaybeTombstone<V> {
/**
* Get the value.
* @return the value
*/
V value();
}

View File

@@ -1,4 +1,9 @@
package com.usatiuk.objects.iterators;
/**
* Simple implementation of the Data interface.
* @param value the value
* @param <V> the type of the value
*/
public record DataWrapper<V>(V value) implements Data<V> {
}

View File

@@ -1,6 +0,0 @@
package com.usatiuk.objects.iterators;
@FunctionalInterface
public interface IterProdFn<K extends Comparable<K>, V> {
CloseableKvIterator<K, V> get(IteratorStart start, K key);
}

View File

@@ -1,5 +1,8 @@
package com.usatiuk.objects.iterators;
/**
* Allows to specify initial positioning of the iterator relative to the requested key.
*/
public enum IteratorStart {
LT,
LE,

View File

@@ -5,11 +5,25 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.NoSuchElementException;
import java.util.function.Function;
/**
* A key-value iterator that filters keys based on a predicate.
*
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public class KeyPredicateKvIterator<K extends Comparable<K>, V> extends ReversibleKvIterator<K, V> {
private final CloseableKvIterator<K, V> _backing;
private final Function<K, Boolean> _filter;
private K _next;
/**
* Constructs a KeyPredicateKvIterator with the specified backing iterator, start position, and filter.
*
* @param backing the backing iterator
* @param start the starting position relative to the startKey
* @param startKey the starting key
* @param filter the filter function to apply to keys. Only keys for which this function returns true will be included in the iteration.
*/
public KeyPredicateKvIterator(CloseableKvIterator<K, V> backing, IteratorStart start, K startKey, Function<K, Boolean> filter) {
_goingForward = true;
_backing = backing;

View File

@@ -4,10 +4,23 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.function.Function;
/**
* A mapping key-value iterator that transforms the values of a backing iterator using a specified function.
*
* @param <K> the type of the keys
* @param <V> the type of the values in the backing iterator
* @param <V_T> the type of the transformed values
*/
public class MappingKvIterator<K extends Comparable<K>, V, V_T> implements CloseableKvIterator<K, V_T> {
private final CloseableKvIterator<K, V> _backing;
private final Function<V, V_T> _transformer;
/**
* Constructs a MappingKvIterator with the specified backing iterator and transformer function.
*
* @param backing the backing iterator
* @param transformer the function to transform values
*/
public MappingKvIterator(CloseableKvIterator<K, V> backing, Function<V, V_T> transformer) {
_backing = backing;
_transformer = transformer;

View File

@@ -1,4 +1,8 @@
package com.usatiuk.objects.iterators;
/**
* Optional-like interface, can either be {@link Data} or {@link Tombstone}.
* @param <T> the type of the value
*/
public interface MaybeTombstone<T> {
}

View File

@@ -9,19 +9,32 @@ import java.util.NavigableMap;
import java.util.NoSuchElementException;
import java.util.TreeMap;
/**
* A merging key-value iterator that combines multiple iterators into a single iterator.
*
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public class MergingKvIterator<K extends Comparable<K>, V> extends ReversibleKvIterator<K, V> {
private final NavigableMap<K, IteratorEntry<K, V>> _sortedIterators = new TreeMap<>();
private final String _name;
private final List<IteratorEntry<K, V>> _iterators;
public MergingKvIterator(String name, IteratorStart startType, K startKey, List<IterProdFn<K, V>> iterators) {
_goingForward = true;
_name = name;
// Why streams are so slow?
/**
* Constructs a MergingKvIterator with the specified start type, start key, and list of iterators.
* The iterators have priority based on their order in the list: if two iterators have the same key,
* the one that is in the beginning of the list will be used.
*
* @param startType the starting position relative to the startKey
* @param startKey the starting key
* @param iterators the list of iterators to merge
*/
public MergingKvIterator(IteratorStart startType, K startKey, List<CloseableKvIterator<K, V>> iterators) {
_goingForward = true;
{
IteratorEntry<K, V>[] iteratorEntries = new IteratorEntry[iterators.size()];
for (int i = 0; i < iterators.size(); i++) {
iteratorEntries[i] = new IteratorEntry<>(i, iterators.get(i).get(startType, startKey));
iteratorEntries[i] = new IteratorEntry<>(i, iterators.get(i));
}
_iterators = List.of(iteratorEntries);
}
@@ -90,9 +103,18 @@ public class MergingKvIterator<K extends Comparable<K>, V> extends ReversibleKvI
// }
}
/**
* Constructs a MergingKvIterator with the specified start type, start key, and array of iterators.
* The iterators have priority based on their order in the array: if two iterators have the same key,
* the one that is in the beginning of the array will be used.
*
* @param startType the starting position relative to the startKey
* @param startKey the starting key
* @param iterators the array of iterators to merge
*/
@SafeVarargs
public MergingKvIterator(String name, IteratorStart startType, K startKey, IterProdFn<K, V>... iterators) {
this(name, startType, startKey, List.of(iterators));
public MergingKvIterator(IteratorStart startType, K startKey, CloseableKvIterator<K, V>... iterators) {
this(startType, startKey, List.of(iterators));
}
private void advanceIterator(IteratorEntry<K, V> iteratorEntry) {
@@ -151,7 +173,6 @@ public class MergingKvIterator<K extends Comparable<K>, V> extends ReversibleKvI
|| (!_goingForward && peekImpl().compareTo(cur.getKey()) >= 0))) {
skipImpl();
}
Log.tracev("{0} Reversed to {1}", _name, _sortedIterators);
}
@Override
@@ -199,28 +220,14 @@ public class MergingKvIterator<K extends Comparable<K>, V> extends ReversibleKvI
@Override
public String toString() {
return "MergingKvIterator{" +
"_name='" + _name + '\'' +
", _sortedIterators=" + _sortedIterators.keySet() +
", _iterators=" + _iterators +
'}';
}
private interface FirstMatchState<K extends Comparable<K>, V> {
}
private record IteratorEntry<K extends Comparable<K>, V>(int priority, CloseableKvIterator<K, V> iterator) {
public IteratorEntry<K, V> reversed() {
return new IteratorEntry<>(priority, iterator.reversed());
}
}
private record FirstMatchNone<K extends Comparable<K>, V>() implements FirstMatchState<K, V> {
}
private record FirstMatchFound<K extends Comparable<K>, V>(
CloseableKvIterator<K, V> iterator) implements FirstMatchState<K, V> {
}
private record FirstMatchConsumed<K extends Comparable<K>, V>() implements FirstMatchState<K, V> {
}
}

View File

@@ -4,11 +4,25 @@ import org.apache.commons.lang3.tuple.Pair;
import java.util.*;
/**
* A key-value iterator for a {@link NavigableMap}.
* It allows iterating over the keys and values in a sorted order.
*
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public class NavigableMapKvIterator<K extends Comparable<K>, V> extends ReversibleKvIterator<K, V> {
private final NavigableMap<K, V> _map;
private Iterator<Map.Entry<K, V>> _iterator;
private Map.Entry<K, V> _next;
/**
* Constructs a NavigableMapKvIterator with the specified map, start type, and start key.
*
* @param map the map to iterate over
* @param start the starting position relative to the startKey
* @param key the starting key
*/
public NavigableMapKvIterator(NavigableMap<K, ? extends V> map, IteratorStart start, K key) {
_map = (NavigableMap<K, V>) map;
SortedMap<K, V> _view;

View File

@@ -2,9 +2,19 @@ package com.usatiuk.objects.iterators;
import org.apache.commons.lang3.tuple.Pair;
/**
* A wrapper for a key-value iterator that iterates in reverse order.
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public class ReversedKvIterator<K extends Comparable<? super K>, V> implements CloseableKvIterator<K, V> {
private final CloseableKvIterator<K, V> _backing;
/**
* Constructs a ReversedKvIterator with the specified backing iterator.
*
* @param backing the backing iterator
*/
public ReversedKvIterator(CloseableKvIterator<K, V> backing) {
_backing = backing;
}

View File

@@ -2,9 +2,21 @@ package com.usatiuk.objects.iterators;
import org.apache.commons.lang3.tuple.Pair;
/**
* Base class for a reversible key-value iterator.
*
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public abstract class ReversibleKvIterator<K extends Comparable<K>, V> implements CloseableKvIterator<K, V> {
/**
* The current direction of the iterator.
*/
protected boolean _goingForward;
/**
* Reverses the current direction of the iterator.
*/
protected abstract void reverse();
private void ensureForward() {
@@ -19,12 +31,33 @@ public abstract class ReversibleKvIterator<K extends Comparable<K>, V> implement
}
}
/**
* Fills the next element in the iterator, depending on the current direction.
*
* @throws IllegalStateException if there is no next element
*/
abstract protected K peekImpl();
/**
* Skips the next element in the iterator, depending on the current direction.
*
* @throws IllegalStateException if there is no next element
*/
abstract protected void skipImpl();
/**
* Checks if there is a next element in the iterator, depending on the current direction.
*
* @return true if there is a next element, false otherwise
*/
abstract protected boolean hasImpl();
/**
* Returns the next element in the iterator, depending on the current direction.
*
* @return the next element
* @throws IllegalStateException if there is no next element
*/
abstract protected Pair<K, V> nextImpl();
@Override

View File

@@ -1,4 +1,8 @@
package com.usatiuk.objects.iterators;
/**
* Indicates that the value is a tombstone.
* @param <V> the type of the value
*/
public interface Tombstone<V> extends MaybeTombstone<V> {
}

View File

@@ -1,4 +1,8 @@
package com.usatiuk.objects.iterators;
/**
* Simple implementation of the Tombstone interface.
* @param <V> the type of the value
*/
public record TombstoneImpl<V>() implements Tombstone<V> {
}

View File

@@ -1,22 +0,0 @@
package com.usatiuk.objects.iterators;
import java.util.List;
public abstract class TombstoneMergingKvIterator {
public static <K extends Comparable<K>, V> CloseableKvIterator<K, V> of(String name, IteratorStart startType, K startKey, List<IterProdFn<K, MaybeTombstone<V>>> iterators) {
return new PredicateKvIterator<K, MaybeTombstone<V>, V>(
new MergingKvIterator<K, MaybeTombstone<V>>(name + "-merging", startType, startKey, iterators),
startType, startKey,
pair -> {
// Log.tracev("{0} - Processing pair {1}", name, pair);
if (pair instanceof Tombstone<V>) {
return null;
}
return ((Data<V>) pair).value();
});
}
public static <K extends Comparable<K>, V> CloseableKvIterator<K, V> of(String name, IteratorStart startType, K startKey, IterProdFn<K, MaybeTombstone<V>>... iterators) {
return of(name, startType, startKey, List.of(iterators));
}
}

View File

@@ -2,26 +2,40 @@ package com.usatiuk.objects.iterators;
import org.apache.commons.lang3.tuple.Pair;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.function.Function;
public class PredicateKvIterator<K extends Comparable<K>, V, V_T> extends ReversibleKvIterator<K, V_T> {
private final CloseableKvIterator<K, V> _backing;
private final Function<V, V_T> _transformer;
private Pair<K, V_T> _next = null;
/**
* A key-value iterator that skips tombstones.
*
* @param <K> the type of the keys
* @param <V> the type of the values
*/
public class TombstoneSkippingIterator<K extends Comparable<K>, V> extends ReversibleKvIterator<K, V> {
private final MergingKvIterator<K, MaybeTombstone<V>> _backing;
private Pair<K, V> _next = null;
private boolean _checkedNext = false;
public PredicateKvIterator(CloseableKvIterator<K, V> backing, IteratorStart start, K startKey, Function<V, V_T> transformer) {
/**
* Constructs a TombstoneSkippingIterator with the specified start position, start key, and list of iterators.
* Like {@link MappingKvIterator}, iterators have a priority depending on their order in the list.
*
* @param start the starting position relative to the startKey
* @param startKey the starting key
* @param iterators the list of iterators to merge
*/
public TombstoneSkippingIterator(IteratorStart start, K startKey, List<CloseableKvIterator<K, MaybeTombstone<V>>> iterators) {
_goingForward = true;
_backing = backing;
_transformer = transformer;
_backing = new MergingKvIterator<>(start, startKey, iterators);
if (start == IteratorStart.GE || start == IteratorStart.GT)
return;
fillNext();
boolean shouldGoBack = false;
if (canHaveNext())
tryFillNext();
if (start == IteratorStart.LE) {
if (_next == null || _next.getKey().compareTo(startKey) > 0) {
shouldGoBack = true;
@@ -38,34 +52,27 @@ public class PredicateKvIterator<K extends Comparable<K>, V, V_T> extends Revers
_backing.skipPrev();
fillNext();
_goingForward = true;
_backing.skip();
if (_next != null)
_backing.skip();
fillNext();
}
}
private boolean canHaveNext() {
return (_goingForward ? _backing.hasNext() : _backing.hasPrev());
}
// switch (start) {
// case LT -> {
//// assert _next == null || _next.getKey().compareTo(startKey) < 0;
// }
// case LE -> {
//// assert _next == null || _next.getKey().compareTo(startKey) <= 0;
// }
// case GT -> {
// assert _next == null || _next.getKey().compareTo(startKey) > 0;
// }
// case GE -> {
// assert _next == null || _next.getKey().compareTo(startKey) >= 0;
// }
// }
private boolean tryFillNext() {
var next = _goingForward ? _backing.next() : _backing.prev();
if (next.getValue() instanceof Tombstone<?>)
return false;
_next = Pair.of(next.getKey(), ((Data<V>) next.getValue()).value());
return true;
}
private void fillNext() {
while ((_goingForward ? _backing.hasNext() : _backing.hasPrev()) && _next == null) {
var next = _goingForward ? _backing.next() : _backing.prev();
var transformed = _transformer.apply(next.getValue());
if (transformed == null)
continue;
_next = Pair.of(next.getKey(), transformed);
while (_next == null && canHaveNext()) {
tryFillNext();
}
_checkedNext = true;
}
@@ -80,9 +87,6 @@ public class PredicateKvIterator<K extends Comparable<K>, V, V_T> extends Revers
else if (!_goingForward && !wasAtEnd)
_backing.skipPrev();
// if (!wasAtEnd)
// Log.tracev("Skipped in reverse: {0}", _next);
_next = null;
_checkedNext = false;
}
@@ -117,7 +121,7 @@ public class PredicateKvIterator<K extends Comparable<K>, V, V_T> extends Revers
}
@Override
protected Pair<K, V_T> nextImpl() {
protected Pair<K, V> nextImpl() {
if (!_checkedNext)
fillNext();
@@ -137,7 +141,6 @@ public class PredicateKvIterator<K extends Comparable<K>, V, V_T> extends Revers
@Override
public String toString() {
return "PredicateKvIterator{" +
"_backing=" + _backing +
", _next=" + _next +
'}';
}

View File

@@ -2,16 +2,45 @@ package com.usatiuk.objects.snapshot;
import com.usatiuk.objects.iterators.CloseableKvIterator;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.utils.AutoCloseableNoThrow;
import com.usatiuk.objects.iterators.MaybeTombstone;
import javax.annotation.Nonnull;
import java.util.List;
import java.util.Optional;
public interface Snapshot<K extends Comparable<K>, V> extends AutoCloseableNoThrow {
CloseableKvIterator<K, V> getIterator(IteratorStart start, K key);
/**
* Interface for a snapshot of a database.
* Represents a point-in-time view of a storage, with a unique ID.
*
* @param <K> the type of the key
* @param <V> the type of the value
*/
public interface Snapshot<K extends Comparable<K>, V> extends AutoCloseable {
/**
* Get a list of iterators representing the snapshot.
* The iterators have priority: the first one in the list is the highest.
* The data type of the iterator is a tombstone: a tombstone represents a deleted value that does not exist anymore.
* The list of iterators is intended to be consumed by {@link com.usatiuk.objects.iterators.TombstoneSkippingIterator}
*
* @return a list of iterators
*/
List<CloseableKvIterator<K, MaybeTombstone<V>>> getIterator(IteratorStart start, K key);
/**
* Read an object from the snapshot.
* @param name the name of the object
* @return an optional containing the object if it exists, or an empty optional if it does not
*/
@Nonnull
Optional<V> readObject(K name);
/**
* Get the ID of the snapshot.
* @return the ID of the snapshot
*/
long id();
@Override
void close();
}

View File

@@ -5,6 +5,7 @@ import com.usatiuk.objects.JDataVersionedWrapperLazy;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.iterators.*;
import com.usatiuk.objects.snapshot.Snapshot;
import com.usatiuk.utils.ListUtils;
import io.quarkus.logging.Log;
import io.quarkus.runtime.StartupEvent;
import jakarta.annotation.Priority;
@@ -16,12 +17,18 @@ import org.eclipse.microprofile.config.inject.ConfigProperty;
import org.pcollections.TreePMap;
import javax.annotation.Nonnull;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
/**
* CachingObjectPersistentStore is a caching layer for the SerializingObjectPersistentStore
* It stores the already deserialized objects in memory.
* Not (yet) thread safe for writes.
*/
@ApplicationScoped
public class CachingObjectPersistentStore {
private final AtomicReference<Cache> _cache;
@@ -29,10 +36,10 @@ public class CachingObjectPersistentStore {
SerializingObjectPersistentStore delegate;
@ConfigProperty(name = "dhfs.objects.lru.print-stats")
boolean printStats;
private ExecutorService _commitExecutor;
private ExecutorService _statusExecutor;
private AtomicLong _cached = new AtomicLong();
private AtomicLong _cacheTries = new AtomicLong();
public CachingObjectPersistentStore(@ConfigProperty(name = "dhfs.objects.lru.limit") int sizeLimit) {
_cache = new AtomicReference<>(
new Cache(TreePMap.empty(), 0, -1, sizeLimit)
@@ -44,7 +51,6 @@ public class CachingObjectPersistentStore {
_cache.set(_cache.get().withVersion(s.id()));
}
_commitExecutor = Executors.newSingleThreadExecutor();
if (printStats) {
_statusExecutor = Executors.newSingleThreadExecutor();
_statusExecutor.submit(() -> {
@@ -61,11 +67,16 @@ public class CachingObjectPersistentStore {
}
}
/**
* Commit the transaction to the underlying store and update the cache.
* Once this function returns, the transaction is committed and the cache is updated.
* @param objs the transaction manifest object
* @param txId the transaction ID
*/
public void commitTx(TxManifestObj<? extends JDataVersionedWrapper> objs, long txId) {
Log.tracev("Committing: {0} writes, {1} deletes", objs.written().size(), objs.deleted().size());
var cache = _cache.get();
var commitFuture = _commitExecutor.submit(() -> delegate.prepareTx(objs, txId).run());
for (var write : objs.written()) {
cache = cache.withPut(write.getLeft(), Optional.of(write.getRight()));
}
@@ -73,16 +84,18 @@ public class CachingObjectPersistentStore {
cache = cache.withPut(del, Optional.empty());
}
cache = cache.withVersion(txId);
try {
commitFuture.get();
} catch (Exception e) {
throw new RuntimeException(e);
}
delegate.commitTx(objs, txId);
_cache.set(cache);
Log.tracev("Committed: {0} writes, {1} deletes", objs.written().size(), objs.deleted().size());
}
/**
* Get a snapshot of underlying store and the cache.
* Objects are read from the cache if possible, if not, they are read from the underlying store,
* then possibly lazily cached when their data is accessed.
* @return a snapshot of the cached store
*/
public Snapshot<JObjectKey, JDataVersionedWrapper> getSnapshot() {
while (true) {
var cache = _cache.get();
@@ -150,10 +163,12 @@ public class CachingObjectPersistentStore {
}
@Override
public CloseableKvIterator<JObjectKey, JDataVersionedWrapper> getIterator(IteratorStart start, JObjectKey key) {
return TombstoneMergingKvIterator.<JObjectKey, JDataVersionedWrapper>of("cache", start, key,
(mS, mK) -> new NavigableMapKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>(_curCache.map(), mS, mK),
(mS, mK) -> new CachingKvIterator(_backing.getIterator(start, key)));
public List<CloseableKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>> getIterator(IteratorStart start, JObjectKey key) {
return ListUtils.prependAndMap(
new NavigableMapKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>(_curCache.map(), start, key),
_backing.getIterator(start, key),
i -> new CachingKvIterator((CloseableKvIterator<JObjectKey, JDataVersionedWrapper>) (CloseableKvIterator<JObjectKey, ?>) i)
);
}
@Nonnull

View File

@@ -3,10 +3,7 @@ package com.usatiuk.objects.stores;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.JObjectKeyMax;
import com.usatiuk.objects.JObjectKeyMin;
import com.usatiuk.objects.iterators.CloseableKvIterator;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.iterators.KeyPredicateKvIterator;
import com.usatiuk.objects.iterators.ReversibleKvIterator;
import com.usatiuk.objects.iterators.*;
import com.usatiuk.objects.snapshot.Snapshot;
import io.quarkus.arc.properties.IfBuildProperty;
import io.quarkus.logging.Log;
@@ -26,16 +23,22 @@ import java.lang.ref.Cleaner;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import static org.lmdbjava.DbiFlags.MDB_CREATE;
import static org.lmdbjava.Env.create;
/**
* Persistent object storage using LMDB
*/
@ApplicationScoped
@IfBuildProperty(name = "dhfs.objects.persistence", stringValue = "lmdb")
public class LmdbObjectPersistentStore implements ObjectPersistentStore {
private static final String DB_NAME = "objects";
// LMDB object name for the transaction id
private static final String DB_VER_OBJ_NAME_STR = "__DB_VER_OBJ";
private static final ByteBuffer DB_VER_OBJ_NAME;
@@ -101,6 +104,12 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
if (!_ready) throw new IllegalStateException("Wrong service order!");
}
/**
* Get a snapshot of the database.
* Note that the ByteBuffers are invalid after the snapshot is closed.
*
* @return a snapshot of the database
*/
@Override
public Snapshot<JObjectKey, ByteBuffer> getSnapshot() {
var txn = _env.txnRead();
@@ -112,9 +121,9 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
private boolean _closed = false;
@Override
public CloseableKvIterator<JObjectKey, ByteBuffer> getIterator(IteratorStart start, JObjectKey key) {
public List<CloseableKvIterator<JObjectKey, MaybeTombstone<ByteBuffer>>> getIterator(IteratorStart start, JObjectKey key) {
assert !_closed;
return new KeyPredicateKvIterator<>(new LmdbKvIterator(_txn, start, key), start, key, (k) -> !k.value().equals(DB_VER_OBJ_NAME_STR));
return List.of(new KeyPredicateKvIterator<>(new LmdbKvIterator(_txn, start, key), start, key, (k) -> !k.value().equals(DB_VER_OBJ_NAME_STR)));
}
@Nonnull
@@ -146,10 +155,9 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
}
@Override
public Runnable prepareTx(TxManifestRaw names, long txId) {
public void commitTx(TxManifestRaw names, long txId) {
verifyReady();
var txn = _env.txnWrite();
try {
try (var txn = _env.txnWrite()) {
for (var written : names.written()) {
var putBb = _db.reserve(txn, written.getKey().toByteBuffer(), written.getValue().size());
written.getValue().copyTo(putBb);
@@ -164,17 +172,8 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
bbData.putLong(txId);
bbData.flip();
_db.put(txn, DB_VER_OBJ_NAME.asReadOnlyBuffer(), bbData);
} catch (Throwable t) {
txn.close();
throw t;
txn.commit();
}
return () -> {
try {
txn.commit();
} finally {
txn.close();
}
};
}
@Override
@@ -189,13 +188,7 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
return _root.toFile().getFreeSpace();
}
@Override
public long getUsableSpace() {
verifyReady();
return _root.toFile().getUsableSpace();
}
private class LmdbKvIterator extends ReversibleKvIterator<JObjectKey, ByteBuffer> {
private class LmdbKvIterator extends ReversibleKvIterator<JObjectKey, MaybeTombstone<ByteBuffer>> {
private static final Cleaner CLEANER = Cleaner.create();
private final Txn<ByteBuffer> _txn; // Managed by the snapshot
private final Cursor<ByteBuffer> _cursor;
@@ -350,13 +343,13 @@ public class LmdbObjectPersistentStore implements ObjectPersistentStore {
}
@Override
protected Pair<JObjectKey, ByteBuffer> nextImpl() {
protected Pair<JObjectKey, MaybeTombstone<ByteBuffer>> nextImpl() {
if (!_hasNext) {
throw new NoSuchElementException("No more elements");
}
// TODO: Right now with java serialization it doesn't matter, it's all copied to arrays anyway
var val = _cursor.val();
var ret = Pair.of(JObjectKey.fromByteBuffer(_cursor.key()), val.asReadOnlyBuffer());
Pair<JObjectKey, MaybeTombstone<ByteBuffer>> ret = Pair.of(JObjectKey.fromByteBuffer(_cursor.key()), new DataWrapper<>(val.asReadOnlyBuffer()));
if (_goingForward)
_hasNext = _cursor.next();
else

View File

@@ -2,10 +2,7 @@ package com.usatiuk.objects.stores;
import com.google.protobuf.ByteString;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.iterators.CloseableKvIterator;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.iterators.MappingKvIterator;
import com.usatiuk.objects.iterators.NavigableMapKvIterator;
import com.usatiuk.objects.iterators.*;
import com.usatiuk.objects.snapshot.Snapshot;
import io.quarkus.arc.properties.IfBuildProperty;
import jakarta.enterprise.context.ApplicationScoped;
@@ -13,9 +10,15 @@ import org.pcollections.TreePMap;
import javax.annotation.Nonnull;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Stream;
/**
* In-memory implementation of the ObjectPersistentStore interface.
* For testing purposes.
*/
@ApplicationScoped
@IfBuildProperty(name = "dhfs.objects.persistence", stringValue = "memory")
public class MemoryObjectPersistentStore implements ObjectPersistentStore {
@@ -31,8 +34,8 @@ public class MemoryObjectPersistentStore implements ObjectPersistentStore {
private final long _lastCommitId = MemoryObjectPersistentStore.this._lastCommitId;
@Override
public CloseableKvIterator<JObjectKey, ByteBuffer> getIterator(IteratorStart start, JObjectKey key) {
return new MappingKvIterator<>(new NavigableMapKvIterator<>(_objects, start, key), ByteString::asReadOnlyByteBuffer);
public List<CloseableKvIterator<JObjectKey, MaybeTombstone<ByteBuffer>>> getIterator(IteratorStart start, JObjectKey key) {
return List.of(new MappingKvIterator<>(new NavigableMapKvIterator<>(_objects, start, key), s -> new DataWrapper<>(s.asReadOnlyByteBuffer())));
}
@Nonnull
@@ -54,19 +57,18 @@ public class MemoryObjectPersistentStore implements ObjectPersistentStore {
}
}
public Runnable prepareTx(TxManifestRaw names, long txId) {
return () -> {
synchronized (this) {
for (var written : names.written()) {
_objects = _objects.plus(written.getKey(), written.getValue());
}
for (JObjectKey key : names.deleted()) {
_objects = _objects.minus(key);
}
assert txId > _lastCommitId;
_lastCommitId = txId;
@Override
public void commitTx(TxManifestRaw names, long txId) {
synchronized (this) {
for (var written : names.written()) {
_objects = _objects.plus(written.getKey(), written.getValue());
}
};
for (JObjectKey key : names.deleted()) {
_objects = _objects.minus(key);
}
assert txId > _lastCommitId;
_lastCommitId = txId;
}
}
@Override
@@ -78,9 +80,4 @@ public class MemoryObjectPersistentStore implements ObjectPersistentStore {
public long getFreeSpace() {
return 0;
}
@Override
public long getUsableSpace() {
return 0;
}
}

View File

@@ -8,16 +8,33 @@ import javax.annotation.Nonnull;
import java.nio.ByteBuffer;
import java.util.Optional;
// Persistent storage of objects
// All changes are written as sequential transactions
/**
* Interface for a persistent store of objects.
* Does not have to be thread-safe! (yet), it is expected that all commits are done by the same thread.
*/
public interface ObjectPersistentStore {
/**
* Get a snapshot of the persistent store.
* @return a snapshot of the persistent store
*/
Snapshot<JObjectKey, ByteBuffer> getSnapshot();
Runnable prepareTx(TxManifestRaw names, long txId);
/**
* Commit a transaction to the persistent store.
* @param names the transaction manifest
* @param txId the transaction ID
*/
void commitTx(TxManifestRaw names, long txId);
/**
* Get the size of the persistent store.
* @return the size of the persistent store
*/
long getTotalSpace();
/**
* Get the free space of the persistent store.
* @return the free space of the persistent store
*/
long getFreeSpace();
long getUsableSpace();
}

View File

@@ -1,22 +1,26 @@
package com.usatiuk.objects.stores;
import com.google.protobuf.ByteString;
import com.usatiuk.objects.JDataVersionedWrapper;
import com.usatiuk.objects.JDataVersionedWrapperSerializer;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.ObjectSerializer;
import com.usatiuk.objects.iterators.CloseableKvIterator;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.iterators.MappingKvIterator;
import com.usatiuk.objects.iterators.*;
import com.usatiuk.objects.snapshot.Snapshot;
import com.usatiuk.utils.ListUtils;
import jakarta.enterprise.context.ApplicationScoped;
import jakarta.inject.Inject;
import org.apache.commons.lang3.tuple.Pair;
import javax.annotation.Nonnull;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Optional;
import java.util.stream.Stream;
/**
* Serializing wrapper for the ObjectPersistentStore.
* It serializes the objects before storing them in the persistent store.
* It deserializes the objects after reading them from the persistent store.
*/
@ApplicationScoped
public class SerializingObjectPersistentStore {
@Inject
@@ -25,13 +29,22 @@ public class SerializingObjectPersistentStore {
@Inject
ObjectPersistentStore delegateStore;
/**
* Get a snapshot of the persistent store, with deserialized objects.
*
* The objects are deserialized lazily, only when their data is accessed.
*
* @return a snapshot of the persistent store
*/
public Snapshot<JObjectKey, JDataVersionedWrapper> getSnapshot() {
return new Snapshot<JObjectKey, JDataVersionedWrapper>() {
private final Snapshot<JObjectKey, ByteBuffer> _backing = delegateStore.getSnapshot();
@Override
public CloseableKvIterator<JObjectKey, JDataVersionedWrapper> getIterator(IteratorStart start, JObjectKey key) {
return new MappingKvIterator<>(_backing.getIterator(start, key), d -> serializer.deserialize(d));
public List<CloseableKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>> getIterator(IteratorStart start, JObjectKey key) {
return ListUtils.map(_backing.getIterator(start, key),
i -> new MappingKvIterator<JObjectKey, MaybeTombstone<ByteBuffer>, MaybeTombstone<JDataVersionedWrapper>>(i,
d -> serializer.deserialize(((DataWrapper<ByteBuffer>) d).value())));
}
@Nonnull
@@ -53,6 +66,12 @@ public class SerializingObjectPersistentStore {
}
/**
* Serialize the objects, in parallel
* @param objs the objects to serialize
* @return the serialized objects
*/
private TxManifestRaw prepareManifest(TxManifestObj<? extends JDataVersionedWrapper> objs) {
return new TxManifestRaw(
objs.written().parallelStream()
@@ -61,7 +80,12 @@ public class SerializingObjectPersistentStore {
, objs.deleted());
}
Runnable prepareTx(TxManifestObj<? extends JDataVersionedWrapper> objects, long txId) {
return delegateStore.prepareTx(prepareManifest(objects), txId);
/**
* Commit a transaction to the persistent store.
* @param objects the transaction manifest
* @param txId the transaction ID
*/
void commitTx(TxManifestObj<? extends JDataVersionedWrapper> objects, long txId) {
delegateStore.commitTx(prepareManifest(objects), txId);
}
}

View File

@@ -6,7 +6,6 @@ import org.apache.commons.lang3.tuple.Pair;
import java.io.Serializable;
import java.util.Collection;
// FIXME: Serializable
public record TxManifestObj<T>(Collection<Pair<JObjectKey, T>> written,
Collection<JObjectKey> deleted) implements Serializable {
}

View File

@@ -7,7 +7,6 @@ import org.apache.commons.lang3.tuple.Pair;
import java.io.Serializable;
import java.util.Collection;
// FIXME: Serializable
public record TxManifestRaw(Collection<Pair<JObjectKey, ByteString>> written,
Collection<JObjectKey> deleted) implements Serializable {
}

View File

@@ -3,10 +3,13 @@ package com.usatiuk.objects.stores;
import com.usatiuk.objects.JDataVersionedWrapper;
import com.usatiuk.objects.JDataVersionedWrapperImpl;
import com.usatiuk.objects.JObjectKey;
import com.usatiuk.objects.iterators.*;
import com.usatiuk.objects.iterators.CloseableKvIterator;
import com.usatiuk.objects.iterators.IteratorStart;
import com.usatiuk.objects.iterators.MaybeTombstone;
import com.usatiuk.objects.iterators.NavigableMapKvIterator;
import com.usatiuk.objects.snapshot.Snapshot;
import com.usatiuk.objects.transaction.TxCommitException;
import com.usatiuk.objects.transaction.TxRecord;
import com.usatiuk.utils.ListUtils;
import io.quarkus.logging.Log;
import io.quarkus.runtime.ShutdownEvent;
import io.quarkus.runtime.StartupEvent;
@@ -26,24 +29,42 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Consumer;
/**
* Asynchronous write cache of objects.
* Objects are put into a write queue by commitTx, and written to the storage by a separate thread.
*/
@ApplicationScoped
public class WritebackObjectPersistentStore {
private final LinkedList<TxBundle> _pendingBundles = new LinkedList<>();
private final LinkedHashMap<Long, TxBundle> _notFlushedBundles = new LinkedHashMap<>();
private final AtomicReference<PendingWriteData> _pendingWrites = new AtomicReference<>(null);
private final Object _flushWaitSynchronizer = new Object();
private final AtomicLong _lastWrittenId = new AtomicLong(-1);
private final AtomicLong _lastCommittedId = new AtomicLong();
private final AtomicLong _waitedTotal = new AtomicLong(0);
@Inject
CachingObjectPersistentStore cachedStore;
@Inject
ExecutorService _callbackExecutor;
@ConfigProperty(name = "dhfs.objects.writeback.limit")
long sizeLimit;
private long currentSize = 0;
int sizeLimit;
private TxBundle _pendingBundle = null;
private int _curSize = 0;
private final AtomicReference<PendingWriteData> _pendingWrites = new AtomicReference<>(null);
private final ReentrantLock _pendingBundleLock = new ReentrantLock();
private final Condition _newBundleCondition = _pendingBundleLock.newCondition();
private final Condition _flushCondition = _pendingBundleLock.newCondition();
private final AtomicLong _lastFlushedId = new AtomicLong(-1);
private final AtomicLong _lastCommittedId = new AtomicLong(-1);
private final AtomicLong _waitedTotal = new AtomicLong(0);
private ExecutorService _writebackExecutor;
private ExecutorService _statusExecutor;
private volatile boolean _ready = false;
void init(@Observes @Priority(120) StartupEvent event) {
@@ -61,8 +82,8 @@ public class WritebackObjectPersistentStore {
try {
while (true) {
Thread.sleep(1000);
if (currentSize > 0)
Log.info("Tx commit status: size=" + currentSize / 1024 / 1024 + "MB");
if (_curSize > 0)
Log.info("Tx commit status: size=" + _curSize / 1024 / 1024 + "MB");
}
} catch (InterruptedException ignored) {
}
@@ -72,7 +93,7 @@ public class WritebackObjectPersistentStore {
lastTxId = s.id();
}
_lastCommittedId.set(lastTxId);
_lastWrittenId.set(lastTxId);
_lastFlushedId.set(lastTxId);
_pendingWrites.set(new PendingWriteData(TreePMap.empty(), lastTxId, lastTxId));
_ready = true;
}
@@ -80,11 +101,14 @@ public class WritebackObjectPersistentStore {
void shutdown(@Observes @Priority(890) ShutdownEvent event) throws InterruptedException {
Log.info("Waiting for all transactions to drain");
synchronized (_flushWaitSynchronizer) {
_ready = false;
while (currentSize > 0) {
_flushWaitSynchronizer.wait();
_ready = false;
_pendingBundleLock.lock();
try {
while (_curSize > 0) {
_flushCondition.await();
}
} finally {
_pendingBundleLock.unlock();
}
_writebackExecutor.shutdownNow();
@@ -98,21 +122,19 @@ public class WritebackObjectPersistentStore {
private void writeback() {
while (!Thread.interrupted()) {
try {
TxBundle bundle = new TxBundle(0);
synchronized (_pendingBundles) {
while (_pendingBundles.isEmpty() || !_pendingBundles.peek()._ready)
_pendingBundles.wait();
TxBundle bundle;
_pendingBundleLock.lock();
try {
while (_pendingBundle == null)
_newBundleCondition.await();
bundle = _pendingBundle;
_pendingBundle = null;
long diff = 0;
while (!_pendingBundles.isEmpty() && _pendingBundles.peek()._ready) {
var toCompress = _pendingBundles.poll();
diff -= toCompress.size();
bundle.compress(toCompress);
}
diff += bundle.size();
synchronized (_flushWaitSynchronizer) {
currentSize += diff;
}
_curSize -= bundle.size();
assert _curSize == 0;
_flushCondition.signal();
} finally {
_pendingBundleLock.unlock();
}
var toWrite = new ArrayList<Pair<JObjectKey, JDataVersionedWrapper>>();
@@ -132,15 +154,12 @@ public class WritebackObjectPersistentStore {
}
}
cachedStore.commitTx(
new TxManifestObj<>(
Collections.unmodifiableList(toWrite),
Collections.unmodifiableList(toDelete)
), bundle.id());
cachedStore.commitTx(new TxManifestObj<>(toWrite, toDelete), bundle.id());
Log.tracev("Bundle {0} committed", bundle.id());
while (true) {
_pendingBundleLock.lock();
try {
var curPw = _pendingWrites.get();
var curPwMap = curPw.pendingWrites();
for (var e : bundle._entries.values()) {
@@ -153,25 +172,16 @@ public class WritebackObjectPersistentStore {
bundle.id(),
curPw.lastCommittedId()
);
if (_pendingWrites.compareAndSet(curPw, newCurPw))
break;
_pendingWrites.compareAndSet(curPw, newCurPw);
} finally {
_pendingBundleLock.unlock();
}
List<List<Runnable>> callbacks = new ArrayList<>();
synchronized (_notFlushedBundles) {
_lastWrittenId.set(bundle.id());
while (!_notFlushedBundles.isEmpty() && _notFlushedBundles.firstEntry().getKey() <= bundle.id()) {
callbacks.add(_notFlushedBundles.pollFirstEntry().getValue().setCommitted());
}
}
callbacks.forEach(l -> l.forEach(Runnable::run));
synchronized (_flushWaitSynchronizer) {
currentSize -= bundle.size();
// FIXME:
if (currentSize <= sizeLimit || !_ready)
_flushWaitSynchronizer.notifyAll();
}
_lastFlushedId.set(bundle.id());
var callbacks = bundle.callbacks();
_callbackExecutor.submit(() -> {
callbacks.forEach(Runnable::run);
});
} catch (InterruptedException ignored) {
} catch (Exception e) {
Log.error("Uncaught exception in writeback", e);
@@ -182,134 +192,126 @@ public class WritebackObjectPersistentStore {
Log.info("Writeback thread exiting");
}
public long commitBundle(Collection<TxRecord.TxObjectRecord<?>> writes) {
private long commitBundle(Collection<TxRecord.TxObjectRecord<?>> writes) {
verifyReady();
boolean wait = false;
while (true) {
if (wait) {
synchronized (_flushWaitSynchronizer) {
long started = System.currentTimeMillis();
while (currentSize > sizeLimit) {
try {
_flushWaitSynchronizer.wait();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
long waited = System.currentTimeMillis() - started;
_waitedTotal.addAndGet(waited);
if (Log.isTraceEnabled())
Log.tracev("Thread {0} waited for tx bundle for {1} ms", Thread.currentThread().getName(), waited);
wait = false;
}
_pendingBundleLock.lock();
try {
boolean shouldWake = false;
if (_curSize > sizeLimit) {
shouldWake = true;
long started = System.currentTimeMillis();
while (_curSize > sizeLimit)
_flushCondition.await();
long waited = System.currentTimeMillis() - started;
_waitedTotal.addAndGet(waited);
if (Log.isTraceEnabled())
Log.tracev("Thread {0} waited for tx bundle for {1} ms", Thread.currentThread().getName(), waited);
}
synchronized (_pendingBundles) {
synchronized (_flushWaitSynchronizer) {
if (currentSize > sizeLimit) {
if (!_pendingBundles.isEmpty() && _pendingBundles.peek()._ready) {
var target = _pendingBundles.poll();
var oursId = _lastCommittedId.incrementAndGet();
long diff = -target.size();
while (!_pendingBundles.isEmpty() && _pendingBundles.peek()._ready) {
var toCompress = _pendingBundles.poll();
diff -= toCompress.size();
target.compress(toCompress);
}
diff += target.size();
currentSize += diff;
_pendingBundles.addFirst(target);
}
var curBundle = _pendingBundle;
int oldSize = 0;
if (curBundle != null) {
oldSize = curBundle.size();
curBundle.setId(oursId);
} else {
curBundle = new TxBundle(oursId);
}
var curPw = _pendingWrites.get();
var curPwMap = curPw.pendingWrites();
for (var action : writes) {
var key = action.key();
switch (action) {
case TxRecord.TxObjectRecordWrite<?> write -> {
// Log.tracev("Flushing object {0}", write.key());
var wrapper = new JDataVersionedWrapperImpl(write.data(), oursId);
curPwMap = curPwMap.plus(key, new PendingWrite(wrapper, oursId));
curBundle.commit(wrapper);
}
if (currentSize > sizeLimit) {
wait = true;
continue;
case TxRecord.TxObjectRecordDeleted deleted -> {
// Log.tracev("Deleting object {0}", deleted.key());
curPwMap = curPwMap.plus(key, new PendingDelete(key, oursId));
curBundle.delete(key);
}
}
TxBundle bundle;
synchronized (_notFlushedBundles) {
bundle = new TxBundle(_lastCommittedId.incrementAndGet());
_pendingBundles.addLast(bundle);
_notFlushedBundles.put(bundle.id(), bundle);
}
for (var action : writes) {
switch (action) {
case TxRecord.TxObjectRecordWrite<?> write -> {
Log.tracev("Flushing object {0}", write.key());
bundle.commit(new JDataVersionedWrapperImpl(write.data(), bundle.id()));
}
case TxRecord.TxObjectRecordDeleted deleted -> {
Log.tracev("Deleting object {0}", deleted.key());
bundle.delete(deleted.key());
}
default -> {
throw new TxCommitException("Unexpected value: " + action.key());
}
}
}
while (true) {
var curPw = _pendingWrites.get();
var curPwMap = curPw.pendingWrites();
for (var e : ((TxBundle) bundle)._entries.values()) {
switch (e) {
case TxBundle.CommittedEntry c -> {
curPwMap = curPwMap.plus(c.key(), new PendingWrite(c.data, bundle.id()));
}
case TxBundle.DeletedEntry d -> {
curPwMap = curPwMap.plus(d.key(), new PendingDelete(d.key, bundle.id()));
}
default -> throw new IllegalStateException("Unexpected value: " + e);
}
}
// Now, make the changes visible to new iterators
var newCurPw = new PendingWriteData(
curPwMap,
curPw.lastFlushedId(),
bundle.id()
);
if (!_pendingWrites.compareAndSet(curPw, newCurPw))
continue;
((TxBundle) bundle).setReady();
if (_pendingBundles.peek() == bundle)
_pendingBundles.notify();
synchronized (_flushWaitSynchronizer) {
currentSize += ((TxBundle) bundle).size();
}
return bundle.id();
}
}
// Now, make the changes visible to new iterators
var newCurPw = new PendingWriteData(
curPwMap,
curPw.lastFlushedId(),
oursId
);
_pendingWrites.compareAndSet(curPw, newCurPw);
_pendingBundle = curBundle;
_newBundleCondition.signalAll();
_curSize += (curBundle.size() - oldSize);
if (shouldWake && _curSize < sizeLimit) {
_flushCondition.signal();
}
return oursId;
} catch (InterruptedException e) {
throw new RuntimeException(e);
} finally {
_pendingBundleLock.unlock();
}
}
public void asyncFence(long bundleId, Runnable fn) {
/**
* Run a given callback after the transaction with id txId is committed.
* If the transaction is already committed, the callback is run immediately.
*
* @param txId transaction id to wait for
* @param fn callback to run
*/
public void asyncFence(long txId, Runnable fn) {
verifyReady();
if (bundleId < 0) throw new IllegalArgumentException("txId should be >0!");
if (_lastWrittenId.get() >= bundleId) {
if (txId < 0) throw new IllegalArgumentException("txId should be >0!");
if (_lastFlushedId.get() >= txId) {
fn.run();
return;
}
synchronized (_notFlushedBundles) {
if (_lastWrittenId.get() >= bundleId) {
_pendingBundleLock.lock();
try {
if (_lastFlushedId.get() >= txId) {
fn.run();
return;
}
_notFlushedBundles.get(bundleId).addCallback(fn);
var pendingBundle = _pendingBundle;
if (pendingBundle == null) {
fn.run();
return;
}
pendingBundle.addCallback(fn);
} finally {
_pendingBundleLock.unlock();
}
}
/**
* Commit a transaction to the persistent store.
*
* @param writes the transaction manifest
* @return a function that allows to add a callback to be run after the transaction is committed
*/
public Consumer<Runnable> commitTx(Collection<TxRecord.TxObjectRecord<?>> writes) {
long bundleId = commitBundle(writes);
return r -> asyncFence(bundleId, r);
}
/**
* Get a snapshot of the persistent store, including the pending writes.
*
* @return a snapshot of the store
*/
public Snapshot<JObjectKey, JDataVersionedWrapper> getSnapshot() {
Snapshot<JObjectKey, JDataVersionedWrapper> cache = null;
PendingWriteData pw = null;
@@ -341,10 +343,8 @@ public class WritebackObjectPersistentStore {
private final long txId = finalPw.lastCommittedId();
@Override
public CloseableKvIterator<JObjectKey, JDataVersionedWrapper> getIterator(IteratorStart start, JObjectKey key) {
return TombstoneMergingKvIterator.<JObjectKey, JDataVersionedWrapper>of("writeback-ps", start, key,
(tS, tK) -> new NavigableMapKvIterator<>(_pendingWrites, tS, tK),
(tS, tK) -> (CloseableKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>) (CloseableKvIterator<JObjectKey, ?>) _cache.getIterator(tS, tK));
public List<CloseableKvIterator<JObjectKey, MaybeTombstone<JDataVersionedWrapper>>> getIterator(IteratorStart start, JObjectKey key) {
return ListUtils.prepend(new NavigableMapKvIterator<>(_pendingWrites, start, key), _cache.getIterator(start, key));
}
@Nonnull
@@ -381,46 +381,40 @@ public class WritebackObjectPersistentStore {
}
}
public interface VerboseReadResult {
}
private record PendingWriteData(TreePMap<JObjectKey, PendingWriteEntry> pendingWrites,
long lastFlushedId,
long lastCommittedId) {
}
private static class TxBundle {
private final LinkedHashMap<JObjectKey, BundleEntry> _entries = new LinkedHashMap<>();
private final HashMap<JObjectKey, BundleEntry> _entries = new HashMap<>();
private final ArrayList<Runnable> _callbacks = new ArrayList<>();
private int _size = 0;
private long _txId;
private volatile boolean _ready = false;
private long _size = 0;
private boolean _wasCommitted = false;
ArrayList<Runnable> callbacks() {
return _callbacks;
}
private TxBundle(long txId) {
_txId = txId;
}
public long id() {
return _txId;
public void setId(long id) {
_txId = id;
}
public void setReady() {
_ready = true;
public long id() {
return _txId;
}
public void addCallback(Runnable callback) {
synchronized (_callbacks) {
if (_wasCommitted) throw new IllegalStateException();
_callbacks.add(callback);
}
_callbacks.add(callback);
}
public List<Runnable> setCommitted() {
synchronized (_callbacks) {
_wasCommitted = true;
return Collections.unmodifiableList(_callbacks);
}
public int size() {
return _size;
}
private void putEntry(BundleEntry entry) {
@@ -439,28 +433,7 @@ public class WritebackObjectPersistentStore {
putEntry(new DeletedEntry(obj));
}
public long size() {
return _size;
}
public void compress(TxBundle other) {
if (_txId >= other._txId)
throw new IllegalArgumentException("Compressing an older bundle into newer");
_txId = other._txId;
for (var entry : other._entries.values()) {
putEntry(entry);
}
synchronized (_callbacks) {
assert !_wasCommitted;
assert !other._wasCommitted;
_callbacks.addAll(other._callbacks);
}
}
private interface BundleEntry {
private sealed interface BundleEntry permits CommittedEntry, DeletedEntry {
JObjectKey key();
int size();
@@ -478,10 +451,4 @@ public class WritebackObjectPersistentStore {
}
}
}
public record VerboseReadResultPersisted(Optional<JDataVersionedWrapper> data) implements VerboseReadResult {
}
public record VerboseReadResultPending(PendingWriteEntry pending) implements VerboseReadResult {
}
}

Some files were not shown because too many files have changed in this diff Show More