aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorEugen Rochko <eugen@zeonfederated.com>2022-10-25 01:07:00 +0200
committerEugen Rochko <eugen@zeonfederated.com>2022-10-25 15:19:33 +0200
commit1bfbfb0317263be46869150f6673f014e8ef0ae8 (patch)
treea91cd724c7492ebee3b87860cda0b5a9f7a1de98 /lib
parent30453fab80d55fc10766f0e067c31d96753ccfda (diff)
downloadmastodon-feature-argument-deduplication.tar
mastodon-feature-argument-deduplication.tar.gz
mastodon-feature-argument-deduplication.tar.bz2
mastodon-feature-argument-deduplication.zip
Add deduplication for JSON payloads in job queuefeature-argument-deduplication
Diffstat (limited to 'lib')
-rw-r--r--lib/argument_deduplication.rb26
-rw-r--r--lib/argument_deduplication/argument.rb49
-rw-r--r--lib/argument_deduplication/client.rb25
-rw-r--r--lib/argument_deduplication/server.rb38
4 files changed, 138 insertions, 0 deletions
diff --git a/lib/argument_deduplication.rb b/lib/argument_deduplication.rb
new file mode 100644
index 000000000..f271b6f96
--- /dev/null
+++ b/lib/argument_deduplication.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+require_relative './argument_deduplication/argument'
+require_relative './argument_deduplication/server'
+require_relative './argument_deduplication/client'
+
+module ArgumentDeduplication
+ class CorruptedArgumentError < ::RuntimeError; end
+
+ PREFIX = 'argument_store'
+
+ # The time-to-live is based on the maximum amount of time
+ # a job can possibly spend in the retry queue, assuming
+ # the exponential backoff algorithm and a maximum number
+ # of 16 retries. It is intended as a safe-guard against
+ # any arguments being orphaned due to interruptions.
+ TTL = 50.days.to_i
+
+ DEATH_HANDLER = ->(job) {
+ Argument.new(job['args'][job['deduplicate_arguments']]).pop! if job['deduplicate_arguments']
+ }.freeze
+
+ def self.configure(config)
+ config.death_handlers << DEATH_HANDLER
+ end
+end
diff --git a/lib/argument_deduplication/argument.rb b/lib/argument_deduplication/argument.rb
new file mode 100644
index 000000000..60be6bca2
--- /dev/null
+++ b/lib/argument_deduplication/argument.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module ArgumentDeduplication
+ class Argument
+ def self.from_value(value)
+ new(Digest::SHA256.base64digest(value), value)
+ end
+
+ attr_reader :content_hash, :value
+
+ def initialize(content_hash, value)
+ @content_hash = content_hash
+ @value = value
+ end
+
+ def push!
+ with_redis do |redis|
+ redis.multi do |transaction|
+ transaction.set("#{PREFIX}:value:#{content_hash}", value, ex: TTL)
+ transaction.incr("#{PREFIX}:refcount:#{content_hash}")
+ transaction.expire("#{PREFIX}:refcount:#{content_hash}", TTL)
+ end
+ end
+ end
+
+ def pop!
+ with_redis do |redis|
+ redis.decr("#{PREFIX}:refcount:#{content_hash}")
+
+ redis.watch("#{PREFIX}:refcount:#{content_hash}") do
+ if redis.get("#{PREFIX}:refcount:#{content_hash}").to_i <= 0
+ redis.multi do |transaction|
+ transaction.del("#{PREFIX}:refcount:#{content_hash}")
+ transaction.del("#{PREFIX}:value:#{content_hash}")
+ end
+ else
+ redis.unwatch
+ end
+ end
+ end
+ end
+
+ private
+
+ def with_redis(&block)
+ Sidekiq.redis(&block)
+ end
+ end
+end
diff --git a/lib/argument_deduplication/client.rb b/lib/argument_deduplication/client.rb
new file mode 100644
index 000000000..b4c6f5999
--- /dev/null
+++ b/lib/argument_deduplication/client.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module ArgumentDeduplication
+ class Client
+ include Sidekiq::ClientMiddleware
+
+ def call(_worker, job, _queue, _redis_pool)
+ process_arguments!(job)
+ yield
+ end
+
+ private
+
+ def process_arguments!(job)
+ return unless job['deduplicate_arguments']
+
+ argument_index = job['deduplicate_arguments']
+ argument = Argument.from_value(job['args'][argument_index])
+
+ argument.push!
+
+ job['args'][argument_index] = argument.content_hash
+ end
+ end
+end
diff --git a/lib/argument_deduplication/server.rb b/lib/argument_deduplication/server.rb
new file mode 100644
index 000000000..0da4d1c62
--- /dev/null
+++ b/lib/argument_deduplication/server.rb
@@ -0,0 +1,38 @@
+# frozen_string_literal: true
+
+module ArgumentDeduplication
+ class Server
+ include Sidekiq::ServerMiddleware
+
+ def call(_worker, job, _queue)
+ argument = process_argument!(job)
+
+ yield
+
+ # If the job completes successfully, we can remove
+ # the argument from the store. If there is an exception,
+ # the job will be retried, so we can't remove the argument
+ # from the store yet. When retries are exhausted, or when
+ # retries are disabled for the worker, the configured death
+ # handler will remove it.
+
+ argument&.pop!
+ end
+
+ private
+
+ def process_argument!(job)
+ return unless job['deduplicate_arguments']
+
+ argument_index = job['deduplicate_arguments']
+ content_hash = job['args'][argument_index]
+ value = Sidekiq.redis { |redis| redis.get("#{PREFIX}:value:#{content_hash}") }
+
+ raise CorruptedArgumentError, "The argument for hash #{content_hash} could not be found" if value.nil?
+
+ job['args'][argument_index] = value
+
+ Argument.new(content_hash, value)
+ end
+ end
+end