From 61a3b793165e92d6f23a2e59fb9b95e06737cf25 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sat, 25 May 2024 14:20:47 -0400 Subject: [PATCH 1/6] Search backend healthcheck process --- changelog.d/search-healthcheck.add | 1 + config/config.exs | 2 +- lib/pleroma/application.ex | 3 +- lib/pleroma/search.ex | 5 ++ lib/pleroma/search/database_search.ex | 3 + lib/pleroma/search/healthcheck.ex | 85 +++++++++++++++++++++++++++ lib/pleroma/search/meilisearch.ex | 11 ++++ lib/pleroma/search/search_backend.ex | 8 +++ 8 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 changelog.d/search-healthcheck.add create mode 100644 lib/pleroma/search/healthcheck.ex diff --git a/changelog.d/search-healthcheck.add b/changelog.d/search-healthcheck.add new file mode 100644 index 000000000..4974925e7 --- /dev/null +++ b/changelog.d/search-healthcheck.add @@ -0,0 +1 @@ +Monitoring of search backend health to control the processing of jobs in the search indexing Oban queue diff --git a/config/config.exs b/config/config.exs index b69044a2b..8b9a588b7 100644 --- a/config/config.exs +++ b/config/config.exs @@ -579,7 +579,7 @@ config :pleroma, Oban, attachments_cleanup: 1, new_users_digest: 1, mute_expire: 5, - search_indexing: 10, + search_indexing: [limit: 10, paused: true], rich_media_expiration: 2 ], plugins: [Oban.Plugins.Pruner], diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index 649bb11c8..d266d1836 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -109,7 +109,8 @@ defmodule Pleroma.Application do streamer_registry() ++ background_migrators() ++ shout_child(shout_enabled?()) ++ - [Pleroma.Gopher.Server] + [Pleroma.Gopher.Server] ++ + [Pleroma.Search.Healthcheck] # See http://elixir-lang.org/docs/stable/elixir/Supervisor.html # for other strategies and supported options diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index 3b266e59b..e8dbcca1f 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -14,4 +14,9 @@ defmodule Pleroma.Search do search_module.search(options[:for_user], query, options) end + + def healthcheck_endpoints do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + search_module.healthcheck_endpoints + end end diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex index 31bfc7e33..11e99e7f1 100644 --- a/lib/pleroma/search/database_search.ex +++ b/lib/pleroma/search/database_search.ex @@ -48,6 +48,9 @@ defmodule Pleroma.Search.DatabaseSearch do @impl true def remove_from_index(_object), do: :ok + @impl true + def healthcheck_endpoints, do: nil + def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) end diff --git a/lib/pleroma/search/healthcheck.ex b/lib/pleroma/search/healthcheck.ex new file mode 100644 index 000000000..495aee930 --- /dev/null +++ b/lib/pleroma/search/healthcheck.ex @@ -0,0 +1,85 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2024 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only +defmodule Pleroma.Search.Healthcheck do + @doc """ + Monitors health of search backend to control processing of events based on health and availability. + """ + use GenServer + require Logger + + @tick :timer.seconds(60) + @queue :search_indexing + + def start_link(_) do + GenServer.start_link(__MODULE__, [], name: __MODULE__) + end + + @impl true + def init(_) do + state = %{healthy: false} + {:ok, state, {:continue, :start}} + end + + @impl true + def handle_continue(:start, state) do + tick() + {:noreply, state} + end + + @impl true + def handle_info(:check, state) do + urls = Pleroma.Search.healthcheck_endpoints() + + new_state = + if healthy?(urls) do + Oban.resume_queue(queue: @queue) + Map.put(state, :healthy, true) + else + Oban.pause_queue(queue: @queue) + Map.put(state, :healthy, false) + end + + maybe_log_state_change(state, new_state) + + tick() + {:noreply, new_state} + end + + @impl true + def handle_call(:check, _from, state) do + status = Map.get(state, :healthy) + + {:reply, status, state, :hibernate} + end + + defp healthy?([]), do: true + + defp healthy?(urls) when is_list(urls) do + Enum.all?( + urls, + fn url -> + case Pleroma.HTTP.get(url) do + {:ok, %{status: 200}} -> true + _ -> false + end + end + ) + end + + defp healthy?(_), do: true + + defp tick do + Process.send_after(self(), :check, @tick) + end + + defp maybe_log_state_change(%{healthy: true}, %{healthy: false}) do + Logger.error("Pausing Oban queue #{@queue} due to search backend healthcheck failure") + end + + defp maybe_log_state_change(%{healthy: false}, %{healthy: true}) do + Logger.info("Resuming Oban queue #{@queue} due to search backend healthcheck pass") + end + + defp maybe_log_state_change(_, _), do: :ok +end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex index 2bff663e8..08c2f3d86 100644 --- a/lib/pleroma/search/meilisearch.ex +++ b/lib/pleroma/search/meilisearch.ex @@ -178,4 +178,15 @@ defmodule Pleroma.Search.Meilisearch do def remove_from_index(object) do meili_delete("/indexes/objects/documents/#{object.id}") end + + @impl true + def healthcheck_endpoints do + endpoint = + Config.get([Pleroma.Search.Meilisearch, :url]) + |> URI.parse() + |> Map.put(:path, "/health") + |> URI.to_string() + + [endpoint] + end end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex index 68bc48cec..13c887bc2 100644 --- a/lib/pleroma/search/search_backend.ex +++ b/lib/pleroma/search/search_backend.ex @@ -21,4 +21,12 @@ defmodule Pleroma.Search.SearchBackend do from index. """ @callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()} + + @doc """ + Healthcheck endpoints of search backend infrastructure to monitor for controlling + processing of jobs in the Oban queue. + + It is expected a 200 response is healthy and other responses are unhealthy. + """ + @callback healthcheck_endpoints :: list() | nil end From 3474b42ce396150b21f26ed35bea46ad61f57d5f Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sat, 25 May 2024 16:55:29 -0400 Subject: [PATCH 2/6] Drop TTL to 5 seconds --- lib/pleroma/search/healthcheck.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/pleroma/search/healthcheck.ex b/lib/pleroma/search/healthcheck.ex index 495aee930..9a2d9fdd6 100644 --- a/lib/pleroma/search/healthcheck.ex +++ b/lib/pleroma/search/healthcheck.ex @@ -8,7 +8,7 @@ defmodule Pleroma.Search.Healthcheck do use GenServer require Logger - @tick :timer.seconds(60) + @tick :timer.seconds(5) @queue :search_indexing def start_link(_) do From f2b0d5f1d02e243a7a1a6f339b59e5abcb8e1bd8 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 26 May 2024 14:11:41 -0400 Subject: [PATCH 3/6] Make it easier to read the state for debugging purposes and expose functions for testing --- lib/pleroma/search/healthcheck.ex | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/pleroma/search/healthcheck.ex b/lib/pleroma/search/healthcheck.ex index 9a2d9fdd6..170f29344 100644 --- a/lib/pleroma/search/healthcheck.ex +++ b/lib/pleroma/search/healthcheck.ex @@ -32,7 +32,7 @@ defmodule Pleroma.Search.Healthcheck do urls = Pleroma.Search.healthcheck_endpoints() new_state = - if healthy?(urls) do + if check(urls) do Oban.resume_queue(queue: @queue) Map.put(state, :healthy, true) else @@ -47,15 +47,15 @@ defmodule Pleroma.Search.Healthcheck do end @impl true - def handle_call(:check, _from, state) do - status = Map.get(state, :healthy) - - {:reply, status, state, :hibernate} + def handle_call(:state, _from, state) do + {:reply, state, state, :hibernate} end - defp healthy?([]), do: true + def state, do: GenServer.call(__MODULE__, :state) - defp healthy?(urls) when is_list(urls) do + def check([]), do: true + + def check(urls) when is_list(urls) do Enum.all?( urls, fn url -> @@ -67,7 +67,7 @@ defmodule Pleroma.Search.Healthcheck do ) end - defp healthy?(_), do: true + def check(_), do: true defp tick do Process.send_after(self(), :check, @tick) From 03f4b461895802259c895c81462a3e9d0d31c1e5 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 26 May 2024 14:21:24 -0400 Subject: [PATCH 4/6] Test that healthchecks behave correctly for the expected HTTP responses --- test/pleroma/search/healthcheck_test.exs | 49 ++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 test/pleroma/search/healthcheck_test.exs diff --git a/test/pleroma/search/healthcheck_test.exs b/test/pleroma/search/healthcheck_test.exs new file mode 100644 index 000000000..e7649d949 --- /dev/null +++ b/test/pleroma/search/healthcheck_test.exs @@ -0,0 +1,49 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2024 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Search.HealthcheckTest do + use Pleroma.DataCase + + import Tesla.Mock + + alias Pleroma.Search.Healthcheck + + @good1 "http://good1.example.com/healthz" + @good2 "http://good2.example.com/health" + @bad "http://bad.example.com/healthy" + + setup do + mock(fn + %{method: :get, url: @good1} -> + %Tesla.Env{ + status: 200, + body: "" + } + + %{method: :get, url: @good2} -> + %Tesla.Env{ + status: 200, + body: "" + } + + %{method: :get, url: @bad} -> + %Tesla.Env{ + status: 503, + body: "" + } + end) + + :ok + end + + test "true for 200 responses" do + assert Healthcheck.check([@good1]) + assert Healthcheck.check([@good1, @good2]) + end + + test "false if any response is not a 200" do + refute Healthcheck.check([@bad]) + refute Healthcheck.check([@good1, @bad]) + end +end From d9b82255b9cf49176f8ef1d5a87abf7d80769a47 Mon Sep 17 00:00:00 2001 From: Mark Felder Date: Sun, 26 May 2024 15:23:12 -0400 Subject: [PATCH 5/6] Add an HTTP timeout for the healthcheck --- lib/pleroma/search/healthcheck.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/pleroma/search/healthcheck.ex b/lib/pleroma/search/healthcheck.ex index 170f29344..e562c8478 100644 --- a/lib/pleroma/search/healthcheck.ex +++ b/lib/pleroma/search/healthcheck.ex @@ -8,8 +8,9 @@ defmodule Pleroma.Search.Healthcheck do use GenServer require Logger - @tick :timer.seconds(5) @queue :search_indexing + @tick :timer.seconds(5) + @timeout :timer.seconds(2) def start_link(_) do GenServer.start_link(__MODULE__, [], name: __MODULE__) @@ -59,7 +60,7 @@ defmodule Pleroma.Search.Healthcheck do Enum.all?( urls, fn url -> - case Pleroma.HTTP.get(url) do + case Pleroma.HTTP.get(url, [], recv_timeout: @timeout) do {:ok, %{status: 200}} -> true _ -> false end From d35b69d2686e62cc5076bd7a33449f98f8a11a85 Mon Sep 17 00:00:00 2001 From: Lain Soykaf Date: Mon, 27 May 2024 13:18:02 +0400 Subject: [PATCH 6/6] Pleroma.Search: Remove wrong (but irrelevant) results --- lib/pleroma/search.ex | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex index e8dbcca1f..fd0218cb8 100644 --- a/lib/pleroma/search.ex +++ b/lib/pleroma/search.ex @@ -10,13 +10,12 @@ defmodule Pleroma.Search do end def search(query, options) do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) - + search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.search(options[:for_user], query, options) end def healthcheck_endpoints do - search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + search_module = Pleroma.Config.get([Pleroma.Search, :module]) search_module.healthcheck_endpoints end end