diff --git a/changelog.d/meilisearch.add b/changelog.d/meilisearch.add new file mode 100644 index 000000000..4856eea2e --- /dev/null +++ b/changelog.d/meilisearch.add @@ -0,0 +1 @@ +Add meilisearch, make search engines pluggable diff --git a/config/config.exs b/config/config.exs index ad3a98f46..ec7c5fe09 100644 --- a/config/config.exs +++ b/config/config.exs @@ -590,7 +590,9 @@ config :pleroma, Oban, background: 5, remote_fetcher: 2, attachments_cleanup: 1, - mute_expire: 5 + new_users_digest: 1, + mute_expire: 5, + search_indexing: 10 ], plugins: [Oban.Plugins.Pruner], crontab: [ @@ -601,7 +603,8 @@ config :pleroma, Oban, config :pleroma, :workers, retries: [ federator_incoming: 5, - federator_outgoing: 5 + federator_outgoing: 5, + search_indexing: 2 ] config :pleroma, Pleroma.Formatter, @@ -888,11 +891,19 @@ config :pleroma, Pleroma.User.Backup, config :pleroma, ConcurrentLimiter, [ {Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]}, - {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} + {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]}, + {Pleroma.Search, [max_running: 30, max_waiting: 50]} ] config :pleroma, Pleroma.Web.WebFinger, domain: nil, update_nickname_on_user_fetch: true +config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch + +config :pleroma, Pleroma.Search.Meilisearch, + url: "http://127.0.0.1:7700/", + private_key: nil, + initial_indexing_chunk_size: 100_000 + # Import environment specific config. This must remain at the bottom # of this file so it overrides the configuration defined above. import_config "#{Mix.env()}.exs" diff --git a/config/description.exs b/config/description.exs index d18649ae8..b152981c4 100644 --- a/config/description.exs +++ b/config/description.exs @@ -3466,5 +3466,48 @@ config :pleroma, :config_description, [ ] } ] + }, + %{ + group: :pleroma, + key: Pleroma.Search, + type: :group, + description: "General search settings.", + children: [ + %{ + key: :module, + type: :keyword, + description: "Selected search module.", + suggestion: [Pleroma.Search.DatabaseSearch, Pleroma.Search.Meilisearch] + } + ] + }, + %{ + group: :pleroma, + key: Pleroma.Search.Meilisearch, + type: :group, + description: "Meilisearch settings.", + children: [ + %{ + key: :url, + type: :string, + description: "Meilisearch URL.", + suggestion: ["http://127.0.0.1:7700/"] + }, + %{ + key: :private_key, + type: :string, + description: + "Private key for meilisearch authentication, or `nil` to disable private key authentication.", + suggestion: [nil] + }, + %{ + key: :initial_indexing_chunk_size, + type: :int, + description: + "Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <> + " since there's a limit on maximum insert size", + suggestion: [100_000] + } + ] } ] diff --git a/config/test.exs b/config/test.exs index 78303eb1d..5e8135a58 100644 --- a/config/test.exs +++ b/config/test.exs @@ -133,10 +133,16 @@ config :pleroma, :side_effects, ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, logger: Pleroma.LoggerMock +config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch + +config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil + # Reduce recompilation time # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects config :phoenix, :plug_init_mode, :runtime +config :pleroma, :config_impl, Pleroma.UnstubbedConfigMock + if File.exists?("./config/test.secret.exs") do import_config "test.secret.exs" else diff --git a/docs/configuration/search.md b/docs/configuration/search.md new file mode 100644 index 000000000..f131948a7 --- /dev/null +++ b/docs/configuration/search.md @@ -0,0 +1,123 @@ +# Configuring search + +{! backend/administration/CLI_tasks/general_cli_task_info.include !} + +## Built-in search + +To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`: + +> config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch + +While it has no external dependencies, it has problems with performance and relevancy. + +## Meilisearch + +Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million +posts while idle and up to 7G while indexing initially). The disk usage for this additional index is also +around 4 gigabytes. Like [RUM](./cheatsheet.md#rum-indexing-for-full-text-search) indexes, it offers considerably +higher performance and ordering by timestamp in a reasonable amount of time. +Additionally, the search results seem to be more accurate. + +Due to high memory usage, it may be best to set it up on a different machine, if running pleroma on a low-resource +computer, and use private key authentication to secure the remote search instance. + +To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`: + +> config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch + +You then need to set the address of the meilisearch instance, and optionally the private key for authentication. You might +also want to change the `initial_indexing_chunk_size` to be smaller if you're server is not very powerful, but not higher than `100_000`, +because meilisearch will refuse to process it if it's too big. However, in general you want this to be as big as possible, because meilisearch +indexes faster when it can process many posts in a single batch. + +> config :pleroma, Pleroma.Search.Meilisearch, +> url: "http://127.0.0.1:7700/", +> private_key: "private key", +> initial_indexing_chunk_size: 100_000 + +Information about setting up meilisearch can be found in the +[official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html). +You probably want to start it with `MEILI_NO_ANALYTICS=true` environment variable to disable analytics. +At least version 0.25.0 is required, but you are strongly adviced to use at least 0.26.0, as it introduces +the `--enable-auto-batching` option which drastically improves performance. Without this option, the search +is hardly usable on a somewhat big instance. + +### Private key authentication (optional) + +To set the private key, use the `MEILI_MASTER_KEY` environment variable when starting. After setting the _master key_, +you have to get the _private key_, which is actually used for authentication. + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch show-keys + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch show-keys + ``` + +You will see a "Default Admin API Key", this is the key you actually put into your configuration file. + +### Initial indexing + +After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only +have to do it one time, but it might take a while, depending on the amount of posts your instance has seen. This is also a fairly RAM +consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2 +million posts while idle and up to 7G while indexing initially, but your experience may be different). + +The sequence of actions is as follows: + +1. First, change the configuration to use `Pleroma.Search.Meilisearch` as the search backend +2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything +3. Start the initial indexing process (as described below with `index`), + and wait until the task says it sent everything from the database to index +4. Wait until everything is actually indexed (by checking with `stats` as described below), + at this point you don't have to do anything, just wait a while. + +To start the initial indexing, run the `index` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch index + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch index + ``` + +This will show you the total amount of posts to index, and then show you the amount of posts indexed currently, until the numbers eventually +become the same. The posts are indexed in big batches and meilisearch will take some time to actually index them, even after you have +inserted all the posts into it. Depending on the amount of posts, this may be as long as several hours. To get information about the status +of indexing and how many posts have actually been indexed, use the `stats` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch stats + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch stats + ``` + +### Clearing the index + +In case you need to clear the index (for example, to re-index from scratch, if that needs to happen for some reason), you can +use the `clear` command: + +=== "OTP" + ```sh + ./bin/pleroma_ctl search.meilisearch clear + ``` + +=== "From Source" + ```sh + mix pleroma.search.meilisearch clear + ``` + +This will clear **all** the posts from the search index. Note, that deleted posts are also removed from index by the instance itself, so +there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information +that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size +depends on the amount of text in posts. diff --git a/lib/mix/tasks/pleroma/search/meilisearch.ex b/lib/mix/tasks/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..8379a0c25 --- /dev/null +++ b/lib/mix/tasks/pleroma/search/meilisearch.ex @@ -0,0 +1,145 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Mix.Tasks.Pleroma.Search.Meilisearch do + require Pleroma.Constants + + import Mix.Pleroma + import Ecto.Query + + import Pleroma.Search.Meilisearch, + only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1] + + def run(["index"]) do + start_pleroma() + Pleroma.HTML.compile_scrubbers() + + meili_version = + ( + {:ok, result} = meili_get("/version") + + result["pkgVersion"] + ) + + # The ranking rule syntax was changed but nothing about that is mentioned in the changelog + if not Version.match?(meili_version, ">= 0.25.0") do + raise "Meilisearch <0.24.0 not supported" + end + + {:ok, _} = + meili_post( + "/indexes/objects/settings/ranking-rules", + [ + "published:desc", + "words", + "exactness", + "proximity", + "typo", + "attribute", + "sort" + ] + ) + + {:ok, _} = + meili_post( + "/indexes/objects/settings/searchable-attributes", + [ + "content" + ] + ) + + IO.puts("Created indices. Starting to insert posts.") + + chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size]) + + Pleroma.Repo.transaction( + fn -> + query = + from(Pleroma.Object, + # Only index public and unlisted posts which are notes and have some text + where: + fragment("data->>'type' = 'Note'") and + (fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or + fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())), + order_by: [desc: fragment("data->'published'")] + ) + + count = query |> Pleroma.Repo.aggregate(:count, :data) + IO.puts("Entries to index: #{count}") + + Pleroma.Repo.stream( + query, + timeout: :infinity + ) + |> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1) + |> Stream.filter(fn o -> not is_nil(o) end) + |> Stream.chunk_every(chunk_size) + |> Stream.transform(0, fn objects, acc -> + new_acc = acc + Enum.count(objects) + + # Reset to the beginning of the line and rewrite it + IO.write("\r") + IO.write("Indexed #{new_acc} entries") + + {[objects], new_acc} + end) + |> Stream.each(fn objects -> + result = + meili_put( + "/indexes/objects/documents", + objects + ) + + with {:ok, res} <- result do + if not Map.has_key?(res, "uid") do + IO.puts("\nFailed to index: #{inspect(result)}") + end + else + e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}") + end + end) + |> Stream.run() + end, + timeout: :infinity + ) + + IO.write("\n") + end + + def run(["clear"]) do + start_pleroma() + + meili_delete("/indexes/objects/documents") + end + + def run(["show-keys", master_key]) do + start_pleroma() + + endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url]) + + {:ok, result} = + Pleroma.HTTP.get( + Path.join(endpoint, "/keys"), + [{"Authorization", "Bearer #{master_key}"}] + ) + + decoded = Jason.decode!(result.body) + + if decoded["results"] do + Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} -> + IO.puts("#{desc}: #{key}") + end) + else + IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}") + end + end + + def run(["stats"]) do + start_pleroma() + + {:ok, result} = meili_get("/indexes/objects/stats") + IO.puts("Number of entries: #{result["numberOfDocuments"]}") + IO.puts("Indexing? #{result["isIndexing"]}") + end +end diff --git a/lib/pleroma/activity.ex b/lib/pleroma/activity.ex index 3556aaf9e..8a512dc57 100644 --- a/lib/pleroma/activity.ex +++ b/lib/pleroma/activity.ex @@ -368,7 +368,7 @@ defmodule Pleroma.Activity do ) end - defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search + defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch def direct_conversation_id(activity, for_user) do alias Pleroma.Conversation.Participation diff --git a/lib/pleroma/application.ex b/lib/pleroma/application.ex index e68a3c57e..7bbc132f1 100644 --- a/lib/pleroma/application.ex +++ b/lib/pleroma/application.ex @@ -322,7 +322,11 @@ defmodule Pleroma.Application do def limiters_setup do config = Config.get(ConcurrentLimiter, []) - [Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] + [ + Pleroma.Web.RichMedia.Helpers, + Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, + Pleroma.Search + ] |> Enum.each(fn module -> mod_config = Keyword.get(config, module, []) diff --git a/lib/pleroma/config/getting.ex b/lib/pleroma/config/getting.ex index f9b66bba6..ec93fd02a 100644 --- a/lib/pleroma/config/getting.ex +++ b/lib/pleroma/config/getting.ex @@ -5,4 +5,11 @@ defmodule Pleroma.Config.Getting do @callback get(any()) :: any() @callback get(any(), any()) :: any() + + def get(key), do: get(key, nil) + def get(key, default), do: impl().get(key, default) + + def impl do + Application.get_env(:pleroma, :config_impl, Pleroma.Config) + end end diff --git a/lib/pleroma/search.ex b/lib/pleroma/search.ex new file mode 100644 index 000000000..3b266e59b --- /dev/null +++ b/lib/pleroma/search.ex @@ -0,0 +1,17 @@ +defmodule Pleroma.Search do + alias Pleroma.Workers.SearchIndexingWorker + + def add_to_index(%Pleroma.Activity{id: activity_id}) do + SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id}) + end + + def remove_from_index(%Pleroma.Object{id: object_id}) do + SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id}) + end + + def search(query, options) do + search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity) + + search_module.search(options[:for_user], query, options) + end +end diff --git a/lib/pleroma/activity/search.ex b/lib/pleroma/search/database_search.ex similarity index 88% rename from lib/pleroma/activity/search.ex rename to lib/pleroma/search/database_search.ex index 0b9b24aa4..c6311e0c7 100644 --- a/lib/pleroma/activity/search.ex +++ b/lib/pleroma/search/database_search.ex @@ -1,9 +1,10 @@ # Pleroma: A lightweight social networking server -# Copyright © 2017-2022 Pleroma Authors +# Copyright © 2017-2021 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only -defmodule Pleroma.Activity.Search do +defmodule Pleroma.Search.DatabaseSearch do alias Pleroma.Activity + alias Pleroma.Config alias Pleroma.Object.Fetcher alias Pleroma.Pagination alias Pleroma.User @@ -13,8 +14,11 @@ defmodule Pleroma.Activity.Search do import Ecto.Query + @behaviour Pleroma.Search.SearchBackend + + @impl true def search(user, search_query, options \\ []) do - index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin + index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin limit = Enum.min([Keyword.get(options, :limit), 40]) offset = Keyword.get(options, :offset, 0) author = Keyword.get(options, :author) @@ -45,6 +49,12 @@ defmodule Pleroma.Activity.Search do end end + @impl true + def add_to_index(_activity), do: :ok + + @impl true + def remove_from_index(_object), do: :ok + def maybe_restrict_author(query, %User{} = author) do Activity.Queries.by_author(query, author) end @@ -136,8 +146,8 @@ defmodule Pleroma.Activity.Search do ) end - defp maybe_restrict_local(q, user) do - limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) + def maybe_restrict_local(q, user) do + limit = Config.get([:instance, :limit_to_local_content], :unauthenticated) case {limit, user} do {:all, _} -> restrict_local(q) @@ -149,7 +159,7 @@ defmodule Pleroma.Activity.Search do defp restrict_local(q), do: where(q, local: true) - defp maybe_fetch(activities, user, search_query) do + def maybe_fetch(activities, user, search_query) do with true <- Regex.match?(~r/https?:/, search_query), {:ok, object} <- Fetcher.fetch_object_from_id(search_query), %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex new file mode 100644 index 000000000..2bff663e8 --- /dev/null +++ b/lib/pleroma/search/meilisearch.ex @@ -0,0 +1,181 @@ +defmodule Pleroma.Search.Meilisearch do + require Logger + require Pleroma.Constants + + alias Pleroma.Activity + alias Pleroma.Config.Getting, as: Config + + import Pleroma.Search.DatabaseSearch + import Ecto.Query + + @behaviour Pleroma.Search.SearchBackend + + defp meili_headers do + private_key = Config.get([Pleroma.Search.Meilisearch, :private_key]) + + [{"Content-Type", "application/json"}] ++ + if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] + end + + def meili_get(path) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.get( + Path.join(endpoint, path), + meili_headers() + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_post(path, params) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.post( + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers() + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_put(path, params) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.request( + :put, + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers(), + [] + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_delete(path) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + with {:ok, _} <- + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) do + :ok + else + _ -> {:error, "Could not remove from index"} + end + end + + @impl true + def search(user, query, options \\ []) do + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + res = + meili_post( + "/indexes/objects/search", + %{q: query, offset: offset, limit: limit} + ) + + with {:ok, result} <- res do + hits = result["hits"] |> Enum.map(& &1["ap"]) + + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([object: obj], desc: obj.data["published"]) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end + end + end + + def object_to_search_data(object) do + # Only index public or unlisted Notes + if not is_nil(object) and object.data["type"] == "Note" and + not is_nil(object.data["content"]) and + (Pleroma.Constants.as_public() in object.data["to"] or + Pleroma.Constants.as_public() in object.data["cc"]) and + object.data["content"] not in ["", "."] do + data = object.data + + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + + content = + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), + trimmed <- String.trim(scrubbed) do + trimmed + end + + # Make sure we have a non-empty string + if content != "" do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end + + @impl true + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity.object) + + if activity.data["type"] == "Create" and maybe_search_data do + result = + meili_put( + "/indexes/objects/documents", + [maybe_search_data] + ) + + with {:ok, %{"status" => "enqueued"}} <- result do + # Added successfully + :ok + else + _ -> + # There was an error, report it + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + {:error, result} + end + else + # The post isn't something we can search, that's ok + :ok + end + end + + @impl true + def remove_from_index(object) do + meili_delete("/indexes/objects/documents/#{object.id}") + end +end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex new file mode 100644 index 000000000..a42e2f5f6 --- /dev/null +++ b/lib/pleroma/search/search_backend.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search.SearchBackend do + @doc """ + Search statuses with a query, restricting to only those the user should have access to. + """ + @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [ + Pleroma.Activity.t() + ] + + @doc """ + Add the object associated with the activity to the search index. + + The whole activity is passed, to allow filtering on things such as scope. + """ + @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()} + + @doc """ + Remove the object from the index. + + Just the object, as opposed to the whole activity, is passed, since the object + is what contains the actual content and there is no need for fitlering when removing + from index. + """ + @callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()} +end diff --git a/lib/pleroma/web/activity_pub/activity_pub.ex b/lib/pleroma/web/activity_pub/activity_pub.ex index 3979d418e..54a77a228 100644 --- a/lib/pleroma/web/activity_pub/activity_pub.ex +++ b/lib/pleroma/web/activity_pub/activity_pub.ex @@ -140,6 +140,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + # Add local posts to search index + if local, do: Pleroma.Search.add_to_index(activity) + {:ok, activity} else %Activity{} = activity -> diff --git a/lib/pleroma/web/activity_pub/side_effects.ex b/lib/pleroma/web/activity_pub/side_effects.ex index 098c177c7..7a28a7c97 100644 --- a/lib/pleroma/web/activity_pub/side_effects.ex +++ b/lib/pleroma/web/activity_pub/side_effects.ex @@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do # - Increase replies count # - Set up ActivityExpiration # - Set up notifications + # - Index incoming posts for search (if needed) @impl true def handle(%{data: %{"type" => "Create"}} = activity, meta) do with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), @@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) end) + Pleroma.Search.add_to_index(Map.put(activity, :object, object)) + meta = meta |> add_notifications(notifications) @@ -285,6 +288,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do # - Reduce the user note count # - Reduce the reply count # - Stream out the activity + # - Removes posts from search index (if needed) @impl true def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do deleted_object = @@ -323,6 +327,11 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do end if result == :ok do + # Only remove from index when deleting actual objects, not users or anything else + with %Pleroma.Object{} <- deleted_object do + Pleroma.Search.remove_from_index(deleted_object) + end + {:ok, object, meta} else {:error, result} diff --git a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex index 5e6e04734..e4acba226 100644 --- a/lib/pleroma/web/mastodon_api/controllers/search_controller.ex +++ b/lib/pleroma/web/mastodon_api/controllers/search_controller.ex @@ -5,7 +5,6 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do use Pleroma.Web, :controller - alias Pleroma.Activity alias Pleroma.Repo alias Pleroma.User alias Pleroma.Web.ControllerHelper @@ -100,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do end defp resource_search(_, "statuses", query, options) do - statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) + statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end) StatusView.render("index.json", activities: statuses, diff --git a/lib/pleroma/workers/search_indexing_worker.ex b/lib/pleroma/workers/search_indexing_worker.ex new file mode 100644 index 000000000..8476a2be5 --- /dev/null +++ b/lib/pleroma/workers/search_indexing_worker.ex @@ -0,0 +1,23 @@ +defmodule Pleroma.Workers.SearchIndexingWorker do + use Pleroma.Workers.WorkerHelper, queue: "search_indexing" + + @impl Oban.Worker + + alias Pleroma.Config.Getting, as: Config + + def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do + activity = Pleroma.Activity.get_by_id_with_object(activity_id) + + search_module = Config.get([Pleroma.Search, :module]) + + search_module.add_to_index(activity) + end + + def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do + object = Pleroma.Object.get_by_id(object_id) + + search_module = Config.get([Pleroma.Search, :module]) + + search_module.remove_from_index(object) + end +end diff --git a/priv/scrubbers/search_indexing.ex b/priv/scrubbers/search_indexing.ex new file mode 100644 index 000000000..02756ab79 --- /dev/null +++ b/priv/scrubbers/search_indexing.ex @@ -0,0 +1,24 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.HTML.Scrubber.SearchIndexing do + @moduledoc """ + An HTML scrubbing policy that scrubs things for searching. + """ + + require FastSanitize.Sanitizer.Meta + alias FastSanitize.Sanitizer.Meta + + # Explicitly remove mentions + def scrub({:a, attrs, children}) do + if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end), + do: nil, + # Strip the tag itself, leave only children (text, presumably) + else: children + ) + end + + Meta.strip_comments() + Meta.strip_everything_not_covered() +end diff --git a/test/mix/tasks/pleroma/digest_test.exs b/test/mix/tasks/pleroma/digest_test.exs index d2a8606c7..08482aadb 100644 --- a/test/mix/tasks/pleroma/digest_test.exs +++ b/test/mix/tasks/pleroma/digest_test.exs @@ -23,6 +23,11 @@ defmodule Mix.Tasks.Pleroma.DigestTest do setup do: clear_config([Pleroma.Emails.Mailer, :enabled], true) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "pleroma.digest test" do test "Sends digest to the given user" do user1 = insert(:user) diff --git a/test/mix/tasks/pleroma/user_test.exs b/test/mix/tasks/pleroma/user_test.exs index 4fdf6912b..c9bcf2951 100644 --- a/test/mix/tasks/pleroma/user_test.exs +++ b/test/mix/tasks/pleroma/user_test.exs @@ -20,6 +20,11 @@ defmodule Mix.Tasks.Pleroma.UserTest do import Mock import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Mix.shell(Mix.Shell.Process) diff --git a/test/pleroma/conversation_test.exs b/test/pleroma/conversation_test.exs index 94897e7ea..809c1951a 100644 --- a/test/pleroma/conversation_test.exs +++ b/test/pleroma/conversation_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.ConversationTest do setup_all do: clear_config([:instance, :federating], true) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "it goes through old direct conversations" do user = insert(:user) other_user = insert(:user) diff --git a/test/pleroma/notification_test.exs b/test/pleroma/notification_test.exs index 71af9acb8..4cf14e65b 100644 --- a/test/pleroma/notification_test.exs +++ b/test/pleroma/notification_test.exs @@ -21,6 +21,11 @@ defmodule Pleroma.NotificationTest do alias Pleroma.Web.Push alias Pleroma.Web.Streamer + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "create_notifications" do test "never returns nil" do user = insert(:user) diff --git a/test/pleroma/activity/search_test.exs b/test/pleroma/search/database_search_test.exs similarity index 95% rename from test/pleroma/activity/search_test.exs rename to test/pleroma/search/database_search_test.exs index 3b5fd2c3c..6c47ff425 100644 --- a/test/pleroma/activity/search_test.exs +++ b/test/pleroma/search/database_search_test.exs @@ -2,8 +2,8 @@ # Copyright © 2017-2022 Pleroma Authors # SPDX-License-Identifier: AGPL-3.0-only -defmodule Pleroma.Activity.SearchTest do - alias Pleroma.Activity.Search +defmodule Pleroma.Search.DatabaseSearchTest do + alias Pleroma.Search.DatabaseSearch, as: Search alias Pleroma.Web.CommonAPI import Pleroma.Factory diff --git a/test/pleroma/search/meilisearch_test.exs b/test/pleroma/search/meilisearch_test.exs new file mode 100644 index 000000000..eea454323 --- /dev/null +++ b/test/pleroma/search/meilisearch_test.exs @@ -0,0 +1,160 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Search.MeilisearchTest do + require Pleroma.Constants + + use Pleroma.DataCase, async: true + use Oban.Testing, repo: Pleroma.Repo + + import Pleroma.Factory + import Tesla.Mock + import Mox + + alias Pleroma.Search.Meilisearch + alias Pleroma.UnstubbedConfigMock, as: Config + alias Pleroma.Web.CommonAPI + alias Pleroma.Workers.SearchIndexingWorker + + describe "meilisearch" do + test "indexes a local post on creation" do + user = insert(:user) + + Tesla.Mock.mock(fn + %{ + method: :put, + url: "http://127.0.0.1:7700/indexes/objects/documents", + body: body + } -> + assert match?( + [%{"content" => "guys i just don't wanna leave the swamp"}], + Jason.decode!(body) + ) + + # To make sure that the worker is called + send(self(), "posted_to_meilisearch") + + %{ + "enqueuedAt" => "2023-11-12T12:36:46.927517Z", + "indexUid" => "objects", + "status" => "enqueued", + "taskUid" => 6, + "type" => "documentAdditionOrUpdate" + } + |> json() + end) + + Config + |> expect(:get, 3, fn + [Pleroma.Search, :module], nil -> + Meilisearch + + [Pleroma.Search.Meilisearch, :url], nil -> + "http://127.0.0.1:7700" + + [Pleroma.Search.Meilisearch, :private_key], nil -> + "secret" + end) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + + assert_enqueued( + worker: SearchIndexingWorker, + args: args + ) + + assert :ok = perform_job(SearchIndexingWorker, args) + assert_received("posted_to_meilisearch") + end + + test "doesn't index posts that are not public" do + user = insert(:user) + + Enum.each(["private", "direct"], fn visibility -> + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: visibility + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + + Config + |> expect(:get, fn + [Pleroma.Search, :module], nil -> + Meilisearch + end) + + assert_enqueued(worker: SearchIndexingWorker, args: args) + assert :ok = perform_job(SearchIndexingWorker, args) + end) + end + + test "deletes posts from index when deleted locally" do + user = insert(:user) + + Tesla.Mock.mock(fn + %{ + method: :put, + url: "http://127.0.0.1:7700/indexes/objects/documents", + body: body + } -> + assert match?( + [%{"content" => "guys i just don't wanna leave the swamp"}], + Jason.decode!(body) + ) + + %{ + "enqueuedAt" => "2023-11-12T12:36:46.927517Z", + "indexUid" => "objects", + "status" => "enqueued", + "taskUid" => 6, + "type" => "documentAdditionOrUpdate" + } + |> json() + + %{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} -> + send(self(), "called_delete") + assert String.length(id) > 1 + json(%{}) + end) + + Config + |> expect(:get, 6, fn + [Pleroma.Search, :module], nil -> + Meilisearch + + [Pleroma.Search.Meilisearch, :url], nil -> + "http://127.0.0.1:7700" + + [Pleroma.Search.Meilisearch, :private_key], nil -> + "secret" + end) + + {:ok, activity} = + CommonAPI.post(user, %{ + status: "guys i just don't wanna leave the swamp", + visibility: "public" + }) + + args = %{"op" => "add_to_index", "activity" => activity.id} + assert_enqueued(worker: SearchIndexingWorker, args: args) + assert :ok = perform_job(SearchIndexingWorker, args) + + {:ok, _} = CommonAPI.delete(activity.id, user) + + delete_args = %{"op" => "remove_from_index", "object" => activity.object.id} + assert_enqueued(worker: SearchIndexingWorker, args: delete_args) + assert :ok = perform_job(SearchIndexingWorker, delete_args) + + assert_received("called_delete") + end + end +end diff --git a/test/pleroma/user_test.exs b/test/pleroma/user_test.exs index 7f60b959a..b9df527a0 100644 --- a/test/pleroma/user_test.exs +++ b/test/pleroma/user_test.exs @@ -19,6 +19,11 @@ defmodule Pleroma.UserTest do import ExUnit.CaptureLog import Swoosh.TestAssertions + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs index 62eb9b5a3..0dc61c2e5 100644 --- a/test/pleroma/web/activity_pub/activity_pub_controller_test.exs +++ b/test/pleroma/web/activity_pub/activity_pub_controller_test.exs @@ -25,6 +25,11 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do require Pleroma.Constants + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/admin_api/controllers/user_controller_test.exs b/test/pleroma/web/admin_api/controllers/user_controller_test.exs index bb9dcb4aa..8edfda54c 100644 --- a/test/pleroma/web/admin_api/controllers/user_controller_test.exs +++ b/test/pleroma/web/admin_api/controllers/user_controller_test.exs @@ -19,6 +19,11 @@ defmodule Pleroma.Web.AdminAPI.UserControllerTest do alias Pleroma.Web.Endpoint alias Pleroma.Web.MediaProxy + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) diff --git a/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs index 128e60b0a..d8e5f9d39 100644 --- a/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/account_controller_test.exs @@ -18,6 +18,11 @@ defmodule Pleroma.Web.MastodonAPI.AccountControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "account fetching" do test "works by id" do %User{id: user_id} = insert(:user) diff --git a/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs index 1524df98f..350b935d7 100644 --- a/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/notification_controller_test.exs @@ -12,6 +12,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "does NOT render account/pleroma/relationship by default" do %{user: user, conn: conn} = oauth_access(["read:notifications"]) other_user = insert(:user) diff --git a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs index 0a9240b70..19dee25d7 100644 --- a/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/search_controller_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do import Tesla.Mock import Mock + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup_all do mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) :ok diff --git a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs index de3b52e26..db2688f80 100644 --- a/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs +++ b/test/pleroma/web/mastodon_api/controllers/status_controller_test.exs @@ -27,6 +27,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do setup do: clear_config([:mrf, :policies]) setup do: clear_config([:mrf_keyword, :reject]) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + describe "posting statuses" do setup do: oauth_access(["write:statuses"]) diff --git a/test/pleroma/web/mastodon_api/views/notification_view_test.exs b/test/pleroma/web/mastodon_api/views/notification_view_test.exs index ddbe4557f..47425d2a9 100644 --- a/test/pleroma/web/mastodon_api/views/notification_view_test.exs +++ b/test/pleroma/web/mastodon_api/views/notification_view_test.exs @@ -22,6 +22,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationViewTest do alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + defp test_notifications_rendering(notifications, user, expected_result) do result = NotificationView.render("index.json", %{notifications: notifications, for: user}) diff --git a/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs b/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs index 21e7d4839..8c2dcc1bb 100644 --- a/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs +++ b/test/pleroma/web/pleroma_api/controllers/emoji_reaction_controller_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Web.PleromaAPI.EmojiReactionControllerTest do import Pleroma.Factory + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "PUT /api/v1/pleroma/statuses/:id/reactions/:emoji", %{conn: conn} do user = insert(:user) other_user = insert(:user) diff --git a/test/pleroma/workers/cron/digest_emails_worker_test.exs b/test/pleroma/workers/cron/digest_emails_worker_test.exs index 851f4d63a..e0bdf303e 100644 --- a/test/pleroma/workers/cron/digest_emails_worker_test.exs +++ b/test/pleroma/workers/cron/digest_emails_worker_test.exs @@ -13,6 +13,11 @@ defmodule Pleroma.Workers.Cron.DigestEmailsWorkerTest do setup do: clear_config([:email_notifications, :digest]) + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + setup do clear_config([:email_notifications, :digest], %{ active: true, diff --git a/test/pleroma/workers/cron/new_users_digest_worker_test.exs b/test/pleroma/workers/cron/new_users_digest_worker_test.exs index 84914876c..0e4234cc8 100644 --- a/test/pleroma/workers/cron/new_users_digest_worker_test.exs +++ b/test/pleroma/workers/cron/new_users_digest_worker_test.exs @@ -10,6 +10,11 @@ defmodule Pleroma.Workers.Cron.NewUsersDigestWorkerTest do alias Pleroma.Web.CommonAPI alias Pleroma.Workers.Cron.NewUsersDigestWorker + setup do + Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config) + :ok + end + test "it sends new users digest emails" do yesterday = NaiveDateTime.utc_now() |> Timex.shift(days: -1) admin = insert(:user, %{is_admin: true}) diff --git a/test/support/mocks.ex b/test/support/mocks.ex index d167996bd..9693095ba 100644 --- a/test/support/mocks.ex +++ b/test/support/mocks.ex @@ -26,5 +26,6 @@ Mox.defmock(Pleroma.Web.ActivityPub.SideEffectsMock, Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing) Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting) +Mox.defmock(Pleroma.UnstubbedConfigMock, for: Pleroma.Config.Getting) Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging)