Merge branch 'meilisearch' into 'develop'

Different search backends, in particular meilisearch as an additional one

See merge request pleroma/pleroma!3529
This commit is contained in:
lain 2023-11-12 13:29:27 +00:00
commit e902c7168d
36 changed files with 881 additions and 15 deletions

View File

@ -0,0 +1 @@
Add meilisearch, make search engines pluggable

View File

@ -590,7 +590,9 @@ config :pleroma, Oban,
background: 5, background: 5,
remote_fetcher: 2, remote_fetcher: 2,
attachments_cleanup: 1, attachments_cleanup: 1,
mute_expire: 5 new_users_digest: 1,
mute_expire: 5,
search_indexing: 10
], ],
plugins: [Oban.Plugins.Pruner], plugins: [Oban.Plugins.Pruner],
crontab: [ crontab: [
@ -601,7 +603,8 @@ config :pleroma, Oban,
config :pleroma, :workers, config :pleroma, :workers,
retries: [ retries: [
federator_incoming: 5, federator_incoming: 5,
federator_outgoing: 5 federator_outgoing: 5,
search_indexing: 2
] ]
config :pleroma, Pleroma.Formatter, config :pleroma, Pleroma.Formatter,
@ -888,11 +891,19 @@ config :pleroma, Pleroma.User.Backup,
config :pleroma, ConcurrentLimiter, [ config :pleroma, ConcurrentLimiter, [
{Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]}, {Pleroma.Web.RichMedia.Helpers, [max_running: 5, max_waiting: 5]},
{Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]} {Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy, [max_running: 5, max_waiting: 5]},
{Pleroma.Search, [max_running: 30, max_waiting: 50]}
] ]
config :pleroma, Pleroma.Web.WebFinger, domain: nil, update_nickname_on_user_fetch: true config :pleroma, Pleroma.Web.WebFinger, domain: nil, update_nickname_on_user_fetch: true
config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
config :pleroma, Pleroma.Search.Meilisearch,
url: "http://127.0.0.1:7700/",
private_key: nil,
initial_indexing_chunk_size: 100_000
# Import environment specific config. This must remain at the bottom # Import environment specific config. This must remain at the bottom
# of this file so it overrides the configuration defined above. # of this file so it overrides the configuration defined above.
import_config "#{Mix.env()}.exs" import_config "#{Mix.env()}.exs"

View File

@ -3466,5 +3466,48 @@ config :pleroma, :config_description, [
] ]
} }
] ]
},
%{
group: :pleroma,
key: Pleroma.Search,
type: :group,
description: "General search settings.",
children: [
%{
key: :module,
type: :keyword,
description: "Selected search module.",
suggestion: [Pleroma.Search.DatabaseSearch, Pleroma.Search.Meilisearch]
}
]
},
%{
group: :pleroma,
key: Pleroma.Search.Meilisearch,
type: :group,
description: "Meilisearch settings.",
children: [
%{
key: :url,
type: :string,
description: "Meilisearch URL.",
suggestion: ["http://127.0.0.1:7700/"]
},
%{
key: :private_key,
type: :string,
description:
"Private key for meilisearch authentication, or `nil` to disable private key authentication.",
suggestion: [nil]
},
%{
key: :initial_indexing_chunk_size,
type: :int,
description:
"Amount of posts in a batch when running the initial indexing operation. Should probably not be more than 100000" <>
" since there's a limit on maximum insert size",
suggestion: [100_000]
}
]
} }
] ]

View File

@ -133,10 +133,16 @@ config :pleroma, :side_effects,
ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock, ap_streamer: Pleroma.Web.ActivityPub.ActivityPubMock,
logger: Pleroma.LoggerMock logger: Pleroma.LoggerMock
config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
config :pleroma, Pleroma.Search.Meilisearch, url: "http://127.0.0.1:7700/", private_key: nil
# Reduce recompilation time # Reduce recompilation time
# https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects # https://dashbit.co/blog/speeding-up-re-compilation-of-elixir-projects
config :phoenix, :plug_init_mode, :runtime config :phoenix, :plug_init_mode, :runtime
config :pleroma, :config_impl, Pleroma.UnstubbedConfigMock
if File.exists?("./config/test.secret.exs") do if File.exists?("./config/test.secret.exs") do
import_config "test.secret.exs" import_config "test.secret.exs"
else else

View File

@ -0,0 +1,123 @@
# Configuring search
{! backend/administration/CLI_tasks/general_cli_task_info.include !}
## Built-in search
To use built-in search that has no external dependencies, set the search module to `Pleroma.Activity`:
> config :pleroma, Pleroma.Search, module: Pleroma.Search.DatabaseSearch
While it has no external dependencies, it has problems with performance and relevancy.
## Meilisearch
Note that it's quite a bit more memory hungry than PostgreSQL (around 4-5G for ~1.2 million
posts while idle and up to 7G while indexing initially). The disk usage for this additional index is also
around 4 gigabytes. Like [RUM](./cheatsheet.md#rum-indexing-for-full-text-search) indexes, it offers considerably
higher performance and ordering by timestamp in a reasonable amount of time.
Additionally, the search results seem to be more accurate.
Due to high memory usage, it may be best to set it up on a different machine, if running pleroma on a low-resource
computer, and use private key authentication to secure the remote search instance.
To use [meilisearch](https://www.meilisearch.com/), set the search module to `Pleroma.Search.Meilisearch`:
> config :pleroma, Pleroma.Search, module: Pleroma.Search.Meilisearch
You then need to set the address of the meilisearch instance, and optionally the private key for authentication. You might
also want to change the `initial_indexing_chunk_size` to be smaller if you're server is not very powerful, but not higher than `100_000`,
because meilisearch will refuse to process it if it's too big. However, in general you want this to be as big as possible, because meilisearch
indexes faster when it can process many posts in a single batch.
> config :pleroma, Pleroma.Search.Meilisearch,
> url: "http://127.0.0.1:7700/",
> private_key: "private key",
> initial_indexing_chunk_size: 100_000
Information about setting up meilisearch can be found in the
[official documentation](https://docs.meilisearch.com/learn/getting_started/installation.html).
You probably want to start it with `MEILI_NO_ANALYTICS=true` environment variable to disable analytics.
At least version 0.25.0 is required, but you are strongly adviced to use at least 0.26.0, as it introduces
the `--enable-auto-batching` option which drastically improves performance. Without this option, the search
is hardly usable on a somewhat big instance.
### Private key authentication (optional)
To set the private key, use the `MEILI_MASTER_KEY` environment variable when starting. After setting the _master key_,
you have to get the _private key_, which is actually used for authentication.
=== "OTP"
```sh
./bin/pleroma_ctl search.meilisearch show-keys <your master key here>
```
=== "From Source"
```sh
mix pleroma.search.meilisearch show-keys <your master key here>
```
You will see a "Default Admin API Key", this is the key you actually put into your configuration file.
### Initial indexing
After setting up the configuration, you'll want to index all of your already existsing posts. Only public posts are indexed. You'll only
have to do it one time, but it might take a while, depending on the amount of posts your instance has seen. This is also a fairly RAM
consuming process for `meilisearch`, and it will take a lot of RAM when running if you have a lot of posts (seems to be around 5G for ~1.2
million posts while idle and up to 7G while indexing initially, but your experience may be different).
The sequence of actions is as follows:
1. First, change the configuration to use `Pleroma.Search.Meilisearch` as the search backend
2. Restart your instance, at this point it can be used while the search indexing is running, though search won't return anything
3. Start the initial indexing process (as described below with `index`),
and wait until the task says it sent everything from the database to index
4. Wait until everything is actually indexed (by checking with `stats` as described below),
at this point you don't have to do anything, just wait a while.
To start the initial indexing, run the `index` command:
=== "OTP"
```sh
./bin/pleroma_ctl search.meilisearch index
```
=== "From Source"
```sh
mix pleroma.search.meilisearch index
```
This will show you the total amount of posts to index, and then show you the amount of posts indexed currently, until the numbers eventually
become the same. The posts are indexed in big batches and meilisearch will take some time to actually index them, even after you have
inserted all the posts into it. Depending on the amount of posts, this may be as long as several hours. To get information about the status
of indexing and how many posts have actually been indexed, use the `stats` command:
=== "OTP"
```sh
./bin/pleroma_ctl search.meilisearch stats
```
=== "From Source"
```sh
mix pleroma.search.meilisearch stats
```
### Clearing the index
In case you need to clear the index (for example, to re-index from scratch, if that needs to happen for some reason), you can
use the `clear` command:
=== "OTP"
```sh
./bin/pleroma_ctl search.meilisearch clear
```
=== "From Source"
```sh
mix pleroma.search.meilisearch clear
```
This will clear **all** the posts from the search index. Note, that deleted posts are also removed from index by the instance itself, so
there is no need to actually clear the whole index, unless you want **all** of it gone. That said, the index does not hold any information
that cannot be re-created from the database, it should also generally be a lot smaller than the size of your database. Still, the size
depends on the amount of text in posts.

View File

@ -0,0 +1,145 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Mix.Tasks.Pleroma.Search.Meilisearch do
require Pleroma.Constants
import Mix.Pleroma
import Ecto.Query
import Pleroma.Search.Meilisearch,
only: [meili_post: 2, meili_put: 2, meili_get: 1, meili_delete: 1]
def run(["index"]) do
start_pleroma()
Pleroma.HTML.compile_scrubbers()
meili_version =
(
{:ok, result} = meili_get("/version")
result["pkgVersion"]
)
# The ranking rule syntax was changed but nothing about that is mentioned in the changelog
if not Version.match?(meili_version, ">= 0.25.0") do
raise "Meilisearch <0.24.0 not supported"
end
{:ok, _} =
meili_post(
"/indexes/objects/settings/ranking-rules",
[
"published:desc",
"words",
"exactness",
"proximity",
"typo",
"attribute",
"sort"
]
)
{:ok, _} =
meili_post(
"/indexes/objects/settings/searchable-attributes",
[
"content"
]
)
IO.puts("Created indices. Starting to insert posts.")
chunk_size = Pleroma.Config.get([Pleroma.Search.Meilisearch, :initial_indexing_chunk_size])
Pleroma.Repo.transaction(
fn ->
query =
from(Pleroma.Object,
# Only index public and unlisted posts which are notes and have some text
where:
fragment("data->>'type' = 'Note'") and
(fragment("data->'to' \\? ?", ^Pleroma.Constants.as_public()) or
fragment("data->'cc' \\? ?", ^Pleroma.Constants.as_public())),
order_by: [desc: fragment("data->'published'")]
)
count = query |> Pleroma.Repo.aggregate(:count, :data)
IO.puts("Entries to index: #{count}")
Pleroma.Repo.stream(
query,
timeout: :infinity
)
|> Stream.map(&Pleroma.Search.Meilisearch.object_to_search_data/1)
|> Stream.filter(fn o -> not is_nil(o) end)
|> Stream.chunk_every(chunk_size)
|> Stream.transform(0, fn objects, acc ->
new_acc = acc + Enum.count(objects)
# Reset to the beginning of the line and rewrite it
IO.write("\r")
IO.write("Indexed #{new_acc} entries")
{[objects], new_acc}
end)
|> Stream.each(fn objects ->
result =
meili_put(
"/indexes/objects/documents",
objects
)
with {:ok, res} <- result do
if not Map.has_key?(res, "uid") do
IO.puts("\nFailed to index: #{inspect(result)}")
end
else
e -> IO.puts("\nFailed to index due to network error: #{inspect(e)}")
end
end)
|> Stream.run()
end,
timeout: :infinity
)
IO.write("\n")
end
def run(["clear"]) do
start_pleroma()
meili_delete("/indexes/objects/documents")
end
def run(["show-keys", master_key]) do
start_pleroma()
endpoint = Pleroma.Config.get([Pleroma.Search.Meilisearch, :url])
{:ok, result} =
Pleroma.HTTP.get(
Path.join(endpoint, "/keys"),
[{"Authorization", "Bearer #{master_key}"}]
)
decoded = Jason.decode!(result.body)
if decoded["results"] do
Enum.each(decoded["results"], fn %{"description" => desc, "key" => key} ->
IO.puts("#{desc}: #{key}")
end)
else
IO.puts("Error fetching the keys, check the master key is correct: #{inspect(decoded)}")
end
end
def run(["stats"]) do
start_pleroma()
{:ok, result} = meili_get("/indexes/objects/stats")
IO.puts("Number of entries: #{result["numberOfDocuments"]}")
IO.puts("Indexing? #{result["isIndexing"]}")
end
end

View File

@ -368,7 +368,7 @@ defmodule Pleroma.Activity do
) )
end end
defdelegate search(user, query, options \\ []), to: Pleroma.Activity.Search defdelegate search(user, query, options \\ []), to: Pleroma.Search.DatabaseSearch
def direct_conversation_id(activity, for_user) do def direct_conversation_id(activity, for_user) do
alias Pleroma.Conversation.Participation alias Pleroma.Conversation.Participation

View File

@ -322,7 +322,11 @@ defmodule Pleroma.Application do
def limiters_setup do def limiters_setup do
config = Config.get(ConcurrentLimiter, []) config = Config.get(ConcurrentLimiter, [])
[Pleroma.Web.RichMedia.Helpers, Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy] [
Pleroma.Web.RichMedia.Helpers,
Pleroma.Web.ActivityPub.MRF.MediaProxyWarmingPolicy,
Pleroma.Search
]
|> Enum.each(fn module -> |> Enum.each(fn module ->
mod_config = Keyword.get(config, module, []) mod_config = Keyword.get(config, module, [])

View File

@ -5,4 +5,11 @@
defmodule Pleroma.Config.Getting do defmodule Pleroma.Config.Getting do
@callback get(any()) :: any() @callback get(any()) :: any()
@callback get(any(), any()) :: any() @callback get(any(), any()) :: any()
def get(key), do: get(key, nil)
def get(key, default), do: impl().get(key, default)
def impl do
Application.get_env(:pleroma, :config_impl, Pleroma.Config)
end
end end

17
lib/pleroma/search.ex Normal file
View File

@ -0,0 +1,17 @@
defmodule Pleroma.Search do
alias Pleroma.Workers.SearchIndexingWorker
def add_to_index(%Pleroma.Activity{id: activity_id}) do
SearchIndexingWorker.enqueue("add_to_index", %{"activity" => activity_id})
end
def remove_from_index(%Pleroma.Object{id: object_id}) do
SearchIndexingWorker.enqueue("remove_from_index", %{"object" => object_id})
end
def search(query, options) do
search_module = Pleroma.Config.get([Pleroma.Search, :module], Pleroma.Activity)
search_module.search(options[:for_user], query, options)
end
end

View File

@ -1,9 +1,10 @@
# Pleroma: A lightweight social networking server # Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> # Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Activity.Search do defmodule Pleroma.Search.DatabaseSearch do
alias Pleroma.Activity alias Pleroma.Activity
alias Pleroma.Config
alias Pleroma.Object.Fetcher alias Pleroma.Object.Fetcher
alias Pleroma.Pagination alias Pleroma.Pagination
alias Pleroma.User alias Pleroma.User
@ -13,8 +14,11 @@ defmodule Pleroma.Activity.Search do
import Ecto.Query import Ecto.Query
@behaviour Pleroma.Search.SearchBackend
@impl true
def search(user, search_query, options \\ []) do def search(user, search_query, options \\ []) do
index_type = if Pleroma.Config.get([:database, :rum_enabled]), do: :rum, else: :gin index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin
limit = Enum.min([Keyword.get(options, :limit), 40]) limit = Enum.min([Keyword.get(options, :limit), 40])
offset = Keyword.get(options, :offset, 0) offset = Keyword.get(options, :offset, 0)
author = Keyword.get(options, :author) author = Keyword.get(options, :author)
@ -45,6 +49,12 @@ defmodule Pleroma.Activity.Search do
end end
end end
@impl true
def add_to_index(_activity), do: :ok
@impl true
def remove_from_index(_object), do: :ok
def maybe_restrict_author(query, %User{} = author) do def maybe_restrict_author(query, %User{} = author) do
Activity.Queries.by_author(query, author) Activity.Queries.by_author(query, author)
end end
@ -136,8 +146,8 @@ defmodule Pleroma.Activity.Search do
) )
end end
defp maybe_restrict_local(q, user) do def maybe_restrict_local(q, user) do
limit = Pleroma.Config.get([:instance, :limit_to_local_content], :unauthenticated) limit = Config.get([:instance, :limit_to_local_content], :unauthenticated)
case {limit, user} do case {limit, user} do
{:all, _} -> restrict_local(q) {:all, _} -> restrict_local(q)
@ -149,7 +159,7 @@ defmodule Pleroma.Activity.Search do
defp restrict_local(q), do: where(q, local: true) defp restrict_local(q), do: where(q, local: true)
defp maybe_fetch(activities, user, search_query) do def maybe_fetch(activities, user, search_query) do
with true <- Regex.match?(~r/https?:/, search_query), with true <- Regex.match?(~r/https?:/, search_query),
{:ok, object} <- Fetcher.fetch_object_from_id(search_query), {:ok, object} <- Fetcher.fetch_object_from_id(search_query),
%Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]),

View File

@ -0,0 +1,181 @@
defmodule Pleroma.Search.Meilisearch do
require Logger
require Pleroma.Constants
alias Pleroma.Activity
alias Pleroma.Config.Getting, as: Config
import Pleroma.Search.DatabaseSearch
import Ecto.Query
@behaviour Pleroma.Search.SearchBackend
defp meili_headers do
private_key = Config.get([Pleroma.Search.Meilisearch, :private_key])
[{"Content-Type", "application/json"}] ++
if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}]
end
def meili_get(path) do
endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
result =
Pleroma.HTTP.get(
Path.join(endpoint, path),
meili_headers()
)
with {:ok, res} <- result do
{:ok, Jason.decode!(res.body)}
end
end
def meili_post(path, params) do
endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
result =
Pleroma.HTTP.post(
Path.join(endpoint, path),
Jason.encode!(params),
meili_headers()
)
with {:ok, res} <- result do
{:ok, Jason.decode!(res.body)}
end
end
def meili_put(path, params) do
endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
result =
Pleroma.HTTP.request(
:put,
Path.join(endpoint, path),
Jason.encode!(params),
meili_headers(),
[]
)
with {:ok, res} <- result do
{:ok, Jason.decode!(res.body)}
end
end
def meili_delete(path) do
endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
with {:ok, _} <-
Pleroma.HTTP.request(
:delete,
Path.join(endpoint, path),
"",
meili_headers(),
[]
) do
:ok
else
_ -> {:error, "Could not remove from index"}
end
end
@impl true
def search(user, query, options \\ []) do
limit = Enum.min([Keyword.get(options, :limit), 40])
offset = Keyword.get(options, :offset, 0)
author = Keyword.get(options, :author)
res =
meili_post(
"/indexes/objects/search",
%{q: query, offset: offset, limit: limit}
)
with {:ok, result} <- res do
hits = result["hits"] |> Enum.map(& &1["ap"])
try do
hits
|> Activity.create_by_object_ap_id()
|> Activity.with_preloaded_object()
|> Activity.restrict_deactivated_users()
|> maybe_restrict_local(user)
|> maybe_restrict_author(author)
|> maybe_restrict_blocked(user)
|> maybe_fetch(user, query)
|> order_by([object: obj], desc: obj.data["published"])
|> Pleroma.Repo.all()
rescue
_ -> maybe_fetch([], user, query)
end
end
end
def object_to_search_data(object) do
# Only index public or unlisted Notes
if not is_nil(object) and object.data["type"] == "Note" and
not is_nil(object.data["content"]) and
(Pleroma.Constants.as_public() in object.data["to"] or
Pleroma.Constants.as_public() in object.data["cc"]) and
object.data["content"] not in ["", "."] do
data = object.data
content_str =
case data["content"] do
[nil | rest] -> to_string(rest)
str -> str
end
content =
with {:ok, scrubbed} <-
FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
trimmed <- String.trim(scrubbed) do
trimmed
end
# Make sure we have a non-empty string
if content != "" do
{:ok, published, _} = DateTime.from_iso8601(data["published"])
%{
id: object.id,
content: content,
ap: data["id"],
published: published |> DateTime.to_unix()
}
end
end
end
@impl true
def add_to_index(activity) do
maybe_search_data = object_to_search_data(activity.object)
if activity.data["type"] == "Create" and maybe_search_data do
result =
meili_put(
"/indexes/objects/documents",
[maybe_search_data]
)
with {:ok, %{"status" => "enqueued"}} <- result do
# Added successfully
:ok
else
_ ->
# There was an error, report it
Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}")
{:error, result}
end
else
# The post isn't something we can search, that's ok
:ok
end
end
@impl true
def remove_from_index(object) do
meili_delete("/indexes/objects/documents/#{object.id}")
end
end

View File

@ -0,0 +1,24 @@
defmodule Pleroma.Search.SearchBackend do
@doc """
Search statuses with a query, restricting to only those the user should have access to.
"""
@callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [
Pleroma.Activity.t()
]
@doc """
Add the object associated with the activity to the search index.
The whole activity is passed, to allow filtering on things such as scope.
"""
@callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()}
@doc """
Remove the object from the index.
Just the object, as opposed to the whole activity, is passed, since the object
is what contains the actual content and there is no need for fitlering when removing
from index.
"""
@callback remove_from_index(object :: Pleroma.Object.t()) :: {:ok, any()} | {:error, any()}
end

View File

@ -140,6 +140,9 @@ defmodule Pleroma.Web.ActivityPub.ActivityPub do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end) end)
# Add local posts to search index
if local, do: Pleroma.Search.add_to_index(activity)
{:ok, activity} {:ok, activity}
else else
%Activity{} = activity -> %Activity{} = activity ->

View File

@ -197,6 +197,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Increase replies count # - Increase replies count
# - Set up ActivityExpiration # - Set up ActivityExpiration
# - Set up notifications # - Set up notifications
# - Index incoming posts for search (if needed)
@impl true @impl true
def handle(%{data: %{"type" => "Create"}} = activity, meta) do def handle(%{data: %{"type" => "Create"}} = activity, meta) do
with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta), with {:ok, object, meta} <- handle_object_creation(meta[:object_data], activity, meta),
@ -226,6 +227,8 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end) Task.start(fn -> Pleroma.Web.RichMedia.Helpers.fetch_data_for_activity(activity) end)
end) end)
Pleroma.Search.add_to_index(Map.put(activity, :object, object))
meta = meta =
meta meta
|> add_notifications(notifications) |> add_notifications(notifications)
@ -285,6 +288,7 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
# - Reduce the user note count # - Reduce the user note count
# - Reduce the reply count # - Reduce the reply count
# - Stream out the activity # - Stream out the activity
# - Removes posts from search index (if needed)
@impl true @impl true
def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do def handle(%{data: %{"type" => "Delete", "object" => deleted_object}} = object, meta) do
deleted_object = deleted_object =
@ -323,6 +327,11 @@ defmodule Pleroma.Web.ActivityPub.SideEffects do
end end
if result == :ok do if result == :ok do
# Only remove from index when deleting actual objects, not users or anything else
with %Pleroma.Object{} <- deleted_object do
Pleroma.Search.remove_from_index(deleted_object)
end
{:ok, object, meta} {:ok, object, meta}
else else
{:error, result} {:error, result}

View File

@ -5,7 +5,6 @@
defmodule Pleroma.Web.MastodonAPI.SearchController do defmodule Pleroma.Web.MastodonAPI.SearchController do
use Pleroma.Web, :controller use Pleroma.Web, :controller
alias Pleroma.Activity
alias Pleroma.Repo alias Pleroma.Repo
alias Pleroma.User alias Pleroma.User
alias Pleroma.Web.ControllerHelper alias Pleroma.Web.ControllerHelper
@ -100,7 +99,7 @@ defmodule Pleroma.Web.MastodonAPI.SearchController do
end end
defp resource_search(_, "statuses", query, options) do defp resource_search(_, "statuses", query, options) do
statuses = with_fallback(fn -> Activity.search(options[:for_user], query, options) end) statuses = with_fallback(fn -> Pleroma.Search.search(query, options) end)
StatusView.render("index.json", StatusView.render("index.json",
activities: statuses, activities: statuses,

View File

@ -0,0 +1,23 @@
defmodule Pleroma.Workers.SearchIndexingWorker do
use Pleroma.Workers.WorkerHelper, queue: "search_indexing"
@impl Oban.Worker
alias Pleroma.Config.Getting, as: Config
def perform(%Job{args: %{"op" => "add_to_index", "activity" => activity_id}}) do
activity = Pleroma.Activity.get_by_id_with_object(activity_id)
search_module = Config.get([Pleroma.Search, :module])
search_module.add_to_index(activity)
end
def perform(%Job{args: %{"op" => "remove_from_index", "object" => object_id}}) do
object = Pleroma.Object.get_by_id(object_id)
search_module = Config.get([Pleroma.Search, :module])
search_module.remove_from_index(object)
end
end

View File

@ -0,0 +1,24 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.HTML.Scrubber.SearchIndexing do
@moduledoc """
An HTML scrubbing policy that scrubs things for searching.
"""
require FastSanitize.Sanitizer.Meta
alias FastSanitize.Sanitizer.Meta
# Explicitly remove mentions
def scrub({:a, attrs, children}) do
if(Enum.any?(attrs, fn {att, val} -> att == "class" and String.contains?(val, "mention") end),
do: nil,
# Strip the tag itself, leave only children (text, presumably)
else: children
)
end
Meta.strip_comments()
Meta.strip_everything_not_covered()
end

View File

@ -23,6 +23,11 @@ defmodule Mix.Tasks.Pleroma.DigestTest do
setup do: clear_config([Pleroma.Emails.Mailer, :enabled], true) setup do: clear_config([Pleroma.Emails.Mailer, :enabled], true)
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
describe "pleroma.digest test" do describe "pleroma.digest test" do
test "Sends digest to the given user" do test "Sends digest to the given user" do
user1 = insert(:user) user1 = insert(:user)

View File

@ -20,6 +20,11 @@ defmodule Mix.Tasks.Pleroma.UserTest do
import Mock import Mock
import Pleroma.Factory import Pleroma.Factory
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup_all do setup_all do
Mix.shell(Mix.Shell.Process) Mix.shell(Mix.Shell.Process)

View File

@ -13,6 +13,11 @@ defmodule Pleroma.ConversationTest do
setup_all do: clear_config([:instance, :federating], true) setup_all do: clear_config([:instance, :federating], true)
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
test "it goes through old direct conversations" do test "it goes through old direct conversations" do
user = insert(:user) user = insert(:user)
other_user = insert(:user) other_user = insert(:user)

View File

@ -21,6 +21,11 @@ defmodule Pleroma.NotificationTest do
alias Pleroma.Web.Push alias Pleroma.Web.Push
alias Pleroma.Web.Streamer alias Pleroma.Web.Streamer
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
describe "create_notifications" do describe "create_notifications" do
test "never returns nil" do test "never returns nil" do
user = insert(:user) user = insert(:user)

View File

@ -2,8 +2,8 @@
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/> # Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Activity.SearchTest do defmodule Pleroma.Search.DatabaseSearchTest do
alias Pleroma.Activity.Search alias Pleroma.Search.DatabaseSearch, as: Search
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
import Pleroma.Factory import Pleroma.Factory

View File

@ -0,0 +1,160 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Search.MeilisearchTest do
require Pleroma.Constants
use Pleroma.DataCase, async: true
use Oban.Testing, repo: Pleroma.Repo
import Pleroma.Factory
import Tesla.Mock
import Mox
alias Pleroma.Search.Meilisearch
alias Pleroma.UnstubbedConfigMock, as: Config
alias Pleroma.Web.CommonAPI
alias Pleroma.Workers.SearchIndexingWorker
describe "meilisearch" do
test "indexes a local post on creation" do
user = insert(:user)
Tesla.Mock.mock(fn
%{
method: :put,
url: "http://127.0.0.1:7700/indexes/objects/documents",
body: body
} ->
assert match?(
[%{"content" => "guys i just don&#39;t wanna leave the swamp"}],
Jason.decode!(body)
)
# To make sure that the worker is called
send(self(), "posted_to_meilisearch")
%{
"enqueuedAt" => "2023-11-12T12:36:46.927517Z",
"indexUid" => "objects",
"status" => "enqueued",
"taskUid" => 6,
"type" => "documentAdditionOrUpdate"
}
|> json()
end)
Config
|> expect(:get, 3, fn
[Pleroma.Search, :module], nil ->
Meilisearch
[Pleroma.Search.Meilisearch, :url], nil ->
"http://127.0.0.1:7700"
[Pleroma.Search.Meilisearch, :private_key], nil ->
"secret"
end)
{:ok, activity} =
CommonAPI.post(user, %{
status: "guys i just don't wanna leave the swamp",
visibility: "public"
})
args = %{"op" => "add_to_index", "activity" => activity.id}
assert_enqueued(
worker: SearchIndexingWorker,
args: args
)
assert :ok = perform_job(SearchIndexingWorker, args)
assert_received("posted_to_meilisearch")
end
test "doesn't index posts that are not public" do
user = insert(:user)
Enum.each(["private", "direct"], fn visibility ->
{:ok, activity} =
CommonAPI.post(user, %{
status: "guys i just don't wanna leave the swamp",
visibility: visibility
})
args = %{"op" => "add_to_index", "activity" => activity.id}
Config
|> expect(:get, fn
[Pleroma.Search, :module], nil ->
Meilisearch
end)
assert_enqueued(worker: SearchIndexingWorker, args: args)
assert :ok = perform_job(SearchIndexingWorker, args)
end)
end
test "deletes posts from index when deleted locally" do
user = insert(:user)
Tesla.Mock.mock(fn
%{
method: :put,
url: "http://127.0.0.1:7700/indexes/objects/documents",
body: body
} ->
assert match?(
[%{"content" => "guys i just don&#39;t wanna leave the swamp"}],
Jason.decode!(body)
)
%{
"enqueuedAt" => "2023-11-12T12:36:46.927517Z",
"indexUid" => "objects",
"status" => "enqueued",
"taskUid" => 6,
"type" => "documentAdditionOrUpdate"
}
|> json()
%{method: :delete, url: "http://127.0.0.1:7700/indexes/objects/documents/" <> id} ->
send(self(), "called_delete")
assert String.length(id) > 1
json(%{})
end)
Config
|> expect(:get, 6, fn
[Pleroma.Search, :module], nil ->
Meilisearch
[Pleroma.Search.Meilisearch, :url], nil ->
"http://127.0.0.1:7700"
[Pleroma.Search.Meilisearch, :private_key], nil ->
"secret"
end)
{:ok, activity} =
CommonAPI.post(user, %{
status: "guys i just don't wanna leave the swamp",
visibility: "public"
})
args = %{"op" => "add_to_index", "activity" => activity.id}
assert_enqueued(worker: SearchIndexingWorker, args: args)
assert :ok = perform_job(SearchIndexingWorker, args)
{:ok, _} = CommonAPI.delete(activity.id, user)
delete_args = %{"op" => "remove_from_index", "object" => activity.object.id}
assert_enqueued(worker: SearchIndexingWorker, args: delete_args)
assert :ok = perform_job(SearchIndexingWorker, delete_args)
assert_received("called_delete")
end
end
end

View File

@ -19,6 +19,11 @@ defmodule Pleroma.UserTest do
import ExUnit.CaptureLog import ExUnit.CaptureLog
import Swoosh.TestAssertions import Swoosh.TestAssertions
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup_all do setup_all do
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
:ok :ok

View File

@ -25,6 +25,11 @@ defmodule Pleroma.Web.ActivityPub.ActivityPubControllerTest do
require Pleroma.Constants require Pleroma.Constants
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup_all do setup_all do
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
:ok :ok

View File

@ -19,6 +19,11 @@ defmodule Pleroma.Web.AdminAPI.UserControllerTest do
alias Pleroma.Web.Endpoint alias Pleroma.Web.Endpoint
alias Pleroma.Web.MediaProxy alias Pleroma.Web.MediaProxy
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup_all do setup_all do
Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) Tesla.Mock.mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)

View File

@ -18,6 +18,11 @@ defmodule Pleroma.Web.MastodonAPI.AccountControllerTest do
import Pleroma.Factory import Pleroma.Factory
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
describe "account fetching" do describe "account fetching" do
test "works by id" do test "works by id" do
%User{id: user_id} = insert(:user) %User{id: user_id} = insert(:user)

View File

@ -12,6 +12,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationControllerTest do
import Pleroma.Factory import Pleroma.Factory
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
test "does NOT render account/pleroma/relationship by default" do test "does NOT render account/pleroma/relationship by default" do
%{user: user, conn: conn} = oauth_access(["read:notifications"]) %{user: user, conn: conn} = oauth_access(["read:notifications"])
other_user = insert(:user) other_user = insert(:user)

View File

@ -13,6 +13,11 @@ defmodule Pleroma.Web.MastodonAPI.SearchControllerTest do
import Tesla.Mock import Tesla.Mock
import Mock import Mock
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup_all do setup_all do
mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end) mock_global(fn env -> apply(HttpRequestMock, :request, [env]) end)
:ok :ok

View File

@ -27,6 +27,11 @@ defmodule Pleroma.Web.MastodonAPI.StatusControllerTest do
setup do: clear_config([:mrf, :policies]) setup do: clear_config([:mrf, :policies])
setup do: clear_config([:mrf_keyword, :reject]) setup do: clear_config([:mrf_keyword, :reject])
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
describe "posting statuses" do describe "posting statuses" do
setup do: oauth_access(["write:statuses"]) setup do: oauth_access(["write:statuses"])

View File

@ -22,6 +22,11 @@ defmodule Pleroma.Web.MastodonAPI.NotificationViewTest do
alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView alias Pleroma.Web.PleromaAPI.Chat.MessageReferenceView
import Pleroma.Factory import Pleroma.Factory
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
defp test_notifications_rendering(notifications, user, expected_result) do defp test_notifications_rendering(notifications, user, expected_result) do
result = NotificationView.render("index.json", %{notifications: notifications, for: user}) result = NotificationView.render("index.json", %{notifications: notifications, for: user})

View File

@ -13,6 +13,11 @@ defmodule Pleroma.Web.PleromaAPI.EmojiReactionControllerTest do
import Pleroma.Factory import Pleroma.Factory
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
test "PUT /api/v1/pleroma/statuses/:id/reactions/:emoji", %{conn: conn} do test "PUT /api/v1/pleroma/statuses/:id/reactions/:emoji", %{conn: conn} do
user = insert(:user) user = insert(:user)
other_user = insert(:user) other_user = insert(:user)

View File

@ -13,6 +13,11 @@ defmodule Pleroma.Workers.Cron.DigestEmailsWorkerTest do
setup do: clear_config([:email_notifications, :digest]) setup do: clear_config([:email_notifications, :digest])
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
setup do setup do
clear_config([:email_notifications, :digest], %{ clear_config([:email_notifications, :digest], %{
active: true, active: true,

View File

@ -10,6 +10,11 @@ defmodule Pleroma.Workers.Cron.NewUsersDigestWorkerTest do
alias Pleroma.Web.CommonAPI alias Pleroma.Web.CommonAPI
alias Pleroma.Workers.Cron.NewUsersDigestWorker alias Pleroma.Workers.Cron.NewUsersDigestWorker
setup do
Mox.stub_with(Pleroma.UnstubbedConfigMock, Pleroma.Config)
:ok
end
test "it sends new users digest emails" do test "it sends new users digest emails" do
yesterday = NaiveDateTime.utc_now() |> Timex.shift(days: -1) yesterday = NaiveDateTime.utc_now() |> Timex.shift(days: -1)
admin = insert(:user, %{is_admin: true}) admin = insert(:user, %{is_admin: true})

View File

@ -26,5 +26,6 @@ Mox.defmock(Pleroma.Web.ActivityPub.SideEffectsMock,
Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing) Mox.defmock(Pleroma.Web.FederatorMock, for: Pleroma.Web.Federator.Publishing)
Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting) Mox.defmock(Pleroma.ConfigMock, for: Pleroma.Config.Getting)
Mox.defmock(Pleroma.UnstubbedConfigMock, for: Pleroma.Config.Getting)
Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging) Mox.defmock(Pleroma.LoggerMock, for: Pleroma.Logging)