From e87a32bcd7ee7d6bb5e9b6882a8685b5ee4c180c Mon Sep 17 00:00:00 2001 From: rinpatch Date: Sat, 14 Mar 2020 15:39:58 +0300 Subject: [PATCH] rip out fetch_initial_posts Every time someone tries to use it, it goes mad and tries to scrape the entire fediverse for no visible reason, it's better to just remove it than continue shipping it in it's current state. idea acked by lain and feld on irc Closes #1595 #1422 --- config/config.exs | 4 -- config/description.exs | 19 ------ docs/configuration/cheatsheet.md | 8 --- lib/pleroma/user.ex | 32 +-------- lib/pleroma/web/activity_pub/utils.ex | 39 ----------- lib/pleroma/workers/background_worker.ex | 4 -- ...3607_config_remove_fetch_initial_posts.exs | 10 +++ ...125756_delete_fetch_initial_posts_jobs.exs | 10 +++ test/web/activity_pub/utils_test.exs | 65 ------------------- 9 files changed, 21 insertions(+), 170 deletions(-) create mode 100644 priv/repo/migrations/20200314123607_config_remove_fetch_initial_posts.exs create mode 100644 priv/repo/migrations/20200315125756_delete_fetch_initial_posts_jobs.exs diff --git a/config/config.exs b/config/config.exs index 2cd741213..3357e23e7 100644 --- a/config/config.exs +++ b/config/config.exs @@ -504,10 +504,6 @@ config :pleroma, :workers, federator_outgoing: 5 ] -config :pleroma, :fetch_initial_posts, - enabled: false, - pages: 5 - config :auto_linker, opts: [ extra: true, diff --git a/config/description.exs b/config/description.exs index 9fdcfcd96..c0e403b2e 100644 --- a/config/description.exs +++ b/config/description.exs @@ -2007,25 +2007,6 @@ config :pleroma, :config_description, [ } ] }, - %{ - group: :pleroma, - key: :fetch_initial_posts, - type: :group, - description: "Fetching initial posts settings", - children: [ - %{ - key: :enabled, - type: :boolean, - description: "Fetch posts when a new user is federated with" - }, - %{ - key: :pages, - type: :integer, - description: "The amount of pages to fetch", - suggestions: [5] - } - ] - }, %{ group: :auto_linker, key: :opts, diff --git a/docs/configuration/cheatsheet.md b/docs/configuration/cheatsheet.md index 05fd6ceb1..2629385da 100644 --- a/docs/configuration/cheatsheet.md +++ b/docs/configuration/cheatsheet.md @@ -151,14 +151,6 @@ config :pleroma, :mrf_user_allowlist, * `sign_object_fetches`: Sign object fetches with HTTP signatures * `authorized_fetch_mode`: Require HTTP signatures for AP fetches -### :fetch_initial_posts - -!!! warning - Be careful with this setting, fetching posts may lead to new users being discovered whose posts will then also be fetched. This can lead to serious load on your instance and database. - -* `enabled`: If enabled, when a new user is discovered by your instance, fetch some of their latest posts. -* `pages`: The amount of pages to fetch - ## Pleroma.ScheduledActivity * `daily_user_limit`: the number of scheduled activities a user is allowed to create in a single day (Default: `25`) diff --git a/lib/pleroma/user.ex b/lib/pleroma/user.ex index 7531757f5..db510d957 100644 --- a/lib/pleroma/user.ex +++ b/lib/pleroma/user.ex @@ -839,10 +839,6 @@ defmodule Pleroma.User do _e -> with [_nick, _domain] <- String.split(nickname, "@"), {:ok, user} <- fetch_by_nickname(nickname) do - if Pleroma.Config.get([:fetch_initial_posts, :enabled]) do - fetch_initial_posts(user) - end - {:ok, user} else _e -> {:error, "not found " <> nickname} @@ -850,11 +846,6 @@ defmodule Pleroma.User do end end - @doc "Fetch some posts when the user has just been federated with" - def fetch_initial_posts(user) do - BackgroundWorker.enqueue("fetch_initial_posts", %{"user_id" => user.id}) - end - @spec get_followers_query(User.t(), pos_integer() | nil) :: Ecto.Query.t() def get_followers_query(%User{} = user, nil) do User.Query.build(%{followers: user, deactivated: false}) @@ -1320,16 +1311,6 @@ defmodule Pleroma.User do Repo.delete(user) end - def perform(:fetch_initial_posts, %User{} = user) do - pages = Pleroma.Config.get!([:fetch_initial_posts, :pages]) - - # Insert all the posts in reverse order, so they're in the right order on the timeline - user.source_data["outbox"] - |> Utils.fetch_ordered_collection(pages) - |> Enum.reverse() - |> Enum.each(&Pleroma.Web.Federator.incoming_ap_doc/1) - end - def perform(:deactivate_async, user, status), do: deactivate(user, status) @spec perform(atom(), User.t(), list()) :: list() | {:error, any()} @@ -1458,18 +1439,7 @@ defmodule Pleroma.User do if !is_nil(user) and !needs_update?(user) do {:ok, user} else - # Whether to fetch initial posts for the user (if it's a new user & the fetching is enabled) - should_fetch_initial = is_nil(user) and Pleroma.Config.get([:fetch_initial_posts, :enabled]) - - resp = fetch_by_ap_id(ap_id) - - if should_fetch_initial do - with {:ok, %User{} = user} <- resp do - fetch_initial_posts(user) - end - end - - resp + fetch_by_ap_id(ap_id) end end diff --git a/lib/pleroma/web/activity_pub/utils.ex b/lib/pleroma/web/activity_pub/utils.ex index 2bc958670..15dd2ed45 100644 --- a/lib/pleroma/web/activity_pub/utils.ex +++ b/lib/pleroma/web/activity_pub/utils.ex @@ -784,45 +784,6 @@ defmodule Pleroma.Web.ActivityPub.Utils do defp build_flag_object(_), do: [] - @doc """ - Fetches the OrderedCollection/OrderedCollectionPage from `from`, limiting the amount of pages fetched after - the first one to `pages_left` pages. - If the amount of pages is higher than the collection has, it returns whatever was there. - """ - def fetch_ordered_collection(from, pages_left, acc \\ []) do - with {:ok, response} <- Tesla.get(from), - {:ok, collection} <- Jason.decode(response.body) do - case collection["type"] do - "OrderedCollection" -> - # If we've encountered the OrderedCollection and not the page, - # just call the same function on the page address - fetch_ordered_collection(collection["first"], pages_left) - - "OrderedCollectionPage" -> - if pages_left > 0 do - # There are still more pages - if Map.has_key?(collection, "next") do - # There are still more pages, go deeper saving what we have into the accumulator - fetch_ordered_collection( - collection["next"], - pages_left - 1, - acc ++ collection["orderedItems"] - ) - else - # No more pages left, just return whatever we already have - acc ++ collection["orderedItems"] - end - else - # Got the amount of pages needed, add them all to the accumulator - acc ++ collection["orderedItems"] - end - - _ -> - {:error, "Not an OrderedCollection or OrderedCollectionPage"} - end - end - end - #### Report-related helpers def get_reports(params, page, page_size) do params = diff --git a/lib/pleroma/workers/background_worker.ex b/lib/pleroma/workers/background_worker.ex index 598df6580..0f8ece2c4 100644 --- a/lib/pleroma/workers/background_worker.ex +++ b/lib/pleroma/workers/background_worker.ex @@ -10,10 +10,6 @@ defmodule Pleroma.Workers.BackgroundWorker do use Pleroma.Workers.WorkerHelper, queue: "background" @impl Oban.Worker - def perform(%{"op" => "fetch_initial_posts", "user_id" => user_id}, _job) do - user = User.get_cached_by_id(user_id) - User.perform(:fetch_initial_posts, user) - end def perform(%{"op" => "deactivate_user", "user_id" => user_id, "status" => status}, _job) do user = User.get_cached_by_id(user_id) diff --git a/priv/repo/migrations/20200314123607_config_remove_fetch_initial_posts.exs b/priv/repo/migrations/20200314123607_config_remove_fetch_initial_posts.exs new file mode 100644 index 000000000..392f531e8 --- /dev/null +++ b/priv/repo/migrations/20200314123607_config_remove_fetch_initial_posts.exs @@ -0,0 +1,10 @@ +defmodule Pleroma.Repo.Migrations.ConfigRemoveFetchInitialPosts do + use Ecto.Migration + + def change do + execute( + "delete from config where config.key = ':fetch_initial_posts' and config.group = ':pleroma';", + "" + ) + end +end diff --git a/priv/repo/migrations/20200315125756_delete_fetch_initial_posts_jobs.exs b/priv/repo/migrations/20200315125756_delete_fetch_initial_posts_jobs.exs new file mode 100644 index 000000000..5b8e3ab91 --- /dev/null +++ b/priv/repo/migrations/20200315125756_delete_fetch_initial_posts_jobs.exs @@ -0,0 +1,10 @@ +defmodule Pleroma.Repo.Migrations.DeleteFetchInitialPostsJobs do + use Ecto.Migration + + def change do + execute( + "delete from oban_jobs where worker = 'Pleroma.Workers.BackgroundWorker' and args->>'op' = 'fetch_initial_posts';", + "" + ) + end +end diff --git a/test/web/activity_pub/utils_test.exs b/test/web/activity_pub/utils_test.exs index e5ab54dd4..e913a5148 100644 --- a/test/web/activity_pub/utils_test.exs +++ b/test/web/activity_pub/utils_test.exs @@ -177,71 +177,6 @@ defmodule Pleroma.Web.ActivityPub.UtilsTest do end end - describe "fetch_ordered_collection" do - import Tesla.Mock - - test "fetches the first OrderedCollectionPage when an OrderedCollection is encountered" do - mock(fn - %{method: :get, url: "http://mastodon.com/outbox"} -> - json(%{"type" => "OrderedCollection", "first" => "http://mastodon.com/outbox?page=true"}) - - %{method: :get, url: "http://mastodon.com/outbox?page=true"} -> - json(%{"type" => "OrderedCollectionPage", "orderedItems" => ["ok"]}) - end) - - assert Utils.fetch_ordered_collection("http://mastodon.com/outbox", 1) == ["ok"] - end - - test "fetches several pages in the right order one after another, but only the specified amount" do - mock(fn - %{method: :get, url: "http://example.com/outbox"} -> - json(%{ - "type" => "OrderedCollectionPage", - "orderedItems" => [0], - "next" => "http://example.com/outbox?page=1" - }) - - %{method: :get, url: "http://example.com/outbox?page=1"} -> - json(%{ - "type" => "OrderedCollectionPage", - "orderedItems" => [1], - "next" => "http://example.com/outbox?page=2" - }) - - %{method: :get, url: "http://example.com/outbox?page=2"} -> - json(%{"type" => "OrderedCollectionPage", "orderedItems" => [2]}) - end) - - assert Utils.fetch_ordered_collection("http://example.com/outbox", 0) == [0] - assert Utils.fetch_ordered_collection("http://example.com/outbox", 1) == [0, 1] - end - - test "returns an error if the url doesn't have an OrderedCollection/Page" do - mock(fn - %{method: :get, url: "http://example.com/not-an-outbox"} -> - json(%{"type" => "NotAnOutbox"}) - end) - - assert {:error, _} = Utils.fetch_ordered_collection("http://example.com/not-an-outbox", 1) - end - - test "returns the what was collected if there are less pages than specified" do - mock(fn - %{method: :get, url: "http://example.com/outbox"} -> - json(%{ - "type" => "OrderedCollectionPage", - "orderedItems" => [0], - "next" => "http://example.com/outbox?page=1" - }) - - %{method: :get, url: "http://example.com/outbox?page=1"} -> - json(%{"type" => "OrderedCollectionPage", "orderedItems" => [1]}) - end) - - assert Utils.fetch_ordered_collection("http://example.com/outbox", 5) == [0, 1] - end - end - test "make_json_ld_header/0" do assert Utils.make_json_ld_header() == %{ "@context" => [