Merge branch 'oban/rich-media-hardening' into 'develop'

Harden Rich Media parsing against very slow or malicious URLs

See merge request pleroma/pleroma!4192
This commit is contained in:
feld 2024-07-24 20:58:31 +00:00
commit 700c106680
9 changed files with 61 additions and 26 deletions

View File

@ -0,0 +1 @@
Harden Rich Media parsing against very slow or malicious URLs

View File

@ -448,7 +448,7 @@ config :pleroma, :rich_media,
Pleroma.Web.RichMedia.Parsers.TwitterCard, Pleroma.Web.RichMedia.Parsers.TwitterCard,
Pleroma.Web.RichMedia.Parsers.OEmbed Pleroma.Web.RichMedia.Parsers.OEmbed
], ],
failure_backoff: 60_000, timeout: 5_000,
ttl_setters: [ ttl_setters: [
Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl, Pleroma.Web.RichMedia.Parser.TTL.AwsSignedUrl,
Pleroma.Web.RichMedia.Parser.TTL.Opengraph Pleroma.Web.RichMedia.Parser.TTL.Opengraph
@ -580,6 +580,8 @@ config :pleroma, Pleroma.User,
], ],
email_blacklist: [] email_blacklist: []
# The Pruner :max_age must be longer than Worker :unique
# value or it cannot enforce uniqueness.
config :pleroma, Oban, config :pleroma, Oban,
repo: Pleroma.Repo, repo: Pleroma.Repo,
log: false, log: false,
@ -593,7 +595,7 @@ config :pleroma, Oban,
search_indexing: [limit: 10, paused: true], search_indexing: [limit: 10, paused: true],
slow: 5 slow: 5
], ],
plugins: [Oban.Plugins.Pruner], plugins: [{Oban.Plugins.Pruner, max_age: 900}],
crontab: [ crontab: [
{"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker}, {"0 0 * * 0", Pleroma.Workers.Cron.DigestEmailsWorker},
{"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker} {"0 0 * * *", Pleroma.Workers.Cron.NewUsersDigestWorker}

View File

@ -2101,11 +2101,11 @@ config :pleroma, :config_description, [
] ]
}, },
%{ %{
key: :failure_backoff, key: :timeout,
type: :integer, type: :integer,
description: description:
"Amount of milliseconds after request failure, during which the request will not be retried.", "Amount of milliseconds after which the HTTP request is forcibly terminated.",
suggestions: [60_000] suggestions: [5_000]
} }
] ]
}, },

View File

@ -436,7 +436,7 @@ config :pleroma, Pleroma.Web.MediaProxy.Invalidation.Http,
* `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`. * `ignore_hosts`: list of hosts which will be ignored by the metadata parser. For example `["accounts.google.com", "xss.website"]`, defaults to `[]`.
* `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"]. * `ignore_tld`: list TLDs (top-level domains) which will ignore for parse metadata. default is ["local", "localdomain", "lan"].
* `parsers`: list of Rich Media parsers. * `parsers`: list of Rich Media parsers.
* `failure_backoff`: Amount of milliseconds after request failure, during which the request will not be retried. * `timeout`: Amount of milliseconds after which the HTTP request is forcibly terminated.
## HTTP server ## HTTP server

View File

@ -68,7 +68,9 @@ defmodule Pleroma.HTTP do
adapter = Application.get_env(:tesla, :adapter) adapter = Application.get_env(:tesla, :adapter)
client = Tesla.client(adapter_middlewares(adapter), adapter) extra_middleware = options[:tesla_middleware] || []
client = Tesla.client(adapter_middlewares(adapter, extra_middleware), adapter)
maybe_limit( maybe_limit(
fn -> fn ->
@ -102,20 +104,21 @@ defmodule Pleroma.HTTP do
fun.() fun.()
end end
defp adapter_middlewares(Tesla.Adapter.Gun) do defp adapter_middlewares(Tesla.Adapter.Gun, extra_middleware) do
[Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] [Tesla.Middleware.FollowRedirects, Pleroma.Tesla.Middleware.ConnectionPool] ++
extra_middleware
end end
defp adapter_middlewares({Tesla.Adapter.Finch, _}) do defp adapter_middlewares({Tesla.Adapter.Finch, _}, extra_middleware) do
[Tesla.Middleware.FollowRedirects] [Tesla.Middleware.FollowRedirects] ++ extra_middleware
end end
defp adapter_middlewares(_) do defp adapter_middlewares(_, extra_middleware) do
if Pleroma.Config.get(:env) == :test do if Pleroma.Config.get(:env) == :test do
# Emulate redirects in test env, which are handled by adapters in other environments # Emulate redirects in test env, which are handled by adapters in other environments
[Tesla.Middleware.FollowRedirects] [Tesla.Middleware.FollowRedirects]
else else
[] extra_middleware
end end
end end
end end

View File

@ -36,13 +36,9 @@ defmodule Pleroma.Web.RichMedia.Backfill do
:ok :ok
{:error, type} = error {:error, type} = error
when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] ->
negative_cache(url_hash) negative_cache(url_hash)
error error
{:error, type} = error
when type in [:get, :head] ->
error
end end
end end
@ -68,5 +64,5 @@ defmodule Pleroma.Web.RichMedia.Backfill do
defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val) defp warm_cache(key, val), do: @cachex.put(:rich_media_cache, key, val)
defp negative_cache(key, ttl \\ :timer.minutes(15)), defp negative_cache(key, ttl \\ :timer.minutes(15)),
do: @cachex.put(:rich_media_cache, key, nil, ttl: ttl) do: @cachex.put(:rich_media_cache, key, :error, ttl: ttl)
end end

View File

@ -69,9 +69,12 @@ defmodule Pleroma.Web.RichMedia.Helpers do
end end
defp http_options do defp http_options do
timeout = Config.get!([:rich_media, :timeout])
[ [
pool: :rich_media, pool: :rich_media,
max_body: Config.get([:rich_media, :max_body], 5_000_000) max_body: Config.get([:rich_media, :max_body], 5_000_000),
tesla_middleware: [{Tesla.Middleware.Timeout, timeout: timeout}]
] ]
end end
end end

View File

@ -3,6 +3,7 @@
# SPDX-License-Identifier: AGPL-3.0-only # SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Workers.RichMediaWorker do defmodule Pleroma.Workers.RichMediaWorker do
alias Pleroma.Config
alias Pleroma.Web.RichMedia.Backfill alias Pleroma.Web.RichMedia.Backfill
alias Pleroma.Web.RichMedia.Card alias Pleroma.Web.RichMedia.Card
@ -19,18 +20,21 @@ defmodule Pleroma.Workers.RichMediaWorker do
:ok :ok
{:error, type} {:error, type}
when type in [:invalid_metadata, :body_too_large, :content_type, :validate] -> when type in [:invalid_metadata, :body_too_large, :content_type, :validate, :get, :head] ->
{:cancel, type} {:cancel, type}
{:error, type}
when type in [:get, :head] ->
{:error, type}
error -> error ->
{:error, error} {:error, error}
end end
end end
# There is timeout value enforced by Tesla.Middleware.Timeout
# which can be found in the RichMedia.Helpers module to allow us to detect
# a slow/infinite data stream and insert a negative cache entry for the URL
# We pad it by 2 seconds to be certain a slow connection is detected and we
# can inject a negative cache entry for the URL
@impl Oban.Worker @impl Oban.Worker
def timeout(_job), do: :timer.seconds(5) def timeout(_job) do
Config.get!([:rich_media, :timeout]) + :timer.seconds(2)
end
end end

View File

@ -0,0 +1,26 @@
# Pleroma: A lightweight social networking server
# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
# SPDX-License-Identifier: AGPL-3.0-only
defmodule Pleroma.Web.RichMedia.BackfillTest do
use Pleroma.DataCase
alias Pleroma.Web.RichMedia.Backfill
alias Pleroma.Web.RichMedia.Card
import Mox
setup_all do: clear_config([:rich_media, :enabled], true)
test "sets a negative cache entry for an error" do
url = "https://bad.example.com/"
url_hash = Card.url_to_hash(url)
Tesla.Mock.mock(fn %{url: ^url} -> :error end)
Pleroma.CachexMock
|> expect(:put, fn :rich_media_cache, ^url_hash, :error, ttl: _ -> {:ok, true} end)
Backfill.run(%{"url" => url})
end
end