aboutsummaryrefslogtreecommitdiff
path: root/lib/pleroma/html.ex
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pleroma/html.ex')
-rw-r--r--lib/pleroma/html.ex93
1 files changed, 93 insertions, 0 deletions
diff --git a/lib/pleroma/html.ex b/lib/pleroma/html.ex
new file mode 100644
index 0000000..5bf735c
--- /dev/null
+++ b/lib/pleroma/html.ex
@@ -0,0 +1,93 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.HTML do
+ # Scrubbers are compiled on boot so they can be configured in OTP releases
+ # @on_load :compile_scrubbers
+
+ @cachex Pleroma.Config.get([:cachex, :provider], Cachex)
+
+ def compile_scrubbers do
+ dir = Path.join(:code.priv_dir(:pleroma), "scrubbers")
+
+ dir
+ |> Pleroma.Utils.compile_dir()
+ |> case do
+ {:error, _errors, _warnings} ->
+ raise "Compiling scrubbers failed"
+
+ {:ok, _modules, _warnings} ->
+ :ok
+ end
+ end
+
+ defp get_scrubbers(scrubber) when is_atom(scrubber), do: [scrubber]
+ defp get_scrubbers(scrubbers) when is_list(scrubbers), do: scrubbers
+ defp get_scrubbers(_), do: [Pleroma.HTML.Scrubber.Default]
+
+ def get_scrubbers do
+ Pleroma.Config.get([:markup, :scrub_policy])
+ |> get_scrubbers
+ end
+
+ def filter_tags(html, nil) do
+ filter_tags(html, get_scrubbers())
+ end
+
+ def filter_tags(html, scrubbers) when is_list(scrubbers) do
+ Enum.reduce(scrubbers, html, fn scrubber, html ->
+ filter_tags(html, scrubber)
+ end)
+ end
+
+ def filter_tags(html, scrubber) do
+ {:ok, content} = FastSanitize.Sanitizer.scrub(html, scrubber)
+ content
+ end
+
+ def filter_tags(html), do: filter_tags(html, nil)
+ def strip_tags(html), do: filter_tags(html, FastSanitize.Sanitizer.StripTags)
+
+ def ensure_scrubbed_html(
+ content,
+ scrubbers,
+ fake,
+ callback
+ ) do
+ content =
+ content
+ |> filter_tags(scrubbers)
+ |> callback.()
+
+ if fake do
+ {:ignore, content}
+ else
+ {:commit, content}
+ end
+ end
+
+ def extract_first_external_url_from_object(%{data: %{"content" => content}} = object)
+ when is_binary(content) do
+ unless object.data["fake"] do
+ key = "URL|#{object.id}"
+
+ @cachex.fetch!(:scrubber_cache, key, fn _key ->
+ {:commit, {:ok, extract_first_external_url(content)}}
+ end)
+ else
+ {:ok, extract_first_external_url(content)}
+ end
+ end
+
+ def extract_first_external_url_from_object(_), do: {:error, :no_content}
+
+ def extract_first_external_url(content) do
+ content
+ |> Floki.parse_fragment!()
+ |> Floki.find("a:not(.mention,.hashtag,.attachment,[rel~=\"tag\"])")
+ |> Enum.take(1)
+ |> Floki.attribute("href")
+ |> Enum.at(0)
+ end
+end