aboutsummaryrefslogtreecommitdiff
path: root/lib/pleroma/web/rich_media/parsers
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pleroma/web/rich_media/parsers')
-rw-r--r--lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex46
-rw-r--r--lib/pleroma/web/rich_media/parsers/o_embed.ex29
-rw-r--r--lib/pleroma/web/rich_media/parsers/ogp.ex10
-rw-r--r--lib/pleroma/web/rich_media/parsers/twitter_card.ex15
4 files changed, 100 insertions, 0 deletions
diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
new file mode 100644
index 0000000..320a5f5
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex
@@ -0,0 +1,46 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do
+ def parse(data, html, prefix, key_name, value_name \\ "content") do
+ html
+ |> get_elements(key_name, prefix)
+ |> Enum.reduce(data, fn el, acc ->
+ attributes = normalize_attributes(el, prefix, key_name, value_name)
+
+ Map.merge(acc, attributes)
+ end)
+ |> maybe_put_title(html)
+ end
+
+ defp get_elements(html, key_name, prefix) do
+ html |> Floki.find("meta[#{key_name}^='#{prefix}:']")
+ end
+
+ defp normalize_attributes(html_node, prefix, key_name, value_name) do
+ {_tag, attributes, _children} = html_node
+
+ data =
+ Map.new(attributes, fn {name, value} ->
+ {name, String.trim_leading(value, "#{prefix}:")}
+ end)
+
+ %{data[key_name] => data[value_name]}
+ end
+
+ defp maybe_put_title(%{"title" => _} = meta, _), do: meta
+
+ defp maybe_put_title(meta, html) when meta != %{} do
+ case get_page_title(html) do
+ "" -> meta
+ title -> Map.put_new(meta, "title", title)
+ end
+ end
+
+ defp maybe_put_title(meta, _), do: meta
+
+ defp get_page_title(html) do
+ Floki.find(html, "html head title") |> List.first() |> Floki.text()
+ end
+end
diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex
new file mode 100644
index 0000000..0f30317
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex
@@ -0,0 +1,29 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do
+ def parse(html, _data) do
+ with elements = [_ | _] <- get_discovery_data(html),
+ oembed_url when is_binary(oembed_url) <- get_oembed_url(elements),
+ {:ok, oembed_data = %{"html" => html}} <- get_oembed_data(oembed_url) do
+ %{oembed_data | "html" => Pleroma.HTML.filter_tags(html)}
+ else
+ _e -> %{}
+ end
+ end
+
+ defp get_discovery_data(html) do
+ html |> Floki.find("link[type='application/json+oembed']")
+ end
+
+ defp get_oembed_url([{"link", attributes, _children} | _]) do
+ Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end)
+ end
+
+ defp get_oembed_data(url) do
+ with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do
+ Jason.decode(json)
+ end
+ end
+end
diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex
new file mode 100644
index 0000000..b7f2b42
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/ogp.ex
@@ -0,0 +1,10 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parsers.OGP do
+ @deprecated "OGP parser is deprecated. Use TwitterCard instead."
+ def parse(_html, _data) do
+ %{}
+ end
+end
diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
new file mode 100644
index 0000000..cc65372
--- /dev/null
+++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex
@@ -0,0 +1,15 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do
+ alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser
+
+ @spec parse(list(), map()) :: map()
+ def parse(html, data) do
+ data
+ |> MetaTagsParser.parse(html, "og", "property")
+ |> MetaTagsParser.parse(html, "twitter", "name")
+ |> MetaTagsParser.parse(html, "twitter", "property")
+ end
+end