From 3a4773c3c2bd0bbef244eb519b07208da9108e49 Mon Sep 17 00:00:00 2001 From: dcc Date: Sat, 2 Sep 2023 00:52:52 -0700 Subject: First --- .../web/rich_media/parsers/meta_tags_parser.ex | 46 ++++++++++++++++++++++ lib/pleroma/web/rich_media/parsers/o_embed.ex | 29 ++++++++++++++ lib/pleroma/web/rich_media/parsers/ogp.ex | 10 +++++ lib/pleroma/web/rich_media/parsers/twitter_card.ex | 15 +++++++ 4 files changed, 100 insertions(+) create mode 100644 lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex create mode 100644 lib/pleroma/web/rich_media/parsers/o_embed.ex create mode 100644 lib/pleroma/web/rich_media/parsers/ogp.ex create mode 100644 lib/pleroma/web/rich_media/parsers/twitter_card.ex (limited to 'lib/pleroma/web/rich_media/parsers') diff --git a/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex new file mode 100644 index 0000000..320a5f5 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/meta_tags_parser.ex @@ -0,0 +1,46 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parsers.MetaTagsParser do + def parse(data, html, prefix, key_name, value_name \\ "content") do + html + |> get_elements(key_name, prefix) + |> Enum.reduce(data, fn el, acc -> + attributes = normalize_attributes(el, prefix, key_name, value_name) + + Map.merge(acc, attributes) + end) + |> maybe_put_title(html) + end + + defp get_elements(html, key_name, prefix) do + html |> Floki.find("meta[#{key_name}^='#{prefix}:']") + end + + defp normalize_attributes(html_node, prefix, key_name, value_name) do + {_tag, attributes, _children} = html_node + + data = + Map.new(attributes, fn {name, value} -> + {name, String.trim_leading(value, "#{prefix}:")} + end) + + %{data[key_name] => data[value_name]} + end + + defp maybe_put_title(%{"title" => _} = meta, _), do: meta + + defp maybe_put_title(meta, html) when meta != %{} do + case get_page_title(html) do + "" -> meta + title -> Map.put_new(meta, "title", title) + end + end + + defp maybe_put_title(meta, _), do: meta + + defp get_page_title(html) do + Floki.find(html, "html head title") |> List.first() |> Floki.text() + end +end diff --git a/lib/pleroma/web/rich_media/parsers/o_embed.ex b/lib/pleroma/web/rich_media/parsers/o_embed.ex new file mode 100644 index 0000000..0f30317 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/o_embed.ex @@ -0,0 +1,29 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parsers.OEmbed do + def parse(html, _data) do + with elements = [_ | _] <- get_discovery_data(html), + oembed_url when is_binary(oembed_url) <- get_oembed_url(elements), + {:ok, oembed_data = %{"html" => html}} <- get_oembed_data(oembed_url) do + %{oembed_data | "html" => Pleroma.HTML.filter_tags(html)} + else + _e -> %{} + end + end + + defp get_discovery_data(html) do + html |> Floki.find("link[type='application/json+oembed']") + end + + defp get_oembed_url([{"link", attributes, _children} | _]) do + Enum.find_value(attributes, fn {k, v} -> if k == "href", do: v end) + end + + defp get_oembed_data(url) do + with {:ok, %Tesla.Env{body: json}} <- Pleroma.Web.RichMedia.Helpers.rich_media_get(url) do + Jason.decode(json) + end + end +end diff --git a/lib/pleroma/web/rich_media/parsers/ogp.ex b/lib/pleroma/web/rich_media/parsers/ogp.ex new file mode 100644 index 0000000..b7f2b42 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/ogp.ex @@ -0,0 +1,10 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parsers.OGP do + @deprecated "OGP parser is deprecated. Use TwitterCard instead." + def parse(_html, _data) do + %{} + end +end diff --git a/lib/pleroma/web/rich_media/parsers/twitter_card.ex b/lib/pleroma/web/rich_media/parsers/twitter_card.ex new file mode 100644 index 0000000..cc65372 --- /dev/null +++ b/lib/pleroma/web/rich_media/parsers/twitter_card.ex @@ -0,0 +1,15 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2022 Pleroma Authors +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Web.RichMedia.Parsers.TwitterCard do + alias Pleroma.Web.RichMedia.Parsers.MetaTagsParser + + @spec parse(list(), map()) :: map() + def parse(html, data) do + data + |> MetaTagsParser.parse(html, "og", "property") + |> MetaTagsParser.parse(html, "twitter", "name") + |> MetaTagsParser.parse(html, "twitter", "property") + end +end -- cgit v1.2.3