diff options
Diffstat (limited to 'lib/pleroma/search')
| -rw-r--r-- | lib/pleroma/search/database_search.ex | 165 | ||||
| -rw-r--r-- | lib/pleroma/search/meilisearch.ex | 181 | ||||
| -rw-r--r-- | lib/pleroma/search/search_backend.ex | 24 |
3 files changed, 370 insertions, 0 deletions
diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex new file mode 100644 index 0000000..31bfc7e --- /dev/null +++ b/lib/pleroma/search/database_search.ex @@ -0,0 +1,165 @@ +# Pleroma: A lightweight social networking server +# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/> +# SPDX-License-Identifier: AGPL-3.0-only + +defmodule Pleroma.Search.DatabaseSearch do + alias Pleroma.Activity + alias Pleroma.Config + alias Pleroma.Object.Fetcher + alias Pleroma.Pagination + alias Pleroma.User + alias Pleroma.Web.ActivityPub.Visibility + + require Pleroma.Constants + + import Ecto.Query + + @behaviour Pleroma.Search.SearchBackend + + @impl true + def search(user, search_query, options \\ []) do + index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + try do + Activity + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> restrict_public(user) + |> query_with(index_type, search_query, :websearch) + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> Pagination.fetch_paginated( + %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum}, + :offset + ) + |> maybe_fetch(user, search_query) + rescue + _ -> maybe_fetch([], user, search_query) + end + end + + @impl true + def add_to_index(_activity), do: :ok + + @impl true + def remove_from_index(_object), do: :ok + + def maybe_restrict_author(query, %User{} = author) do + Activity.Queries.by_author(query, author) + end + + def maybe_restrict_author(query, _), do: query + + def maybe_restrict_blocked(query, %User{} = user) do + Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user)) + end + + def maybe_restrict_blocked(query, _), do: query + + defp restrict_public(q, user) when not is_nil(user) do + intended_recipients = [ + Pleroma.Constants.as_public(), + Pleroma.Web.ActivityPub.Utils.as_local_public() + ] + + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: fragment("? && ?", ^intended_recipients, a.recipients) + ) + end + + defp restrict_public(q, _user) do + from([a, o] in q, + where: fragment("?->>'type' = 'Create'", a.data), + where: ^Pleroma.Constants.as_public() in a.recipients + ) + end + + defp query_with(q, :gin, search_query, :plain) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :gin, search_query, :websearch) do + %{rows: [[tsc]]} = + Ecto.Adapters.SQL.query!( + Pleroma.Repo, + "select current_setting('default_text_search_config')::regconfig::oid;" + ) + + from([a, o] in q, + where: + fragment( + "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)", + ^tsc, + o.data, + ^search_query + ) + ) + end + + defp query_with(q, :rum, search_query, :plain) do + from([a, o] in q, + where: + fragment( + "? @@ plainto_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + defp query_with(q, :rum, search_query, :websearch) do + from([a, o] in q, + where: + fragment( + "? @@ websearch_to_tsquery(?)", + o.fts_content, + ^search_query + ), + order_by: [fragment("? <=> now()::date", o.inserted_at)] + ) + end + + def maybe_restrict_local(q, user) do + limit = Config.get([:instance, :limit_to_local_content], :unauthenticated) + + case {limit, user} do + {:all, _} -> restrict_local(q) + {:unauthenticated, %User{}} -> q + {:unauthenticated, _} -> restrict_local(q) + {false, _} -> q + end + end + + defp restrict_local(q), do: where(q, local: true) + + def maybe_fetch(activities, user, search_query) do + with true <- Regex.match?(~r/https?:/, search_query), + {:ok, object} <- Fetcher.fetch_object_from_id(search_query), + %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]), + true <- Visibility.visible_for_user?(activity, user) do + [activity | activities] + else + _ -> activities + end + end +end diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex new file mode 100644 index 0000000..2bff663 --- /dev/null +++ b/lib/pleroma/search/meilisearch.ex @@ -0,0 +1,181 @@ +defmodule Pleroma.Search.Meilisearch do + require Logger + require Pleroma.Constants + + alias Pleroma.Activity + alias Pleroma.Config.Getting, as: Config + + import Pleroma.Search.DatabaseSearch + import Ecto.Query + + @behaviour Pleroma.Search.SearchBackend + + defp meili_headers do + private_key = Config.get([Pleroma.Search.Meilisearch, :private_key]) + + [{"Content-Type", "application/json"}] ++ + if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}] + end + + def meili_get(path) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.get( + Path.join(endpoint, path), + meili_headers() + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_post(path, params) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.post( + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers() + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_put(path, params) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + result = + Pleroma.HTTP.request( + :put, + Path.join(endpoint, path), + Jason.encode!(params), + meili_headers(), + [] + ) + + with {:ok, res} <- result do + {:ok, Jason.decode!(res.body)} + end + end + + def meili_delete(path) do + endpoint = Config.get([Pleroma.Search.Meilisearch, :url]) + + with {:ok, _} <- + Pleroma.HTTP.request( + :delete, + Path.join(endpoint, path), + "", + meili_headers(), + [] + ) do + :ok + else + _ -> {:error, "Could not remove from index"} + end + end + + @impl true + def search(user, query, options \\ []) do + limit = Enum.min([Keyword.get(options, :limit), 40]) + offset = Keyword.get(options, :offset, 0) + author = Keyword.get(options, :author) + + res = + meili_post( + "/indexes/objects/search", + %{q: query, offset: offset, limit: limit} + ) + + with {:ok, result} <- res do + hits = result["hits"] |> Enum.map(& &1["ap"]) + + try do + hits + |> Activity.create_by_object_ap_id() + |> Activity.with_preloaded_object() + |> Activity.restrict_deactivated_users() + |> maybe_restrict_local(user) + |> maybe_restrict_author(author) + |> maybe_restrict_blocked(user) + |> maybe_fetch(user, query) + |> order_by([object: obj], desc: obj.data["published"]) + |> Pleroma.Repo.all() + rescue + _ -> maybe_fetch([], user, query) + end + end + end + + def object_to_search_data(object) do + # Only index public or unlisted Notes + if not is_nil(object) and object.data["type"] == "Note" and + not is_nil(object.data["content"]) and + (Pleroma.Constants.as_public() in object.data["to"] or + Pleroma.Constants.as_public() in object.data["cc"]) and + object.data["content"] not in ["", "."] do + data = object.data + + content_str = + case data["content"] do + [nil | rest] -> to_string(rest) + str -> str + end + + content = + with {:ok, scrubbed} <- + FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing), + trimmed <- String.trim(scrubbed) do + trimmed + end + + # Make sure we have a non-empty string + if content != "" do + {:ok, published, _} = DateTime.from_iso8601(data["published"]) + + %{ + id: object.id, + content: content, + ap: data["id"], + published: published |> DateTime.to_unix() + } + end + end + end + + @impl true + def add_to_index(activity) do + maybe_search_data = object_to_search_data(activity.object) + + if activity.data["type"] == "Create" and maybe_search_data do + result = + meili_put( + "/indexes/objects/documents", + [maybe_search_data] + ) + + with {:ok, %{"status" => "enqueued"}} <- result do + # Added successfully + :ok + else + _ -> + # There was an error, report it + Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}") + {:error, result} + end + else + # The post isn't something we can search, that's ok + :ok + end + end + + @impl true + def remove_from_index(object) do + meili_delete("/indexes/objects/documents/#{object.id}") + end +end diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex new file mode 100644 index 0000000..68bc48c --- /dev/null +++ b/lib/pleroma/search/search_backend.ex @@ -0,0 +1,24 @@ +defmodule Pleroma.Search.SearchBackend do + @doc """ + Search statuses with a query, restricting to only those the user should have access to. + """ + @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [ + Pleroma.Activity.t() + ] + + @doc """ + Add the object associated with the activity to the search index. + + The whole activity is passed, to allow filtering on things such as scope. + """ + @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()} + + @doc """ + Remove the object from the index. + + Just the object, as opposed to the whole activity, is passed, since the object + is what contains the actual content and there is no need for filtering when removing + from index. + """ + @callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()} +end |
