aboutsummaryrefslogtreecommitdiff
path: root/lib/pleroma/search
diff options
context:
space:
mode:
authordcc <dcc@logografos.com>2024-05-15 00:57:23 -0700
committerdcc <dcc@logografos.com>2024-05-15 00:57:23 -0700
commitb31a934a804aed3f35442ceafe2080b0955e7317 (patch)
tree947b13a0388ecea81e05dd980baa10f7546860b9 /lib/pleroma/search
parentea33a0d3427f8b30b82a6ddbc0ff7429cfaf8d91 (diff)
downloadanni-master.tar.gz
anni-master.tar.bz2
anni-master.zip
total rebaseHEADmaster
Diffstat (limited to 'lib/pleroma/search')
-rw-r--r--lib/pleroma/search/database_search.ex165
-rw-r--r--lib/pleroma/search/meilisearch.ex181
-rw-r--r--lib/pleroma/search/search_backend.ex24
3 files changed, 370 insertions, 0 deletions
diff --git a/lib/pleroma/search/database_search.ex b/lib/pleroma/search/database_search.ex
new file mode 100644
index 0000000..31bfc7e
--- /dev/null
+++ b/lib/pleroma/search/database_search.ex
@@ -0,0 +1,165 @@
+# Pleroma: A lightweight social networking server
+# Copyright © 2017-2021 Pleroma Authors <https://pleroma.social/>
+# SPDX-License-Identifier: AGPL-3.0-only
+
+defmodule Pleroma.Search.DatabaseSearch do
+ alias Pleroma.Activity
+ alias Pleroma.Config
+ alias Pleroma.Object.Fetcher
+ alias Pleroma.Pagination
+ alias Pleroma.User
+ alias Pleroma.Web.ActivityPub.Visibility
+
+ require Pleroma.Constants
+
+ import Ecto.Query
+
+ @behaviour Pleroma.Search.SearchBackend
+
+ @impl true
+ def search(user, search_query, options \\ []) do
+ index_type = if Config.get([:database, :rum_enabled]), do: :rum, else: :gin
+ limit = Enum.min([Keyword.get(options, :limit), 40])
+ offset = Keyword.get(options, :offset, 0)
+ author = Keyword.get(options, :author)
+
+ try do
+ Activity
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> restrict_public(user)
+ |> query_with(index_type, search_query, :websearch)
+ |> maybe_restrict_local(user)
+ |> maybe_restrict_author(author)
+ |> maybe_restrict_blocked(user)
+ |> Pagination.fetch_paginated(
+ %{"offset" => offset, "limit" => limit, "skip_order" => index_type == :rum},
+ :offset
+ )
+ |> maybe_fetch(user, search_query)
+ rescue
+ _ -> maybe_fetch([], user, search_query)
+ end
+ end
+
+ @impl true
+ def add_to_index(_activity), do: :ok
+
+ @impl true
+ def remove_from_index(_object), do: :ok
+
+ def maybe_restrict_author(query, %User{} = author) do
+ Activity.Queries.by_author(query, author)
+ end
+
+ def maybe_restrict_author(query, _), do: query
+
+ def maybe_restrict_blocked(query, %User{} = user) do
+ Activity.Queries.exclude_authors(query, User.blocked_users_ap_ids(user))
+ end
+
+ def maybe_restrict_blocked(query, _), do: query
+
+ defp restrict_public(q, user) when not is_nil(user) do
+ intended_recipients = [
+ Pleroma.Constants.as_public(),
+ Pleroma.Web.ActivityPub.Utils.as_local_public()
+ ]
+
+ from([a, o] in q,
+ where: fragment("?->>'type' = 'Create'", a.data),
+ where: fragment("? && ?", ^intended_recipients, a.recipients)
+ )
+ end
+
+ defp restrict_public(q, _user) do
+ from([a, o] in q,
+ where: fragment("?->>'type' = 'Create'", a.data),
+ where: ^Pleroma.Constants.as_public() in a.recipients
+ )
+ end
+
+ defp query_with(q, :gin, search_query, :plain) do
+ %{rows: [[tsc]]} =
+ Ecto.Adapters.SQL.query!(
+ Pleroma.Repo,
+ "select current_setting('default_text_search_config')::regconfig::oid;"
+ )
+
+ from([a, o] in q,
+ where:
+ fragment(
+ "to_tsvector(?::oid::regconfig, ?->>'content') @@ plainto_tsquery(?)",
+ ^tsc,
+ o.data,
+ ^search_query
+ )
+ )
+ end
+
+ defp query_with(q, :gin, search_query, :websearch) do
+ %{rows: [[tsc]]} =
+ Ecto.Adapters.SQL.query!(
+ Pleroma.Repo,
+ "select current_setting('default_text_search_config')::regconfig::oid;"
+ )
+
+ from([a, o] in q,
+ where:
+ fragment(
+ "to_tsvector(?::oid::regconfig, ?->>'content') @@ websearch_to_tsquery(?)",
+ ^tsc,
+ o.data,
+ ^search_query
+ )
+ )
+ end
+
+ defp query_with(q, :rum, search_query, :plain) do
+ from([a, o] in q,
+ where:
+ fragment(
+ "? @@ plainto_tsquery(?)",
+ o.fts_content,
+ ^search_query
+ ),
+ order_by: [fragment("? <=> now()::date", o.inserted_at)]
+ )
+ end
+
+ defp query_with(q, :rum, search_query, :websearch) do
+ from([a, o] in q,
+ where:
+ fragment(
+ "? @@ websearch_to_tsquery(?)",
+ o.fts_content,
+ ^search_query
+ ),
+ order_by: [fragment("? <=> now()::date", o.inserted_at)]
+ )
+ end
+
+ def maybe_restrict_local(q, user) do
+ limit = Config.get([:instance, :limit_to_local_content], :unauthenticated)
+
+ case {limit, user} do
+ {:all, _} -> restrict_local(q)
+ {:unauthenticated, %User{}} -> q
+ {:unauthenticated, _} -> restrict_local(q)
+ {false, _} -> q
+ end
+ end
+
+ defp restrict_local(q), do: where(q, local: true)
+
+ def maybe_fetch(activities, user, search_query) do
+ with true <- Regex.match?(~r/https?:/, search_query),
+ {:ok, object} <- Fetcher.fetch_object_from_id(search_query),
+ %Activity{} = activity <- Activity.get_create_by_object_ap_id(object.data["id"]),
+ true <- Visibility.visible_for_user?(activity, user) do
+ [activity | activities]
+ else
+ _ -> activities
+ end
+ end
+end
diff --git a/lib/pleroma/search/meilisearch.ex b/lib/pleroma/search/meilisearch.ex
new file mode 100644
index 0000000..2bff663
--- /dev/null
+++ b/lib/pleroma/search/meilisearch.ex
@@ -0,0 +1,181 @@
+defmodule Pleroma.Search.Meilisearch do
+ require Logger
+ require Pleroma.Constants
+
+ alias Pleroma.Activity
+ alias Pleroma.Config.Getting, as: Config
+
+ import Pleroma.Search.DatabaseSearch
+ import Ecto.Query
+
+ @behaviour Pleroma.Search.SearchBackend
+
+ defp meili_headers do
+ private_key = Config.get([Pleroma.Search.Meilisearch, :private_key])
+
+ [{"Content-Type", "application/json"}] ++
+ if is_nil(private_key), do: [], else: [{"Authorization", "Bearer #{private_key}"}]
+ end
+
+ def meili_get(path) do
+ endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.get(
+ Path.join(endpoint, path),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_post(path, params) do
+ endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.post(
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers()
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_put(path, params) do
+ endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
+
+ result =
+ Pleroma.HTTP.request(
+ :put,
+ Path.join(endpoint, path),
+ Jason.encode!(params),
+ meili_headers(),
+ []
+ )
+
+ with {:ok, res} <- result do
+ {:ok, Jason.decode!(res.body)}
+ end
+ end
+
+ def meili_delete(path) do
+ endpoint = Config.get([Pleroma.Search.Meilisearch, :url])
+
+ with {:ok, _} <-
+ Pleroma.HTTP.request(
+ :delete,
+ Path.join(endpoint, path),
+ "",
+ meili_headers(),
+ []
+ ) do
+ :ok
+ else
+ _ -> {:error, "Could not remove from index"}
+ end
+ end
+
+ @impl true
+ def search(user, query, options \\ []) do
+ limit = Enum.min([Keyword.get(options, :limit), 40])
+ offset = Keyword.get(options, :offset, 0)
+ author = Keyword.get(options, :author)
+
+ res =
+ meili_post(
+ "/indexes/objects/search",
+ %{q: query, offset: offset, limit: limit}
+ )
+
+ with {:ok, result} <- res do
+ hits = result["hits"] |> Enum.map(& &1["ap"])
+
+ try do
+ hits
+ |> Activity.create_by_object_ap_id()
+ |> Activity.with_preloaded_object()
+ |> Activity.restrict_deactivated_users()
+ |> maybe_restrict_local(user)
+ |> maybe_restrict_author(author)
+ |> maybe_restrict_blocked(user)
+ |> maybe_fetch(user, query)
+ |> order_by([object: obj], desc: obj.data["published"])
+ |> Pleroma.Repo.all()
+ rescue
+ _ -> maybe_fetch([], user, query)
+ end
+ end
+ end
+
+ def object_to_search_data(object) do
+ # Only index public or unlisted Notes
+ if not is_nil(object) and object.data["type"] == "Note" and
+ not is_nil(object.data["content"]) and
+ (Pleroma.Constants.as_public() in object.data["to"] or
+ Pleroma.Constants.as_public() in object.data["cc"]) and
+ object.data["content"] not in ["", "."] do
+ data = object.data
+
+ content_str =
+ case data["content"] do
+ [nil | rest] -> to_string(rest)
+ str -> str
+ end
+
+ content =
+ with {:ok, scrubbed} <-
+ FastSanitize.Sanitizer.scrub(content_str, Pleroma.HTML.Scrubber.SearchIndexing),
+ trimmed <- String.trim(scrubbed) do
+ trimmed
+ end
+
+ # Make sure we have a non-empty string
+ if content != "" do
+ {:ok, published, _} = DateTime.from_iso8601(data["published"])
+
+ %{
+ id: object.id,
+ content: content,
+ ap: data["id"],
+ published: published |> DateTime.to_unix()
+ }
+ end
+ end
+ end
+
+ @impl true
+ def add_to_index(activity) do
+ maybe_search_data = object_to_search_data(activity.object)
+
+ if activity.data["type"] == "Create" and maybe_search_data do
+ result =
+ meili_put(
+ "/indexes/objects/documents",
+ [maybe_search_data]
+ )
+
+ with {:ok, %{"status" => "enqueued"}} <- result do
+ # Added successfully
+ :ok
+ else
+ _ ->
+ # There was an error, report it
+ Logger.error("Failed to add activity #{activity.id} to index: #{inspect(result)}")
+ {:error, result}
+ end
+ else
+ # The post isn't something we can search, that's ok
+ :ok
+ end
+ end
+
+ @impl true
+ def remove_from_index(object) do
+ meili_delete("/indexes/objects/documents/#{object.id}")
+ end
+end
diff --git a/lib/pleroma/search/search_backend.ex b/lib/pleroma/search/search_backend.ex
new file mode 100644
index 0000000..68bc48c
--- /dev/null
+++ b/lib/pleroma/search/search_backend.ex
@@ -0,0 +1,24 @@
+defmodule Pleroma.Search.SearchBackend do
+ @doc """
+ Search statuses with a query, restricting to only those the user should have access to.
+ """
+ @callback search(user :: Pleroma.User.t(), query :: String.t(), options :: [any()]) :: [
+ Pleroma.Activity.t()
+ ]
+
+ @doc """
+ Add the object associated with the activity to the search index.
+
+ The whole activity is passed, to allow filtering on things such as scope.
+ """
+ @callback add_to_index(activity :: Pleroma.Activity.t()) :: :ok | {:error, any()}
+
+ @doc """
+ Remove the object from the index.
+
+ Just the object, as opposed to the whole activity, is passed, since the object
+ is what contains the actual content and there is no need for filtering when removing
+ from index.
+ """
+ @callback remove_from_index(object :: Pleroma.Object.t()) :: :ok | {:error, any()}
+end