dnscms: exclude non-live pages from search results, rank results across page types

This commit is contained in:
2026-05-26 00:41:19 +02:00
parent 09d1078dce
commit 38229c97f0
2 changed files with 63 additions and 19 deletions
+20 -19
View File
@@ -1,10 +1,7 @@
from django.apps import apps as django_apps
from django.templatetags.static import static from django.templatetags.static import static
from django.utils.html import format_html from django.utils.html import format_html
from grapple.registry import registry as grapple_registry from grapple.registry import registry as grapple_registry
from wagtail import hooks from wagtail import hooks
from wagtail.documents import get_document_model
from wagtail.images import get_image_model
from wagtail.models import Page from wagtail.models import Page
from wagtail.search.backends import get_search_backend from wagtail.search.backends import get_search_backend
@@ -15,33 +12,37 @@ def enable_additional_rich_text_features(features):
@hooks.register("register_schema_query") @hooks.register("register_schema_query")
def filter_search_to_live_pages(query_mixins): def override_search_resolver(query_mixins):
""" """
Grapple's default `search` resolver hits every page regardless of publish Override Grapple's `search` resolver. Two fixes vs. the upstream version:
state, exposing drafts on the public API. Prepend a mixin so MRO picks our 1. Restrict pages to live + public so drafts and access-restricted pages
`resolve_search`, which restricts Page subclasses to live + public. don't leak via the public API.
2. Run a single search across all `Page` subclasses (instead of iterating
per-model) so results are ranked by relevance across types rather than
grouped by content type. Specific instances are fetched in a second
bulk query and reordered to match the search ranking.
Documents and images are intentionally not searched. The upstream resolver
includes them, but the frontend search page only renders Page types and
discards everything else, so iterating those indexes is wasted work.
""" """
if not grapple_registry.class_models: if not grapple_registry.class_models:
return return
class SearchLivePublicMixin: class SearchOverrideMixin:
def resolve_search(self, info, **kwargs): def resolve_search(self, info, **kwargs):
query = kwargs.get("query") query = kwargs.get("query")
if not query: if not query:
return None return None
s = get_search_backend() s = get_search_backend()
results = [] ranked = list(s.search(query, Page.objects.live().public()))
models = [get_document_model(), get_image_model()] if not ranked:
for app in grapple_registry.apps: return []
models += django_apps.all_models[app].values() ids = [p.id for p in ranked]
for model in models: specific_map = {p.id: p for p in Page.objects.filter(id__in=ids).specific()}
if issubclass(model, Page): return [specific_map[i] for i in ids if i in specific_map]
results += s.search(query, model.objects.live().public())
else:
results += s.search(query, model)
return results
query_mixins.insert(0, SearchLivePublicMixin) query_mixins.insert(0, SearchOverrideMixin)
@hooks.register("construct_page_action_menu") @hooks.register("construct_page_action_menu")
+43
View File
@@ -86,3 +86,46 @@ def test_search_excludes_draft_event_page(home_page, event_index, graphql_post):
assert response.status_code == 200 assert response.status_code == 200
assert "errors" not in body, body assert "errors" not in body, body
assert "DraftEventSearchToken" not in _titles_for(body, "EventPage") assert "DraftEventSearchToken" not in _titles_for(body, "EventPage")
def test_search_results_not_grouped_by_type(home_page, event_index, graphql_post):
# Two pages of different types matching the query equally, plus a third
# page of one of those types that should rank highest. Under the
# per-model-iteration resolver, all Generic results come before all Event
# results (or vice versa) — type-grouped — so the highest-relevance Event
# ends up after a less-relevant Generic. Cross-type relevance ordering
# should put the strongest match first regardless of type.
weak_generic = GenericPageFactory(
parent=home_page,
title="Klatremus klatremus klatremus",
slug="weak-generic",
)
weak_event = EventPageFactory(
parent=event_index,
title="Klatremus klatremus klatremus",
slug="weak-event",
)
strong_event = EventPageFactory(
parent=event_index,
title="Klatremus klatremus klatremus klatremus klatremus klatremus",
slug="strong-event",
)
_index(weak_generic)
_index(weak_event)
_index(strong_event)
response, body = graphql_post(SEARCH_QUERY, {"query": "klatremus"})
assert response.status_code == 200
assert "errors" not in body, body
order = [
(r["__typename"], r["title"])
for r in body["data"]["results"]
if r["__typename"] in ("GenericPage", "EventPage")
]
assert len(order) == 3, order
# Per-type grouping would put all results of one type consecutively
# before the other type. Cross-type relevance ordering should interleave.
types_seen = [t for t, _ in order]
assert types_seen != ["GenericPage", "EventPage", "EventPage"], order
assert types_seen != ["EventPage", "EventPage", "GenericPage"], order