dnscms: exclude non-live pages from search results, rank results across page types

This commit is contained in:
2026-05-26 00:41:19 +02:00
parent 09d1078dce
commit 38229c97f0
2 changed files with 63 additions and 19 deletions
+20 -19
View File
@@ -1,10 +1,7 @@
from django.apps import apps as django_apps
from django.templatetags.static import static
from django.utils.html import format_html
from grapple.registry import registry as grapple_registry
from wagtail import hooks
from wagtail.documents import get_document_model
from wagtail.images import get_image_model
from wagtail.models import Page
from wagtail.search.backends import get_search_backend
@@ -15,33 +12,37 @@ def enable_additional_rich_text_features(features):
@hooks.register("register_schema_query")
def filter_search_to_live_pages(query_mixins):
def override_search_resolver(query_mixins):
"""
Grapple's default `search` resolver hits every page regardless of publish
state, exposing drafts on the public API. Prepend a mixin so MRO picks our
`resolve_search`, which restricts Page subclasses to live + public.
Override Grapple's `search` resolver. Two fixes vs. the upstream version:
1. Restrict pages to live + public so drafts and access-restricted pages
don't leak via the public API.
2. Run a single search across all `Page` subclasses (instead of iterating
per-model) so results are ranked by relevance across types rather than
grouped by content type. Specific instances are fetched in a second
bulk query and reordered to match the search ranking.
Documents and images are intentionally not searched. The upstream resolver
includes them, but the frontend search page only renders Page types and
discards everything else, so iterating those indexes is wasted work.
"""
if not grapple_registry.class_models:
return
class SearchLivePublicMixin:
class SearchOverrideMixin:
def resolve_search(self, info, **kwargs):
query = kwargs.get("query")
if not query:
return None
s = get_search_backend()
results = []
models = [get_document_model(), get_image_model()]
for app in grapple_registry.apps:
models += django_apps.all_models[app].values()
for model in models:
if issubclass(model, Page):
results += s.search(query, model.objects.live().public())
else:
results += s.search(query, model)
return results
ranked = list(s.search(query, Page.objects.live().public()))
if not ranked:
return []
ids = [p.id for p in ranked]
specific_map = {p.id: p for p in Page.objects.filter(id__in=ids).specific()}
return [specific_map[i] for i in ids if i in specific_map]
query_mixins.insert(0, SearchLivePublicMixin)
query_mixins.insert(0, SearchOverrideMixin)
@hooks.register("construct_page_action_menu")
+43
View File
@@ -86,3 +86,46 @@ def test_search_excludes_draft_event_page(home_page, event_index, graphql_post):
assert response.status_code == 200
assert "errors" not in body, body
assert "DraftEventSearchToken" not in _titles_for(body, "EventPage")
def test_search_results_not_grouped_by_type(home_page, event_index, graphql_post):
# Two pages of different types matching the query equally, plus a third
# page of one of those types that should rank highest. Under the
# per-model-iteration resolver, all Generic results come before all Event
# results (or vice versa) — type-grouped — so the highest-relevance Event
# ends up after a less-relevant Generic. Cross-type relevance ordering
# should put the strongest match first regardless of type.
weak_generic = GenericPageFactory(
parent=home_page,
title="Klatremus klatremus klatremus",
slug="weak-generic",
)
weak_event = EventPageFactory(
parent=event_index,
title="Klatremus klatremus klatremus",
slug="weak-event",
)
strong_event = EventPageFactory(
parent=event_index,
title="Klatremus klatremus klatremus klatremus klatremus klatremus",
slug="strong-event",
)
_index(weak_generic)
_index(weak_event)
_index(strong_event)
response, body = graphql_post(SEARCH_QUERY, {"query": "klatremus"})
assert response.status_code == 200
assert "errors" not in body, body
order = [
(r["__typename"], r["title"])
for r in body["data"]["results"]
if r["__typename"] in ("GenericPage", "EventPage")
]
assert len(order) == 3, order
# Per-type grouping would put all results of one type consecutively
# before the other type. Cross-type relevance ordering should interleave.
types_seen = [t for t, _ in order]
assert types_seen != ["GenericPage", "EventPage", "EventPage"], order
assert types_seen != ["EventPage", "EventPage", "GenericPage"], order