dnscms: exclude non-live pages from search results, rank results across page types

2026-05-26 00:41:19 +02:00
parent 09d1078dce
commit 38229c97f0
2 changed files with 63 additions and 19 deletions
@@ -1,10 +1,7 @@
 from django.apps import apps as django_apps
 from django.templatetags.static import static
 from django.utils.html import format_html
 from grapple.registry import registry as grapple_registry
 from wagtail import hooks
 from wagtail.documents import get_document_model
 from wagtail.images import get_image_model
 from wagtail.models import Page
 from wagtail.search.backends import get_search_backend
@@ -15,33 +12,37 @@ def enable_additional_rich_text_features(features):
@hooks.register("register_schema_query")
-def filter_search_to_live_pages(query_mixins):
+def override_search_resolver(query_mixins):
    """
-    Grapple's default `search` resolver hits every page regardless of publish
+    Override Grapple's `search` resolver. Two fixes vs. the upstream version:
-    state, exposing drafts on the public API. Prepend a mixin so MRO picks our
+    1. Restrict pages to live + public so drafts and access-restricted pages
-    `resolve_search`, which restricts Page subclasses to live + public.
+       don't leak via the public API.
    2. Run a single search across all `Page` subclasses (instead of iterating
       per-model) so results are ranked by relevance across types rather than
       grouped by content type. Specific instances are fetched in a second
       bulk query and reordered to match the search ranking.
    Documents and images are intentionally not searched. The upstream resolver
    includes them, but the frontend search page only renders Page types and
    discards everything else, so iterating those indexes is wasted work.
    """
    if not grapple_registry.class_models:
        return
-    class SearchLivePublicMixin:
+    class SearchOverrideMixin:
        def resolve_search(self, info, **kwargs):
            query = kwargs.get("query")
            if not query:
                return None
            s = get_search_backend()
-            results = []
+            ranked = list(s.search(query, Page.objects.live().public()))
-            models = [get_document_model(), get_image_model()]
+            if not ranked:
-            for app in grapple_registry.apps:
+                return []
-                models += django_apps.all_models[app].values()
+            ids = [p.id for p in ranked]
-            for model in models:
+            specific_map = {p.id: p for p in Page.objects.filter(id__in=ids).specific()}
-                if issubclass(model, Page):
+            return [specific_map[i] for i in ids if i in specific_map]
                    results += s.search(query, model.objects.live().public())
                else:
                    results += s.search(query, model)
            return results
-    query_mixins.insert(0, SearchLivePublicMixin)
+    query_mixins.insert(0, SearchOverrideMixin)
@hooks.register("construct_page_action_menu")
@@ -86,3 +86,46 @@ def test_search_excludes_draft_event_page(home_page, event_index, graphql_post):
    assert response.status_code == 200
    assert "errors" not in body, body
    assert "DraftEventSearchToken" not in _titles_for(body, "EventPage")
 def test_search_results_not_grouped_by_type(home_page, event_index, graphql_post):
    # Two pages of different types matching the query equally, plus a third
    # page of one of those types that should rank highest. Under the
    # per-model-iteration resolver, all Generic results come before all Event
    # results (or vice versa) — type-grouped — so the highest-relevance Event
    # ends up after a less-relevant Generic. Cross-type relevance ordering
    # should put the strongest match first regardless of type.
    weak_generic = GenericPageFactory(
        parent=home_page,
        title="Klatremus klatremus klatremus",
        slug="weak-generic",
    )
    weak_event = EventPageFactory(
        parent=event_index,
        title="Klatremus klatremus klatremus",
        slug="weak-event",
    )
    strong_event = EventPageFactory(
        parent=event_index,
        title="Klatremus klatremus klatremus klatremus klatremus klatremus",
        slug="strong-event",
    )
    _index(weak_generic)
    _index(weak_event)
    _index(strong_event)
    response, body = graphql_post(SEARCH_QUERY, {"query": "klatremus"})
    assert response.status_code == 200
    assert "errors" not in body, body
    order = [
        (r["__typename"], r["title"])
        for r in body["data"]["results"]
        if r["__typename"] in ("GenericPage", "EventPage")
    ]
    assert len(order) == 3, order
    # Per-type grouping would put all results of one type consecutively
    # before the other type. Cross-type relevance ordering should interleave.
    types_seen = [t for t, _ in order]
    assert types_seen != ["GenericPage", "EventPage", "EventPage"], order
    assert types_seen != ["EventPage", "EventPage", "GenericPage"], order