diff --git a/dnscms/dnscms/wagtail_hooks.py b/dnscms/dnscms/wagtail_hooks.py index bef3e4e..e7d3262 100644 --- a/dnscms/dnscms/wagtail_hooks.py +++ b/dnscms/dnscms/wagtail_hooks.py @@ -1,10 +1,7 @@ -from django.apps import apps as django_apps from django.templatetags.static import static from django.utils.html import format_html from grapple.registry import registry as grapple_registry from wagtail import hooks -from wagtail.documents import get_document_model -from wagtail.images import get_image_model from wagtail.models import Page from wagtail.search.backends import get_search_backend @@ -15,33 +12,37 @@ def enable_additional_rich_text_features(features): @hooks.register("register_schema_query") -def filter_search_to_live_pages(query_mixins): +def override_search_resolver(query_mixins): """ - Grapple's default `search` resolver hits every page regardless of publish - state, exposing drafts on the public API. Prepend a mixin so MRO picks our - `resolve_search`, which restricts Page subclasses to live + public. + Override Grapple's `search` resolver. Two fixes vs. the upstream version: + 1. Restrict pages to live + public so drafts and access-restricted pages + don't leak via the public API. + 2. Run a single search across all `Page` subclasses (instead of iterating + per-model) so results are ranked by relevance across types rather than + grouped by content type. Specific instances are fetched in a second + bulk query and reordered to match the search ranking. + + Documents and images are intentionally not searched. The upstream resolver + includes them, but the frontend search page only renders Page types and + discards everything else, so iterating those indexes is wasted work. """ if not grapple_registry.class_models: return - class SearchLivePublicMixin: + class SearchOverrideMixin: def resolve_search(self, info, **kwargs): query = kwargs.get("query") if not query: return None s = get_search_backend() - results = [] - models = [get_document_model(), get_image_model()] - for app in grapple_registry.apps: - models += django_apps.all_models[app].values() - for model in models: - if issubclass(model, Page): - results += s.search(query, model.objects.live().public()) - else: - results += s.search(query, model) - return results + ranked = list(s.search(query, Page.objects.live().public())) + if not ranked: + return [] + ids = [p.id for p in ranked] + specific_map = {p.id: p for p in Page.objects.filter(id__in=ids).specific()} + return [specific_map[i] for i in ids if i in specific_map] - query_mixins.insert(0, SearchLivePublicMixin) + query_mixins.insert(0, SearchOverrideMixin) @hooks.register("construct_page_action_menu") diff --git a/dnscms/tests/test_search.py b/dnscms/tests/test_search.py index 6a183af..936de88 100644 --- a/dnscms/tests/test_search.py +++ b/dnscms/tests/test_search.py @@ -86,3 +86,46 @@ def test_search_excludes_draft_event_page(home_page, event_index, graphql_post): assert response.status_code == 200 assert "errors" not in body, body assert "DraftEventSearchToken" not in _titles_for(body, "EventPage") + + +def test_search_results_not_grouped_by_type(home_page, event_index, graphql_post): + # Two pages of different types matching the query equally, plus a third + # page of one of those types that should rank highest. Under the + # per-model-iteration resolver, all Generic results come before all Event + # results (or vice versa) — type-grouped — so the highest-relevance Event + # ends up after a less-relevant Generic. Cross-type relevance ordering + # should put the strongest match first regardless of type. + weak_generic = GenericPageFactory( + parent=home_page, + title="Klatremus klatremus klatremus", + slug="weak-generic", + ) + weak_event = EventPageFactory( + parent=event_index, + title="Klatremus klatremus klatremus", + slug="weak-event", + ) + strong_event = EventPageFactory( + parent=event_index, + title="Klatremus klatremus klatremus klatremus klatremus klatremus", + slug="strong-event", + ) + _index(weak_generic) + _index(weak_event) + _index(strong_event) + + response, body = graphql_post(SEARCH_QUERY, {"query": "klatremus"}) + + assert response.status_code == 200 + assert "errors" not in body, body + order = [ + (r["__typename"], r["title"]) + for r in body["data"]["results"] + if r["__typename"] in ("GenericPage", "EventPage") + ] + assert len(order) == 3, order + # Per-type grouping would put all results of one type consecutively + # before the other type. Cross-type relevance ordering should interleave. + types_seen = [t for t, _ in order] + assert types_seen != ["GenericPage", "EventPage", "EventPage"], order + assert types_seen != ["EventPage", "EventPage", "GenericPage"], order