dnscms: exclude non-live pages from search results, rank results across page types
This commit is contained in:
@@ -1,10 +1,7 @@
|
|||||||
from django.apps import apps as django_apps
|
|
||||||
from django.templatetags.static import static
|
from django.templatetags.static import static
|
||||||
from django.utils.html import format_html
|
from django.utils.html import format_html
|
||||||
from grapple.registry import registry as grapple_registry
|
from grapple.registry import registry as grapple_registry
|
||||||
from wagtail import hooks
|
from wagtail import hooks
|
||||||
from wagtail.documents import get_document_model
|
|
||||||
from wagtail.images import get_image_model
|
|
||||||
from wagtail.models import Page
|
from wagtail.models import Page
|
||||||
from wagtail.search.backends import get_search_backend
|
from wagtail.search.backends import get_search_backend
|
||||||
|
|
||||||
@@ -15,33 +12,37 @@ def enable_additional_rich_text_features(features):
|
|||||||
|
|
||||||
|
|
||||||
@hooks.register("register_schema_query")
|
@hooks.register("register_schema_query")
|
||||||
def filter_search_to_live_pages(query_mixins):
|
def override_search_resolver(query_mixins):
|
||||||
"""
|
"""
|
||||||
Grapple's default `search` resolver hits every page regardless of publish
|
Override Grapple's `search` resolver. Two fixes vs. the upstream version:
|
||||||
state, exposing drafts on the public API. Prepend a mixin so MRO picks our
|
1. Restrict pages to live + public so drafts and access-restricted pages
|
||||||
`resolve_search`, which restricts Page subclasses to live + public.
|
don't leak via the public API.
|
||||||
|
2. Run a single search across all `Page` subclasses (instead of iterating
|
||||||
|
per-model) so results are ranked by relevance across types rather than
|
||||||
|
grouped by content type. Specific instances are fetched in a second
|
||||||
|
bulk query and reordered to match the search ranking.
|
||||||
|
|
||||||
|
Documents and images are intentionally not searched. The upstream resolver
|
||||||
|
includes them, but the frontend search page only renders Page types and
|
||||||
|
discards everything else, so iterating those indexes is wasted work.
|
||||||
"""
|
"""
|
||||||
if not grapple_registry.class_models:
|
if not grapple_registry.class_models:
|
||||||
return
|
return
|
||||||
|
|
||||||
class SearchLivePublicMixin:
|
class SearchOverrideMixin:
|
||||||
def resolve_search(self, info, **kwargs):
|
def resolve_search(self, info, **kwargs):
|
||||||
query = kwargs.get("query")
|
query = kwargs.get("query")
|
||||||
if not query:
|
if not query:
|
||||||
return None
|
return None
|
||||||
s = get_search_backend()
|
s = get_search_backend()
|
||||||
results = []
|
ranked = list(s.search(query, Page.objects.live().public()))
|
||||||
models = [get_document_model(), get_image_model()]
|
if not ranked:
|
||||||
for app in grapple_registry.apps:
|
return []
|
||||||
models += django_apps.all_models[app].values()
|
ids = [p.id for p in ranked]
|
||||||
for model in models:
|
specific_map = {p.id: p for p in Page.objects.filter(id__in=ids).specific()}
|
||||||
if issubclass(model, Page):
|
return [specific_map[i] for i in ids if i in specific_map]
|
||||||
results += s.search(query, model.objects.live().public())
|
|
||||||
else:
|
|
||||||
results += s.search(query, model)
|
|
||||||
return results
|
|
||||||
|
|
||||||
query_mixins.insert(0, SearchLivePublicMixin)
|
query_mixins.insert(0, SearchOverrideMixin)
|
||||||
|
|
||||||
|
|
||||||
@hooks.register("construct_page_action_menu")
|
@hooks.register("construct_page_action_menu")
|
||||||
|
|||||||
@@ -86,3 +86,46 @@ def test_search_excludes_draft_event_page(home_page, event_index, graphql_post):
|
|||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert "errors" not in body, body
|
assert "errors" not in body, body
|
||||||
assert "DraftEventSearchToken" not in _titles_for(body, "EventPage")
|
assert "DraftEventSearchToken" not in _titles_for(body, "EventPage")
|
||||||
|
|
||||||
|
|
||||||
|
def test_search_results_not_grouped_by_type(home_page, event_index, graphql_post):
|
||||||
|
# Two pages of different types matching the query equally, plus a third
|
||||||
|
# page of one of those types that should rank highest. Under the
|
||||||
|
# per-model-iteration resolver, all Generic results come before all Event
|
||||||
|
# results (or vice versa) — type-grouped — so the highest-relevance Event
|
||||||
|
# ends up after a less-relevant Generic. Cross-type relevance ordering
|
||||||
|
# should put the strongest match first regardless of type.
|
||||||
|
weak_generic = GenericPageFactory(
|
||||||
|
parent=home_page,
|
||||||
|
title="Klatremus klatremus klatremus",
|
||||||
|
slug="weak-generic",
|
||||||
|
)
|
||||||
|
weak_event = EventPageFactory(
|
||||||
|
parent=event_index,
|
||||||
|
title="Klatremus klatremus klatremus",
|
||||||
|
slug="weak-event",
|
||||||
|
)
|
||||||
|
strong_event = EventPageFactory(
|
||||||
|
parent=event_index,
|
||||||
|
title="Klatremus klatremus klatremus klatremus klatremus klatremus",
|
||||||
|
slug="strong-event",
|
||||||
|
)
|
||||||
|
_index(weak_generic)
|
||||||
|
_index(weak_event)
|
||||||
|
_index(strong_event)
|
||||||
|
|
||||||
|
response, body = graphql_post(SEARCH_QUERY, {"query": "klatremus"})
|
||||||
|
|
||||||
|
assert response.status_code == 200
|
||||||
|
assert "errors" not in body, body
|
||||||
|
order = [
|
||||||
|
(r["__typename"], r["title"])
|
||||||
|
for r in body["data"]["results"]
|
||||||
|
if r["__typename"] in ("GenericPage", "EventPage")
|
||||||
|
]
|
||||||
|
assert len(order) == 3, order
|
||||||
|
# Per-type grouping would put all results of one type consecutively
|
||||||
|
# before the other type. Cross-type relevance ordering should interleave.
|
||||||
|
types_seen = [t for t, _ in order]
|
||||||
|
assert types_seen != ["GenericPage", "EventPage", "EventPage"], order
|
||||||
|
assert types_seen != ["EventPage", "EventPage", "GenericPage"], order
|
||||||
|
|||||||
Reference in New Issue
Block a user