dnscms: remove some wordpress import functionality, keep fields around but defer

This commit is contained in:
2026-05-23 01:31:44 +02:00
parent 10763f0b5d
commit 0b0fba174e
9 changed files with 127 additions and 401 deletions
+8 -51
View File
@@ -4,12 +4,16 @@ from grapple.helpers import register_singular_query_field
from grapple.models import GraphQLImage, GraphQLRichText, GraphQLStreamfield, GraphQLString
from wagtail.admin.panels import FieldPanel
from wagtail.fields import RichTextField
from wagtail.models import Page
from wagtail.models import Page, PageManager
from wagtail.search import index
from wagtail_headless_preview.models import HeadlessMixin
from dnscms.fields import CommonStreamField
from dnscms.wordpress.models import WPImportedPageMixin
from dnscms.wordpress.models import DeferWPFieldsManagerMixin, WPImportedPageMixin
class NewsPageManager(DeferWPFieldsManagerMixin, PageManager):
pass
@register_singular_query_field("newsIndex")
@@ -39,6 +43,8 @@ class NewsPage(HeadlessMixin, WPImportedPageMixin, Page):
parent_page_types = ["news.NewsIndex"]
show_in_menus = False
objects = NewsPageManager()
excerpt = models.TextField(max_length=512, blank=False)
lead = RichTextField(features=["italic", "link"], blank=True)
body = CommonStreamField
@@ -90,52 +96,3 @@ class NewsPage(HeadlessMixin, WPImportedPageMixin, Page):
class Meta:
verbose_name = _("news article")
verbose_name_plural = _("news articles")
def import_wordpress_data(self, data):
import html
from bs4 import BeautifulSoup
def generate_excerpt(html_content):
soup = BeautifulSoup(html_content, features="lxml")
VALID_TAGS = ["div", "p"]
for tag in soup.findAll("p"):
if tag.name not in VALID_TAGS:
tag.remove()
text = soup.get_text().strip()
words = text.split(" ")
if len(words) < 26:
return text
return " ".join(words[:25]) + " [...]"
# Wagtail page model fields
self.title = html.unescape(data["title"])
self.slug = data["slug"]
self.first_published_at = data["first_published_at"]
self.last_published_at = data["last_published_at"]
self.latest_revision_created_at = data["latest_revision_created_at"]
self.search_description = data["search_description"]
# debug fields
self.wp_post_id = data["wp_post_id"]
self.wp_post_type = data["wp_post_type"]
self.wp_link = data["wp_link"]
self.wp_raw_content = data["wp_raw_content"]
self.wp_block_json = data["wp_block_json"]
self.wp_processed_content = data["wp_processed_content"]
self.wp_normalized_styles = data["wp_normalized_styles"]
self.wp_post_meta = data["wp_post_meta"]
# own model fields
self.body = data["body"] or ""
meta = data["wp_post_meta"]
written_excerpt = meta.get("excerpt_encoded")
generated_excerpt = ""
if not written_excerpt:
generated_excerpt = generate_excerpt(self.wp_processed_content)
self.excerpt = written_excerpt or generated_excerpt or "[...]"