add wordpress migration

This commit is contained in:
2024-08-09 01:56:00 +02:00
parent d1be264bc5
commit 1bb0fd252f
36 changed files with 1234 additions and 63 deletions

View File

@ -5,6 +5,7 @@ from wagtail.admin.panels import FieldPanel
from wagtail.fields import RichTextField
from wagtail.models import Page
from wagtail.search import index
from wagtail_wordpress_import.models import WPImportedPageMixin
from dnscms.fields import CommonStreamField
@ -27,7 +28,7 @@ class NewsIndex(Page):
search_fields = []
class NewsPage(Page):
class NewsPage(WPImportedPageMixin, Page):
subpage_types = []
parent_page_types = ["news.NewsIndex"]
show_in_menus = False
@ -74,3 +75,54 @@ class NewsPage(Page):
index.SearchField("lead", boost=1),
index.SearchField("body"),
]
settings_panels = Page.settings_panels + WPImportedPageMixin.wordpress_panels
def import_wordpress_data(self, data):
import html
from bs4 import BeautifulSoup
def generate_excerpt(html_content):
soup = BeautifulSoup(html_content, features="lxml")
VALID_TAGS = ["div", "p"]
for tag in soup.findAll("p"):
if tag.name not in VALID_TAGS:
tag.remove()
text = soup.get_text().strip()
words = text.split(" ")
if len(words) < 26:
return text
return " ".join(words[:25]) + " [...]"
# Wagtail page model fields
self.title = html.unescape(data["title"])
self.slug = data["slug"]
self.first_published_at = data["first_published_at"]
self.last_published_at = data["last_published_at"]
self.latest_revision_created_at = data["latest_revision_created_at"]
self.search_description = data["search_description"]
# debug fields
self.wp_post_id = data["wp_post_id"]
self.wp_post_type = data["wp_post_type"]
self.wp_link = data["wp_link"]
self.wp_raw_content = data["wp_raw_content"]
self.wp_block_json = data["wp_block_json"]
self.wp_processed_content = data["wp_processed_content"]
self.wp_normalized_styles = data["wp_normalized_styles"]
self.wp_post_meta = data["wp_post_meta"]
# own model fields
self.body = data["body"] or ""
meta = data["wp_post_meta"]
written_excerpt = meta.get("excerpt_encoded")
generated_excerpt = ""
if not written_excerpt:
generated_excerpt = generate_excerpt(self.wp_processed_content)
self.excerpt = written_excerpt or generated_excerpt or "[...]"