mirror of
https://github.com/c0mmando/discourse-to-markdown-archiver.git
synced 2025-05-12 21:33:48 +05:30
truncate forum topics over 255 characters
This commit is contained in:
parent
74da2c49e0
commit
60fec0c718
27
archive.py
27
archive.py
@ -80,6 +80,28 @@ def http_get_json(site_url: str, path: str) -> dict:
|
|||||||
log.warning("Unable to decode JSON response from %r", path)
|
log.warning("Unable to decode JSON response from %r", path)
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
# ----- Helper: Truncate Filename -----
|
||||||
|
def truncate_filename(filename: str, max_length: int = 255) -> str:
|
||||||
|
"""
|
||||||
|
Truncates the file name to a maximum length (default 255 characters).
|
||||||
|
It preserves the file extension.
|
||||||
|
"""
|
||||||
|
if len(filename) <= max_length:
|
||||||
|
return filename
|
||||||
|
|
||||||
|
# Split into stem and suffix(s)
|
||||||
|
p = Path(filename)
|
||||||
|
stem = p.stem
|
||||||
|
suffix = "".join(p.suffixes)
|
||||||
|
# Calculate the maximum allowed length for the stem after accounting for the suffix
|
||||||
|
max_stem_length = max_length - len(suffix)
|
||||||
|
if max_stem_length <= 0:
|
||||||
|
# In the unlikely event that the suffix itself is longer than max_length,
|
||||||
|
# simply return a truncated version of the entire filename.
|
||||||
|
return filename[:max_length]
|
||||||
|
truncated_stem = stem[:max_stem_length]
|
||||||
|
return truncated_stem + suffix
|
||||||
|
|
||||||
# ----- Data Models -----
|
# ----- Data Models -----
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
@ -101,6 +123,8 @@ class Post:
|
|||||||
"""Save the raw JSON post to disk if not already archived."""
|
"""Save the raw JSON post to disk if not already archived."""
|
||||||
idstr = str(self.id).zfill(10)
|
idstr = str(self.id).zfill(10)
|
||||||
filename = f"{idstr}-{self.raw.get('username', 'anonymous')}-{self.raw.get('topic_slug', 'unknown')}.json"
|
filename = f"{idstr}-{self.raw.get('username', 'anonymous')}-{self.raw.get('topic_slug', 'unknown')}.json"
|
||||||
|
# Truncate file name if necessary.
|
||||||
|
filename = truncate_filename(filename)
|
||||||
folder_name = self.get_created_at().strftime('%Y-%m-%B')
|
folder_name = self.get_created_at().strftime('%Y-%m-%B')
|
||||||
full_path = dir / folder_name / filename
|
full_path = dir / folder_name / filename
|
||||||
|
|
||||||
@ -141,9 +165,12 @@ class Topic:
|
|||||||
"""
|
"""
|
||||||
Save the rendered Markdown topic to disk.
|
Save the rendered Markdown topic to disk.
|
||||||
Filename built from creation date, slug, and id.
|
Filename built from creation date, slug, and id.
|
||||||
|
Truncate the filename if it is too long for the operating system.
|
||||||
"""
|
"""
|
||||||
date_str = str(self.get_created_at().date())
|
date_str = str(self.get_created_at().date())
|
||||||
filename = f"{date_str}-{self.slug}-id{self.id}.md"
|
filename = f"{date_str}-{self.slug}-id{self.id}.md"
|
||||||
|
# Truncate if necessary
|
||||||
|
filename = truncate_filename(filename)
|
||||||
folder_name = self.get_created_at().strftime('%Y-%m-%B')
|
folder_name = self.get_created_at().strftime('%Y-%m-%B')
|
||||||
full_path = dir / folder_name / filename
|
full_path = dir / folder_name / filename
|
||||||
full_path.parent.mkdir(parents=True, exist_ok=True)
|
full_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user