diff --git a/archive.py b/archive.py index 4865298..7b0dca8 100644 --- a/archive.py +++ b/archive.py @@ -80,6 +80,28 @@ def http_get_json(site_url: str, path: str) -> dict: log.warning("Unable to decode JSON response from %r", path) raise +# ----- Helper: Truncate Filename ----- +def truncate_filename(filename: str, max_length: int = 255) -> str: + """ + Truncates the file name to a maximum length (default 255 characters). + It preserves the file extension. + """ + if len(filename) <= max_length: + return filename + + # Split into stem and suffix(s) + p = Path(filename) + stem = p.stem + suffix = "".join(p.suffixes) + # Calculate the maximum allowed length for the stem after accounting for the suffix + max_stem_length = max_length - len(suffix) + if max_stem_length <= 0: + # In the unlikely event that the suffix itself is longer than max_length, + # simply return a truncated version of the entire filename. + return filename[:max_length] + truncated_stem = stem[:max_stem_length] + return truncated_stem + suffix + # ----- Data Models ----- @dataclass(frozen=True) @@ -101,6 +123,8 @@ class Post: """Save the raw JSON post to disk if not already archived.""" idstr = str(self.id).zfill(10) filename = f"{idstr}-{self.raw.get('username', 'anonymous')}-{self.raw.get('topic_slug', 'unknown')}.json" + # Truncate file name if necessary. + filename = truncate_filename(filename) folder_name = self.get_created_at().strftime('%Y-%m-%B') full_path = dir / folder_name / filename @@ -141,9 +165,12 @@ class Topic: """ Save the rendered Markdown topic to disk. Filename built from creation date, slug, and id. + Truncate the filename if it is too long for the operating system. """ date_str = str(self.get_created_at().date()) filename = f"{date_str}-{self.slug}-id{self.id}.md" + # Truncate if necessary + filename = truncate_filename(filename) folder_name = self.get_created_at().strftime('%Y-%m-%B') full_path = dir / folder_name / filename full_path.parent.mkdir(parents=True, exist_ok=True)