From 4325ecc2300ae0ffc0cb32a088c76ba5f34f9fa9 Mon Sep 17 00:00:00 2001 From: Bhupesh Varshney Date: Sun, 25 Feb 2024 14:21:15 +0530 Subject: [PATCH] remove reading reddit collection for community threads collection (#16) --- .../workflows/collection-thread-updater.yml | 54 +++++----- community-threads/main.py | 102 +++++++----------- 2 files changed, 68 insertions(+), 88 deletions(-) diff --git a/.github/workflows/collection-thread-updater.yml b/.github/workflows/collection-thread-updater.yml index 4fe5fd8..138eacd 100644 --- a/.github/workflows/collection-thread-updater.yml +++ b/.github/workflows/collection-thread-updater.yml @@ -1,35 +1,37 @@ name : Community Threads Wiki Updater on: - schedule: - - cron: '0 0 * * *' # This cron expression triggers the workflow every day at midnight UTC - workflow_dispatch: + workflow_dispatch: + inputs: + post_url: + description: 'The URL of the Reddit post to add' + required: true permissions: contents: read jobs: - build: - runs-on: ubuntu-latest + build: + runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Update Wiki - env: - REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }} - REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }} - REDDIT_PASSWORD: ${{ secrets.REDDIT_PASSWORD }} - REDDIT_USERNAME: ${{ secrets.REDDIT_USERNAME }} - GIST_ID: ${{ secrets.GIST_ID }} - GIST_TOKEN: ${{ secrets.GIST_TOKEN }} - run: | - cd community-threads - python main.py + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: "3.10" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + - name: Update Wiki + env: + REDDIT_CLIENT_ID: ${{ secrets.REDDIT_CLIENT_ID }} + REDDIT_CLIENT_SECRET: ${{ secrets.REDDIT_CLIENT_SECRET }} + REDDIT_PASSWORD: ${{ secrets.REDDIT_PASSWORD }} + REDDIT_USERNAME: ${{ secrets.REDDIT_USERNAME }} + GIST_ID: ${{ secrets.GIST_ID }} + GIST_TOKEN: ${{ secrets.GIST_TOKEN }} + run: | + cd community-threads + python main.py ${{ github.event.inputs.post_url }} \ No newline at end of file diff --git a/community-threads/main.py b/community-threads/main.py index f3d4880..c91d952 100644 --- a/community-threads/main.py +++ b/community-threads/main.py @@ -1,5 +1,6 @@ import praw import os +import argparse from datetime import datetime import json from collections import defaultdict @@ -36,13 +37,26 @@ def update_gist(gist_id, filename, content, description=""): ) return response.json() +# farewell, reddit collections +# def get_collection(reddit): +# collection = reddit.subreddit(sub).collections( +# permalink="https://reddit.com/r/developersIndia/collection/958aef35-f9cb-414d-ab33-08bc639e47de" +# ) +# return collection -def get_collection(reddit): - collection = reddit.subreddit(sub).collections( - permalink="https://reddit.com/r/developersIndia/collection/958aef35-f9cb-414d-ab33-08bc639e47de" - ) - return collection - +def get_post_data(reddit, post_url): + submission = reddit.submission(url=post_url) + post = { + "title": submission.title, + "url": submission.url, + "id": submission.id, + "num_comments": submission.num_comments, + "created_at": datetime.utcfromtimestamp( + submission.created_utc + ).isoformat(), + "flair_text": submission.link_flair_text, + } + return post def update_wiki(reddit, wikipage, posts): # Group posts by year @@ -78,6 +92,10 @@ def update_wiki(reddit, wikipage, posts): def main(): + parser = argparse.ArgumentParser(description='Update Community Threads Collection.') + parser.add_argument('post_url', help='The URL of the Reddit post to add.') + args = parser.parse_args() + reddit = praw.Reddit( client_id=client_id, client_secret=client_secret, @@ -86,70 +104,30 @@ def main(): user_agent=f"Automod reader by u/{username}", ) - collection = get_collection(reddit) - saved_collection_posts = json.loads(get_gist_content(gist_id)) saved_collection_ids = [post["id"] for post in saved_collection_posts["posts"]] print(f"Database was last updated on {saved_collection_posts['collection_last_updated']}") - print(f"Collection was last updated on {datetime.utcfromtimestamp(collection.last_update_utc).isoformat()}") - if ( - saved_collection_posts["collection_last_updated"] - != datetime.utcfromtimestamp(collection.last_update_utc).isoformat() - ): - print("Collection was updated, getting new posts data...") + posts = [] + for submission_id in saved_collection_posts["posts"]: + post = { + "title": submission_id["title"], + "url": submission_id["url"], + "id": submission_id["id"], + "num_comments": submission_id["num_comments"], + "created_at": submission_id["created_at"], + "flair_text": submission_id["flair_text"], + } + posts.append(post) - # given 2 lists find non-common elements - db_posts = set(saved_collection_ids) - collection_posts = [] - for submission in collection: - collection_posts.append(submission.id) - collection_posts = set(collection_posts) - - new_posts = list(collection_posts - db_posts) - deleted_posts = list(db_posts - collection_posts) - - print(f"Found {len(new_posts)} new posts!") - print(f"Found {len(deleted_posts)} deleted posts!") - - posts = [] - # load the saved collection posts data - for submission_id in saved_collection_posts["posts"]: - if submission_id["id"] in deleted_posts: - continue - post = { - "title": submission_id["title"], - "url": submission_id["url"], - "id": submission_id["id"], - "num_comments": submission_id["num_comments"], - "created_at": submission_id["created_at"], - "flair_text": submission_id["flair_text"], - } - posts.append(post) - - # get the new posts data - for submission_id in new_posts: - submission = reddit.submission(submission_id) - post = { - "title": submission.title, - "url": submission.url, - "id": submission.id, - "num_comments": submission.num_comments, - "created_at": datetime.utcfromtimestamp( - submission.created_utc - ).isoformat(), - "flair_text": submission.link_flair_text, - } - posts.append(post) - - # sort the posts by created_at + new_post = get_post_data(reddit, args.post_url) + if new_post["id"] not in saved_collection_ids: + posts.append(new_post) posts = sorted(posts, key=lambda k: k["created_at"]) collection_json = { - "collection_last_updated": datetime.utcfromtimestamp( - collection.last_update_utc - ).isoformat(), + "collection_last_updated": datetime.utcnow().isoformat(), "posts": posts, } @@ -157,7 +135,7 @@ def main(): print("Internal database updated successfully!") update_wiki(reddit, "community-threads", posts) else: - print("Wiki is up to date!") + print("Post is already in the collection. No changes were made.") if __name__ == "__main__":