From ba97013c276cf0afe141d4e3863a993061f9f38c Mon Sep 17 00:00:00 2001 From: John Ahlroos Date: Fri, 13 Feb 2026 18:18:14 +0100 Subject: [PATCH] Initial version of app --- .dockerignore | 10 ++ .gitea/workflows/build.yaml | 71 ++++++++++++++ .gitignore | 4 + Dockerfile | 11 +++ app/ai/__init__py | 0 app/ai/prompts.py | 2 + app/main.py | 24 +++++ app/resources/embed.css | 191 ++++++++++++++++++++++++++++++++++++ app/routers/__init__.py | 0 app/routers/embed.py | 97 ++++++++++++++++++ app/routers/fuengirola.py | 123 +++++++++++++++++++++++ app/routers/hackernews.py | 101 +++++++++++++++++++ app/routers/sur.py | 118 ++++++++++++++++++++++ app/routers/taloustaito.py | 113 +++++++++++++++++++++ app/routers/the_local.py | 118 ++++++++++++++++++++++ app/routers/yle_rss_en.py | 156 +++++++++++++++++++++++++++++ app/routers/yle_rss_fi.py | 135 +++++++++++++++++++++++++ app/settings/__init__py | 0 app/settings/defaults.py | 13 +++ compose.yml | 20 ++++ log_config.yml | 34 +++++++ requirements.txt | 15 +++ 22 files changed, 1356 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitea/workflows/build.yaml create mode 100644 .gitignore create mode 100644 Dockerfile create mode 100644 app/ai/__init__py create mode 100644 app/ai/prompts.py create mode 100644 app/main.py create mode 100644 app/resources/embed.css create mode 100644 app/routers/__init__.py create mode 100644 app/routers/embed.py create mode 100644 app/routers/fuengirola.py create mode 100644 app/routers/hackernews.py create mode 100644 app/routers/sur.py create mode 100644 app/routers/taloustaito.py create mode 100644 app/routers/the_local.py create mode 100644 app/routers/yle_rss_en.py create mode 100644 app/routers/yle_rss_fi.py create mode 100644 app/settings/__init__py create mode 100644 app/settings/defaults.py create mode 100644 compose.yml create mode 100644 log_config.yml create mode 100644 requirements.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..78ba8dd --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +__pycache__ +.venv +.env +.gitea +.git +.gitignore +.dockerignore +README.md +compose.yml +Dockerfile \ No newline at end of file diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml new file mode 100644 index 0000000..5f9d9c4 --- /dev/null +++ b/.gitea/workflows/build.yaml @@ -0,0 +1,71 @@ +name: Build & Release +on: + push: + tags: + - '[0-9]+.[0-9]+.[0-9]+' + +env: + ENDPOINT: services-3 + STACK: mastodon + IMAGE: com.devsoap/mastobot + TAG: ${{ gitea.ref_name }} + CACHE_NAME: cache-python-dependencies-mastobot + RUNNER_TOOL_CACHE: /toolcache + +jobs: + + build-docker-image: + runs-on: python + container: + image: catthehacker/ubuntu:act-20.04 + steps: + - name: Checkout Docker file + uses: actions/checkout@v4 + with: + ssh-key: ${{ secrets.SSH_DEVSOAP_PRIVATE_KEY }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Login to Devsoap Container Registry + uses: docker/login-action@v2 + with: + registry: ${{ secrets.DOCKER_REGISTRY }} + username: ${{ secrets.DOCKER_REGISTRY_USER }} + password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }} + - name: Build and push Docker image + uses: docker/build-push-action@v4 + with: + context: . + file: ./Dockerfile + push: true + build-args: | + VERSION=${{env.TAG}} + tags: | + ${{secrets.DOCKER_REGISTRY}}/${{env.IMAGE}}:${{env.TAG}} + ${{secrets.DOCKER_REGISTRY}}/${{env.IMAGE}}:latest + + deploy-to-production: + if: ${{ always() && needs.build-docker-image.result == 'success' }} + needs: build-docker-image + runs-on: python + steps: + - name: Checkout infrastructure config + run: | + echo "Cloning repository ${{ env.REPOSITORY_URL }}" + git clone -v --depth=1 ${{ env.REPOSITORY_URL }} infra + env: + REPOSITORY_URL: ${{ env.GIT_REPO_USER }}@${{ env.GIT_REPO_INTERNAL }}:${{ env.DEVSOAP_INFRA_GIT_REPO }} + - name: Setup Git config + working-directory: infra + run: | + git config user.email "code@devsoap.com" + git config user.name "Devsoap Code CI/CD" + - name: Update image version + working-directory: infra + run: | + sed -i -r "s|/$IMAGE:(.*?)|/$IMAGE:$TAG|g" $ENDPOINT/$STACK/docker-compose.yml + git diff -U0 + - name: Push changes + working-directory: infra + run: | + git commit -am "Updated $ENDPOINT/$STACK/$IMAGE to $TAG" + git push origin master \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..72383bb --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.venv +.env +__pycache__ +.vscode \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..cc70a4c --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.12.12-alpine3.23 +WORKDIR /code +COPY ./requirements.txt /code/requirements.txt +RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt +COPY ./app /code/app +COPY ./log_config.yml /code/log_config.yml +ARG version +ENV VERSION=${version} +HEALTHCHECK --interval=30s --timeout=5s --start-period=10s --retries=3 \ + CMD wget --spider --quiet --tries=1 --timeout=5 --server-response http://127.0.0.1:8000/health 2>&1 | grep "200 OK" > /dev/null +CMD ["uvicorn", "main:app", "--app-dir", "app", "--log-config", "log_config.yml", "--host","0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/app/ai/__init__py b/app/ai/__init__py new file mode 100644 index 0000000..e69de29 diff --git a/app/ai/prompts.py b/app/ai/prompts.py new file mode 100644 index 0000000..55be373 --- /dev/null +++ b/app/ai/prompts.py @@ -0,0 +1,2 @@ +TRANSLATE_ML_PROMPT= "Translate the following hashtags and delimit the output with spaces. Convert word to camel case. Do not include the original tag. Prefix each tag with the # character.: %s" +GENERATE_TAGS_PROMPT = "Generate %d hashtags from the following text and delimit them with space: %s" diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..a9c4e6e --- /dev/null +++ b/app/main.py @@ -0,0 +1,24 @@ +from fastapi import Depends, FastAPI +from routers import embed, yle_rss_fi, yle_rss_en, the_local, taloustaito,sur,hackernews,fuengirola + +from settings.defaults import get_settings + +app = FastAPI(title='Mastobot', description='Mastodon Feed Automation Service', version=get_settings().version) + +app.include_router(embed.router, prefix="/embed", tags=["embed"]) + +app.include_router(yle_rss_fi.router, prefix="/rss", tags=["rss"]) +app.include_router(yle_rss_en.router, prefix="/rss", tags=["rss"]) +app.include_router(the_local.router, prefix="/rss", tags=["rss"]) +app.include_router(taloustaito.router, prefix="/rss", tags=["rss"]) +app.include_router(sur.router, prefix="/rss", tags=["rss"]) +app.include_router(hackernews.router, prefix="/rss", tags=["rss"]) +app.include_router(fuengirola.router, prefix="/rss", tags=["rss"]) + +@app.get("/") +def read_root(): + return {"message": "Welcome to Mastobot!"} + +@app.get("/health") +def health_check(): + return {"status": "healthy"} \ No newline at end of file diff --git a/app/resources/embed.css b/app/resources/embed.css new file mode 100644 index 0000000..2ec7538 --- /dev/null +++ b/app/resources/embed.css @@ -0,0 +1,191 @@ +html, +body { + background-color: #282c37; + font-family: 'Roboto', roboto, Arial, sans-serif; + color: #ffffff; + font-weight: lighter; + overflow-x: hidden; + font-size: 80%; + word-break: break-word; +} + +* { + margin: 0; + padding: 0; +} + +a, +a * { + color: #2b90d9; +} + +::-webkit-scrollbar { + display: none; +} + +.meta { + background-color: #39404d; +} + +.header { + display: flex; + background-size: cover; + min-height: 8rem; + color: #ffffff; +} + +.header .header-left, +.header .header-right { + margin: 0; +} + +.header .header-left { + min-width: 8rem; + position: relative; + text-align: center; + background: rgba(40, 44, 55, 0.3); +} + +.header .header-left .avatar { + width: 6rem; + height: 6rem; + position: relative; + top: calc(50% - 3rem); +} + +.header .header-right { + flex-grow: 1; + font-size: 0.9rem; + padding: 0.9rem; + background: rgba(40, 44, 55, 0.85); +} + +.header .header-title { + font-size: 1.3rem; +} + +.item { + padding: 1rem; + border-top: solid 1px #626d80; +} + +.item-content, +.cw, +.title { + font-size: 1.1rem; + font-weight: lighter; +} + +.item-content *, +.cw { + margin: 1rem 0; + line-height: 1.4rem; +} + +.item-title, +.date, +.author-fullname { + color: #9baec8; + font-size: 0.9rem; +} + +.date { + margin: 1rem 0 0 0; + text-decoration: none; + display: block; +} + +.date:hover { + text-decoration: underline; +} + +.item-title { + margin-bottom: 0.7rem; +} + +.author { + display: flex; + margin-bottom: 1rem; +} + +.author-info { + margin: 0 1rem; + display: flex; + flex-direction: column; + justify-content: space-around; +} + +.author-info .author-displayname { + font-size: 1.2rem; + color: #ffffff; + text-decoration: none; + display: block; + font-weight: bolder; +} + +.avatar { + width: 3rem; + height: 3rem; + border: none; + border-radius: 10%; +} + +.avatar.circular { + border-radius: 100%; +} + +.enclosures { + padding: 0.5em 0; + display: flex; + flex-wrap: wrap; + flex-direction: row; + overflow: hidden; +} + +.enclosure { + display: flex; + border: none; + max-height: 12rem; +} + +a.enclosure { + cursor: zoom-in; +} + +.enclosure > * { + flex: 1 1 auto; + width: 100%; + max-height: 100%; + object-fit: cover; +} + +.meta .title { + font-weight: bold; +} + +.hidden { + display: none; +} + +.button { + padding: 0.5rem 1rem; + border: none; + margin: 1rem; + border-radius: 0.5rem; + display: inline-block; + text-decoration: none; + background: #2b90d9; + color: #ffffff; + font-weight: 400; + cursor: pointer; + text-transform: uppercase; + user-select: none; +} + +label.button { + padding: 0.25rem 0.5rem; + margin: 0.4rem; + background: #626d80; + color: #282c37; + font-size: 0.8rem; +} \ No newline at end of file diff --git a/app/routers/__init__.py b/app/routers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/routers/embed.py b/app/routers/embed.py new file mode 100644 index 0000000..b402357 --- /dev/null +++ b/app/routers/embed.py @@ -0,0 +1,97 @@ +import traceback +import json +import requests +import timeago +import boto3 +import logging + +from datetime import datetime +from pathlib import Path +from yattag import Doc, indent +from typing import Annotated +from fastapi import Depends, APIRouter + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/generate", summary="Embeddable Mastodon Feed") +async def generate_static_page(settings: Annotated[Settings, Depends(get_settings)]): + mastodon_token = settings.feeds['embed']['token'] + s3_bucket = settings.feeds['embed']['s3_bucket'] + s3_filename = settings.feeds['embed']['s3_key'] + mastodon_get_statuses_url=settings.feeds['embed']['url'] + try: + latest_statuses = load_latest_statuses(mastodon_get_statuses_url, mastodon_token,20) + latest_statuses = [status for status in latest_statuses if status['in_reply_to_id'] == None] + latest_statuses = [status for status in latest_statuses if status['in_reply_to_account_id'] == None] + latest_statuses = [status for status in latest_statuses if status['reblog'] == None] + latest_statuses = [status for status in latest_statuses if status['visibility'] == 'public'] + latest_statuses = [status for status in latest_statuses if status['language'] == 'en'] + + html = convertToHTML(latest_statuses) + + uploadToAmazonS3(s3_bucket, s3_filename, html) + + return { + "status": 200, + "body": { + "successful": True + } + } + + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "message": msg, + "successful": False + } + } + +def load_latest_statuses(url, token, limit): + response=requests.get(f'{url}?limit={limit}', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text) + +def convertToHTML(statuses): + css_file = Path(__file__).parent / '../resources/embed.css' + with css_file.open('r') as css: + doc, tag, text = Doc().tagtext() + with tag('html'): + with tag('head'): + doc.stag('meta', charset='UTF-8') + doc.stag('base', charset='_top') + with tag('style'): + doc.asis(css.read()) + with tag('body'): + with tag('div', klass='container'): + for status in statuses: + with tag('div', klass='item'): + with tag('div', klass='author'): + with tag('a', target='_top', klass='avatar', href=status['account']['url']): + doc.stag('img', klass='avatar', src=status['account']['avatar']) + with tag('div', klass='author-info'): + with tag('a', target='_top', klass='author-displayname', href=status['account']['url']): + text(status['account']['display_name']) + with tag('div', klass='author-fullname'): + text(status['account']['username'], '@', 'ahlroos.me') + with tag('div', klass='item-content'): + doc.asis(status['content']) + for attachment in status['media_attachments']: + with tag('a', target='_top', klass='enclosure', href=attachment['url']): + doc.stag('img', src=attachment['preview_url'] or '', alt=attachment['description'] or '', title=attachment['description'] or '') + with tag('a', target='_top', klass='date', href=status['uri']): + tst = datetime.fromisoformat(status['created_at']).replace(tzinfo=None) + text(timeago.format(tst, datetime.now())) + + return indent(doc.getvalue()) + +def uploadToAmazonS3(bucket, key, content): + s3 = boto3.resource('s3') + object = s3.Object(bucket, key) # type: ignore + object.put(Body=content, ACL='public-read', ContentType='text/html') \ No newline at end of file diff --git a/app/routers/fuengirola.py b/app/routers/fuengirola.py new file mode 100644 index 0000000..cb0b816 --- /dev/null +++ b/app/routers/fuengirola.py @@ -0,0 +1,123 @@ +import traceback +import json +import requests +import traceback +import json +import feedparser +import requests +import logging +import re + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter +from bs4 import BeautifulSoup + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/fuengirola", summary="Fuengirola.fi RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['fuengirola']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['fuengirola']['account_id'] + mastodon_token = str(settings.feeds['fuengirola']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": msg, + "successful": False + } + } + + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + link = entry.link + + description = BeautifulSoup(entry.summary, features="html.parser") + phrases = description.get_text().split('.') + phrases = filter(lambda str: ('first appeared' not in str), phrases) + phrases = filter(lambda str: (str != 'fi'), phrases) + description = '.'.join(phrases) + + if 'tags' in entry: + categories = [t.get('term') for t in entry.tags] + categories = map(lambda str: re.sub(r'\s+','', str), categories) + categories = map(lambda str: re.sub(r'[0-9.\–\-()]+','', str), categories) + categories = map(lambda str: str.capitalize(), categories) + categories = [str for str in categories if len(str) >= 3] + if len(categories) > 0: + categories = map(lambda str: str.capitalize(), categories) + categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories) + categories = ' '.join(categories) + message = f"{title}\n\n{description}\n\n{link}\n\n{categories}" + else: + message = f"{title}\n\n{description}\n\n{link}" + else: + message = f"{title}\n\n{description}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'fi', + 'visibility': 'public' + } + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + print('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/routers/hackernews.py b/app/routers/hackernews.py new file mode 100644 index 0000000..bbd432c --- /dev/null +++ b/app/routers/hackernews.py @@ -0,0 +1,101 @@ +import traceback +import json +import requests +import traceback +import json +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/hn", summary="Hacker News RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['hn']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['hn']['account_id'] + mastodon_token = str(settings.feeds['hn']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + + except Exception: + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": ''.join(traceback.format_exception_only()), + "successful": False + } + } + + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + link = entry.link + message = f"{title}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'en', + 'visibility': 'public' + } + + logger.info('posting to', url) + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + logger.error('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/routers/sur.py b/app/routers/sur.py new file mode 100644 index 0000000..baad1d4 --- /dev/null +++ b/app/routers/sur.py @@ -0,0 +1,118 @@ +import traceback +import json +import requests +import traceback +import json +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter +from bs4 import BeautifulSoup + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/sur", summary="Sur RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['sur']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['sur']['account_id'] + mastodon_token = str(settings.feeds['sur']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": msg, + "successful": False + } + } + + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + link = entry.link + description = BeautifulSoup(entry.summary, features="html.parser") + + if 'tags' in entry: + categories = [t.get('term') for t in entry.tags] + categories = map(lambda str: re.sub(r'\s+','', str), categories) + categories = map(lambda str: re.sub(r'[0-9.\–\-()]+','', str), categories) + categories = map(lambda str: str.capitalize(), categories) + + categories = [str for str in categories if len(str) >= 3] + if len(categories) > 0: + categories = map(lambda str: str.capitalize(), categories) + categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories) + categories = ' '.join(categories) + message = f"{title}\n\n{description.get_text()}\n\n{link}\n\n{categories}" + else: + message = f"{title}\n\n{description.get_text()}\n\n{link}" + else: + message = f"{title}\n\n{description.get_text()}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'en', + 'visibility': 'public' + } + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + logger.error('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/routers/taloustaito.py b/app/routers/taloustaito.py new file mode 100644 index 0000000..c27cc46 --- /dev/null +++ b/app/routers/taloustaito.py @@ -0,0 +1,113 @@ +import traceback +import json +import requests +import traceback +import json +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter +from bs4 import BeautifulSoup + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/taloustaito", summary="Taloustaito RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['taloustaito']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['taloustaito']['account_id'] + mastodon_token = str(settings.feeds['taloustaito']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status = load_last_status(mastodon_get_statuses_url, mastodon_token) + if last_status: + last_status_timestamp=datetime.fromisoformat(last_status['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + else: + new_entries=load_feed_rss(feed_url, None) + new_entries.sort(key=lambda e: e['published_parsed'], reverse=True) + new_entries = new_entries[:3] + logger.info(f'Found {len(new_entries)} entries') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": msg, + "successful": False + } + } + + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + + document = json.loads(response.text) + if len(document) > 0: + return document[0] + else: + return None + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + description = BeautifulSoup(entry.summary, features="html.parser").get_text() + link = entry.link + message = f"{title}\n\n{description}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'fi', + 'visibility': 'public' + } + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + logger.error('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if since == None or datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/routers/the_local.py b/app/routers/the_local.py new file mode 100644 index 0000000..a89bd96 --- /dev/null +++ b/app/routers/the_local.py @@ -0,0 +1,118 @@ +import traceback +import json +import requests +import traceback +import json +import re +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/the_local", summary="The Local RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['the_local']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['the_local']['account_id'] + mastodon_token = str(settings.feeds['the_local']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + + except Exception: + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": ''.join(traceback.format_exception_only()), + "successful": False + } + } + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + description = entry.summary + link = entry.link + + if 'tags' in entry: + categories = [t.get('term') for t in entry.tags] + categories = map(lambda str: re.sub(r'\s+','', str), categories) + categories = map(lambda str: re.sub(r'[0-9.\–\-()]+','', str), categories) + categories = map(lambda str: str.capitalize(), categories) + + categories = [str for str in categories if len(str) >= 3] + if len(categories) > 0: + categories = map(lambda str: str.capitalize(), categories) + categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories) + categories = ' '.join(categories) + message = f"{title}\n\n{description}\n\n{link}\n\n{categories}" + else: + message = f"{title}\n\n{description}\n\n{link}" + else: + message = f"{title}\n\n{description}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'en', + 'visibility': 'public' + } + + logger.info('posting to', url) + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + logger.error('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/routers/yle_rss_en.py b/app/routers/yle_rss_en.py new file mode 100644 index 0000000..cd9933c --- /dev/null +++ b/app/routers/yle_rss_en.py @@ -0,0 +1,156 @@ +import traceback +import json +import requests +import traceback +import json +import re +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter +from openai import OpenAI + +from settings.defaults import Settings, get_settings +from ai.prompts import GENERATE_TAGS_PROMPT, TRANSLATE_ML_PROMPT + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/yle_en", summary="Yle.fi News RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['yle_en']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['yle_en']['account_id'] + mastodon_token = str(settings.feeds['yle_en']['token']) + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x, settings), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": msg, + "successful": False + } + } + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + + +def post_rss_entry_to_mastodon(url:str, token:str, entry, settings:Settings): + + title = entry.title + description = entry.summary + + linkEnd = entry.link.find('?') + if linkEnd > -1: + link = entry.link[0:linkEnd] + else: + link = entry.link + + + if 'tags' in entry: + categories = [t.get('term') for t in entry.tags] + categories = sanitize_tags(categories) + categories = [str for str in categories if len(str) >= 3] + if len(categories) > 3: + tags = translate_tags(categories, settings) + message = f"{title}\n\n{description}\n\n{link}\n\n{tags}" + else: + tags = generate_tags(description, 5, settings) + message = f"{title}\n\n{description}\n\n{link}\n\n{tags}" + else: + tags = generate_tags(description, 5, settings) + message = f"{title}\n\n{description}\n\n{link}\n\n{tags}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'en', + 'visibility': 'public' + } + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + print('Failed to post message', response) + return response + + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] + + +def generate_tags(text:str, num_tags:int, settings:Settings): + try: + client = OpenAI(api_key=settings.openai_api_key) + result = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": GENERATE_TAGS_PROMPT % (num_tags, text) }] + ) + return result.choices[0].message.content + except Exception as e: + logger.error('Failed to generate tags', e) + return '' + +def translate_tags(tags:list, settings:Settings): + try: + client = OpenAI(api_key=settings.openai_api_key) + result = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": TRANSLATE_ML_PROMPT % (" ".join(tags)) }] + ) + return result.choices[0].message.content + except Exception as e: + logger.error('Failed to translate tags', e) + return '' + +def sanitize_tags(categories): + categories = [part for item in categories for part in item.split('=')] + categories = map(lambda str: re.sub(r'\s+','', str), categories) + categories = map(lambda str: re.sub(r'[0-9.\–\-()<>{}#]+','', str), categories) + categories = map(lambda str: re.sub('&','And', str), categories) + categories = map(lambda str: str.capitalize(), categories) + categories = map(lambda str: f'#{str}', categories) + return categories diff --git a/app/routers/yle_rss_fi.py b/app/routers/yle_rss_fi.py new file mode 100644 index 0000000..3372612 --- /dev/null +++ b/app/routers/yle_rss_fi.py @@ -0,0 +1,135 @@ +import traceback +import json +import requests +import traceback +import json +import re +import feedparser +import requests +import logging + +from datetime import datetime +from time import mktime +from typing import Annotated +from fastapi import Depends, APIRouter + +from settings.defaults import Settings, get_settings + +router = APIRouter() +logger = logging.getLogger(__name__) + +@router.get("/yle_fi", summary="Yle.fi Uutiset RSS") +async def update(settings: Annotated[Settings, Depends(get_settings)]): + + feed_url = settings.feeds['yle_fi']['url'] + + mastodon_server = settings.mastodon_server + mastodon_aid = settings.feeds['yle_fi']['account_id'] + mastodon_token = settings.feeds['yle_fi']['token'] + mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses' + mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses' + + try: + last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at']) + new_entries=load_feed_rss(feed_url, last_status_timestamp) + logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}') + + if (len(new_entries) == 0): + return { + "status": 200, + "body": { + "posted_entries": 0, + "successful": True + } + } + + posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries)) + + return { + "status": 200, + "body": { + "posted_entries": len(posted_entries), + "successful": True + } + } + except Exception as e: + msg = ''.join(traceback.format_exception_only(e)) + logger.error(msg) + return { + "status": 501, + "body": { + "posted_entries": 0, + "message": msg, + "successful": False + } + } + +def split(arr, char): + return [tag for subtags in (map(lambda str: str.split(char), arr)) for tag in subtags] + +def capitalize(arr, char): + result = map(lambda str: str.split(char), arr) + result = map(lambda subtag: map(lambda str: str.capitalize(), subtag), result) + result = map(lambda subtag: ''.join(subtag), result) + return result + +def load_last_status(url, token): + response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' }) + if response.status_code != 200: + raise Exception('Failed to contact Mastodon', response.text) + return json.loads(response.text)[0] + +def post_rss_entry_to_mastodon(url, token, entry): + + title = entry.title + description = entry.summary + link = entry.link + + linkEnd = entry.link.find('?') + if linkEnd > -1: + link = entry.link[0:linkEnd] + else: + link = entry.link + + if 'tags' in entry: + categories = [t.get('term') for t in entry.tags] + categories = split(categories, ',') + categories = capitalize(categories, ' ') + categories = capitalize(categories, '–') + categories = capitalize(categories, '-') + categories = capitalize(categories, '/') + categories = capitalize(categories, '\\') + categories = map(lambda str: re.sub(r'\s+','', str), categories) + categories = map(lambda str: re.sub(r'[0-9.()]+','', str), categories) + categories = map(lambda str: re.sub('&','Ja', str), categories) + + categories = [str for str in categories if len(str) >= 3] + if len(categories) > 0: + categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories) + categories = ' '.join(categories) + message = f"{title}\n\n{description}\n\n{link}\n\n{categories}" + else: + message = f"{title}\n\n{description}\n\n{link}" + else: + message = f"{title}\n\n{description}\n\n{link}" + + headers = { + 'Authorization': f'Bearer {token}', + 'Content-type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Serverless Feed' + } + + params = { + 'status': message, + 'language': 'fi', + 'visibility': 'public' + } + + response = requests.post(url, data=params, headers=headers) + if response.status_code != 200: + logger.error('Failed to post message', response) + return response + +def load_feed_rss(url, since): + feed=feedparser.parse(url) + return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)] diff --git a/app/settings/__init__py b/app/settings/__init__py new file mode 100644 index 0000000..e69de29 diff --git a/app/settings/defaults.py b/app/settings/defaults.py new file mode 100644 index 0000000..6115767 --- /dev/null +++ b/app/settings/defaults.py @@ -0,0 +1,13 @@ +from pydantic_settings import BaseSettings, SettingsConfigDict +from functools import lru_cache + +class Settings(BaseSettings): + mastodon_server:str + openai_api_key:str + feeds: dict[str, dict[str,object]] + model_config = SettingsConfigDict(env_file=".env", env_nested_delimiter='__', arbitrary_types_allowed=True) + version:str + +@lru_cache +def get_settings(): + return Settings() # type: ignore diff --git a/compose.yml b/compose.yml new file mode 100644 index 0000000..7658172 --- /dev/null +++ b/compose.yml @@ -0,0 +1,20 @@ +services: + mastobot: + build: + context: . + args: + - version=local + container_name: mastobot + env_file: ./.env + environment: + - UVICORN_RELOAD=True + ports: + - 8000:8000 + develop: + watch: + - action: sync + path: ./app + target: /code/app + - action: rebuild + path: ./requirements.txt + diff --git a/log_config.yml b/log_config.yml new file mode 100644 index 0000000..570ac1b --- /dev/null +++ b/log_config.yml @@ -0,0 +1,34 @@ +version: 1 +disable_existing_loggers: False +formatters: + default: + # "()": uvicorn.logging.DefaultFormatter + format: '%(asctime)s %(levelname)s %(message)s' + access: + # "()": uvicorn.logging.AccessFormatter + format: '%(asctime)s %(levelname)s %(message)s' +handlers: + default: + formatter: default + class: logging.StreamHandler + stream: ext://sys.stderr + access: + formatter: access + class: logging.StreamHandler + stream: ext://sys.stdout +loggers: + uvicorn.error: + level: INFO + handlers: + - default + propagate: no + uvicorn.access: + level: INFO + handlers: + - access + propagate: no +root: + level: INFO + handlers: + - default + propagate: no \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ca6cfa6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +fastapi[standard]==0.113.0 +pydantic==2.8.0 +pydantic-settings==2.12.0 +requests==2.32.3 +boto3==1.36.21 +yattag==1.16.1 +timeago==1.0.16 +certifi==2024.12.14 +charset-normalizer==3.4.1 +feedparser==6.0.11 +idna==3.10 +sgmllib3k==1.0.0 +urllib3==2.3.0 +openai==1.60.2 +beautifulsoup4==4.12.3 \ No newline at end of file