Initial version of app
Some checks failed
Build & Release / build-docker-image (push) Failing after 4m15s
Build & Release / deploy-to-production (push) Has been skipped

This commit is contained in:
2026-02-13 18:18:14 +01:00
commit ba97013c27
22 changed files with 1356 additions and 0 deletions

0
app/ai/__init__py Normal file
View File

2
app/ai/prompts.py Normal file
View File

@@ -0,0 +1,2 @@
TRANSLATE_ML_PROMPT= "Translate the following hashtags and delimit the output with spaces. Convert word to camel case. Do not include the original tag. Prefix each tag with the # character.: %s"
GENERATE_TAGS_PROMPT = "Generate %d hashtags from the following text and delimit them with space: %s"

24
app/main.py Normal file
View File

@@ -0,0 +1,24 @@
from fastapi import Depends, FastAPI
from routers import embed, yle_rss_fi, yle_rss_en, the_local, taloustaito,sur,hackernews,fuengirola
from settings.defaults import get_settings
app = FastAPI(title='Mastobot', description='Mastodon Feed Automation Service', version=get_settings().version)
app.include_router(embed.router, prefix="/embed", tags=["embed"])
app.include_router(yle_rss_fi.router, prefix="/rss", tags=["rss"])
app.include_router(yle_rss_en.router, prefix="/rss", tags=["rss"])
app.include_router(the_local.router, prefix="/rss", tags=["rss"])
app.include_router(taloustaito.router, prefix="/rss", tags=["rss"])
app.include_router(sur.router, prefix="/rss", tags=["rss"])
app.include_router(hackernews.router, prefix="/rss", tags=["rss"])
app.include_router(fuengirola.router, prefix="/rss", tags=["rss"])
@app.get("/")
def read_root():
return {"message": "Welcome to Mastobot!"}
@app.get("/health")
def health_check():
return {"status": "healthy"}

191
app/resources/embed.css Normal file
View File

@@ -0,0 +1,191 @@
html,
body {
background-color: #282c37;
font-family: 'Roboto', roboto, Arial, sans-serif;
color: #ffffff;
font-weight: lighter;
overflow-x: hidden;
font-size: 80%;
word-break: break-word;
}
* {
margin: 0;
padding: 0;
}
a,
a * {
color: #2b90d9;
}
::-webkit-scrollbar {
display: none;
}
.meta {
background-color: #39404d;
}
.header {
display: flex;
background-size: cover;
min-height: 8rem;
color: #ffffff;
}
.header .header-left,
.header .header-right {
margin: 0;
}
.header .header-left {
min-width: 8rem;
position: relative;
text-align: center;
background: rgba(40, 44, 55, 0.3);
}
.header .header-left .avatar {
width: 6rem;
height: 6rem;
position: relative;
top: calc(50% - 3rem);
}
.header .header-right {
flex-grow: 1;
font-size: 0.9rem;
padding: 0.9rem;
background: rgba(40, 44, 55, 0.85);
}
.header .header-title {
font-size: 1.3rem;
}
.item {
padding: 1rem;
border-top: solid 1px #626d80;
}
.item-content,
.cw,
.title {
font-size: 1.1rem;
font-weight: lighter;
}
.item-content *,
.cw {
margin: 1rem 0;
line-height: 1.4rem;
}
.item-title,
.date,
.author-fullname {
color: #9baec8;
font-size: 0.9rem;
}
.date {
margin: 1rem 0 0 0;
text-decoration: none;
display: block;
}
.date:hover {
text-decoration: underline;
}
.item-title {
margin-bottom: 0.7rem;
}
.author {
display: flex;
margin-bottom: 1rem;
}
.author-info {
margin: 0 1rem;
display: flex;
flex-direction: column;
justify-content: space-around;
}
.author-info .author-displayname {
font-size: 1.2rem;
color: #ffffff;
text-decoration: none;
display: block;
font-weight: bolder;
}
.avatar {
width: 3rem;
height: 3rem;
border: none;
border-radius: 10%;
}
.avatar.circular {
border-radius: 100%;
}
.enclosures {
padding: 0.5em 0;
display: flex;
flex-wrap: wrap;
flex-direction: row;
overflow: hidden;
}
.enclosure {
display: flex;
border: none;
max-height: 12rem;
}
a.enclosure {
cursor: zoom-in;
}
.enclosure > * {
flex: 1 1 auto;
width: 100%;
max-height: 100%;
object-fit: cover;
}
.meta .title {
font-weight: bold;
}
.hidden {
display: none;
}
.button {
padding: 0.5rem 1rem;
border: none;
margin: 1rem;
border-radius: 0.5rem;
display: inline-block;
text-decoration: none;
background: #2b90d9;
color: #ffffff;
font-weight: 400;
cursor: pointer;
text-transform: uppercase;
user-select: none;
}
label.button {
padding: 0.25rem 0.5rem;
margin: 0.4rem;
background: #626d80;
color: #282c37;
font-size: 0.8rem;
}

0
app/routers/__init__.py Normal file
View File

97
app/routers/embed.py Normal file
View File

@@ -0,0 +1,97 @@
import traceback
import json
import requests
import timeago
import boto3
import logging
from datetime import datetime
from pathlib import Path
from yattag import Doc, indent
from typing import Annotated
from fastapi import Depends, APIRouter
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/generate", summary="Embeddable Mastodon Feed")
async def generate_static_page(settings: Annotated[Settings, Depends(get_settings)]):
mastodon_token = settings.feeds['embed']['token']
s3_bucket = settings.feeds['embed']['s3_bucket']
s3_filename = settings.feeds['embed']['s3_key']
mastodon_get_statuses_url=settings.feeds['embed']['url']
try:
latest_statuses = load_latest_statuses(mastodon_get_statuses_url, mastodon_token,20)
latest_statuses = [status for status in latest_statuses if status['in_reply_to_id'] == None]
latest_statuses = [status for status in latest_statuses if status['in_reply_to_account_id'] == None]
latest_statuses = [status for status in latest_statuses if status['reblog'] == None]
latest_statuses = [status for status in latest_statuses if status['visibility'] == 'public']
latest_statuses = [status for status in latest_statuses if status['language'] == 'en']
html = convertToHTML(latest_statuses)
uploadToAmazonS3(s3_bucket, s3_filename, html)
return {
"status": 200,
"body": {
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"message": msg,
"successful": False
}
}
def load_latest_statuses(url, token, limit):
response=requests.get(f'{url}?limit={limit}', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)
def convertToHTML(statuses):
css_file = Path(__file__).parent / '../resources/embed.css'
with css_file.open('r') as css:
doc, tag, text = Doc().tagtext()
with tag('html'):
with tag('head'):
doc.stag('meta', charset='UTF-8')
doc.stag('base', charset='_top')
with tag('style'):
doc.asis(css.read())
with tag('body'):
with tag('div', klass='container'):
for status in statuses:
with tag('div', klass='item'):
with tag('div', klass='author'):
with tag('a', target='_top', klass='avatar', href=status['account']['url']):
doc.stag('img', klass='avatar', src=status['account']['avatar'])
with tag('div', klass='author-info'):
with tag('a', target='_top', klass='author-displayname', href=status['account']['url']):
text(status['account']['display_name'])
with tag('div', klass='author-fullname'):
text(status['account']['username'], '@', 'ahlroos.me')
with tag('div', klass='item-content'):
doc.asis(status['content'])
for attachment in status['media_attachments']:
with tag('a', target='_top', klass='enclosure', href=attachment['url']):
doc.stag('img', src=attachment['preview_url'] or '', alt=attachment['description'] or '', title=attachment['description'] or '')
with tag('a', target='_top', klass='date', href=status['uri']):
tst = datetime.fromisoformat(status['created_at']).replace(tzinfo=None)
text(timeago.format(tst, datetime.now()))
return indent(doc.getvalue())
def uploadToAmazonS3(bucket, key, content):
s3 = boto3.resource('s3')
object = s3.Object(bucket, key) # type: ignore
object.put(Body=content, ACL='public-read', ContentType='text/html')

123
app/routers/fuengirola.py Normal file
View File

@@ -0,0 +1,123 @@
import traceback
import json
import requests
import traceback
import json
import feedparser
import requests
import logging
import re
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from bs4 import BeautifulSoup
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/fuengirola", summary="Fuengirola.fi RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['fuengirola']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['fuengirola']['account_id']
mastodon_token = str(settings.feeds['fuengirola']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": msg,
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
link = entry.link
description = BeautifulSoup(entry.summary, features="html.parser")
phrases = description.get_text().split('.')
phrases = filter(lambda str: ('first appeared' not in str), phrases)
phrases = filter(lambda str: (str != 'fi'), phrases)
description = '.'.join(phrases)
if 'tags' in entry:
categories = [t.get('term') for t in entry.tags]
categories = map(lambda str: re.sub(r'\s+','', str), categories)
categories = map(lambda str: re.sub(r'[0-9.\\-()]+','', str), categories)
categories = map(lambda str: str.capitalize(), categories)
categories = [str for str in categories if len(str) >= 3]
if len(categories) > 0:
categories = map(lambda str: str.capitalize(), categories)
categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories)
categories = ' '.join(categories)
message = f"{title}\n\n{description}\n\n{link}\n\n{categories}"
else:
message = f"{title}\n\n{description}\n\n{link}"
else:
message = f"{title}\n\n{description}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'fi',
'visibility': 'public'
}
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
print('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

101
app/routers/hackernews.py Normal file
View File

@@ -0,0 +1,101 @@
import traceback
import json
import requests
import traceback
import json
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/hn", summary="Hacker News RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['hn']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['hn']['account_id']
mastodon_token = str(settings.feeds['hn']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception:
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": ''.join(traceback.format_exception_only()),
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
link = entry.link
message = f"{title}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'en',
'visibility': 'public'
}
logger.info('posting to', url)
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
logger.error('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

118
app/routers/sur.py Normal file
View File

@@ -0,0 +1,118 @@
import traceback
import json
import requests
import traceback
import json
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from bs4 import BeautifulSoup
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/sur", summary="Sur RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['sur']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['sur']['account_id']
mastodon_token = str(settings.feeds['sur']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": msg,
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
link = entry.link
description = BeautifulSoup(entry.summary, features="html.parser")
if 'tags' in entry:
categories = [t.get('term') for t in entry.tags]
categories = map(lambda str: re.sub(r'\s+','', str), categories)
categories = map(lambda str: re.sub(r'[0-9.\\-()]+','', str), categories)
categories = map(lambda str: str.capitalize(), categories)
categories = [str for str in categories if len(str) >= 3]
if len(categories) > 0:
categories = map(lambda str: str.capitalize(), categories)
categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories)
categories = ' '.join(categories)
message = f"{title}\n\n{description.get_text()}\n\n{link}\n\n{categories}"
else:
message = f"{title}\n\n{description.get_text()}\n\n{link}"
else:
message = f"{title}\n\n{description.get_text()}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'en',
'visibility': 'public'
}
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
logger.error('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

113
app/routers/taloustaito.py Normal file
View File

@@ -0,0 +1,113 @@
import traceback
import json
import requests
import traceback
import json
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from bs4 import BeautifulSoup
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/taloustaito", summary="Taloustaito RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['taloustaito']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['taloustaito']['account_id']
mastodon_token = str(settings.feeds['taloustaito']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status = load_last_status(mastodon_get_statuses_url, mastodon_token)
if last_status:
last_status_timestamp=datetime.fromisoformat(last_status['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
else:
new_entries=load_feed_rss(feed_url, None)
new_entries.sort(key=lambda e: e['published_parsed'], reverse=True)
new_entries = new_entries[:3]
logger.info(f'Found {len(new_entries)} entries')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": msg,
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
document = json.loads(response.text)
if len(document) > 0:
return document[0]
else:
return None
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
description = BeautifulSoup(entry.summary, features="html.parser").get_text()
link = entry.link
message = f"{title}\n\n{description}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'fi',
'visibility': 'public'
}
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
logger.error('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if since == None or datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

118
app/routers/the_local.py Normal file
View File

@@ -0,0 +1,118 @@
import traceback
import json
import requests
import traceback
import json
import re
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/the_local", summary="The Local RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['the_local']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['the_local']['account_id']
mastodon_token = str(settings.feeds['the_local']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception:
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": ''.join(traceback.format_exception_only()),
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
description = entry.summary
link = entry.link
if 'tags' in entry:
categories = [t.get('term') for t in entry.tags]
categories = map(lambda str: re.sub(r'\s+','', str), categories)
categories = map(lambda str: re.sub(r'[0-9.\\-()]+','', str), categories)
categories = map(lambda str: str.capitalize(), categories)
categories = [str for str in categories if len(str) >= 3]
if len(categories) > 0:
categories = map(lambda str: str.capitalize(), categories)
categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories)
categories = ' '.join(categories)
message = f"{title}\n\n{description}\n\n{link}\n\n{categories}"
else:
message = f"{title}\n\n{description}\n\n{link}"
else:
message = f"{title}\n\n{description}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'en',
'visibility': 'public'
}
logger.info('posting to', url)
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
logger.error('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

156
app/routers/yle_rss_en.py Normal file
View File

@@ -0,0 +1,156 @@
import traceback
import json
import requests
import traceback
import json
import re
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from openai import OpenAI
from settings.defaults import Settings, get_settings
from ai.prompts import GENERATE_TAGS_PROMPT, TRANSLATE_ML_PROMPT
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/yle_en", summary="Yle.fi News RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['yle_en']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['yle_en']['account_id']
mastodon_token = str(settings.feeds['yle_en']['token'])
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x, settings), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": msg,
"successful": False
}
}
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url:str, token:str, entry, settings:Settings):
title = entry.title
description = entry.summary
linkEnd = entry.link.find('?')
if linkEnd > -1:
link = entry.link[0:linkEnd]
else:
link = entry.link
if 'tags' in entry:
categories = [t.get('term') for t in entry.tags]
categories = sanitize_tags(categories)
categories = [str for str in categories if len(str) >= 3]
if len(categories) > 3:
tags = translate_tags(categories, settings)
message = f"{title}\n\n{description}\n\n{link}\n\n{tags}"
else:
tags = generate_tags(description, 5, settings)
message = f"{title}\n\n{description}\n\n{link}\n\n{tags}"
else:
tags = generate_tags(description, 5, settings)
message = f"{title}\n\n{description}\n\n{link}\n\n{tags}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'en',
'visibility': 'public'
}
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
print('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]
def generate_tags(text:str, num_tags:int, settings:Settings):
try:
client = OpenAI(api_key=settings.openai_api_key)
result = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": GENERATE_TAGS_PROMPT % (num_tags, text) }]
)
return result.choices[0].message.content
except Exception as e:
logger.error('Failed to generate tags', e)
return ''
def translate_tags(tags:list, settings:Settings):
try:
client = OpenAI(api_key=settings.openai_api_key)
result = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": TRANSLATE_ML_PROMPT % (" ".join(tags)) }]
)
return result.choices[0].message.content
except Exception as e:
logger.error('Failed to translate tags', e)
return ''
def sanitize_tags(categories):
categories = [part for item in categories for part in item.split('=')]
categories = map(lambda str: re.sub(r'\s+','', str), categories)
categories = map(lambda str: re.sub(r'[0-9.\\-()<>{}#]+','', str), categories)
categories = map(lambda str: re.sub('&','And', str), categories)
categories = map(lambda str: str.capitalize(), categories)
categories = map(lambda str: f'#{str}', categories)
return categories

135
app/routers/yle_rss_fi.py Normal file
View File

@@ -0,0 +1,135 @@
import traceback
import json
import requests
import traceback
import json
import re
import feedparser
import requests
import logging
from datetime import datetime
from time import mktime
from typing import Annotated
from fastapi import Depends, APIRouter
from settings.defaults import Settings, get_settings
router = APIRouter()
logger = logging.getLogger(__name__)
@router.get("/yle_fi", summary="Yle.fi Uutiset RSS")
async def update(settings: Annotated[Settings, Depends(get_settings)]):
feed_url = settings.feeds['yle_fi']['url']
mastodon_server = settings.mastodon_server
mastodon_aid = settings.feeds['yle_fi']['account_id']
mastodon_token = settings.feeds['yle_fi']['token']
mastodon_get_statuses_url=f'{mastodon_server}/api/v1/accounts/{mastodon_aid}/statuses'
mastodon_post_statuses_url=f'{mastodon_server}/api/v1/statuses'
try:
last_status_timestamp=datetime.fromisoformat(load_last_status(mastodon_get_statuses_url, mastodon_token)['created_at'])
new_entries=load_feed_rss(feed_url, last_status_timestamp)
logger.info(f'Found {len(new_entries)} new entries since {last_status_timestamp}')
if (len(new_entries) == 0):
return {
"status": 200,
"body": {
"posted_entries": 0,
"successful": True
}
}
posted_entries=list(map(lambda x: post_rss_entry_to_mastodon(mastodon_post_statuses_url, mastodon_token, x), new_entries))
return {
"status": 200,
"body": {
"posted_entries": len(posted_entries),
"successful": True
}
}
except Exception as e:
msg = ''.join(traceback.format_exception_only(e))
logger.error(msg)
return {
"status": 501,
"body": {
"posted_entries": 0,
"message": msg,
"successful": False
}
}
def split(arr, char):
return [tag for subtags in (map(lambda str: str.split(char), arr)) for tag in subtags]
def capitalize(arr, char):
result = map(lambda str: str.split(char), arr)
result = map(lambda subtag: map(lambda str: str.capitalize(), subtag), result)
result = map(lambda subtag: ''.join(subtag), result)
return result
def load_last_status(url, token):
response=requests.get(url + '?limit=1', headers={ 'Authorization' : f'Bearer {token}' })
if response.status_code != 200:
raise Exception('Failed to contact Mastodon', response.text)
return json.loads(response.text)[0]
def post_rss_entry_to_mastodon(url, token, entry):
title = entry.title
description = entry.summary
link = entry.link
linkEnd = entry.link.find('?')
if linkEnd > -1:
link = entry.link[0:linkEnd]
else:
link = entry.link
if 'tags' in entry:
categories = [t.get('term') for t in entry.tags]
categories = split(categories, ',')
categories = capitalize(categories, ' ')
categories = capitalize(categories, '')
categories = capitalize(categories, '-')
categories = capitalize(categories, '/')
categories = capitalize(categories, '\\')
categories = map(lambda str: re.sub(r'\s+','', str), categories)
categories = map(lambda str: re.sub(r'[0-9.()]+','', str), categories)
categories = map(lambda str: re.sub('&','Ja', str), categories)
categories = [str for str in categories if len(str) >= 3]
if len(categories) > 0:
categories = map(lambda str: str if str.startswith('#') else f'#{str}', categories)
categories = ' '.join(categories)
message = f"{title}\n\n{description}\n\n{link}\n\n{categories}"
else:
message = f"{title}\n\n{description}\n\n{link}"
else:
message = f"{title}\n\n{description}\n\n{link}"
headers = {
'Authorization': f'Bearer {token}',
'Content-type': 'application/x-www-form-urlencoded',
'User-Agent': 'Serverless Feed'
}
params = {
'status': message,
'language': 'fi',
'visibility': 'public'
}
response = requests.post(url, data=params, headers=headers)
if response.status_code != 200:
logger.error('Failed to post message', response)
return response
def load_feed_rss(url, since):
feed=feedparser.parse(url)
return [entry for entry in feed.entries if datetime.fromtimestamp(mktime(entry.published_parsed)) > since.replace(tzinfo=datetime.fromtimestamp(mktime(entry.published_parsed)).tzinfo)]

0
app/settings/__init__py Normal file
View File

13
app/settings/defaults.py Normal file
View File

@@ -0,0 +1,13 @@
from pydantic_settings import BaseSettings, SettingsConfigDict
from functools import lru_cache
class Settings(BaseSettings):
mastodon_server:str
openai_api_key:str
feeds: dict[str, dict[str,object]]
model_config = SettingsConfigDict(env_file=".env", env_nested_delimiter='__', arbitrary_types_allowed=True)
version:str
@lru_cache
def get_settings():
return Settings() # type: ignore