Pinhole – blob

You can use Git to clone the repository via the web URL. Download snapshot (zip)
Prevent unintended crash if sending AP payload fails for non-network reasons
[Pinhole] / pinhole.py
1 #!/usr/bin/env python3
2 # -*- coding: utf-8 -*-
4 # SPDX-License-Identifier: AGPL-3.0-or-later
5 # ================================================================================================
6 # Pinhole
7 # <https://fietkau.software/pinhole>
8 #
9 # Copyright (C) Julian Fietkau
10 #
11 # This program is free software: you can redistribute it and/or modify it under the terms of the
12 # GNU Affero General Public License as published by the Free Software Foundation, either version 3
13 # of the License, or (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
16 # without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
17 # the GNU Affero General Public License for more details.
18 #
19 # You should have received a copy of the GNU Affero General Public License along with this
20 # program. If not, see <https://www.gnu.org/licenses/>.
21 # ================================================================================================
23 BLUESKY_PROFILE = '@example.bsky.social'
24 ACTIVITYPUB_RECIPIENT = '@you@example.com'
27 import base64
28 import email.utils
29 import glob
30 import html
31 import json
32 import logging
33 import os
34 import requests
35 import socket
36 import sys
37 import urllib.parse
39 from Crypto.Hash import SHA256
40 from Crypto.PublicKey import RSA
41 from Crypto.Signature import pkcs1_15
43 from flask import Flask, abort, g, make_response, request, send_file
46 os.chdir(os.path.dirname(__file__))
48 logging.basicConfig(level=logging.INFO)
49 app = Flask(__name__)
50 application = app
52 if not os.access('.', os.W_OK):
53     print('FATAL ERROR: Working directory must be writable by this script.', file=sys.stderr)
56 @app.route('/')
57 def actor():
58     ensure_data()
59     if not check_request_validity(request):
60         response = make_response('Pinhole is running.\n\nIf you are the operator, try searching this URL from your ActivityPub-compatible social platform.', 200)
61         response.mimetype = 'text/plain'
62         return response
63     ensure_keys()
64     result = {
65         '@context': [
66             'https://www.w3.org/ns/activitystreams',
67             'https://w3id.org/security/v1',
68         ],
69         'id': g.data['app']['urlRoot'],
70         'inbox': g.data['app']['urlRoot'] + 'inbox',
71         'outbox': g.data['app']['urlRoot'] + 'outbox',
72         'type': 'Service',
73         'name': g.data['sender']['displayName'] if g.data['sender']['displayName'] is not None else BLUESKY_PROFILE[1:].replace('.bsky.social', ''),
74         'preferredUsername': BLUESKY_PROFILE[1:].replace('.bsky.social', ''),
75         'attachment': [
76             {
77                 'type': 'PropertyValue',
78                 'name': 'Original Bluesky profile',
79                 'value': '<a href=\"https://bsky.app/profile/' + BLUESKY_PROFILE[1:] + '\" rel=\"me nofollow noopener noreferrer\" target=\"_blank\"><span class=\"invisible\">https://</span><span class=\"\">bsky.app/profile/' + BLUESKY_PROFILE[1:] + '</span></a>',
80             },
81         ],
82         'url': 'https://bsky.app/profile/' + BLUESKY_PROFILE[1:],
83         'manuallyApprovesFollowers': True,
84         'discoverable': False,
85         'indexable': False,
86         'publicKey': {
87             'id': g.data['app']['urlRoot'] + '#main-key',
88             'id': g.data['app']['urlRoot'],
89             'publicKeyPem': g.public_key.exportKey(format='PEM').decode('ascii')
90         }
91     }
92     if g.data['sender']['description'] is not None:
93         result['summary'] = html.escape(g.data['sender']['description']).replace('\n', '<br>') + '<br><br>__________________<br>'
94     else:
95         result['summary'] = ''
96     result['summary'] += 'This is an automated mirror account showing posts retrieved from Bluesky. The author will not see your replies nor any other interactions. Unless you are the operator of this mirror bot, your follow request will be ignored.'
97     avatars = glob.glob('avatar.*')
98     if len(avatars) > 0:
99         avatar = avatars[0]
100         file_type = os.path.splitext(avatar)[1][1:]
101         result['icon'] = {
102             'type': 'Image',
103             'mediaType': 'image/' + file_type,
104             'url': g.data['app']['urlRoot'] + 'avatar',
105         }
106     response = make_response(result)
107     response.headers['Content-Type'] = 'application/activity+json'
108     return response
110 @app.route('/inbox', methods=['POST'])
111 def inbox():
112     ensure_data()
113     if not check_request_validity(request):
114         abort(404)
115     payload = receive(request)
116     if payload is None:
117         # Incoming message is malformed somehow
118         abort(401)
119     if 'type' not in payload:
120         return make_response('', 400)
121     if payload['type'] == 'Follow' and payload.get('object') == g.data['app']['urlRoot']:
122         if payload.get('actor') is not None and payload.get('actor') == g.data['recipient']['url']:
123             response_payload = {
124                 '@context': 'https://www.w3.org/ns/activitystreams',
125                 'id': g.data['app']['urlRoot'] + os.urandom(16).hex(),
126                 'type': 'Accept',
127                 'actor': g.data['app']['urlRoot'],
128                 'object': payload['id'],
129             }
130             g.data['recipient']['followingID'] = payload['id']
131             persist_data()
132             send(response_payload)
133     if payload['type'] == 'Undo' and 'object' in payload and payload['object'].get('id')  == g.data['recipient']['followingID']:
134         if payload.get('actor') is not None and payload.get('actor') == g.data['recipient']['url']:
135             g.data['recipient']['followingID'] = None
136             persist_data()
137     return make_response('', 202)
139 @app.route('/.well-known/webfinger')
140 def webfinger():
141     if not check_request_validity(request):
142         abort(404)
143     ensure_data()
144     resource = request.args.get('resource')
145     if resource != 'acct:' + BLUESKY_PROFILE[1:].replace('.bsky.social', '') + '@' + request.host:
146         abort(404)
147     response = make_response({
148         'subject': 'acct:' + BLUESKY_PROFILE[1:].replace('.bsky.social', '') + '@' + request.host,
149         'links': [
150             {
151                 'rel': 'self',
152                 'type': 'application/activity+json',
153                 'href': g.data['app']['urlRoot'],
154             }
155         ]
156     })
157     response.headers['Content-Type'] = 'application/jrd+json'
159     return response
161 @app.route('/avatar')
162 def avatar():
163     if not check_request_validity(request):
164         abort(404)
165     avatars = glob.glob('avatar.*')
166     if len(avatars) == 0:
167         abort(404)
168     avatar = avatars[0]
169     file_type = os.path.splitext(avatar)[1][1:]
170     return send_file(avatar, mimetype='image/'+file_type)
172 @app.route('/attachment/<attachment_id>')
173 def attachment(attachment_id):
174     if not check_request_validity(request):
175         abort(404)
176     attachments = glob.glob('images/' + attachment_id + '.*')
177     if len(attachments) == 0:
178         abort(404)
179     attachment = attachments[0]
180     file_type = os.path.splitext(attachment)[1][1:]
181     return send_file(attachment, mimetype='image/'+file_type)
184 def persist_data():
185     ensure_data()
186     with open('data.json', 'w', encoding='utf-8') as fp:
187         json.dump(g.data, fp)
189 def ensure_data():
190     if not hasattr(g, 'data'):
191         if os.path.isfile('data.json'):
192             with open('data.json', 'r', encoding='utf-8') as fp:
193                 g.data = json.load(fp)
194         else:
195             update()
197 def ensure_keys():
198     if hasattr(g, 'private_key') and hasattr(g, 'public_key'):
199         return
200     if not os.path.isfile('public.pem') or not os.path.isfile('private.pem'):
201         g.private_key = RSA.generate(2048)
202         g.public_key = g.private_key.public_key()
203         with open('private.pem', 'wb') as fp:
204             fp.write(g.private_key.export_key('PEM'))
205         with open('public.pem', 'wb') as fp:
206             fp.write(g.public_key.export_key('PEM'))
207     else:
208         with open('private.pem', 'rb') as fp:
209             g.private_key = RSA.import_key(fp.read())
210         with open('public.pem', 'rb') as fp:
211             g.public_key = RSA.import_key(fp.read())
213 def check_request_validity(request):
214     ensure_data()
215     if g.data['app']['urlRoot'] is None:
216         g.data['app']['urlRoot'] = request.url_root
217         persist_data()
218     return request.remote_addr in ['127.0.0.1', '::1'] or hasattr(g, 'data') and request.remote_addr in g.data['recipient']['ip']
220 def send(data):
221     ensure_data()
222     ensure_keys()
223     if g.data['recipient']['url'] is None:
224         app.logger.error('Failed to send data because recipient account URL is unknown.')
225         return
226     if g.data['recipient']['inbox'] is None:
227         app.logger.error('Failed to send data because recipient inbox is unknown.')
228         return
230     # Hat tip to @CartyBoston@mastodon.roundpond.net for
231     # https://socialhub.activitypub.rocks/t/python-mastodon-server-post-with-http-signature/2757
232     date = email.utils.formatdate(usegmt=True)
233     payload = json.dumps(data)
234     payload_hash = SHA256.new(payload.encode('utf-8'))
235     payload_hash_base64 = base64.b64encode(payload_hash.digest()).decode()
237     to_be_signed_string = '(request-target): post ' + urllib.parse.urlparse(g.data['recipient']['inbox']).path + '\n'\
238                           + 'host: ' + urllib.parse.urlparse(g.data['recipient']['url']).netloc + '\n'\
239                           + 'date: ' + date + '\n'\
240                           + 'digest: SHA-256=' + payload_hash_base64
241     to_be_signed_string_hash = SHA256.new(to_be_signed_string.encode('utf-8'))
242     signature = pkcs1_15.new(g.private_key).sign(to_be_signed_string_hash)
244     try:
245         pkcs1_15.new(g.public_key).verify(to_be_signed_string_hash, signature)
246     except ValueError:
247         app.logger.error('Signature did not match during send.')
248         return
250     signature_base64 = base64.b64encode(signature).decode()
251     signature_header = 'keyId="' + g.data['app']['urlRoot'] + '#main-key", algorithm="rsa-sha256", headers="(request-target) host date digest", signature="' + signature_base64 + '"'
252     headers = {
253         'host': urllib.parse.urlparse(g.data['recipient']['url']).netloc,
254         'date': date,
255         'digest': 'SHA-256=' + payload_hash_base64,
256         'content-type': 'application/activity+json',
257         'signature' : signature_header
258     }
259     r = requests.post(g.data['recipient']['inbox'], data=payload, headers=headers)
260     return (r.text, r.status_code)
262 def receive(request):
263     ensure_data()
264     ensure_keys()
265     signature_header = {}
266     for pair in request.headers['Signature'].split(','):
267         pair = pair.split('=')
268         pair = [pair[0], '='.join(pair[1:])]
269         if pair[1].startswith('"') and pair[1].endswith('"'):
270             pair[1] = pair[1][1:-1]
271         signature_header[pair[0]] = pair[1]
272     key_id = signature_header['keyId']
273     if g.data['recipient']['url'] is None:
274         app.logger.warn('Failed to receive data because recipient URL has not been stored.')
275         return
276     if key_id.split('#')[0] != g.data['recipient']['url']:
277         app.logger.warn('Received data from an unauthorized actor.')
278         return
280     if 'digest' in request.headers:
281         if request.headers['digest'].split('=')[0] != 'SHA-256':
282             app.logger.error('Failed to receive data because digest used an unsupported hash algorithm.')
283             return
284         payload_hash = SHA256.new(request.get_data()).digest()
285         payload_hash_base64 = base64.b64encode(SHA256.new(request.get_data()).digest()).decode()
286         if request.headers['digest'][request.headers['digest'].index('=')+1:] != payload_hash_base64:
287             app.logger.error('Failed to receive data because digest does not match payload.')
288             return
290     headers = signature_header['headers']
291     signature = base64.b64decode(signature_header['signature'])
292     actor = requests.get(key_id, headers={'accept': 'application/activity+json'}).json()
293     public_key = RSA.import_key(actor['publicKey']['publicKeyPem'])
295     comparison_string = []
296     for signed_header_name in headers.split(' '):
297         if signed_header_name == '(request-target)':
298             comparison_string.append('(request-target): post /inbox')
299         else:
300             comparison_string.append(signed_header_name + ': ' + request.headers[signed_header_name])
301     comparison_string = '\n'.join(comparison_string)
302     comparison_string_hash = SHA256.new(comparison_string.encode('utf-8'))
303     try:
304         pkcs1_15.new(public_key).verify(comparison_string_hash, signature)
305         return request.get_json()
306     except ValueError as e:
307         app.logger.error('Signature did not match during receive.')
308         return
310 def update():
311     logger = logging.getLogger('pinhole')
312     if os.path.isfile('data.json'):
313         with open('data.json', 'r', encoding='utf-8') as fp:
314             data = json.load(fp)
315         modified = False
316     else:
317         data = {
318             'app': {
319                 'urlRoot': None,
320             },
321             'sender': {
322                 'did': None,
323                 'displayName': None,
324                 'description': None,
325                 'avatarLink': None,
326                 'avatarType': None,
327             },
328             'lastPosted': None,
329             'sentPosts': None,
330             'recipient': {
331                 'account': ACTIVITYPUB_RECIPIENT,
332                 'url': None,
333                 'inbox': None,
334                 'followingID': None,
335                 'ip': [],
336             },
337         }
338         modified = True
339     if data['recipient']['account'] != ACTIVITYPUB_RECIPIENT:
340         data['recipient']['account'] = ACTIVITYPUB_RECIPIENT
341         data['recipient']['url'] = None
342         data['recipient']['inbox'] = None
343         modified = True
344     recipient_host = data['recipient']['account'].split('@')[-1].lower()
345     if data['recipient']['url'] is None:
346         webfinger_url = 'https://' + recipient_host + '/.well-known/webfinger?resource=acct:' + data['recipient']['account'][1:]
347         r = requests.get(webfinger_url)
348         if r.status_code == 200:
349             lookup = r.json()
350             if 'links' in lookup:
351                 for link in lookup['links']:
352                     if link['rel'] == 'self':
353                         data['recipient']['url'] = link['href']
354                 modified = True
355     if data['recipient']['inbox'] is None and data['recipient']['url'] is not None:
356         r = requests.get(data['recipient']['url'], headers={'accept': 'application/activity+json'})
357         if r.status_code == 200:
358             lookup = r.json()
359             if 'inbox' in lookup:
360                 data['recipient']['inbox'] = lookup['inbox']
361                 modified = True
362     newRecipientIPs = list(set(map(lambda f: f[4][0], socket.getaddrinfo(recipient_host, None))))
363     if data['recipient']['ip'] != newRecipientIPs:
364         data['recipient']['ip'] = newRecipientIPs
365         modified = True
366     if data['sender']['did'] is None:
367         r = requests.get('https://api.bsky.app/xrpc/com.atproto.identity.resolveHandle?handle=' + BLUESKY_PROFILE[1:])
368         data['sender']['did'] = r.json()['did']
369         modified = True
370     r = requests.get('https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=' + data['sender']['did'] + '&collection=app.bsky.actor.profile&limit=1')
371     result = r.json()
372     for prop in ['displayName', 'description']:
373         if data['sender'][prop] != result['records'][0]['value'][prop]:
374             data['sender'][prop] = result['records'][0]['value'][prop]
375             modified = True
376     try:
377         if data['sender']['avatarLink'] != result['records'][0]['value']['avatar']['ref']['$link']:
378             data['sender']['avatarLink'] = result['records'][0]['value']['avatar']['ref']['$link']
379             data['sender']['avatarType'] = result['records'][0]['value']['avatar']['mimeType']
380             modified = True
381             file_type = data['sender']['avatarType'].split('/')[1]
382             print(data['sender'])
383             r_avatar = requests.get('https://cdn.bsky.app/img/avatar/plain/' + data['sender']['did'] + '/' + data['sender']['avatarLink'] + '@' + file_type)
384             if r_avatar.status_code == 200:
385                 with open('avatar.' + file_type, 'wb') as fp:
386                     fp.write(r_avatar.content)
387     except KeyError:
388         # I assume this means the user has no avatar
389         if data['sender']['avatarLink'] is not None or data['sender']['avatarType'] is not None:
390             data['sender']['avatarLink'] = None
391             data['sender']['avatarType'] = None
392             modified = True
393             for avatar in glob.glob('avatar.*'):
394                 os.remove(avatar)
395     r = requests.get('https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=' + data['sender']['did'] + '&collection=app.bsky.feed.post&limit=10')
396     result = r.json()
397     try:
398         newLastPosted = data['lastPosted']
399         posts = []
400         for source_post in result['records']:
401             if newLastPosted is None:
402                 newLastPosted = source_post['value']['createdAt']
403             else:
404                 newLastPosted = max(source_post['value']['createdAt'], newLastPosted)
405             if data['lastPosted'] is None or source_post['value']['createdAt'] > data['lastPosted']:
406                 post = {
407                     'id': source_post['uri'].split('/')[-1],
408                     'content': source_post['value']['text'],
409                     'lang': source_post['value']['langs'][0],
410                     'date': source_post['value']['createdAt'],
411                 }
412                 if data['sentPosts'] is not None and post['id'] in data['sentPosts']:
413                     continue
414                 if 'reply' in source_post['value']:
415                     reply_to = source_post['value']['reply']['parent']['uri'].split('/')[-1]
416                     valid_parents = list(map(lambda p: p['id'], posts))
417                     if data['sentPosts'] is not None:
418                         valid_parents.extend(data['sentPosts'])
419                     if reply_to not in valid_parents:
420                         # Only bridge self-replies, not replies to other people (see comment further down).
421                         continue
422                     post['reply-to'] = reply_to
423                 if 'embed' in source_post['value'] and source_post['value']['embed']['$type'] == 'app.bsky.embed.images':
424                     post['attachments'] = []
425                     for image in source_post['value']['embed']['images']:
426                         post['attachments'].append({
427                             'type': 'image',
428                             'link': image['image']['ref']['$link'],
429                             'imageType': image['image']['mimeType'],
430                         })
431                         if 'alt' in image:
432                             post['attachments'][-1]['alt'] = image['alt']
433                         file_type = image['image']['mimeType'].split('/')[1]
434                         r_image = requests.get('https://cdn.bsky.app/img/feed_fullsize/plain/' + data['sender']['did'] + '/' + image['image']['ref']['$link'] + '@' + file_type)
435                         if r_image.status_code == 200:
436                             if not os.path.isdir('images'):
437                                 os.makedirs('images')
438                             with open(os.path.join('images', image['image']['ref']['$link'] + '.' + file_type), 'wb') as fp:
439                                 fp.write(r_image.content)
440                 posts.append(post)
441         if data['lastPosted'] != newLastPosted:
442             data['lastPosted'] = newLastPosted
443             modified = True
444     except KeyError:
445         # Something about the JSON is different than expected
446         pass
447     posts.reverse()
448     current_posts = []
449     for post in posts:
450         if data['recipient']['followingID'] is not None and data['sentPosts'] is not None:
451             post_payload = {
452                 '@context': 'https://www.w3.org/ns/activitystreams',
453                 'type': 'Create',
454                 'actor': data['app']['urlRoot'],
455                 'object': {
456                     'id': data['app']['urlRoot'] + post['id'],
457                     'type': 'Note',
458                     'content': post['content'].replace('\n', '<br>'),
459                     'published': post['date'],
460                     'attributedTo': data['app']['urlRoot'],
461                     'to': data['recipient']['url'],
462                 },
463                 'published': email.utils.formatdate(usegmt=True),
464                 'to': data['recipient']['url'],
465             }
466             if 'reply-to' in post:
467                 post_payload['object']['inReplyTo'] = data['app']['urlRoot'] + post['reply-to']
468             if 'attachments' in post:
469                 post_payload['object']['attachment'] = []
470                 for attachment in post['attachments']:
471                     attach = {
472                         'url': data['app']['urlRoot'] + 'attachment/' + attachment['link'],
473                         'summary': attachment['alt'],
474                     }
475                     if 'alt' in attachment:
476                         attach['summary'] = attachment['alt']
477                     post_payload['object']['attachment'].append(attach)
478             post_response = send(post_payload)
479             if post_response is None:
480                 logger.error('Failed to post ID ' + post['id'] + '.')
481             elif post_response[1] >= 400:
482                 logger.error('Failed to post ID ' + post['id'] + ': got HTTP error ' + str(post_response[1]) + '.')
483                 logger.error(post_response[0])
484         current_posts.append(post['id'])
485     if data['sentPosts'] is None:
486         data['sentPosts'] = []
487         modified = True
488     if len(current_posts) > 0:
489         # If a Bluesky post is a reply, we get the "in reply to" post ID from the API. To decide whether to bridge
490         # the post, we want to know if the Bluesky user is replying to themselves in a thread or to someone else.
491         # To know that, we keep a record of the most recent 100 post IDs we've sent. This way, threads don't get
492         # broken unless the author is replying to a very old post of their own.
493         data['sentPosts'].extend(current_posts)
494         data['sentPosts'] = data['sentPosts'][-100:]
495         modified = True
496     if modified:
497         with open('data.json', 'w', encoding='utf-8') as fp:
498             json.dump(data, fp)
501 def version():
502     return 1
504 if __name__ == '__main__':
505     if len(sys.argv) >= 2 and '--update' in sys.argv:
506         with app.app_context():
507             update()
508     else:
509         help_text = """Pinhole v%d
510 <https://fietkau.software/pinhole>
512 Open this file in a text editor to change the configuration. Current settings:
513 BLUESKY_PROFILE = %s
514 ACTIVITYPUB_RECIPIENT = %s
516 """ % (version(), BLUESKY_PROFILE, ACTIVITYPUB_RECIPIENT)
517         print(help_text)
518         if '--help' not in sys.argv and '--about' not in sys.argv:
519             app.run(port=app.config.get('PORT', 5000))