From de6be9d6ad2d5d15a403904c9edcadca995b0450 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Thu, 26 May 2022 21:12:36 -0400 Subject: [PATCH 01/12] Use ActivityPub outbox for Mastodon (et al.) feed closes #2754 --- bridges/MastodonBridge.php | 125 +++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 39 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 549647ea23b..74ab48d9d58 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -1,11 +1,16 @@ array( 'name' => 'Without replies', 'type' => 'checkbox', - 'title' => 'Only return initial toots' + 'title' => 'Only return initial statuses' ), 'noboost' => array( 'name' => 'Without boosts', @@ -27,6 +32,10 @@ class MastodonBridge extends FeedExpander { ) )); + const AP_HEADER = array( + 'Accept: application/activity+json' + ); + public function getName(){ switch($this->queriedContext) { case 'By username': @@ -35,38 +44,6 @@ public function getName(){ } } - protected function parseItem($newItem){ - $item = parent::parseItem($newItem); - - $content = str_get_html($item['content']); - $title = str_get_html($item['title']); - - $item['title'] = $content->plaintext; - - if(strlen($item['title']) > 75) { - $item['title'] = substr($item['title'], 0, strpos(wordwrap($item['title'], 75), "\n")) . '...'; - } - - if(strpos($title, 'shared a status by') !== false) { - if($this->getInput('noboost')) { - return null; - } - - preg_match('/shared a status by (\S{0,})/', $title, $matches); - $item['title'] = 'Boost ' . $matches[1] . ' ' . $item['title']; - $item['author'] = $matches[1]; - } else { - $item['author'] = $this->getInput('canusername'); - } - - // Check if it's a initial toot or a response - if($this->getInput('norep') && preg_match('/^@.+/', trim($content->plaintext))) { - return null; - } - - return $item; - } - private function getInstance(){ preg_match('/^@[a-zA-Z0-9_]+@(.+)/', $this->getInput('canusername'), $matches); return $matches[1]; @@ -78,13 +55,83 @@ private function getUsername(){ } public function getURI(){ - if($this->getInput('canusername')) - return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss'; + if($this->getInput('canusername')) { + // We parse webfinger to make sure the URL is correct. This is mostly because + // MissKey uses user ID instead of the username in the endpoint, and also to + // be compatible with future ActivityPub implementations. + $resource = 'acct:' . $this->getUsername() . '@' . $this->getInstance(); + $webfingerUrl = 'https://' . $this->getInstance() . '/.well-known/webfinger?resource=' . $resource; + $webfingerHeader = array( + 'Content-Type: application/jrd+json' + ); + $webfinger = json_decode(getContents($webfingerUrl, $webfingerHeader), true); + if ($webfinger['subject'] == $resource) { + foreach ($webfinger['links'] as $link) { + if ($link['type'] == 'application/activity+json') + return $link['href'] . '/outbox?page=true'; + } + } + } return parent::getURI(); } public function collectData(){ - return $this->collectExpandableDatas($this->getURI()); + $url = $this->getURI(); + $content = json_decode(getContents($url, self::AP_HEADER), true); + if ($content['id'] == $url) { + foreach ($content['orderedItems'] as $status) { + $this->items[] = $this->parseItem($status); + } + } + else returnServerError('Unexpected response from server.'); + } + + protected function parseItem($content) { + $item = array(); + switch ($content['type']) { + case 'Announce': // boost + if ($this->getInput('noboost')) return null; + // We fetch the boosted content. + try { + $rtContent = json_decode(getContents($content['object'], self::AP_HEADER), true); + // We fetch the author, since we cannot always assume the format of the URL. + $user = json_decode(getContents($rtContent['attributedTo'], self::AP_HEADER), true); + preg_match('/http(|s):\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); + $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[2]; + $item['author'] = $rtUser; + $item['title'] = 'Shared a status by ' . $rtUser . ': '; + $item = $this->parseObject($rtContent, $item); + } catch (Throwable $th) { + return null; + } + break; + case 'Create': + if ($this->getInput('norep') && $content['object']['inReplyTo']) return null; + $item['author'] = $this->getInput('canusername'); + $item['title'] = ''; + $item = $this->parseObject($content['object'], $item); + } + $item['timestamp'] = $content['published']; + $item['uid'] = $content['id']; + return $item; + } + + protected function parseObject($object, $item) { + $item['content'] = $object['content']; + if (strlen(strip_tags($object['content'])) > 75) { + $item['title'] = $item['title'] . substr(strip_tags($object['content']), 0, strpos(wordwrap(strip_tags($object['content']), 75), "\n")) . '...'; + } + else $item['title'] = $item['title'] . strip_tags($object['content']); + $item['uri'] = $object['id']; + foreach ($object['attachment'] as $attachment) { + // Only process REMOTE pictures (prevent xss) + if (preg_match('/^image\//', $attachment['mediaType'], $match) && preg_match('/^http(s|):\/\//', $attachment['url'], $match)) { + $item['content'] = $item['content'] . '
'; + } + } + return $item; } } From 40eb1e75a8692940de1545351b98e24966b239e8 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Thu, 26 May 2022 21:14:58 -0400 Subject: [PATCH 02/12] Better description for Mastodon bridge I mean I could rename it to ActivityPub bridge if the maintainer so pleases --- bridges/MastodonBridge.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 74ab48d9d58..535a88028f1 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -8,9 +8,9 @@ class MastodonBridge extends FeedExpander { // use the official feed: https://pixelfed.instance/users/username.atom (Posts only) const MAINTAINER = 'Austin Huang'; - const NAME = 'ActivityPub (Mastodon, Pleroma, Misskey...) Bridge'; + const NAME = 'Mastodon Bridge'; const CACHE_TIMEOUT = 900; // 15mn - const DESCRIPTION = 'Returns recent statuses for an ActivityPub-compatible account.'; + const DESCRIPTION = 'Returns recent statuses for a Mastodon account. May support other ActivityPub-compatible accounts.'; const URI = 'https://mastodon.social'; const PARAMETERS = array(array( From 1f0cd8903ac49fe8b1bec2b70a1c47b58809c7ee Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Thu, 26 May 2022 21:22:02 -0400 Subject: [PATCH 03/12] [Mastodon] Please the lint --- bridges/MastodonBridge.php | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 535a88028f1..03a4755a053 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -10,7 +10,7 @@ class MastodonBridge extends FeedExpander { const MAINTAINER = 'Austin Huang'; const NAME = 'Mastodon Bridge'; const CACHE_TIMEOUT = 900; // 15mn - const DESCRIPTION = 'Returns recent statuses for a Mastodon account. May support other ActivityPub-compatible accounts.'; + const DESCRIPTION = 'Returns recent statuses. May support other ActivityPub-compatible accounts.'; const URI = 'https://mastodon.social'; const PARAMETERS = array(array( @@ -83,8 +83,7 @@ public function collectData(){ foreach ($content['orderedItems'] as $status) { $this->items[] = $this->parseItem($status); } - } - else returnServerError('Unexpected response from server.'); + } else returnServerError('Unexpected response from server.'); } protected function parseItem($content) { @@ -120,13 +119,14 @@ protected function parseItem($content) { protected function parseObject($object, $item) { $item['content'] = $object['content']; if (strlen(strip_tags($object['content'])) > 75) { - $item['title'] = $item['title'] . substr(strip_tags($object['content']), 0, strpos(wordwrap(strip_tags($object['content']), 75), "\n")) . '...'; - } - else $item['title'] = $item['title'] . strip_tags($object['content']); + $item['title'] = $item['title'] . + substr(strip_tags($object['content']), 0, strpos(wordwrap(strip_tags($object['content']), 75), "\n")) . '...'; + } else $item['title'] = $item['title'] . strip_tags($object['content']); $item['uri'] = $object['id']; foreach ($object['attachment'] as $attachment) { // Only process REMOTE pictures (prevent xss) - if (preg_match('/^image\//', $attachment['mediaType'], $match) && preg_match('/^http(s|):\/\//', $attachment['url'], $match)) { + if (preg_match('/^image\//', $attachment['mediaType'], $match) && + preg_match('/^http(s|):\/\//', $attachment['url'], $match)) { $item['content'] = $item['content'] . '
'; From a1a432d7cf94a929a7061899e1189aa61833b4bf Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Thu, 26 May 2022 21:48:46 -0400 Subject: [PATCH 04/12] [Mastodon] address feedback --- bridges/MastodonBridge.php | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 03a4755a053..4beefa15d93 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -22,13 +22,13 @@ class MastodonBridge extends FeedExpander { 'norep' => array( 'name' => 'Without replies', 'type' => 'checkbox', - 'title' => 'Only return initial statuses' + 'title' => 'Only return statuses that are not replies, as determined by relations (not mentions).' ), 'noboost' => array( 'name' => 'Without boosts', 'required' => false, 'type' => 'checkbox', - 'title' => 'Hide boosts' + 'title' => 'Hide boosts. Note that RSS-Bridge will fetch the original status from other federated instances.' ) )); @@ -102,7 +102,9 @@ protected function parseItem($content) { $item['title'] = 'Shared a status by ' . $rtUser . ': '; $item = $this->parseObject($rtContent, $item); } catch (Throwable $th) { - return null; + $item['title'] = 'Shared an unreachable status: ' . $content['object']; + $item['content'] = $content['object']; + $item['uri'] = $content['object']; } break; case 'Create': @@ -125,7 +127,7 @@ protected function parseObject($object, $item) { $item['uri'] = $object['id']; foreach ($object['attachment'] as $attachment) { // Only process REMOTE pictures (prevent xss) - if (preg_match('/^image\//', $attachment['mediaType'], $match) && + if ($attachment['mediaType'] && preg_match('/^image\//', $attachment['mediaType'], $match) && preg_match('/^http(s|):\/\//', $attachment['url'], $match)) { $item['content'] = $item['content'] . '
Date: Fri, 27 May 2022 14:22:40 -0400 Subject: [PATCH 05/12] [Mastodon] fix link, address spelling case bug --- bridges/MastodonBridge.php | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 4beefa15d93..382f2009f57 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -65,11 +65,9 @@ public function getURI(){ 'Content-Type: application/jrd+json' ); $webfinger = json_decode(getContents($webfingerUrl, $webfingerHeader), true); - if ($webfinger['subject'] == $resource) { - foreach ($webfinger['links'] as $link) { - if ($link['type'] == 'application/activity+json') - return $link['href'] . '/outbox?page=true'; - } + foreach ($webfinger['links'] as $link) { + if ($link['type'] == 'application/activity+json') + return $link['href']; } } @@ -77,7 +75,7 @@ public function getURI(){ } public function collectData(){ - $url = $this->getURI(); + $url = $this->getURI() . '/outbox?page=true'; $content = json_decode(getContents($url, self::AP_HEADER), true); if ($content['id'] == $url) { foreach ($content['orderedItems'] as $status) { From 6c239a5473d2b9898c7e328eb162a0abd5d1c2cf Mon Sep 17 00:00:00 2001 From: Dag Date: Sat, 4 Jun 2022 21:53:01 +0200 Subject: [PATCH 06/12] refactor --- bridges/MastodonBridge.php | 57 ++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 382f2009f57..3d8c6564d7c 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -1,6 +1,6 @@ queriedContext) { - case 'By username': - return $this->getInput('canusername'); - default: return parent::getName(); + case 'By username': + return $this->getInput('canusername'); + default: return parent::getName(); } } - private function getInstance(){ + private function getInstance() { preg_match('/^@[a-zA-Z0-9_]+@(.+)/', $this->getInput('canusername'), $matches); return $matches[1]; } - private function getUsername(){ + private function getUsername() { preg_match('/^@([a-zA-Z_0-9_]+)@.+/', $this->getInput('canusername'), $matches); return $matches[1]; } @@ -66,29 +66,34 @@ public function getURI(){ ); $webfinger = json_decode(getContents($webfingerUrl, $webfingerHeader), true); foreach ($webfinger['links'] as $link) { - if ($link['type'] == 'application/activity+json') + if ($link['type'] === 'application/activity+json') { return $link['href']; + } } } return parent::getURI(); } - public function collectData(){ + public function collectData() { $url = $this->getURI() . '/outbox?page=true'; $content = json_decode(getContents($url, self::AP_HEADER), true); - if ($content['id'] == $url) { + if ($content['id'] === $url) { foreach ($content['orderedItems'] as $status) { $this->items[] = $this->parseItem($status); } - } else returnServerError('Unexpected response from server.'); + } else { + throw new \Exception('Unexpected response from server.'); + } } protected function parseItem($content) { $item = array(); switch ($content['type']) { case 'Announce': // boost - if ($this->getInput('noboost')) return null; + if ($this->getInput('noboost')) { + return null; + } // We fetch the boosted content. try { $rtContent = json_decode(getContents($content['object'], self::AP_HEADER), true); @@ -106,7 +111,9 @@ protected function parseItem($content) { } break; case 'Create': - if ($this->getInput('norep') && $content['object']['inReplyTo']) return null; + if ($this->getInput('norep') && $content['object']['inReplyTo']) { + return null; + } $item['author'] = $this->getInput('canusername'); $item['title'] = ''; $item = $this->parseObject($content['object'], $item); @@ -118,18 +125,26 @@ protected function parseItem($content) { protected function parseObject($object, $item) { $item['content'] = $object['content']; - if (strlen(strip_tags($object['content'])) > 75) { - $item['title'] = $item['title'] . - substr(strip_tags($object['content']), 0, strpos(wordwrap(strip_tags($object['content']), 75), "\n")) . '...'; - } else $item['title'] = $item['title'] . strip_tags($object['content']); + $strippedContent = strip_tags($object['content']); + + if (mb_strlen($strippedContent) > 75) { + $contentSubstring = mb_substr($strippedContent, 0, mb_strpos(wordwrap($strippedContent, 75), "\n")); + $item['title'] .= $contentSubstring . '...'; + } else { + $item['title'] .= $strippedContent; + } $item['uri'] = $object['id']; foreach ($object['attachment'] as $attachment) { // Only process REMOTE pictures (prevent xss) - if ($attachment['mediaType'] && preg_match('/^image\//', $attachment['mediaType'], $match) && - preg_match('/^http(s|):\/\//', $attachment['url'], $match)) { + if ($attachment['mediaType'] + && preg_match('/^image\//', $attachment['mediaType'], $match) + && preg_match('/^http(s|):\/\//', $attachment['url'], $match) + ) { $item['content'] = $item['content'] . '
'; + if ($attachment['name']) { + $item['content'] .= sprintf('alt="%s" ', $attachment['name']); + } + $item['content'] .= sprintf('src="%s" />', $attachment['url']); } } return $item; From 6d9b531a98560417e3c220ebc5c5dceebe0ef174 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sat, 18 Jun 2022 21:13:36 -0400 Subject: [PATCH 07/12] [Mastodon] add username cache, fix try-catch, rename --- bridges/MastodonBridge.php | 42 ++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 3d8c6564d7c..37f511b21ec 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -2,15 +2,16 @@ class MastodonBridge extends BridgeAbstract { // This script attempts to imitiate the behaviour of a read-only ActivityPub server - // to read the outbox. + // to read the outbox. This does not support instances that require HTTP signatures + // for ActivityPub endpoints. // Note: Most PixelFed instances have ActivityPub outbox disabled, - // use the official feed: https://pixelfed.instance/users/username.atom (Posts only) + // so use the official feed: https://pixelfed.instance/users/username.atom (Posts only) const MAINTAINER = 'Austin Huang'; - const NAME = 'Mastodon Bridge'; + const NAME = 'ActivityPub Bridge'; const CACHE_TIMEOUT = 900; // 15mn - const DESCRIPTION = 'Returns recent statuses. May support other ActivityPub-compatible accounts.'; + const DESCRIPTION = 'Returns recent statuses. Supports ActivityPub-compatible platforms, including Mastodon, Pleroma and Misskey.'; const URI = 'https://mastodon.social'; const PARAMETERS = array(array( @@ -80,14 +81,15 @@ public function collectData() { $content = json_decode(getContents($url, self::AP_HEADER), true); if ($content['id'] === $url) { foreach ($content['orderedItems'] as $status) { - $this->items[] = $this->parseItem($status); + $users = array(); + $this->items[] = $this->parseItem($status, $users); } } else { throw new \Exception('Unexpected response from server.'); } } - protected function parseItem($content) { + protected function parseItem($content, &$users) { $item = array(); switch ($content['type']) { case 'Announce': // boost @@ -97,21 +99,31 @@ protected function parseItem($content) { // We fetch the boosted content. try { $rtContent = json_decode(getContents($content['object'], self::AP_HEADER), true); - // We fetch the author, since we cannot always assume the format of the URL. - $user = json_decode(getContents($rtContent['attributedTo'], self::AP_HEADER), true); - preg_match('/http(|s):\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); - $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[2]; - $item['author'] = $rtUser; - $item['title'] = 'Shared a status by ' . $rtUser . ': '; - $item = $this->parseObject($rtContent, $item); - } catch (Throwable $th) { + if ($rtContent['attributedTo'] && isset($users[$rtContent['attributedTo']])) { + $item['author'] = $users[$rtContent['attributedTo']]; + $item['title'] = 'Shared a status by ' . $item['author'] . ': '; + $item = $this->parseObject($rtContent, $item); + } else { + // We fetch the author, since we cannot always assume the format of the URL. + $user = json_decode(getContents($rtContent['attributedTo'], self::AP_HEADER), true); + preg_match('/http(|s):\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); + // We assume that the server name as indicated by the path is the actual server name, + // since using webfinger to delegate domains is not officially supported, and it only + // seems to work in one way. + $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[2]; + $users[$rtContent['attributedTo']] = $rtUser; + $item['author'] = $rtUser; + $item['title'] = 'Shared a status by ' . $rtUser . ': '; + $item = $this->parseObject($rtContent, $item); + } + } catch (UnexpectedResponseException $th) { $item['title'] = 'Shared an unreachable status: ' . $content['object']; $item['content'] = $content['object']; $item['uri'] = $content['object']; } break; case 'Create': - if ($this->getInput('norep') && $content['object']['inReplyTo']) { + if ($this->getInput('norep') && isset($content['object']['inReplyTo'])) { return null; } $item['author'] = $this->getInput('canusername'); From 1832da46474fb58333f0341094286881ba385f2d Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sat, 18 Jun 2022 21:15:33 -0400 Subject: [PATCH 08/12] [Mastodon] shorten description to satisfy the lint --- bridges/MastodonBridge.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 37f511b21ec..72ef4a4d097 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -11,7 +11,7 @@ class MastodonBridge extends BridgeAbstract { const MAINTAINER = 'Austin Huang'; const NAME = 'ActivityPub Bridge'; const CACHE_TIMEOUT = 900; // 15mn - const DESCRIPTION = 'Returns recent statuses. Supports ActivityPub-compatible platforms, including Mastodon, Pleroma and Misskey.'; + const DESCRIPTION = 'Returns recent statuses. Supports Mastodon, Pleroma and Misskey, among others.'; const URI = 'https://mastodon.social'; const PARAMETERS = array(array( From c0ae90e43a5ca1e56d6ab20fbd2d996a4e6ce9d7 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 19 Jun 2022 17:10:35 -0400 Subject: [PATCH 09/12] [Mastodon] address feedback --- bridges/MastodonBridge.php | 57 +++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 25 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 72ef4a4d097..5e745db5727 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -2,8 +2,7 @@ class MastodonBridge extends BridgeAbstract { // This script attempts to imitiate the behaviour of a read-only ActivityPub server - // to read the outbox. This does not support instances that require HTTP signatures - // for ActivityPub endpoints. + // to read the outbox. // Note: Most PixelFed instances have ActivityPub outbox disabled, // so use the official feed: https://pixelfed.instance/users/username.atom (Posts only) @@ -14,6 +13,19 @@ class MastodonBridge extends BridgeAbstract { const DESCRIPTION = 'Returns recent statuses. Supports Mastodon, Pleroma and Misskey, among others.'; const URI = 'https://mastodon.social'; + // Some Mastodon instances use Secure Mode which requires all requests to be signed. + // You do not need this for most instances, but if you want to support every known + // instance, then you should configure them. + // See also https://docs.joinmastodon.org/spec/security/#http + const CONFIGURATION = array( + 'private_key' => array( + 'required' => false, + ), + 'key_id' => array( + 'required' => false, + ), + ); + const PARAMETERS = array(array( 'canusername' => array( 'name' => 'Canonical username', @@ -38,11 +50,10 @@ class MastodonBridge extends BridgeAbstract { ); public function getName() { - switch($this->queriedContext) { - case 'By username': - return $this->getInput('canusername'); - default: return parent::getName(); + if($this->getInput('canusername')) { + return $this->getInput('canusername'); } + return parent::getName(); } private function getInstance() { @@ -58,8 +69,8 @@ private function getUsername() { public function getURI(){ if($this->getInput('canusername')) { // We parse webfinger to make sure the URL is correct. This is mostly because - // MissKey uses user ID instead of the username in the endpoint, and also to - // be compatible with future ActivityPub implementations. + // MissKey uses user ID instead of the username in the endpoint, domain delegations, + // and also to be compatible with future ActivityPub implementations. $resource = 'acct:' . $this->getUsername() . '@' . $this->getInstance(); $webfingerUrl = 'https://' . $this->getInstance() . '/.well-known/webfinger?resource=' . $resource; $webfingerHeader = array( @@ -81,15 +92,14 @@ public function collectData() { $content = json_decode(getContents($url, self::AP_HEADER), true); if ($content['id'] === $url) { foreach ($content['orderedItems'] as $status) { - $users = array(); - $this->items[] = $this->parseItem($status, $users); + $this->items[] = $this->parseItem($status); } } else { throw new \Exception('Unexpected response from server.'); } } - protected function parseItem($content, &$users) { + protected function parseItem($content) { $item = array(); switch ($content['type']) { case 'Announce': // boost @@ -98,31 +108,28 @@ protected function parseItem($content, &$users) { } // We fetch the boosted content. try { - $rtContent = json_decode(getContents($content['object'], self::AP_HEADER), true); - if ($rtContent['attributedTo'] && isset($users[$rtContent['attributedTo']])) { - $item['author'] = $users[$rtContent['attributedTo']]; - $item['title'] = 'Shared a status by ' . $item['author'] . ': '; - $item = $this->parseObject($rtContent, $item); - } else { + $rtContent = $this->fetchAP($content['object']); + $rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400); + if (!isset($rtUser)) { // We fetch the author, since we cannot always assume the format of the URL. $user = json_decode(getContents($rtContent['attributedTo'], self::AP_HEADER), true); - preg_match('/http(|s):\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); + preg_match('/https?:\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); // We assume that the server name as indicated by the path is the actual server name, // since using webfinger to delegate domains is not officially supported, and it only // seems to work in one way. - $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[2]; - $users[$rtContent['attributedTo']] = $rtUser; - $item['author'] = $rtUser; - $item['title'] = 'Shared a status by ' . $rtUser . ': '; - $item = $this->parseObject($rtContent, $item); + $rtUser = '@' . $user['preferredUsername'] . '@' . $matches[1]; + $this->saveCacheValue($rtContent['attributedTo'], $rtUser); } + $item['author'] = $rtUser; + $item['title'] = 'Shared a status by ' . $rtUser . ': '; + $item = $this->parseObject($rtContent, $item); } catch (UnexpectedResponseException $th) { $item['title'] = 'Shared an unreachable status: ' . $content['object']; $item['content'] = $content['object']; $item['uri'] = $content['object']; } break; - case 'Create': + case 'Create': // posts if ($this->getInput('norep') && isset($content['object']['inReplyTo'])) { return null; } @@ -137,7 +144,7 @@ protected function parseItem($content, &$users) { protected function parseObject($object, $item) { $item['content'] = $object['content']; - $strippedContent = strip_tags($object['content']); + $strippedContent = strip_tags(str_replace('
', ' ', $object['content'])); if (mb_strlen($strippedContent) > 75) { $contentSubstring = mb_substr($strippedContent, 0, mb_strpos(wordwrap($strippedContent, 75), "\n")); From f973ecebb4d604aa3a36450bd1d2eda5452a28e4 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Sun, 19 Jun 2022 17:10:45 -0400 Subject: [PATCH 10/12] [Mastodon] support Secure Mode instances --- bridges/MastodonBridge.php | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 5e745db5727..c7865fea763 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -45,10 +45,6 @@ class MastodonBridge extends BridgeAbstract { ) )); - const AP_HEADER = array( - 'Accept: application/activity+json' - ); - public function getName() { if($this->getInput('canusername')) { return $this->getInput('canusername'); @@ -89,7 +85,7 @@ public function getURI(){ public function collectData() { $url = $this->getURI() . '/outbox?page=true'; - $content = json_decode(getContents($url, self::AP_HEADER), true); + $content = $this->fetchAP($url); if ($content['id'] === $url) { foreach ($content['orderedItems'] as $status) { $this->items[] = $this->parseItem($status); @@ -112,7 +108,7 @@ protected function parseItem($content) { $rtUser = $this->loadCacheValue($rtContent['attributedTo'], 86400); if (!isset($rtUser)) { // We fetch the author, since we cannot always assume the format of the URL. - $user = json_decode(getContents($rtContent['attributedTo'], self::AP_HEADER), true); + $user = $this->fetchAP($rtContent['attributedTo']); preg_match('/https?:\/\/([a-z0-9-\.]{0,})\//', $rtContent['attributedTo'], $matches); // We assume that the server name as indicated by the path is the actual server name, // since using webfinger to delegate domains is not officially supported, and it only @@ -168,4 +164,31 @@ protected function parseObject($object, $item) { } return $item; } + + protected function fetchAP($url) { + $d = new DateTime(); + $d->setTimezone(new DateTimeZone('GMT')); + $date = $d->format('D, d M Y H:i:s e'); + preg_match('/https?:\/\/([a-z0-9-\.]{0,})(\/[^?#]+)/', $url, $matches); + $headers = array( + 'Accept: application/activity+json', + 'Host: ' . $matches[1], + 'Date: ' . $date + ); + $privateKey = $this->getOption('private_key'); + $keyId = $this->getOption('key_id'); + if ($privateKey && $keyId) { + $pkey = openssl_pkey_get_private('file://' . $privateKey); + $toSign = '(request-target): get ' . $matches[2] . "\nhost: " . $matches[1] . "\ndate: " . $date; + $result = openssl_sign($toSign, $signature, $pkey, 'RSA-SHA256'); + if ($result) { + Debug::log($toSign); + $sig = 'Signature: keyId="' . $keyId . '",headers="(request-target) host date",signature="' . + base64_encode($signature) . '"'; + Debug::log($sig); + array_push($headers, $sig); + } + } + return json_decode(getContents($url, $headers), true); + } } From 06f18b4f560afc8cb5d53a8cbd883e1f913bc577 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Mon, 20 Jun 2022 15:16:55 -0400 Subject: [PATCH 11/12] [Mastodon] add config documentation --- bridges/MastodonBridge.php | 4 +- .../ActivityPub_(Mastodon).md | 57 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 docs/10_Bridge_Specific/ActivityPub_(Mastodon).md diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index c7865fea763..bbbc558701c 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -10,7 +10,9 @@ class MastodonBridge extends BridgeAbstract { const MAINTAINER = 'Austin Huang'; const NAME = 'ActivityPub Bridge'; const CACHE_TIMEOUT = 900; // 15mn - const DESCRIPTION = 'Returns recent statuses. Supports Mastodon, Pleroma and Misskey, among others.'; + const DESCRIPTION = 'Returns recent statuses. Supports Mastodon, Pleroma and Misskey, among others. Access to + instances that have Authorized Fetch enabled requires + configuration.'; const URI = 'https://mastodon.social'; // Some Mastodon instances use Secure Mode which requires all requests to be signed. diff --git a/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md new file mode 100644 index 00000000000..d5162e7d1ba --- /dev/null +++ b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md @@ -0,0 +1,57 @@ +# MastodonBridge (aka. ActivityPub Bridge) + +Certain ActivityPub implementations, such as [Mastodon](https://docs.joinmastodon.org/spec/security/#http) and [Pleroma](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#activitypub), allow instances to require requests to ActivityPub endpoints to be signed. RSS-Bridge can handle the HTTP signature header if a private key is provided, while the ActivityPub instance must be able to know the corresponding public key. + +You do **not** need to configure this if their usage is limited to accessing ActivityPub instances that do not have such requirements. + +## Configuration + +[This article](https://blog.joinmastodon.org/2018/06/how-to-implement-a-basic-activitypub-server/) is referenced. + +1. Select a domain. It may, but does not need to, be the one RSS-Bridge is on. For all subsequent steps, replace `DOMAIN` with this domain. +2. Run the following commands on your machine: +```bash +$ openssl genrsa -out private.pem 2048 +$ openssl rsa -in private.pem -outform PEM -pubout -out public.pem +``` +3. Place `private.pem` in an appropriate location and note down its absolute path. +4. Serve the following page at `https://DOMAIN/.well-known/webfinger`: +```json +{ + "subject": "acct:DOMAIN@DOMAIN", + "aliases": ["https://DOMAIN/actor"], + "links": [{ + "rel": "self", + "type": "application/activity+json", + "href": "https://DOMAIN/actor" + }] +} +``` +5. Serve the following page at `https://DOMAIN/actor`, replacing the value of `publicKeyPem` with the contents of the `public.pem` file in step 2: +```json +{ + "@context": [ + "https://www.w3.org/ns/activitystreams", + "https://w3id.org/security/v1" + ], + "id": "https://DOMAIN/actor", + "type": "Application", + "inbox": "https://DOMAIN/actor/inbox", + "preferredUsername": "DOMAIN", + "publicKey": { + "id": "https://DOMAIN/actor#main-key", + "owner": "https://DOMAIN/actor", + "publicKeyPem": "-----BEGIN PUBLIC KEY-----\n...\n-----END PUBLIC KEY-----\n" + } +} +``` +6. Add the following configuration in `config.ini.php` in your RSS-Bridge folder, replacing the path with the one from step 3: +```ini +[MastodonBridge] +private_key = "/absolute/path/to/your/private.pem" +key_id = "https://DOMAIN/actor#main-key" +``` + +## Considerations + +Any ActivityPub instance your users requested content from will be able to identify requests from your RSS-Bridge instance by the domain you specified in the configuration. This also means that an ActivityPub instance may choose to block this domain should they judge your instance's usage excessive. Therefore, public instance operators may need to monitor for abuse and communicate with ActivityPub instance admins when necessary. \ No newline at end of file From 44962cae64fd69581dc10d5b06824b5e2abadec3 Mon Sep 17 00:00:00 2001 From: Austin Huang Date: Mon, 20 Jun 2022 17:37:48 -0400 Subject: [PATCH 12/12] [Mastodon] update docs --- docs/10_Bridge_Specific/ActivityPub_(Mastodon).md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md index d5162e7d1ba..cdf0a5d99e8 100644 --- a/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md +++ b/docs/10_Bridge_Specific/ActivityPub_(Mastodon).md @@ -2,7 +2,7 @@ Certain ActivityPub implementations, such as [Mastodon](https://docs.joinmastodon.org/spec/security/#http) and [Pleroma](https://docs-develop.pleroma.social/backend/configuration/cheatsheet/#activitypub), allow instances to require requests to ActivityPub endpoints to be signed. RSS-Bridge can handle the HTTP signature header if a private key is provided, while the ActivityPub instance must be able to know the corresponding public key. -You do **not** need to configure this if their usage is limited to accessing ActivityPub instances that do not have such requirements. +You do **not** need to configure this if the usage on your RSS-Bridge instance is limited to accessing ActivityPub instances that do not have such requirements. While the majority of ActivityPub instances don't have them at the time of writing, the situation may change in the future. ## Configuration @@ -27,7 +27,7 @@ $ openssl rsa -in private.pem -outform PEM -pubout -out public.pem }] } ``` -5. Serve the following page at `https://DOMAIN/actor`, replacing the value of `publicKeyPem` with the contents of the `public.pem` file in step 2: +5. Serve the following page at `https://DOMAIN/actor`, replacing the value of `publicKeyPem` with the contents of the `public.pem` file in step 2, with all line breaks substituted with `\n`: ```json { "@context": [ @@ -54,4 +54,4 @@ key_id = "https://DOMAIN/actor#main-key" ## Considerations -Any ActivityPub instance your users requested content from will be able to identify requests from your RSS-Bridge instance by the domain you specified in the configuration. This also means that an ActivityPub instance may choose to block this domain should they judge your instance's usage excessive. Therefore, public instance operators may need to monitor for abuse and communicate with ActivityPub instance admins when necessary. \ No newline at end of file +Any ActivityPub instance your users requested content from will be able to identify requests from your RSS-Bridge instance by the domain you specified in the configuration. This also means that an ActivityPub instance may choose to block this domain should they judge your instance's usage excessive. Therefore, public instance operators should monitor for abuse and prepare to communicate with ActivityPub instance admins when necessary. You may also leave contact information as the `summary` value in the actor JSON (step 5).