Commit 6b3e1708 authored by jaywink's avatar jaywink

Merge branch 'images-to-matrix' into 'master'

Support images to Matrix

See merge request !163
parents 4566b252 2fe45a35
Pipeline #5391 passed with stage
in 1 minute and 49 seconds
import logging
import re
import uuid
from typing import Dict, List
......@@ -26,16 +25,11 @@ class AttachImagesMixin(RawContentMixin):
Attach any embedded images from raw_content.
"""
super().pre_send()
if self._media_type != "text/markdown":
return
regex = r"!\[([\w ]*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)"
matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE)
for match in matches:
groups = match.groups()
for image in self.embedded_images:
self._children.append(
ActivitypubImage(
url=groups[1],
name=groups[0] or "",
url=image[0],
name=image[1],
inline=True,
)
)
......
import json
import logging
import mimetypes
import os
from typing import Dict, List
from urllib.parse import quote
from uuid import uuid4
import requests
from federation.entities.base import Post, Profile
from federation.entities.matrix.enums import EventType
from federation.entities.mixins import BaseEntity
from federation.entities.utils import get_base_attributes
from federation.utils.matrix import get_matrix_configuration, appservice_auth_header
from federation.utils.network import fetch_document
from federation.utils.network import fetch_document, fetch_file
logger = logging.getLogger("federation")
......@@ -41,6 +45,11 @@ class MatrixEntityMixin(BaseEntity):
config = get_matrix_configuration()
return f"{config['homeserver_base_url']}/_matrix/client/r0"
# noinspection PyMethodMayBeStatic
def get_endpoint_media(self) -> str:
config = get_matrix_configuration()
return f"{config['homeserver_base_url']}/_matrix/media/r0"
def get_profile_room_id(self):
# TODO: we should cache these.
doc, status, error = fetch_document(
......@@ -89,9 +98,62 @@ class MatrixRoomMessage(Post, MatrixEntityMixin):
def pre_send(self):
"""
Get profile room ID.
Do various pre-send things.
"""
super().pre_send()
# Get profile room ID
self.get_profile_room_id()
# Upload embedded images and replace the HTTP urls in the message with MXC urls so clients show the images
self.upload_embedded_images()
def upload_embedded_images(self):
"""
Upload embedded images
Replaces the HTTP urls in the message with MXC urls so that Matrix clients will show the images.
"""
for image in self.embedded_images:
url, name = image
headers = appservice_auth_header()
content_type, _encoding = mimetypes.guess_type(url)
headers["Content-Type"] = content_type
# Random name if none
if not name:
name = f"{uuid4()}{mimetypes.guess_extension(content_type, strict=False)}"
# Need to fetch it locally first
# noinspection PyBroadException
try:
image_file = fetch_file(url=url, timeout=60)
except Exception as ex:
logger.warning("MatrixRoomMessage.pre_send | Failed to retrieve image %s to be uploaded: %s",
url, ex)
continue
# Then upload
headers["Content-Length"] = str(os.stat(image_file).st_size)
# noinspection PyBroadException
try:
with open(image_file, "rb") as f:
response = requests.post(
f"{super().get_endpoint_media()}/upload?filename={quote(name)}&user_id={self.mxid}",
data=f.read(),
headers=headers,
timeout=60,
)
response.raise_for_status()
except Exception as ex:
logger.warning("MatrixRoomMessage.pre_send | Failed to upload image %s: %s",
url, ex)
continue
finally:
os.unlink(image_file)
# Replace in raw content
try:
logger.debug("MatrixRoomMessage.pre_send | Got response %s", response.json())
content_uri = response.json()["content_uri"]
self.raw_content = self.raw_content.replace(url, content_uri)
except Exception as ex:
logger.error("MatrixRoomMessage.pre_send | Failed to find content_uri from the image upload "
"response: %s", ex)
class MatrixProfile(Profile, MatrixEntityMixin):
......
......@@ -2,7 +2,7 @@ import datetime
import importlib
import re
import warnings
from typing import List, Set, Union, Dict
from typing import List, Set, Union, Dict, Tuple
from commonmark import commonmark
......@@ -202,6 +202,24 @@ class RawContentMixin(BaseEntity):
super().__init__(*args, **kwargs)
self._required += ["raw_content"]
@property
def embedded_images(self) -> List[Tuple[str, str]]:
"""
Returns a list of images from the raw_content.
Currently only markdown supported.
Returns a Tuple of (url, filename).
"""
images = []
if self._media_type != "text/markdown" or self.raw_content is None:
return images
regex = r"!\[([\w ]*)\]\((https?://[\w\d\-\./]+\.[\w]*((?<=jpg)|(?<=gif)|(?<=png)|(?<=jpeg)))\)"
matches = re.finditer(regex, self.raw_content, re.MULTILINE | re.IGNORECASE)
for match in matches:
groups = match.groups()
images.append((groups[1], groups[0] or ""))
return images
@property
def rendered_content(self) -> str:
"""Returns the rendered version of raw_content, or just raw_content."""
......
......@@ -3,8 +3,9 @@ import datetime
import logging
import re
import socket
from typing import Optional
from typing import Optional, Dict
from urllib.parse import quote
from uuid import uuid4
import requests
from requests.exceptions import RequestException, HTTPError, SSLError
......@@ -107,6 +108,22 @@ def fetch_host_ip(host: str) -> str:
return ip
def fetch_file(url: str, timeout: int = 30, extra_headers: Dict = None) -> str:
"""
Download a file with a temporary name and return the name.
"""
headers = {'user-agent': USER_AGENT}
if extra_headers:
headers.update(extra_headers)
response = requests.get(url, timeout=timeout, headers=headers, stream=True)
response.raise_for_status()
name = f"/tmp/{str(uuid4())}"
with open(name, "wb") as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
return name
def parse_http_date(date):
"""
Parse a date format as specified by HTTP RFC7231 section 7.1.1.1.
......@@ -185,6 +202,7 @@ def send_document(url, data, timeout=10, method="post", *args, **kwargs):
response = request_func(url, *args, **kwargs)
logger.debug("send_document: response status code %s", response.status_code)
return response.status_code, None
# TODO support rate limit 429 code
except RequestException as ex:
logger.debug("send_document: exception %s", ex)
return None, ex
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment