feat(events): add plain-text body representation from HTML
Co-Authored-By: Sorunome <mail@sorunome.de>
This commit is contained in:
parent
f0dd8ca061
commit
a1b95c0915
|
|
@ -27,6 +27,7 @@ import 'utils/matrix_localizations.dart';
|
||||||
import 'utils/receipt.dart';
|
import 'utils/receipt.dart';
|
||||||
import 'utils/event_localizations.dart';
|
import 'utils/event_localizations.dart';
|
||||||
import 'utils/crypto/encrypted_file.dart';
|
import 'utils/crypto/encrypted_file.dart';
|
||||||
|
import 'utils/html_to_text.dart';
|
||||||
|
|
||||||
abstract class RelationshipTypes {
|
abstract class RelationshipTypes {
|
||||||
static const String reply = 'm.in_reply_to';
|
static const String reply = 'm.in_reply_to';
|
||||||
|
|
@ -286,6 +287,12 @@ class Event extends MatrixEvent {
|
||||||
return '$type';
|
return '$type';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Use this to get a plain-text representation of the event, stripping things
|
||||||
|
/// like spoilers and thelike. Useful for plain text notifications.
|
||||||
|
String get plaintextBody => content['format'] == 'org.matrix.custom.html'
|
||||||
|
? HtmlToText.convert(formattedText)
|
||||||
|
: body;
|
||||||
|
|
||||||
/// Returns a list of [Receipt] instances for this event.
|
/// Returns a list of [Receipt] instances for this event.
|
||||||
List<Receipt> get receipts {
|
List<Receipt> get receipts {
|
||||||
if (!(room.roomAccountData.containsKey('m.receipt'))) return [];
|
if (!(room.roomAccountData.containsKey('m.receipt'))) return [];
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,243 @@
|
||||||
|
/*
|
||||||
|
* Famedly Matrix SDK
|
||||||
|
* Copyright (C) 2021 Famedly GmbH
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'package:html/parser.dart';
|
||||||
|
import 'package:html/dom.dart';
|
||||||
|
import 'package:html_unescape/html_unescape.dart';
|
||||||
|
|
||||||
|
class HtmlToText {
|
||||||
|
/// Convert an HTML string to a pseudo-markdown plain text representation, with
|
||||||
|
/// `data-mx-spoiler` spans redacted
|
||||||
|
static String convert(String html) {
|
||||||
|
final opts = _ConvertOpts();
|
||||||
|
var reply = _walkNode(opts, parseFragment(html));
|
||||||
|
reply = reply.replaceAll(RegExp(r'\s*$', multiLine: false), '');
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _parsePreContent(_ConvertOpts opts, Element node) {
|
||||||
|
var text = node.innerHtml;
|
||||||
|
final match =
|
||||||
|
RegExp(r'^<code([^>]*)>', multiLine: false, caseSensitive: false)
|
||||||
|
.firstMatch(text);
|
||||||
|
if (match == null) {
|
||||||
|
text = HtmlUnescape().convert(text);
|
||||||
|
if (text[0] != '\n') {
|
||||||
|
text = '\n$text';
|
||||||
|
}
|
||||||
|
if (text[text.length - 1] != '\n') {
|
||||||
|
text += '\n';
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
// remove <code> opening tag
|
||||||
|
text = text.substring(match.end);
|
||||||
|
// remove the </code> closing tag
|
||||||
|
text = text.replaceAll(
|
||||||
|
RegExp(r'</code>$', multiLine: false, caseSensitive: false), '');
|
||||||
|
text = HtmlUnescape().convert(text);
|
||||||
|
if (text[0] != '\n') {
|
||||||
|
text = '\n$text';
|
||||||
|
}
|
||||||
|
if (text[text.length - 1] != '\n') {
|
||||||
|
text += '\n';
|
||||||
|
}
|
||||||
|
final language =
|
||||||
|
RegExp(r'language-(\w+)', multiLine: false, caseSensitive: false)
|
||||||
|
.firstMatch(match.group(1));
|
||||||
|
if (language != null) {
|
||||||
|
text = language.group(1) + text;
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _parseBlockquoteContent(_ConvertOpts opts, Element node) {
|
||||||
|
final msg = _walkChildNodes(opts, node);
|
||||||
|
return msg.split('\n').map((s) => '> $s').join('\n') + '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _parseSpanContent(_ConvertOpts opts, Element node) {
|
||||||
|
final content = _walkChildNodes(opts, node);
|
||||||
|
if (node.attributes['data-mx-spoiler'] is String) {
|
||||||
|
var spoiler = '█' * content.length;
|
||||||
|
final reason = node.attributes['data-mx-spoiler'];
|
||||||
|
if (reason != '') {
|
||||||
|
spoiler = '($reason) $spoiler';
|
||||||
|
}
|
||||||
|
return spoiler;
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _parseUlContent(_ConvertOpts opts, Element node) {
|
||||||
|
opts.listDepth++;
|
||||||
|
final entries = _listChildNodes(opts, node, {'li'});
|
||||||
|
opts.listDepth--;
|
||||||
|
final bulletPoint =
|
||||||
|
_listBulletPoints[opts.listDepth % _listBulletPoints.length];
|
||||||
|
|
||||||
|
return entries
|
||||||
|
.map((s) =>
|
||||||
|
(' ' * opts.listDepth) +
|
||||||
|
bulletPoint +
|
||||||
|
' ' +
|
||||||
|
s.replaceAll('\n', '\n' + (' ' * opts.listDepth) + ' '))
|
||||||
|
.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _parseOlContent(_ConvertOpts opts, Element node) {
|
||||||
|
opts.listDepth++;
|
||||||
|
final entries = _listChildNodes(opts, node, {'li'});
|
||||||
|
opts.listDepth--;
|
||||||
|
var entry = 0;
|
||||||
|
if (node.attributes['start'] is String &&
|
||||||
|
RegExp(r'^[0-9]+$', multiLine: false)
|
||||||
|
.hasMatch(node.attributes['start'])) {
|
||||||
|
entry = int.parse(node.attributes['start']);
|
||||||
|
}
|
||||||
|
|
||||||
|
return entries.map((s) {
|
||||||
|
entry++;
|
||||||
|
return (' ' * opts.listDepth) +
|
||||||
|
'$entry. ' +
|
||||||
|
s.replaceAll('\n', '\n' + (' ' * opts.listDepth) + ' ');
|
||||||
|
}).join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
static const _listBulletPoints = <String>['●', '○', '■', '‣'];
|
||||||
|
|
||||||
|
static List<String> _listChildNodes(_ConvertOpts opts, Element node,
|
||||||
|
[Iterable<String> types]) {
|
||||||
|
final replies = <String>[];
|
||||||
|
for (final child in node.nodes) {
|
||||||
|
if (types != null &&
|
||||||
|
types.isNotEmpty &&
|
||||||
|
((child is Text) ||
|
||||||
|
((child is Element) &&
|
||||||
|
!types.contains(child.localName.toLowerCase())))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
replies.add(_walkNode(opts, child));
|
||||||
|
}
|
||||||
|
return replies;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const _blockTags = <String>{
|
||||||
|
'blockquote',
|
||||||
|
'ul',
|
||||||
|
'ol',
|
||||||
|
'h1',
|
||||||
|
'h2',
|
||||||
|
'h3',
|
||||||
|
'h4',
|
||||||
|
'h5',
|
||||||
|
'h6',
|
||||||
|
'pre',
|
||||||
|
};
|
||||||
|
|
||||||
|
static String _walkChildNodes(_ConvertOpts opts, Node node) {
|
||||||
|
var reply = '';
|
||||||
|
var lastTag = '';
|
||||||
|
for (final child in node.nodes) {
|
||||||
|
final thisTag = child is Element ? child.localName.toLowerCase() : '';
|
||||||
|
if (thisTag == 'p' && lastTag == 'p') {
|
||||||
|
reply += '\n\n';
|
||||||
|
} else if (_blockTags.contains(thisTag) &&
|
||||||
|
reply.isNotEmpty &&
|
||||||
|
reply[reply.length - 1] != '\n') {
|
||||||
|
reply += '\n';
|
||||||
|
}
|
||||||
|
reply += _walkNode(opts, child);
|
||||||
|
if (thisTag.isNotEmpty) {
|
||||||
|
lastTag = thisTag;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return reply;
|
||||||
|
}
|
||||||
|
|
||||||
|
static String _walkNode(_ConvertOpts opts, Node node) {
|
||||||
|
if (node is Text) {
|
||||||
|
// ignore \n between single nodes
|
||||||
|
return node.text == '\n' ? '' : node.text;
|
||||||
|
} else if (node is Element) {
|
||||||
|
final tag = node.localName.toLowerCase();
|
||||||
|
switch (tag) {
|
||||||
|
case 'em':
|
||||||
|
case 'i':
|
||||||
|
return '*${_walkChildNodes(opts, node)}*';
|
||||||
|
case 'strong':
|
||||||
|
case 'b':
|
||||||
|
return '**${_walkChildNodes(opts, node)}**';
|
||||||
|
case 'u':
|
||||||
|
case 'ins':
|
||||||
|
return '__${_walkChildNodes(opts, node)}__';
|
||||||
|
case 'del':
|
||||||
|
case 'strike':
|
||||||
|
case 's':
|
||||||
|
return '~~${_walkChildNodes(opts, node)}~~';
|
||||||
|
case 'code':
|
||||||
|
return '`${node.text}`';
|
||||||
|
case 'pre':
|
||||||
|
return '```${_parsePreContent(opts, node)}```\n';
|
||||||
|
case 'a':
|
||||||
|
final href = node.attributes['href'] ?? '';
|
||||||
|
final content = _walkChildNodes(opts, node);
|
||||||
|
if (href.toLowerCase().startsWith('https://matrix.to/#/') ||
|
||||||
|
href.toLowerCase().startsWith('matrix:')) {
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
return '🔗$content';
|
||||||
|
case 'img':
|
||||||
|
return node.attributes['alt'] ??
|
||||||
|
node.attributes['title'] ??
|
||||||
|
node.attributes['src'] ??
|
||||||
|
'';
|
||||||
|
case 'br':
|
||||||
|
return '\n';
|
||||||
|
case 'blockquote':
|
||||||
|
return _parseBlockquoteContent(opts, node);
|
||||||
|
case 'ul':
|
||||||
|
return _parseUlContent(opts, node);
|
||||||
|
case 'ol':
|
||||||
|
return _parseOlContent(opts, node);
|
||||||
|
case 'mx-reply':
|
||||||
|
return '';
|
||||||
|
case 'hr':
|
||||||
|
return '\n----------\n';
|
||||||
|
case 'h1':
|
||||||
|
case 'h2':
|
||||||
|
case 'h3':
|
||||||
|
case 'h4':
|
||||||
|
case 'h5':
|
||||||
|
case 'h6':
|
||||||
|
final mark = '#' * int.parse(tag[1]);
|
||||||
|
return '$mark ${_walkChildNodes(opts, node)}\n';
|
||||||
|
case 'span':
|
||||||
|
return _parseSpanContent(opts, node);
|
||||||
|
default:
|
||||||
|
return _walkChildNodes(opts, node);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return _walkChildNodes(opts, node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class _ConvertOpts {
|
||||||
|
int listDepth = 0;
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,7 @@ dependencies:
|
||||||
ffi: ^1.0.0
|
ffi: ^1.0.0
|
||||||
js: ^0.6.3
|
js: ^0.6.3
|
||||||
slugify: ^2.0.0
|
slugify: ^2.0.0
|
||||||
|
html: ^0.15.0
|
||||||
|
|
||||||
dev_dependencies:
|
dev_dependencies:
|
||||||
pedantic: ^1.11.0
|
pedantic: ^1.11.0
|
||||||
|
|
|
||||||
|
|
@ -945,6 +945,20 @@ void main() {
|
||||||
expect(
|
expect(
|
||||||
event.aggregatedEvents(timeline, RelationshipTypes.edit), <Event>{});
|
event.aggregatedEvents(timeline, RelationshipTypes.edit), <Event>{});
|
||||||
});
|
});
|
||||||
|
test('plaintextBody', () {
|
||||||
|
final event = Event.fromJson({
|
||||||
|
'type': EventTypes.Message,
|
||||||
|
'content': {
|
||||||
|
'body': 'blah',
|
||||||
|
'msgtype': 'm.text',
|
||||||
|
'format': 'org.matrix.custom.html',
|
||||||
|
'formatted_body': '<b>blah</b>',
|
||||||
|
},
|
||||||
|
'event_id': '\$source',
|
||||||
|
'sender': '@alice:example.org',
|
||||||
|
}, null);
|
||||||
|
expect(event.plaintextBody, '**blah**');
|
||||||
|
});
|
||||||
test('getDisplayEvent', () {
|
test('getDisplayEvent', () {
|
||||||
var event = Event.fromJson({
|
var event = Event.fromJson({
|
||||||
'type': EventTypes.Message,
|
'type': EventTypes.Message,
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,98 @@
|
||||||
|
/*
|
||||||
|
* Famedly Matrix SDK
|
||||||
|
* Copyright (C) 2021 Famedly GmbH
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of the
|
||||||
|
* License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import 'package:matrix/src/utils/html_to_text.dart';
|
||||||
|
import 'package:test/test.dart';
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
group('htmlToText', () {
|
||||||
|
test('stuff', () async {
|
||||||
|
final testMap = <String, String>{
|
||||||
|
'': '',
|
||||||
|
'hello world\nthis is a test': 'hello world\nthis is a test',
|
||||||
|
'<em>That\'s</em> not a test, <strong>this</strong> is a test':
|
||||||
|
'*That\'s* not a test, **this** is a test',
|
||||||
|
'Visit <del><a href="http://example.com">our website</a></del> (outdated)':
|
||||||
|
'Visit ~~🔗our website~~ (outdated)',
|
||||||
|
'(cw spiders) <span data-mx-spoiler>spiders are pretty cool</span>':
|
||||||
|
'(cw spiders) ███████████████████████',
|
||||||
|
'<span data-mx-spoiler="cw spiders">spiders are pretty cool</span>':
|
||||||
|
'(cw spiders) ███████████████████████',
|
||||||
|
'<img src="test.gif" alt="a test case" />': 'a test case',
|
||||||
|
'List of cute animals:\n<ul>\n<li>Kittens</li>\n<li>Puppies</li>\n<li>Snakes<br/>(I think they\'re cute!)</li>\n</ul>\n(This list is incomplete, you can help by adding to it!)':
|
||||||
|
'List of cute animals:\n● Kittens\n● Puppies\n● Snakes\n (I think they\'re cute!)\n(This list is incomplete, you can help by adding to it!)',
|
||||||
|
'<em>fox</em>': '*fox*',
|
||||||
|
'<i>fox</i>': '*fox*',
|
||||||
|
'<strong>fox</i>': '**fox**',
|
||||||
|
'<b>fox</b>': '**fox**',
|
||||||
|
'<u>fox</u>': '__fox__',
|
||||||
|
'<ins>fox</ins>': '__fox__',
|
||||||
|
'<del>fox</del>': '~~fox~~',
|
||||||
|
'<strike>fox</strike>': '~~fox~~',
|
||||||
|
'<s>fox</s>': '~~fox~~',
|
||||||
|
'<code>>fox</code>': '`>fox`',
|
||||||
|
'<pre>meep</pre>': '```\nmeep\n```',
|
||||||
|
'<pre>meep\n</pre>': '```\nmeep\n```',
|
||||||
|
'<pre><code class="language-floof">meep</code></pre>':
|
||||||
|
'```floof\nmeep\n```',
|
||||||
|
'before<pre>code</pre>after': 'before\n```\ncode\n```\nafter',
|
||||||
|
'<p>before</p><pre>code</pre><p>after</p>':
|
||||||
|
'before\n```\ncode\n```\nafter',
|
||||||
|
'<p>fox</p>': 'fox',
|
||||||
|
'<p>fox</p><p>floof</p>': 'fox\n\nfloof',
|
||||||
|
'<a href="https://example.org">website</a>': '🔗website',
|
||||||
|
'<a href="https://matrix.to/#/@user:example.org">fox</a>': 'fox',
|
||||||
|
'<a href="matrix:u/user:example.org">fox</a>': 'fox',
|
||||||
|
'<img alt=":wave:" src="mxc://fox">': ':wave:',
|
||||||
|
'fox<br>floof': 'fox\nfloof',
|
||||||
|
'<blockquote>fox</blockquote>floof': '> fox\nfloof',
|
||||||
|
'<blockquote><p>fox</p></blockquote>floof': '> fox\nfloof',
|
||||||
|
'<blockquote><p>fox</p></blockquote><p>floof</p>': '> fox\nfloof',
|
||||||
|
'a<blockquote>fox</blockquote>floof': 'a\n> fox\nfloof',
|
||||||
|
'<blockquote><blockquote>fox</blockquote>floof</blockquote>fluff':
|
||||||
|
'> > fox\n> floof\nfluff',
|
||||||
|
'<ul><li>hey<ul><li>a</li><li>b</li></ul></li><li>foxies</li></ul>':
|
||||||
|
'● hey\n ○ a\n ○ b\n● foxies',
|
||||||
|
'<ol><li>a</li><li>b</li></ol>': '1. a\n2. b',
|
||||||
|
'<ol><li>a<ol><li>aa</li><li>bb</li></ol></li><li>b</li></ol>':
|
||||||
|
'1. a\n 1. aa\n 2. bb\n2. b',
|
||||||
|
'<ol><li>a<ul><li>aa</li><li>bb</li></ul></li><li>b</li></ol>':
|
||||||
|
'1. a\n ○ aa\n ○ bb\n2. b',
|
||||||
|
'<ul><li>a<ol><li>aa</li><li>bb</li></ol></li><li>b</li></ul>':
|
||||||
|
'● a\n 1. aa\n 2. bb\n● b',
|
||||||
|
'<mx-reply>bunny</mx-reply>fox': 'fox',
|
||||||
|
'fox<hr>floof': 'fox\n----------\nfloof',
|
||||||
|
'<p>fox</p><hr><p>floof</p>': 'fox\n----------\nfloof',
|
||||||
|
'<h1>fox</h1>floof': '# fox\nfloof',
|
||||||
|
'<h1>fox</h1><p>floof</p>': '# fox\nfloof',
|
||||||
|
'floof<h1>fox</h1>': 'floof\n# fox',
|
||||||
|
'<p>floof</p><h1>fox</h1>': 'floof\n# fox',
|
||||||
|
'<h2>fox</h2>': '## fox',
|
||||||
|
'<h3>fox</h3>': '### fox',
|
||||||
|
'<h4>fox</h4>': '#### fox',
|
||||||
|
'<h5>fox</h5>': '##### fox',
|
||||||
|
'<h6>fox</h6>': '###### fox',
|
||||||
|
'<span>fox</span>': 'fox',
|
||||||
|
'<p>fox</p>\n<p>floof</p>': 'fox\n\nfloof',
|
||||||
|
};
|
||||||
|
for (final entry in testMap.entries) {
|
||||||
|
expect(HtmlToText.convert(entry.key), entry.value);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue