feat(events): add plain-text body representation from HTML
Co-Authored-By: Sorunome <mail@sorunome.de>
This commit is contained in:
parent
f0dd8ca061
commit
a1b95c0915
|
|
@ -27,6 +27,7 @@ import 'utils/matrix_localizations.dart';
|
|||
import 'utils/receipt.dart';
|
||||
import 'utils/event_localizations.dart';
|
||||
import 'utils/crypto/encrypted_file.dart';
|
||||
import 'utils/html_to_text.dart';
|
||||
|
||||
abstract class RelationshipTypes {
|
||||
static const String reply = 'm.in_reply_to';
|
||||
|
|
@ -286,6 +287,12 @@ class Event extends MatrixEvent {
|
|||
return '$type';
|
||||
}
|
||||
|
||||
/// Use this to get a plain-text representation of the event, stripping things
|
||||
/// like spoilers and thelike. Useful for plain text notifications.
|
||||
String get plaintextBody => content['format'] == 'org.matrix.custom.html'
|
||||
? HtmlToText.convert(formattedText)
|
||||
: body;
|
||||
|
||||
/// Returns a list of [Receipt] instances for this event.
|
||||
List<Receipt> get receipts {
|
||||
if (!(room.roomAccountData.containsKey('m.receipt'))) return [];
|
||||
|
|
|
|||
|
|
@ -0,0 +1,243 @@
|
|||
/*
|
||||
* Famedly Matrix SDK
|
||||
* Copyright (C) 2021 Famedly GmbH
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import 'package:html/parser.dart';
|
||||
import 'package:html/dom.dart';
|
||||
import 'package:html_unescape/html_unescape.dart';
|
||||
|
||||
class HtmlToText {
|
||||
/// Convert an HTML string to a pseudo-markdown plain text representation, with
|
||||
/// `data-mx-spoiler` spans redacted
|
||||
static String convert(String html) {
|
||||
final opts = _ConvertOpts();
|
||||
var reply = _walkNode(opts, parseFragment(html));
|
||||
reply = reply.replaceAll(RegExp(r'\s*$', multiLine: false), '');
|
||||
return reply;
|
||||
}
|
||||
|
||||
static String _parsePreContent(_ConvertOpts opts, Element node) {
|
||||
var text = node.innerHtml;
|
||||
final match =
|
||||
RegExp(r'^<code([^>]*)>', multiLine: false, caseSensitive: false)
|
||||
.firstMatch(text);
|
||||
if (match == null) {
|
||||
text = HtmlUnescape().convert(text);
|
||||
if (text[0] != '\n') {
|
||||
text = '\n$text';
|
||||
}
|
||||
if (text[text.length - 1] != '\n') {
|
||||
text += '\n';
|
||||
}
|
||||
return text;
|
||||
}
|
||||
// remove <code> opening tag
|
||||
text = text.substring(match.end);
|
||||
// remove the </code> closing tag
|
||||
text = text.replaceAll(
|
||||
RegExp(r'</code>$', multiLine: false, caseSensitive: false), '');
|
||||
text = HtmlUnescape().convert(text);
|
||||
if (text[0] != '\n') {
|
||||
text = '\n$text';
|
||||
}
|
||||
if (text[text.length - 1] != '\n') {
|
||||
text += '\n';
|
||||
}
|
||||
final language =
|
||||
RegExp(r'language-(\w+)', multiLine: false, caseSensitive: false)
|
||||
.firstMatch(match.group(1));
|
||||
if (language != null) {
|
||||
text = language.group(1) + text;
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
static String _parseBlockquoteContent(_ConvertOpts opts, Element node) {
|
||||
final msg = _walkChildNodes(opts, node);
|
||||
return msg.split('\n').map((s) => '> $s').join('\n') + '\n';
|
||||
}
|
||||
|
||||
static String _parseSpanContent(_ConvertOpts opts, Element node) {
|
||||
final content = _walkChildNodes(opts, node);
|
||||
if (node.attributes['data-mx-spoiler'] is String) {
|
||||
var spoiler = '█' * content.length;
|
||||
final reason = node.attributes['data-mx-spoiler'];
|
||||
if (reason != '') {
|
||||
spoiler = '($reason) $spoiler';
|
||||
}
|
||||
return spoiler;
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
static String _parseUlContent(_ConvertOpts opts, Element node) {
|
||||
opts.listDepth++;
|
||||
final entries = _listChildNodes(opts, node, {'li'});
|
||||
opts.listDepth--;
|
||||
final bulletPoint =
|
||||
_listBulletPoints[opts.listDepth % _listBulletPoints.length];
|
||||
|
||||
return entries
|
||||
.map((s) =>
|
||||
(' ' * opts.listDepth) +
|
||||
bulletPoint +
|
||||
' ' +
|
||||
s.replaceAll('\n', '\n' + (' ' * opts.listDepth) + ' '))
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
static String _parseOlContent(_ConvertOpts opts, Element node) {
|
||||
opts.listDepth++;
|
||||
final entries = _listChildNodes(opts, node, {'li'});
|
||||
opts.listDepth--;
|
||||
var entry = 0;
|
||||
if (node.attributes['start'] is String &&
|
||||
RegExp(r'^[0-9]+$', multiLine: false)
|
||||
.hasMatch(node.attributes['start'])) {
|
||||
entry = int.parse(node.attributes['start']);
|
||||
}
|
||||
|
||||
return entries.map((s) {
|
||||
entry++;
|
||||
return (' ' * opts.listDepth) +
|
||||
'$entry. ' +
|
||||
s.replaceAll('\n', '\n' + (' ' * opts.listDepth) + ' ');
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
static const _listBulletPoints = <String>['●', '○', '■', '‣'];
|
||||
|
||||
static List<String> _listChildNodes(_ConvertOpts opts, Element node,
|
||||
[Iterable<String> types]) {
|
||||
final replies = <String>[];
|
||||
for (final child in node.nodes) {
|
||||
if (types != null &&
|
||||
types.isNotEmpty &&
|
||||
((child is Text) ||
|
||||
((child is Element) &&
|
||||
!types.contains(child.localName.toLowerCase())))) {
|
||||
continue;
|
||||
}
|
||||
replies.add(_walkNode(opts, child));
|
||||
}
|
||||
return replies;
|
||||
}
|
||||
|
||||
static const _blockTags = <String>{
|
||||
'blockquote',
|
||||
'ul',
|
||||
'ol',
|
||||
'h1',
|
||||
'h2',
|
||||
'h3',
|
||||
'h4',
|
||||
'h5',
|
||||
'h6',
|
||||
'pre',
|
||||
};
|
||||
|
||||
static String _walkChildNodes(_ConvertOpts opts, Node node) {
|
||||
var reply = '';
|
||||
var lastTag = '';
|
||||
for (final child in node.nodes) {
|
||||
final thisTag = child is Element ? child.localName.toLowerCase() : '';
|
||||
if (thisTag == 'p' && lastTag == 'p') {
|
||||
reply += '\n\n';
|
||||
} else if (_blockTags.contains(thisTag) &&
|
||||
reply.isNotEmpty &&
|
||||
reply[reply.length - 1] != '\n') {
|
||||
reply += '\n';
|
||||
}
|
||||
reply += _walkNode(opts, child);
|
||||
if (thisTag.isNotEmpty) {
|
||||
lastTag = thisTag;
|
||||
}
|
||||
}
|
||||
return reply;
|
||||
}
|
||||
|
||||
static String _walkNode(_ConvertOpts opts, Node node) {
|
||||
if (node is Text) {
|
||||
// ignore \n between single nodes
|
||||
return node.text == '\n' ? '' : node.text;
|
||||
} else if (node is Element) {
|
||||
final tag = node.localName.toLowerCase();
|
||||
switch (tag) {
|
||||
case 'em':
|
||||
case 'i':
|
||||
return '*${_walkChildNodes(opts, node)}*';
|
||||
case 'strong':
|
||||
case 'b':
|
||||
return '**${_walkChildNodes(opts, node)}**';
|
||||
case 'u':
|
||||
case 'ins':
|
||||
return '__${_walkChildNodes(opts, node)}__';
|
||||
case 'del':
|
||||
case 'strike':
|
||||
case 's':
|
||||
return '~~${_walkChildNodes(opts, node)}~~';
|
||||
case 'code':
|
||||
return '`${node.text}`';
|
||||
case 'pre':
|
||||
return '```${_parsePreContent(opts, node)}```\n';
|
||||
case 'a':
|
||||
final href = node.attributes['href'] ?? '';
|
||||
final content = _walkChildNodes(opts, node);
|
||||
if (href.toLowerCase().startsWith('https://matrix.to/#/') ||
|
||||
href.toLowerCase().startsWith('matrix:')) {
|
||||
return content;
|
||||
}
|
||||
return '🔗$content';
|
||||
case 'img':
|
||||
return node.attributes['alt'] ??
|
||||
node.attributes['title'] ??
|
||||
node.attributes['src'] ??
|
||||
'';
|
||||
case 'br':
|
||||
return '\n';
|
||||
case 'blockquote':
|
||||
return _parseBlockquoteContent(opts, node);
|
||||
case 'ul':
|
||||
return _parseUlContent(opts, node);
|
||||
case 'ol':
|
||||
return _parseOlContent(opts, node);
|
||||
case 'mx-reply':
|
||||
return '';
|
||||
case 'hr':
|
||||
return '\n----------\n';
|
||||
case 'h1':
|
||||
case 'h2':
|
||||
case 'h3':
|
||||
case 'h4':
|
||||
case 'h5':
|
||||
case 'h6':
|
||||
final mark = '#' * int.parse(tag[1]);
|
||||
return '$mark ${_walkChildNodes(opts, node)}\n';
|
||||
case 'span':
|
||||
return _parseSpanContent(opts, node);
|
||||
default:
|
||||
return _walkChildNodes(opts, node);
|
||||
}
|
||||
} else {
|
||||
return _walkChildNodes(opts, node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class _ConvertOpts {
|
||||
int listDepth = 0;
|
||||
}
|
||||
|
|
@ -21,6 +21,7 @@ dependencies:
|
|||
ffi: ^1.0.0
|
||||
js: ^0.6.3
|
||||
slugify: ^2.0.0
|
||||
html: ^0.15.0
|
||||
|
||||
dev_dependencies:
|
||||
pedantic: ^1.11.0
|
||||
|
|
|
|||
|
|
@ -945,6 +945,20 @@ void main() {
|
|||
expect(
|
||||
event.aggregatedEvents(timeline, RelationshipTypes.edit), <Event>{});
|
||||
});
|
||||
test('plaintextBody', () {
|
||||
final event = Event.fromJson({
|
||||
'type': EventTypes.Message,
|
||||
'content': {
|
||||
'body': 'blah',
|
||||
'msgtype': 'm.text',
|
||||
'format': 'org.matrix.custom.html',
|
||||
'formatted_body': '<b>blah</b>',
|
||||
},
|
||||
'event_id': '\$source',
|
||||
'sender': '@alice:example.org',
|
||||
}, null);
|
||||
expect(event.plaintextBody, '**blah**');
|
||||
});
|
||||
test('getDisplayEvent', () {
|
||||
var event = Event.fromJson({
|
||||
'type': EventTypes.Message,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Famedly Matrix SDK
|
||||
* Copyright (C) 2021 Famedly GmbH
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as
|
||||
* published by the Free Software Foundation, either version 3 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU Affero General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Affero General Public License
|
||||
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
import 'package:matrix/src/utils/html_to_text.dart';
|
||||
import 'package:test/test.dart';
|
||||
|
||||
void main() {
|
||||
group('htmlToText', () {
|
||||
test('stuff', () async {
|
||||
final testMap = <String, String>{
|
||||
'': '',
|
||||
'hello world\nthis is a test': 'hello world\nthis is a test',
|
||||
'<em>That\'s</em> not a test, <strong>this</strong> is a test':
|
||||
'*That\'s* not a test, **this** is a test',
|
||||
'Visit <del><a href="http://example.com">our website</a></del> (outdated)':
|
||||
'Visit ~~🔗our website~~ (outdated)',
|
||||
'(cw spiders) <span data-mx-spoiler>spiders are pretty cool</span>':
|
||||
'(cw spiders) ███████████████████████',
|
||||
'<span data-mx-spoiler="cw spiders">spiders are pretty cool</span>':
|
||||
'(cw spiders) ███████████████████████',
|
||||
'<img src="test.gif" alt="a test case" />': 'a test case',
|
||||
'List of cute animals:\n<ul>\n<li>Kittens</li>\n<li>Puppies</li>\n<li>Snakes<br/>(I think they\'re cute!)</li>\n</ul>\n(This list is incomplete, you can help by adding to it!)':
|
||||
'List of cute animals:\n● Kittens\n● Puppies\n● Snakes\n (I think they\'re cute!)\n(This list is incomplete, you can help by adding to it!)',
|
||||
'<em>fox</em>': '*fox*',
|
||||
'<i>fox</i>': '*fox*',
|
||||
'<strong>fox</i>': '**fox**',
|
||||
'<b>fox</b>': '**fox**',
|
||||
'<u>fox</u>': '__fox__',
|
||||
'<ins>fox</ins>': '__fox__',
|
||||
'<del>fox</del>': '~~fox~~',
|
||||
'<strike>fox</strike>': '~~fox~~',
|
||||
'<s>fox</s>': '~~fox~~',
|
||||
'<code>>fox</code>': '`>fox`',
|
||||
'<pre>meep</pre>': '```\nmeep\n```',
|
||||
'<pre>meep\n</pre>': '```\nmeep\n```',
|
||||
'<pre><code class="language-floof">meep</code></pre>':
|
||||
'```floof\nmeep\n```',
|
||||
'before<pre>code</pre>after': 'before\n```\ncode\n```\nafter',
|
||||
'<p>before</p><pre>code</pre><p>after</p>':
|
||||
'before\n```\ncode\n```\nafter',
|
||||
'<p>fox</p>': 'fox',
|
||||
'<p>fox</p><p>floof</p>': 'fox\n\nfloof',
|
||||
'<a href="https://example.org">website</a>': '🔗website',
|
||||
'<a href="https://matrix.to/#/@user:example.org">fox</a>': 'fox',
|
||||
'<a href="matrix:u/user:example.org">fox</a>': 'fox',
|
||||
'<img alt=":wave:" src="mxc://fox">': ':wave:',
|
||||
'fox<br>floof': 'fox\nfloof',
|
||||
'<blockquote>fox</blockquote>floof': '> fox\nfloof',
|
||||
'<blockquote><p>fox</p></blockquote>floof': '> fox\nfloof',
|
||||
'<blockquote><p>fox</p></blockquote><p>floof</p>': '> fox\nfloof',
|
||||
'a<blockquote>fox</blockquote>floof': 'a\n> fox\nfloof',
|
||||
'<blockquote><blockquote>fox</blockquote>floof</blockquote>fluff':
|
||||
'> > fox\n> floof\nfluff',
|
||||
'<ul><li>hey<ul><li>a</li><li>b</li></ul></li><li>foxies</li></ul>':
|
||||
'● hey\n ○ a\n ○ b\n● foxies',
|
||||
'<ol><li>a</li><li>b</li></ol>': '1. a\n2. b',
|
||||
'<ol><li>a<ol><li>aa</li><li>bb</li></ol></li><li>b</li></ol>':
|
||||
'1. a\n 1. aa\n 2. bb\n2. b',
|
||||
'<ol><li>a<ul><li>aa</li><li>bb</li></ul></li><li>b</li></ol>':
|
||||
'1. a\n ○ aa\n ○ bb\n2. b',
|
||||
'<ul><li>a<ol><li>aa</li><li>bb</li></ol></li><li>b</li></ul>':
|
||||
'● a\n 1. aa\n 2. bb\n● b',
|
||||
'<mx-reply>bunny</mx-reply>fox': 'fox',
|
||||
'fox<hr>floof': 'fox\n----------\nfloof',
|
||||
'<p>fox</p><hr><p>floof</p>': 'fox\n----------\nfloof',
|
||||
'<h1>fox</h1>floof': '# fox\nfloof',
|
||||
'<h1>fox</h1><p>floof</p>': '# fox\nfloof',
|
||||
'floof<h1>fox</h1>': 'floof\n# fox',
|
||||
'<p>floof</p><h1>fox</h1>': 'floof\n# fox',
|
||||
'<h2>fox</h2>': '## fox',
|
||||
'<h3>fox</h3>': '### fox',
|
||||
'<h4>fox</h4>': '#### fox',
|
||||
'<h5>fox</h5>': '##### fox',
|
||||
'<h6>fox</h6>': '###### fox',
|
||||
'<span>fox</span>': 'fox',
|
||||
'<p>fox</p>\n<p>floof</p>': 'fox\n\nfloof',
|
||||
};
|
||||
for (final entry in testMap.entries) {
|
||||
expect(HtmlToText.convert(entry.key), entry.value);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
Loading…
Reference in New Issue