Browse Source

Fix rendering `<a>` without `href` when scheme unsupported (#13040)

- Disallow links with relative paths
- Disallow iframes with non-http protocols and relative paths

Close #13037
master
Eugen Rochko 1 week ago
parent
commit
b1349342d2
No account linked to committer's email address
2 changed files with 55 additions and 6 deletions
  1. 39
    6
      app/lib/sanitize_config.rb
  2. 16
    0
      spec/lib/sanitize_config_spec.rb

+ 39
- 6
app/lib/sanitize_config.rb View File

@@ -2,7 +2,23 @@
2 2
 
3 3
 class Sanitize
4 4
   module Config
5
-    HTTP_PROTOCOLS ||= ['http', 'https', 'dat', 'dweb', 'ipfs', 'ipns', 'ssb', 'gopher', 'xmpp', 'magnet', :relative].freeze
5
+    HTTP_PROTOCOLS = %w(
6
+      http
7
+      https
8
+    ).freeze
9
+
10
+    LINK_PROTOCOLS = %w(
11
+      http
12
+      https
13
+      dat
14
+      dweb
15
+      ipfs
16
+      ipns
17
+      ssb
18
+      gopher
19
+      xmpp
20
+      magnet
21
+    ).freeze
6 22
 
7 23
     CLASS_WHITELIST_TRANSFORMER = lambda do |env|
8 24
       node = env[:node]
@@ -19,19 +35,37 @@ class Sanitize
19 35
       node['class'] = class_list.join(' ')
20 36
     end
21 37
 
38
+    UNSUPPORTED_HREF_TRANSFORMER = lambda do |env|
39
+      return unless env[:node_name] == 'a'
40
+
41
+      current_node = env[:node]
42
+
43
+      scheme = begin
44
+        if current_node['href'] =~ Sanitize::REGEX_PROTOCOL
45
+          Regexp.last_match(1).downcase
46
+        else
47
+          :relative
48
+        end
49
+      end
50
+
51
+      current_node.replace(current_node.text) unless LINK_PROTOCOLS.include?(scheme)
52
+    end
53
+
22 54
     UNSUPPORTED_ELEMENTS_TRANSFORMER = lambda do |env|
23 55
       return unless %w(h1 h2 h3 h4 h5 h6 blockquote pre ul ol li).include?(env[:node_name])
24 56
 
57
+      current_node = env[:node]
58
+
25 59
       case env[:node_name]
26 60
       when 'li'
27
-        env[:node].traverse do |node|
61
+        current_node.traverse do |node|
28 62
           next unless %w(p ul ol li).include?(node.name)
29 63
 
30 64
           node.add_next_sibling('<br>') if node.next_sibling
31 65
           node.replace(node.children) unless node.text?
32 66
         end
33 67
       else
34
-        env[:node].name = 'p'
68
+        current_node.name = 'p'
35 69
       end
36 70
     end
37 71
 
@@ -50,13 +84,12 @@ class Sanitize
50 84
         },
51 85
       },
52 86
 
53
-      protocols: {
54
-        'a' => { 'href' => HTTP_PROTOCOLS },
55
-      },
87
+      protocols: {},
56 88
 
57 89
       transformers: [
58 90
         CLASS_WHITELIST_TRANSFORMER,
59 91
         UNSUPPORTED_ELEMENTS_TRANSFORMER,
92
+        UNSUPPORTED_HREF_TRANSFORMER,
60 93
       ]
61 94
     )
62 95
 

+ 16
- 0
spec/lib/sanitize_config_spec.rb View File

@@ -26,5 +26,21 @@ describe Sanitize::Config do
26 26
     it 'keep links in lists' do
27 27
       expect(Sanitize.fragment('<p>Check out:</p><ul><li><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a></li><li>Bar</li></ul>', subject)).to eq '<p>Check out:</p><p><a href="https://joinmastodon.org" rel="nofollow noopener noreferrer" target="_blank">joinmastodon.org</a><br>Bar</p>'
28 28
     end
29
+
30
+    it 'removes a without href' do
31
+      expect(Sanitize.fragment('<a>Test</a>', subject)).to eq 'Test'
32
+    end
33
+
34
+    it 'removes a without href and only keeps text content' do
35
+      expect(Sanitize.fragment('<a><span class="invisible">foo&amp;</span><span>Test</span></a>', subject)).to eq 'foo&amp;Test'
36
+    end
37
+
38
+    it 'removes a with unsupported scheme in href' do
39
+      expect(Sanitize.fragment('<a href="foo://bar">Test</a>', subject)).to eq 'Test'
40
+    end
41
+
42
+    it 'keeps a with href' do
43
+      expect(Sanitize.fragment('<a href="http://example.com">Test</a>', subject)).to eq '<a href="http://example.com" rel="nofollow noopener noreferrer" target="_blank">Test</a>'
44
+    end
29 45
   end
30 46
 end

Loading…
Cancel
Save