First
[anni] / test / pleroma / html_test.exs
1 # Pleroma: A lightweight social networking server
2 # Copyright © 2017-2022 Pleroma Authors <https://pleroma.social/>
3 # SPDX-License-Identifier: AGPL-3.0-only
4
5 defmodule Pleroma.HTMLTest do
6   alias Pleroma.HTML
7   alias Pleroma.Object
8   alias Pleroma.Web.CommonAPI
9   use Pleroma.DataCase, async: true
10
11   import Pleroma.Factory
12
13   @html_sample """
14     <b>this is in bold</b>
15     <p>this is a paragraph</p>
16     this is a linebreak<br />
17     this is a link with allowed "rel" attribute: <a href="http://example.com/" rel="tag">example.com</a>
18     this is a link with not allowed "rel" attribute: <a href="http://example.com/" rel="tag noallowed">example.com</a>
19     this is an image: <img src="http://example.com/image.jpg"><br />
20     this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"><br />
21     <script>alert('hacked')</script>
22   """
23
24   @html_onerror_sample """
25   <img src="http://example.com/image.jpg" onerror="alert('hacked')">
26   """
27
28   @html_stillimage_sample """
29   <img class="still-image" src="http://example.com/image.jpg">
30   """
31
32   @html_span_class_sample """
33   <span class="animate-spin">hi</span>
34   """
35
36   @html_span_microformats_sample """
37   <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
38   """
39
40   @html_span_invalid_microformats_sample """
41   <span class="h-card"><a class="u-url mention animate-spin">@<span>foo</span></a></span>
42   """
43
44   describe "StripTags scrubber" do
45     test "works as expected" do
46       expected = """
47         this is in bold
48         this is a paragraph
49         this is a linebreak
50         this is a link with allowed &quot;rel&quot; attribute: example.com
51         this is a link with not allowed &quot;rel&quot; attribute: example.com
52         this is an image: 
53         this is an inline emoji: 
54         alert(&#39;hacked&#39;)
55       """
56
57       assert expected == HTML.strip_tags(@html_sample)
58     end
59
60     test "does not allow attribute-based XSS" do
61       expected = "\n"
62
63       assert expected == HTML.strip_tags(@html_onerror_sample)
64     end
65   end
66
67   describe "TwitterText scrubber" do
68     test "normalizes HTML as expected" do
69       expected = """
70         this is in bold
71         <p>this is a paragraph</p>
72         this is a linebreak<br/>
73         this is a link with allowed &quot;rel&quot; attribute: <a href="http://example.com/" rel="tag">example.com</a>
74         this is a link with not allowed &quot;rel&quot; attribute: <a href="http://example.com/">example.com</a>
75         this is an image: <img src="http://example.com/image.jpg"/><br/>
76         this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"/><br/>
77         alert(&#39;hacked&#39;)
78       """
79
80       assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.TwitterText)
81     end
82
83     test "does not allow attribute-based XSS" do
84       expected = """
85       <img src="http://example.com/image.jpg"/>
86       """
87
88       assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.TwitterText)
89     end
90
91     test "does not allow spans with invalid classes" do
92       expected = """
93       <span>hi</span>
94       """
95
96       assert expected ==
97                HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.TwitterText)
98     end
99
100     test "does not allow images with invalid classes" do
101       expected = """
102       <img src="http://example.com/image.jpg"/>
103       """
104
105       assert expected ==
106                HTML.filter_tags(@html_stillimage_sample, Pleroma.HTML.Scrubber.TwitterText)
107     end
108
109     test "does allow microformats" do
110       expected = """
111       <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
112       """
113
114       assert expected ==
115                HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.TwitterText)
116     end
117
118     test "filters invalid microformats markup" do
119       expected = """
120       <span class="h-card"><a>@<span>foo</span></a></span>
121       """
122
123       assert expected ==
124                HTML.filter_tags(
125                  @html_span_invalid_microformats_sample,
126                  Pleroma.HTML.Scrubber.TwitterText
127                )
128     end
129   end
130
131   describe "default scrubber" do
132     test "normalizes HTML as expected" do
133       expected = """
134         <b>this is in bold</b>
135         <p>this is a paragraph</p>
136         this is a linebreak<br/>
137         this is a link with allowed &quot;rel&quot; attribute: <a href="http://example.com/" rel="tag">example.com</a>
138         this is a link with not allowed &quot;rel&quot; attribute: <a href="http://example.com/">example.com</a>
139         this is an image: <img src="http://example.com/image.jpg"/><br/>
140         this is an inline emoji: <img class="emoji" src="http://example.com/image.jpg"/><br/>
141         alert(&#39;hacked&#39;)
142       """
143
144       assert expected == HTML.filter_tags(@html_sample, Pleroma.HTML.Scrubber.Default)
145     end
146
147     test "does not allow attribute-based XSS" do
148       expected = """
149       <img src="http://example.com/image.jpg"/>
150       """
151
152       assert expected == HTML.filter_tags(@html_onerror_sample, Pleroma.HTML.Scrubber.Default)
153     end
154
155     test "does not allow spans with invalid classes" do
156       expected = """
157       <span>hi</span>
158       """
159
160       assert expected == HTML.filter_tags(@html_span_class_sample, Pleroma.HTML.Scrubber.Default)
161     end
162
163     test "does not allow images with invalid classes" do
164       expected = """
165       <img src="http://example.com/image.jpg"/>
166       """
167
168       assert expected ==
169                HTML.filter_tags(@html_stillimage_sample, Pleroma.HTML.Scrubber.TwitterText)
170     end
171
172     test "does allow microformats" do
173       expected = """
174       <span class="h-card"><a class="u-url mention">@<span>foo</span></a></span>
175       """
176
177       assert expected ==
178                HTML.filter_tags(@html_span_microformats_sample, Pleroma.HTML.Scrubber.Default)
179     end
180
181     test "filters invalid microformats markup" do
182       expected = """
183       <span class="h-card"><a>@<span>foo</span></a></span>
184       """
185
186       assert expected ==
187                HTML.filter_tags(
188                  @html_span_invalid_microformats_sample,
189                  Pleroma.HTML.Scrubber.Default
190                )
191     end
192   end
193
194   describe "extract_first_external_url_from_object" do
195     test "extracts the url" do
196       user = insert(:user)
197
198       {:ok, activity} =
199         CommonAPI.post(user, %{
200           status:
201             "I think I just found the best github repo https://github.com/komeiji-satori/Dress"
202         })
203
204       object = Object.normalize(activity, fetch: false)
205       {:ok, url} = HTML.extract_first_external_url_from_object(object)
206       assert url == "https://github.com/komeiji-satori/Dress"
207     end
208
209     test "skips mentions" do
210       user = insert(:user)
211       other_user = insert(:user)
212
213       {:ok, activity} =
214         CommonAPI.post(user, %{
215           status:
216             "@#{other_user.nickname} install misskey! https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
217         })
218
219       object = Object.normalize(activity, fetch: false)
220       {:ok, url} = HTML.extract_first_external_url_from_object(object)
221
222       assert url == "https://github.com/syuilo/misskey/blob/develop/docs/setup.en.md"
223
224       refute url == other_user.ap_id
225     end
226
227     test "skips hashtags" do
228       user = insert(:user)
229
230       {:ok, activity} =
231         CommonAPI.post(user, %{
232           status: "#cofe https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
233         })
234
235       object = Object.normalize(activity, fetch: false)
236       {:ok, url} = HTML.extract_first_external_url_from_object(object)
237
238       assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
239     end
240
241     test "skips microformats hashtags" do
242       user = insert(:user)
243
244       {:ok, activity} =
245         CommonAPI.post(user, %{
246           status:
247             "<a href=\"https://pleroma.gov/tags/cofe\" rel=\"tag\">#cofe</a> https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140",
248           content_type: "text/html"
249         })
250
251       object = Object.normalize(activity, fetch: false)
252       {:ok, url} = HTML.extract_first_external_url_from_object(object)
253
254       assert url == "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=72255140"
255     end
256
257     test "does not crash when there is an HTML entity in a link" do
258       user = insert(:user)
259
260       {:ok, activity} = CommonAPI.post(user, %{status: "\"http://cofe.com/?boomer=ok&foo=bar\""})
261
262       object = Object.normalize(activity, fetch: false)
263
264       assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
265     end
266
267     test "skips attachment links" do
268       user = insert(:user)
269
270       {:ok, activity} =
271         CommonAPI.post(user, %{
272           status:
273             "<a href=\"https://pleroma.gov/media/d24caa3a498e21e0298377a9ca0149a4f4f8b767178aacf837542282e2d94fb1.png?name=image.png\" class=\"attachment\">image.png</a>"
274         })
275
276       object = Object.normalize(activity, fetch: false)
277
278       assert {:ok, nil} = HTML.extract_first_external_url_from_object(object)
279     end
280   end
281 end