1 package org.sentrysoftware.maven.skin;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 import java.text.Normalizer;
24 import java.text.Normalizer.Form;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collections;
28 import java.util.HashMap;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.regex.Pattern;
32
33 import org.apache.velocity.tools.ToolContext;
34 import org.apache.velocity.tools.config.DefaultKey;
35 import org.apache.velocity.tools.generic.SafeConfig;
36 import org.apache.velocity.tools.generic.ValueParser;
37 import org.jsoup.Jsoup;
38 import org.jsoup.internal.StringUtil;
39 import org.jsoup.nodes.Document;
40 import org.jsoup.nodes.Element;
41 import org.jsoup.parser.Tag;
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56 @DefaultKey("htmlTool")
57 public class HtmlTool extends SafeConfig {
58
59
60 private static final List<String> HEADINGS = Collections.unmodifiableList(
61 Arrays.asList("h1", "h2", "h3", "h4", "h5", "h6"));
62
63
64 private String outputEncoding = "UTF-8";
65
66
67
68
69 public HtmlTool() {
70
71 }
72
73
74
75
76
77
78 @Override
79 protected void configure(final ValueParser values) {
80
81
82 Object velocityContext = values.get("velocityContext");
83
84 if (!(velocityContext instanceof ToolContext)) {
85 return;
86 }
87
88 ToolContext ctxt = (ToolContext) velocityContext;
89
90
91 Object outputEncodingObj = ctxt.get("outputEncoding");
92 if (outputEncodingObj instanceof String) {
93 this.outputEncoding = (String) outputEncodingObj;
94 }
95 }
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112 public Element setAttr(final Element body, final String selector, final String attributeKey, final String value) {
113
114 List<Element> elements = body.select(selector);
115
116 for (Element element : elements) {
117 element.attr(attributeKey, value);
118 }
119
120 return body;
121
122 }
123
124
125
126
127
128
129
130 public Element parseContent(final String content) {
131 Document doc = Jsoup.parseBodyFragment(content);
132 doc.outputSettings().charset(outputEncoding);
133 return doc.body();
134 }
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 public List<String> getAttr(final Element body, final String selector, final String attributeKey) {
151
152 List<Element> elements = body.select(selector);
153 List<String> attrs = new ArrayList<String>();
154
155 for (Element element : elements) {
156 String attrValue = element.attr(attributeKey);
157 attrs.add(attrValue);
158 }
159
160 return attrs;
161 }
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178 public Element addClass(final Element body, final String selector, final List<String> classNames, final int amount) {
179
180 List<Element> elements = body.select(selector);
181 if (amount >= 0) {
182
183 elements = elements.subList(0, Math.min(amount, elements.size()));
184 }
185
186 for (Element element : elements) {
187 for (String className : classNames) {
188 element.addClass(className);
189 }
190 }
191
192 return body;
193
194 }
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209 public Element addClass(final Element body, final String selector, final List<String> classNames) {
210 return addClass(body, selector, classNames, -1);
211 }
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226 public Element addClass(final Element body, final String selector, final String className) {
227 return addClass(body, selector, Collections.singletonList(className));
228 }
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 public Element wrap(final Element body, final String selector, final String wrapHtml, final int amount) {
246
247 List<Element> elements = body.select(selector);
248 if (amount >= 0) {
249
250 elements = elements.subList(0, Math.min(amount, elements.size()));
251 }
252
253 for (Element element : elements) {
254 element.wrap(wrapHtml);
255 }
256
257 return body;
258
259 }
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275 public Element append(final Element body, final String selector, final String appendHtml, final int amount) {
276
277 List<Element> elements = body.select(selector);
278 if (amount >= 0) {
279
280 elements = elements.subList(0, Math.min(amount, elements.size()));
281 }
282
283 for (Element element : elements) {
284 element.append(appendHtml);
285 }
286
287 return body;
288
289 }
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306 public Element prepend(final Element body, final String selector, final String prependHtml, final int amount) {
307
308 List<Element> elements = body.select(selector);
309 if (amount >= 0) {
310
311 elements = elements.subList(0, Math.min(amount, elements.size()));
312 }
313
314 for (Element element : elements) {
315 element.prepend(prependHtml);
316 }
317
318 return body;
319
320 }
321
322
323
324
325
326
327
328
329
330
331
332
333
334 public Element remove(final Element body, final String selector) {
335
336 List<Element> elements = body.select(selector);
337
338 for (Element element : elements) {
339 element.remove();
340 }
341
342 return body;
343
344 }
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360 public Element replace(final Element body, final String selector, final String replaceHtml, final int amount) {
361
362 List<Element> elements = body.select(selector);
363 if (amount >= 0) {
364
365 elements = elements.subList(0, Math.min(amount, elements.size()));
366 }
367
368 for (Element element : elements) {
369 element.before(replaceHtml).remove();
370 }
371
372 return body;
373
374 }
375
376
377
378
379
380
381
382
383
384
385
386
387
388 public List<String> text(final Element body, final String selector) {
389
390 List<Element> elements = body.select(selector);
391 List<String> texts = new ArrayList<String>();
392
393 for (Element element : elements) {
394 texts.add(element.text());
395 }
396
397 return texts;
398 }
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419 public Element headingAnchorToId(final Element body) {
420
421
422 List<String> headNoIds = concat(HEADINGS, ":not([id])", true);
423
424
425 String nameA = "a[name]:not([href])";
426
427
428 List<Element> headingsInnerA = body.select(StringUtil.join(
429 concat(headNoIds, ":has(" + nameA + ")", true), ", "));
430
431 for (Element heading : headingsInnerA) {
432 List<Element> anchors = heading.select(nameA);
433
434 if (!anchors.isEmpty()) {
435 anchorToId(heading, anchors.get(0));
436 }
437 }
438
439
440 List<Element> headingsPreA = body.select(StringUtil.join(
441 concat(headNoIds, nameA + " + ", false), ", "));
442
443 for (Element heading : headingsPreA) {
444 Element anchor = heading.previousElementSibling();
445 if (anchor != null) {
446 anchorToId(heading, anchor);
447 }
448 }
449
450
451
452
453 List<Element> anchorsPreH = body.select(StringUtil.join(
454 concat(headNoIds, " + " + nameA, true), ", "));
455
456 for (Element anchor : anchorsPreH) {
457 Element heading = anchor.previousElementSibling();
458 if (heading != null) {
459 anchorToId(heading, anchor);
460 }
461 }
462
463 return body;
464 }
465
466
467
468
469
470
471
472 private static void anchorToId(final Element heading, final Element anchor) {
473
474 if ("a".equals(anchor.tagName()) && heading.id().isEmpty()) {
475 String aName = anchor.attr("name");
476 if (!aName.isEmpty()) {
477
478 heading.attr("id", aName);
479
480
481 anchor.remove();
482 }
483 }
484 }
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501 public static List<String> concat(final List<String> elements, final String text, final boolean append) {
502 List<String> concats = new ArrayList<String>();
503
504 for (String element : elements) {
505 concats.add(append ? element + text : text + element);
506 }
507
508 return concats;
509 }
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527 public Element ensureHeadingIds(final Element body) {
528
529
530 Map<String, Integer> ids = new HashMap<String, Integer>();
531 List<Element> idElems = body.select("*[id]");
532 for (Element idElem : idElems) {
533 ids.put(idElem.id(), 0);
534 }
535
536
537 List<Element> headingsNoId = body.select("h1:not([id]), h2:not([id]), h3:not([id]), h4:not([id]), h5:not([id]), h6:not([id])");
538
539 for (Element heading : headingsNoId) {
540
541
542 String headingText = heading.text();
543
544
545 String headingSlug = slug(headingText);
546 if (headingSlug.length() > 50) {
547 headingSlug = headingSlug.substring(0, 50);
548 }
549
550
551 int slugNumber = ids.merge(headingSlug, 1, (oldValue, newValue) -> oldValue + 1);
552
553
554 if (slugNumber > 1) {
555 headingSlug = headingSlug + "_" + slugNumber;
556 }
557 heading.attr("id", headingSlug);
558 }
559
560 return body;
561
562 }
563
564
565
566
567
568
569
570
571
572
573
574
575
576 public Element fixIds(final Element body) {
577
578
579 List<Element> idElems = body.select("*[id]");
580 for (Element idElem : idElems) {
581
582 String id = idElem.id();
583 String newId = slug(id);
584 if (!id.equals(newId)) {
585 idElem.attr("id", newId);
586 }
587 }
588
589
590 List<Element> aElems = body.select("a[href^=#]");
591 for (Element aElem : aElems) {
592
593 String href = aElem.attr("href");
594 String newHref = "#" + slug(href.substring(1));
595 if (!href.equals(newHref)) {
596 aElem.attr("href", newHref);
597 }
598 }
599
600
601 return body;
602 }
603
604
605
606
607
608
609
610
611
612
613
614 public Element fixTableHeads(final Element body) {
615
616
617 List<Element> tableHeadRows = body.select("table > tbody > tr:has(th)");
618
619 for (Element row : tableHeadRows) {
620
621
622 Element table = row.parent().parent();
623
624
625 row.remove();
626
627
628 Element thead = new Element(Tag.valueOf("thead"), "");
629 thead.appendChild(row);
630
631 table.prependChild(thead);
632 }
633
634 return body;
635 }
636
637
638
639
640
641 private static final Pattern NONLATIN = Pattern.compile("[^\\w-]");
642
643
644
645
646 private static final Pattern WORD_SEPARATORS = Pattern.compile("[\\s_'()\\[\\]{}/\\|+=*,;:\\.]+");
647
648
649
650
651 private static final Pattern LEADING_TRAILING_DASHES = Pattern.compile("^-+|-+$");
652
653
654
655
656
657
658
659
660 public static String slug(final String input) {
661 String normalized = Normalizer.normalize(input, Form.NFD);
662 String nowhitespace = WORD_SEPARATORS.matcher(normalized).replaceAll("-");
663 String noSpecialChars = NONLATIN.matcher(nowhitespace).replaceAll("");
664 return LEADING_TRAILING_DASHES.matcher(noSpecialChars).replaceAll("").toLowerCase();
665 }
666
667
668
669
670
671
672
673
674
675 public Element fixProtocolRelativeUrls(final Element body) {
676
677
678
679 List<Element> aElems = body.select("*[href^=//]");
680
681
682 if (aElems.isEmpty()) {
683 return body;
684 }
685
686 for (Element aElem : aElems) {
687
688
689 String href = aElem.attr("href");
690 aElem.attr("href", "https:" + href);
691
692 }
693
694
695 return body;
696 }
697
698
699 }