View Javadoc
1   package org.sentrysoftware.maven.skin;
2   
3   /*-
4    * ╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲
5    * Sentry Maven Skin Tools
6    * ჻჻჻჻჻჻
7    * Copyright 2017 - 2023 Sentry Software
8    * ჻჻჻჻჻჻
9    * Licensed under the Apache License, Version 2.0 (the "License");
10   * you may not use this file except in compliance with the License.
11   * You may obtain a copy of the License at
12   *
13   *      http://www.apache.org/licenses/LICENSE-2.0
14   *
15   * Unless required by applicable law or agreed to in writing, software
16   * distributed under the License is distributed on an "AS IS" BASIS,
17   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18   * See the License for the specific language governing permissions and
19   * limitations under the License.
20   * ╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱╲╱
21   */
22  
23  import java.awt.Image;
24  import java.awt.image.BufferedImage;
25  import java.io.File;
26  import java.io.IOException;
27  import java.nio.file.Path;
28  import java.nio.file.Paths;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.regex.Pattern;
32  import java.util.stream.Collectors;
33  
34  import javax.imageio.IIOImage;
35  import javax.imageio.ImageIO;
36  import javax.imageio.ImageWriteParam;
37  import javax.imageio.ImageWriter;
38  import javax.imageio.spi.IIORegistry;
39  import javax.imageio.stream.FileImageOutputStream;
40  
41  import org.apache.velocity.tools.config.DefaultKey;
42  import org.jsoup.nodes.Element;
43  
44  import com.luciad.imageio.webp.WebPImageReaderSpi;
45  import com.luciad.imageio.webp.WebPImageWriterSpi;
46  import com.luciad.imageio.webp.WebPWriteParam;
47  
48  /**
49   * Set of tools for handling images
50   *
51   */
52  @DefaultKey("imageTool")
53  public class ImageTool {
54  
55  	static {
56  		// First, register the WEBP IOImage Writer and Reader
57  		// (Note: this should be done automatically by IOImage, but it doesn't work
58  		// with Plexus and Maven because of their specific ClassLoader)
59  		IIORegistry iioRegistry = IIORegistry.getDefaultInstance();
60  		iioRegistry.registerServiceProvider(new WebPImageWriterSpi());
61  		iioRegistry.registerServiceProvider(new WebPImageReaderSpi());
62  
63  	}
64  
65  	/**
66  	 * Patterns that matches with absolute URLs, like:
67  	 * <ul>
68  	 * <li>http://google.com
69  	 * <li>ftp://cia.gov
70  	 * <li>//sentrysoftware.com
71  	 */
72  	private static final Pattern ABSOLUTE_URL_PATTERN = Pattern.compile("^(?:[a-z]+:)?//", Pattern.CASE_INSENSITIVE);
73  
74  	/**
75  	 * Create a new instance
76  	 */
77  	public ImageTool() {
78  		/* Do nothing */
79  	}
80  
81  	/**
82  	 * Returns whether specified path is absolute or not.
83  	 * <ul>
84  	 * <li>http://google.com => absolute
85  	 * <li>ftp://cia.gov => absolute
86  	 * <li>//sentrysoftware.com => absolute
87  	 * <li>path/file => relative
88  	 *
89  	 * @param path Path to test
90  	 * @return whether specified path is absolute or not
91  	 */
92  	protected static boolean isAbsoluteUrl(final String path) {
93  		return ABSOLUTE_URL_PATTERN.matcher(path).find();
94  	}
95  
96  	/**
97  	 * Check the image links in the document and make sure they refer to a file that
98  	 * actually exists.
99  	 *
100 	 * @param body         the HTML content
101 	 * @param basedir         Actual root directory of the site on the file system
102 	 * @param currentDocument Logical path of the document being parsed (e.g.
103 	 *                        "index.html", or "subdir/subpage.html")
104 	 * @return the updated HTML content
105 	 * @throws IOException when an image cannot be read or converted
106 	 */
107 	public Element checkImageLinks(
108 			final Element body,
109 			final String basedir,
110 			final String currentDocument
111 	) throws IOException {
112 
113 		// Initialization
114 		List<String> errorList = new ArrayList<String>();
115 
116 		// basedir path
117 		Path basedirPath = Paths.get(basedir).toAbsolutePath();
118 
119 		// First, calculate the real path of the current document
120 		Path documentPath = Paths.get(basedir, currentDocument);
121 
122 		Path parentPath = documentPath.getParent();
123 		if (parentPath == null) {
124 			throw new IOException("Couldn't get the parent path of " + currentDocument);
125 		}
126 
127 		// Select all images
128 		List<Element> elements = body.select("img");
129 
130 		// For each image
131 		for (Element element : elements) {
132 
133 			// Get the SRC attribute (the path)
134 			String imageSrc = element.attr("src");
135 			if (imageSrc.isEmpty()) {
136 				continue;
137 			}
138 
139 			// Skip absolute URLs
140 			if (isAbsoluteUrl(imageSrc)) {
141 				continue;
142 			}
143 
144 			// Calculate the path to the actual picture file
145 			Path sourcePath = documentPath.resolveSibling(imageSrc);
146 			File sourceFile = sourcePath.toFile();
147 
148 			// Skip external URLs
149 			if (!sourcePath.toAbsolutePath().startsWith(basedirPath)) {
150 				continue;
151 			}
152 
153 			// Recalculate the relative link and see whether the original matches
154 			// the recalculated one. If not, it means there is a problem in the case.
155 			Path recalculatedPath = parentPath.toRealPath().relativize(sourcePath.toRealPath());
156 			String sourcePathSlashString = sourcePath.toString().replace('\\', '/');
157 			String recalculatedPathSlashString = recalculatedPath.toString().replace('\\', '/');
158 			if (!recalculatedPathSlashString.endsWith(sourcePathSlashString) && !sourcePathSlashString.endsWith(recalculatedPathSlashString)) {
159 				errorList.add("Referenced image " + imageSrc + " in " + currentDocument + " doesn't match case of actual file " + recalculatedPath);
160 			}
161 
162 			// Sanity check
163 			if (!sourceFile.isFile()) {
164 				errorList.add("Referenced image " + imageSrc + " in " + currentDocument + " doesn't exist");
165 			}
166 
167 		}
168 
169 		// Some errors, show them all
170 		if (!errorList.isEmpty()) {
171 			throw new IOException(errorList.stream().collect(Collectors.joining("\n")));
172 		}
173 
174 		return body;
175 
176 	}
177 
178 
179 	/**
180 	 * Returns the extension of the file
181 	 * <p>
182 	 * @param file File
183 	 * @return the extension of the file
184 	 */
185 	protected static String getExtension(final File file) {
186 		String name = file.getName();
187 		int dotIndex = name.lastIndexOf('.');
188 		if (dotIndex > -1) {
189 			return name.substring(dotIndex + 1);
190 		}
191 		return "";
192 	}
193 
194 	/**
195 	 * Returns the name of the file without its extension
196 	 * <p>
197 	 * @param file File
198 	 * @return the name of the file without its extension
199 	 */
200 	protected static String getNameWithoutExtension(final File file) {
201 		String name = file.getName();
202 		int dotIndex = name.lastIndexOf('.');
203 		if (dotIndex > -1) {
204 			return name.substring(0, dotIndex);
205 		}
206 		return name;
207 	}
208 
209 	/**
210 	 * Create a thumbnail image file from the specified image file.
211 	 * <p>
212 	 * @param sourceFile File instance of the source image
213 	 * @param thumbnailMark Suffix to be appended to the source file name to build the thumbnail file
214 	 * @param maxWidth Maximum width of the thumbnail, 0 if no maximum width
215 	 * @param maxHeight Maximum height of the thumbnail, 0 if no maximum height
216 	 * @return File instance of the thumbail image
217 	 * @throws IOException when cannot read the source image, or write the thumbnail file
218 	 */
219 	protected static File createThumbnail(
220 			final File sourceFile,
221 			final String thumbnailMark,
222 			final int maxWidth,
223 			final int maxHeight
224 	) throws IOException {
225 
226 		// Sanity check
227 		if (!sourceFile.isFile()) {
228 			throw new IOException(sourceFile.getAbsolutePath() + " does not exist");
229 		}
230 
231 		// Destination
232 		File destination = new File(sourceFile.getParent(), getNameWithoutExtension(sourceFile) + thumbnailMark + ".jpg");
233 
234 		// Do we need to do anything? (if destination is newer than source, we skip)
235 		if (Helper.getLastModifiedTime(sourceFile) < Helper.getLastModifiedTime(destination)) {
236 			return destination;
237 		}
238 
239 		// Read the specified image
240 		BufferedImage sourceImage = ImageIO.read(sourceFile);
241 		String imageType = getExtension(sourceFile).toLowerCase();
242 
243 		// Calculate the dimensions of the resulting thumbnail
244 		int targetWidth = sourceImage.getWidth();
245 		int targetHeight = sourceImage.getHeight();
246 
247 		if (maxWidth > 0 && targetWidth > maxWidth) {
248 			targetHeight = targetHeight * maxWidth / targetWidth;
249 			targetWidth = maxWidth;
250 		}
251 		if (maxHeight > 0 && targetHeight > maxHeight) {
252 			targetWidth = targetWidth * maxHeight / targetHeight;
253 			targetHeight = maxHeight;
254 		}
255 
256 		// Rescale
257 		Image resultingImage = sourceImage.getScaledInstance(targetWidth, targetHeight, Image.SCALE_SMOOTH);
258 		BufferedImage outputImage = new BufferedImage(targetWidth, targetHeight, BufferedImage.TYPE_INT_RGB);
259 		outputImage.getGraphics().drawImage(resultingImage, 0, 0, null);
260 
261 		// Write the thumbnail file
262 		ImageIO.write(outputImage, imageType, destination);
263 
264 		return destination;
265 
266 	}
267 
268 	/**
269 	 * Saves the specified image file as a WEBP image.
270 	 * <p>
271 	 * @param sourceFile image file to convert to WEBP
272 	 * @return a File instance of the converted image, or null if the file was already a WEBP
273 	 * @throws IOException when cannot read the image file
274 	 */
275 	protected static File saveImageFileAsWebp(final File sourceFile) throws IOException {
276 
277 		// Sanity check
278 		if (!sourceFile.isFile()) {
279 			throw new IOException(sourceFile.getAbsolutePath() + " does not exist");
280 		}
281 
282 		// Output file
283 		String webpImagePath = getNameWithoutExtension(sourceFile) + ".webp";
284 		File webpFile = new File(sourceFile.getParent(), webpImagePath);
285 
286 		// Do we need to do anything? (if destination is newer than source, we skip)
287 		if (Helper.getLastModifiedTime(sourceFile) < Helper.getLastModifiedTime(webpFile)) {
288 			return webpFile;
289 		}
290 
291 		// Read the specified image
292 		BufferedImage sourceImage = ImageIO.read(sourceFile);
293 		if (sourceImage == null) {
294 			return null;
295 		}
296 
297 		// Image type (skip if webp)
298 		String imageType = getExtension(sourceFile).toLowerCase();
299 		if ("webp".equals(imageType)) {
300 			return null;
301 		}
302 
303 		// Obtain a WebP ImageWriter instance
304 		ImageWriter writer = ImageIO.getImageWritersBySuffix("webp").next();
305 
306 		// Configure encoding parameters: LOSSY for jpeg and jpg, LOSSLESS otherwise
307 		WebPWriteParam writeParam = new WebPWriteParam(writer.getLocale());
308 		writeParam.setCompressionMode(ImageWriteParam.MODE_EXPLICIT);
309 		if ("jpeg".equals(imageType) || "jpg".equals(imageType)) {
310 			writeParam.setCompressionType(writeParam.getCompressionTypes()[WebPWriteParam.LOSSY_COMPRESSION]);
311 		} else {
312 			writeParam.setCompressionType(writeParam.getCompressionTypes()[WebPWriteParam.LOSSLESS_COMPRESSION]);
313 		}
314 
315 		// Configure the output on the ImageWriter
316 		writer.setOutput(new FileImageOutputStream(webpFile));
317 
318 		// Write the WEBP image
319 		writer.write(null, new IIOImage(sourceImage, null, null), writeParam);
320 
321 		// Return the file
322 		return webpFile;
323 
324 	}
325 
326 	/**
327 	 * Upgrades all images in the specified HTML document to WEBP.
328 	 *
329 	 * @param body         the HTML content
330 	 * @param selector        CSS selector to select all images to upgrade
331 	 *                        ("img.screenshot" will process all &lt;IMG
332 	 *                        class="screenshot"&gt; elements)
333 	 * @param basedir         Actual root directory of the site on the file system
334 	 * @param currentDocument Logical path of the document being parsed (e.g.
335 	 *                        "index.html", or "subdir/subpage.html")
336 	 * @return the updated HTML content
337 	 * @throws IOException when an image cannot be read or converted
338 	 */
339 	public Element convertImagesToWebp(
340 			final Element body,
341 			final String selector,
342 			final String basedir,
343 			final String currentDocument
344 	) throws IOException {
345 
346 		// basedir path
347 		Path basedirPath = Paths.get(basedir).toAbsolutePath();
348 
349 		// First, calculate the real path of the current document
350 		Path documentPath = Paths.get(basedir, currentDocument);
351 
352 		Path parentPath = documentPath.getParent();
353 		if (parentPath == null) {
354 			throw new IOException("Couldn't get parent path of " + currentDocument);
355 		}
356 
357 		// Select all images
358 		List<Element> elements = body.select(selector);
359 
360 		// For each image
361 		for (Element element : elements) {
362 
363 			// Get the SRC attribute (the path)
364 			String imageSrc = element.attr("src");
365 			if (imageSrc.isEmpty()) {
366 				continue;
367 			}
368 
369 			// Skip absolute URLs
370 			if (isAbsoluteUrl(imageSrc)) {
371 				continue;
372 			}
373 
374 			// Calculate the path to the actual picture file
375 			Path sourcePath = documentPath.resolveSibling(imageSrc);
376 			File sourceFile = sourcePath.toFile();
377 
378 			// Skip external URLs
379 			if (!sourcePath.toAbsolutePath().startsWith(basedirPath)) {
380 				continue;
381 			}
382 
383 			// Sanity check
384 			if (!sourceFile.isFile()) {
385 				throw new IOException(sourceFile.getAbsolutePath() + " (referenced as " + imageSrc + ") does not exist");
386 			}
387 
388 			// Save as webp
389 			File webpFile = saveImageFileAsWebp(sourceFile);
390 			if (webpFile == null) {
391 				continue;
392 			}
393 
394 			// Calculate the src path of the webp image
395 			String webpSrc = parentPath.relativize(webpFile.toPath()).toString().replace('\\', '/');
396 
397 			// Now wrap the IMG element with <picture> and <source srcset="...webp">
398 			element
399 				.wrap("<picture>")
400 				.parent()
401 				.prependElement("source")
402 				.attr("srcset", webpSrc)
403 				.attr("type", "image/webp");
404 
405 		}
406 
407 		return body;
408 
409 	}
410 
411 
412 	/**
413 	 * Explicitly states the width and height of each image in the specified document.
414 	 *
415 	 * @param body         the HTML content
416 	 * @param selector        CSS selector to select all images to upgrade
417 	 *                        ("img.screenshot" will process all &lt;IMG
418 	 *                        class="screenshot"&gt; elements)
419 	 * @param basedir         Actual root directory of the site on the file system
420 	 * @param currentDocument Logical path of the document being parsed (e.g.
421 	 *                        "index.html", or "subdir/subpage.html")
422 	 * @return the updated HTML content
423 	 * @throws IOException when an image cannot be read or converted
424 	 */
425 	public Element explicitImageSize(
426 			final Element body,
427 			final String selector,
428 			final String basedir,
429 			final String currentDocument
430 	) throws IOException {
431 
432 		// basedir path
433 		Path basedirPath = Paths.get(basedir).toAbsolutePath();
434 
435 		// First, calculate the real path of the current document
436 		Path documentPath = Paths.get(basedir, currentDocument);
437 
438 		// Select all images
439 		List<Element> elements = body.select(selector);
440 
441 		// For each image
442 		for (Element element : elements) {
443 
444 			// Get the SRC attribute (the path)
445 			String imageSrc = element.attr("src");
446 			if (imageSrc.isEmpty()) {
447 				continue;
448 			}
449 
450 			// Skip absolute URLs
451 			if (isAbsoluteUrl(imageSrc)) {
452 				continue;
453 			}
454 
455 			// If size and height are already specified, skip
456 			if (element.attr("style").matches("(^|\\b)(width:|height:)")
457 					|| !element.attr("height").isEmpty()
458 					|| !element.attr("width").isEmpty()) {
459 				continue;
460 			}
461 
462 			// Calculate the path to the actual picture file
463 			Path sourcePath = documentPath.resolveSibling(imageSrc);
464 			File sourceFile = sourcePath.toFile();
465 
466 			// Skip external URLs
467 			if (!sourcePath.toAbsolutePath().startsWith(basedirPath)) {
468 				continue;
469 			}
470 
471 			// Sanity check
472 			if (!sourceFile.isFile()) {
473 				throw new IOException(sourceFile.getAbsolutePath() + " (referenced as " + imageSrc + ") does not exist");
474 			}
475 
476 			// Read the image
477 			BufferedImage sourceImage = ImageIO.read(sourceFile);
478 			if (sourceImage == null) {
479 				continue;
480 			}
481 
482 			// Now set the width and height attributes (and CSS)
483 			element
484 				.attr("width", String.valueOf(sourceImage.getWidth()))
485 				.attr("height", String.valueOf(sourceImage.getHeight()))
486 				.attr("style", String.format(
487 						"width: %dpx; height: %dpx;%s",
488 						sourceImage.getWidth(),
489 						sourceImage.getHeight(),
490 						element.attr("style")
491 				));
492 
493 		}
494 
495 		return body;
496 
497 	}
498 
499 	/**
500 	 * For all images in the document, create the corresponding thumbnail, and wrap
501 	 * the picture elements with the specified template.
502 	 * <p>
503 	 * The specified template may reference the below "macros":
504 	 * <ul>
505 	 * <li><code>%imgWidth%</code>: the original image width
506 	 * <li><code>%imgHeight%</code>: the original image height
507 	 * <li><code>%imgAlt%</code>: the image alternate text (it's description)
508 	 * <li><code>%thumbWidth%</code>: the thumbnail image width
509 	 * <li><code>%thumbHeight%</code>: the thumbnail image height
510 	 * <li><code>%thumbSrc%</code>: the thumbnail image source path
511 	 * </ul>
512 	 *
513 	 * @param body         the HTML content
514 	 * @param selector        CSS selector to select all images to upgrade
515 	 *                        ("img.screenshot" will process all &lt;IMG
516 	 *                        class="screenshot"&gt; elements)
517 	 * @param basedir         Actual root directory of the site on the file system
518 	 * @param currentDocument Logical path of the document being parsed (e.g.
519 	 *                        "index.html", or "subdir/subpage.html")
520 	 * @param maxWidth        Maximum width for the thumbnail (or 0 for no maximum)
521 	 * @param maxHeight       Maximum height for the thumbnail (or 0 for no maximum)
522 	 * @param wrapTemplate    HTML code wrap the image element with. This will be
523 	 *                        typically used to create the thumbnail element. The HTML
524 	 *                        can reference macros.
525 	 * @return the updated HTML content
526 	 * @throws IOException when an image cannot be read or converted
527 	 */
528 	public Element convertImagesToThumbnails(
529 			final Element body,
530 			final String selector,
531 			final String basedir,
532 			final String currentDocument,
533 			final int maxWidth,
534 			final int maxHeight,
535 			final String wrapTemplate
536 	) throws IOException {
537 
538 		// basedir path
539 		Path basedirPath = Paths.get(basedir).toAbsolutePath();
540 
541 		// First, calculate the real path of the current document
542 		Path documentPath = Paths.get(basedir, currentDocument);
543 
544 		Path parentPath = documentPath.getParent();
545 		if (parentPath == null) {
546 			throw new IOException("Couldn't get parent path of " + currentDocument);
547 		}
548 
549 		// Select all images
550 		List<Element> elements = body.select(selector);
551 
552 		// For each image
553 		for (Element element : elements) {
554 
555 			// Get the SRC attribute (the path)
556 			String imageSrc = element.attr("src");
557 			if (imageSrc.isEmpty()) {
558 				continue;
559 			}
560 
561 			// Skip absolute URLs
562 			if (isAbsoluteUrl(imageSrc)) {
563 				continue;
564 			}
565 
566 			// Get the ALT attribute (the description)
567 			String imageAlt = element.attr("alt");
568 
569 			// Calculate the path to the actual picture file
570 			Path sourcePath = documentPath.resolveSibling(imageSrc);
571 			File sourceFile = sourcePath.toFile();
572 
573 			// Skip external URLs
574 			if (!sourcePath.toAbsolutePath().startsWith(basedirPath)) {
575 				continue;
576 			}
577 
578 			// Sanity check
579 			if (!sourceFile.isFile()) {
580 				throw new IOException(sourceFile.getAbsolutePath() + " (referenced as " + imageSrc + ") does not exist");
581 			}
582 
583 			// Image size
584 			BufferedImage sourceImage = ImageIO.read(sourceFile);
585 			int sourceWidth = sourceImage.getWidth();
586 			int sourceHeight = sourceImage.getHeight();
587 
588 			// Create the thumbnail
589 			File thumbnailFile = createThumbnail(sourceFile, "-thumbnail", maxWidth, maxHeight);
590 
591 			// Read the thumbnail and get its size
592 			BufferedImage thumbnailImage = ImageIO.read(thumbnailFile);
593 			int thumbnailWidth = thumbnailImage.getWidth();
594 			int thumbnailHeight = thumbnailImage.getHeight();
595 
596 			// Calculate the src path of the webp image
597 			String thumbnailSrc = parentPath.relativize(thumbnailFile.toPath()).toString().replace('\\', '/');
598 
599 			// Replace macros in the wrap template
600 			String wrapHtml = wrapTemplate
601 				.replaceAll("%imgWidth%", String.valueOf(sourceWidth))
602 				.replaceAll("%imgHeight%", String.valueOf(sourceHeight))
603 				.replaceAll("%thumbWidth%", String.valueOf(thumbnailWidth))
604 				.replaceAll("%thumbHeight%", String.valueOf(thumbnailHeight))
605 				.replaceAll("%thumbSrc%", thumbnailSrc)
606 				.replaceAll("%imgAlt%", imageAlt);
607 
608 			// Now wrap the IMG element with template
609 			// If the IMG element is inside a PICTURE element, wrap the PICTURE element
610 			Element pictureElement = element;
611 			if ("PICTURE".equalsIgnoreCase(element.parent().tagName())) {
612 				pictureElement = element.parent();
613 			}
614 			pictureElement.wrap(wrapHtml);
615 
616 		}
617 
618 		return body;
619 
620 	}
621 
622 
623 }