diff --git a/README.md b/README.md index 822ec49..0396b9f 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,8 @@ This library provides a set of classes that can be used to solve some common "pr * Automatically discovering and downloading new versions of the eForms SDK. * Maintaining and instantiating at runtime the correct application component versions for different major versions of the SDK. * Basic parsing and processing of XPath expressions. +* Parsing eForms notice XML documents and extracting metadata (SDK version, subtype, languages). +* Secure XML document building with XXE prevention (OWASP guidelines). ## Using the eForms Core Library diff --git a/src/main/java/eu/europa/ted/eforms/NoticeDocument.java b/src/main/java/eu/europa/ted/eforms/NoticeDocument.java new file mode 100644 index 0000000..c534856 --- /dev/null +++ b/src/main/java/eu/europa/ted/eforms/NoticeDocument.java @@ -0,0 +1,176 @@ +/* + * Copyright 2022 European Union + * + * Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European + * Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in + * compliance with the Licence. You may obtain a copy of the Licence at: + * https://joinup.ec.europa.eu/software/page/eupl + * + * Unless required by applicable law or agreed to in writing, software distributed under the Licence + * is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the Licence for the specific language governing permissions and limitations under + * the Licence. + */ +package eu.europa.ted.eforms; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import javax.xml.xpath.XPathNodes; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.Validate; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; +import eu.europa.ted.util.SafeDocumentBuilder; + +/** + * A class representing a Notice document with accessor methods for its XML contents and metadata. + */ +public class NoticeDocument { + + private static final String TAG_PRIMARY_LANGUAGE = "cbc:NoticeLanguageCode"; + private static final String TAG_SDK_VERSION = "cbc:CustomizationID"; + private static final String TAG_SUBTYPE_CODE = "cbc:SubTypeCode"; + private static final String XPATH_ADDITIONAL_LANGUAGE = + "/*/AdditionalNoticeLanguage/ID/text()"; + + private static final XPath xpath = XPathFactory.newInstance().newXPath(); + + private final Element root; + private final String xmlContents; + + public NoticeDocument(final Path noticeXmlPath) + throws ParserConfigurationException, SAXException, IOException { + Validate.notNull(noticeXmlPath, "Undefined Notice XML file path"); + + if (!Files.isRegularFile(noticeXmlPath)) { + throw new FileNotFoundException(noticeXmlPath.toString()); + } + + this.xmlContents = Files.readString(noticeXmlPath, StandardCharsets.UTF_8); + this.root = parseXmlRoot(this.xmlContents); + } + + public NoticeDocument(final InputStream noticeXmlInput) + throws ParserConfigurationException, SAXException, IOException { + Validate.notNull(noticeXmlInput, "Undefined Notice XML input"); + + this.xmlContents = new String(noticeXmlInput.readAllBytes(), StandardCharsets.UTF_8); + this.root = parseXmlRoot(this.xmlContents); + } + + public NoticeDocument(final String noticeXmlContents) + throws ParserConfigurationException, SAXException, IOException { + Validate.notBlank(noticeXmlContents, "Invalid Notice XML contents"); + + this.xmlContents = noticeXmlContents; + this.root = parseXmlRoot(this.xmlContents); + } + + private static Element parseXmlRoot(final String xmlContents) + throws ParserConfigurationException, SAXException, IOException { + try (InputStream input = + new java.io.ByteArrayInputStream(xmlContents.getBytes(StandardCharsets.UTF_8))) { + final Element root = + SafeDocumentBuilder.buildSafeDocumentBuilderAllowDoctype().parse(input) + .getDocumentElement(); + Validate.notNull(root, "No XML root found"); + return root; + } + } + + /** + * Gets the notice sub type from the notice XML. + * + * @return The notice sub type as found in the notice XML + */ + public String getNoticeSubType() { + return Optional.ofNullable(this.root.getElementsByTagName(TAG_SUBTYPE_CODE)) + .map((final NodeList subTypeCodes) -> { + Optional result = Optional.empty(); + for (int i = 0; i < subTypeCodes.getLength(); i++) { + result = Optional.ofNullable(subTypeCodes.item(i)) + .filter((final Node node) -> node.getAttributes() != null) + .map(Node::getTextContent) + .map(StringUtils::strip); + } + return result.orElse(null); + }) + .filter(StringUtils::isNotBlank) + .orElseThrow(() -> new RuntimeException("SubTypeCode not found in notice XML")); + } + + /** + * Gets the eForms SDK version from the notice XML. + * + * @return The eForms SDK version as found in the notice XML + */ + public String getEformsSdkVersion() { + return Optional.ofNullable(this.root.getElementsByTagName(TAG_SDK_VERSION)) + .filter((final NodeList nodes) -> nodes.getLength() == 1) + .map((final NodeList nodes) -> Optional.ofNullable(nodes.item(0)) + .map(Node::getTextContent) + .map(StringUtils::strip) + .map((final String str) -> str.startsWith("eforms-sdk-") + ? str.substring("eforms-sdk-".length()) : str) + .orElse(null)) + .filter(StringUtils::isNotBlank) + .orElseThrow(() -> new RuntimeException("eForms SDK version not found in notice XML")); + } + + /** + * Gets the primary language from the notice XML. + * + * @return The primary language + */ + public String getPrimaryLanguage() { + return Optional + .ofNullable(this.root.getElementsByTagName(TAG_PRIMARY_LANGUAGE)) + .map((final NodeList nodes) -> nodes.item(0)) + .map(Node::getTextContent) + .orElse(null); + } + + /** + * Gets the list of other languages from the notice XML. + * + * @return A list of other languages + * @throws XPathExpressionException If an error occurs evaluating the XPath expression + */ + public List getOtherLanguages() throws XPathExpressionException { + return Optional + .ofNullable(xpath.evaluateExpression(XPATH_ADDITIONAL_LANGUAGE, + this.root.getOwnerDocument(), XPathNodes.class)) + .map((final XPathNodes nodes) -> { + final List languages = new ArrayList<>(); + nodes.forEach((final Node node) -> { + if (StringUtils.isNotBlank(node.getTextContent())) { + languages.add(node.getTextContent()); + } + }); + return languages; + }) + .orElseGet(ArrayList::new); + } + + /** + * Gets the notice XML contents. + * + * @return The notice XML + */ + public String getXmlContents() { + return this.xmlContents; + } +} diff --git a/src/main/java/eu/europa/ted/util/SafeDocumentBuilder.java b/src/main/java/eu/europa/ted/util/SafeDocumentBuilder.java new file mode 100644 index 0000000..b70d385 --- /dev/null +++ b/src/main/java/eu/europa/ted/util/SafeDocumentBuilder.java @@ -0,0 +1,96 @@ +/* + * Copyright 2022 European Union + * + * Licensed under the EUPL, Version 1.2 or – as soon they will be approved by the European + * Commission – subsequent versions of the EUPL (the "Licence"); You may not use this work except in + * compliance with the Licence. You may obtain a copy of the Licence at: + * https://joinup.ec.europa.eu/software/page/eupl + * + * Unless required by applicable law or agreed to in writing, software distributed under the Licence + * is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the Licence for the specific language governing permissions and limitations under + * the Licence. + */ +package eu.europa.ted.util; + +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Utility class for the creation of {@link DocumentBuilder} instances for XML parsing, using XXE + * prevention techniques as recommended by OWASP. + * + * @see OWASP + * XXE Prevention Cheat Sheet + */ +public class SafeDocumentBuilder { + + private static final Logger logger = LoggerFactory.getLogger(SafeDocumentBuilder.class); + + private SafeDocumentBuilder() { + throw new AssertionError("Utility class."); + } + + /** + * Creates a {@link DocumentBuilder} using XXE prevention techniques. Allows DOCTYPE declarations. + * + * @return A {@link DocumentBuilder} instance + * @throws ParserConfigurationException when the builder is configured with a feature that is + * unsupported by the XML processor + */ + public static DocumentBuilder buildSafeDocumentBuilderAllowDoctype() + throws ParserConfigurationException { + return buildSafeDocumentBuilder(false); + } + + /** + * Creates a {@link DocumentBuilder} using XXE prevention techniques. Raises a fatal error when a + * DOCTYPE declaration is found. + * + * @return A {@link DocumentBuilder} instance + * @throws ParserConfigurationException when the builder is configured with a feature that is + * unsupported by the XML processor + */ + public static DocumentBuilder buildSafeDocumentBuilderStrict() + throws ParserConfigurationException { + return buildSafeDocumentBuilder(true); + } + + private static DocumentBuilder buildSafeDocumentBuilder(final boolean disallowDoctypeDecl) + throws ParserConfigurationException { + final DocumentBuilderFactory dbf = DocumentBuilderFactory.newDefaultInstance(); + String feature = null; + try { + feature = "http://apache.org/xml/features/disallow-doctype-decl"; + dbf.setFeature(feature, disallowDoctypeDecl); + + feature = "http://xml.org/sax/features/external-general-entities"; + dbf.setFeature(feature, false); + + feature = "http://xml.org/sax/features/external-parameter-entities"; + dbf.setFeature(feature, false); + + feature = "http://apache.org/xml/features/nonvalidating/load-external-dtd"; + dbf.setFeature(feature, false); + + dbf.setXIncludeAware(false); + dbf.setExpandEntityReferences(false); + dbf.setValidating(false); + dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); + dbf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); + dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); + + return dbf.newDocumentBuilder(); + } catch (final ParserConfigurationException e) { + logger.info("Error: The feature '{}' is probably not supported by your XML processor.", + feature); + logger.debug("ParserConfigurationException was thrown:", e); + throw e; + } + } +}