001/* 002 * Shredzone Commons 003 * 004 * Copyright (C) 2014 Richard "Shred" Körber 005 * http://commons.shredzone.org 006 * 007 * This program is free software: you can redistribute it and/or modify 008 * it under the terms of the GNU Library General Public License as 009 * published by the Free Software Foundation, either version 3 of the 010 * License, or (at your option) any later version. 011 * 012 * This program is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU Library General Public License 018 * along with this program. If not, see <http://www.gnu.org/licenses/>. 019 */ 020package org.shredzone.commons.xml; 021 022import static java.util.stream.Collectors.*; 023 024import java.io.IOException; 025import java.io.InputStream; 026import java.io.Reader; 027import java.io.StringReader; 028import java.util.Collections; 029import java.util.Map; 030import java.util.Optional; 031import java.util.concurrent.atomic.AtomicReference; 032import java.util.function.Function; 033import java.util.stream.IntStream; 034import java.util.stream.Stream; 035 036import javax.annotation.Nonnull; 037import javax.annotation.ParametersAreNonnullByDefault; 038import javax.annotation.WillClose; 039import javax.annotation.concurrent.Immutable; 040import javax.annotation.concurrent.ThreadSafe; 041import javax.xml.parsers.DocumentBuilder; 042import javax.xml.parsers.DocumentBuilderFactory; 043import javax.xml.parsers.ParserConfigurationException; 044import javax.xml.xpath.XPathConstants; 045import javax.xml.xpath.XPathExpression; 046import javax.xml.xpath.XPathExpressionException; 047import javax.xml.xpath.XPathFactory; 048 049import org.w3c.dom.Element; 050import org.w3c.dom.NamedNodeMap; 051import org.w3c.dom.Node; 052import org.w3c.dom.NodeList; 053import org.w3c.dom.Text; 054import org.xml.sax.InputSource; 055import org.xml.sax.SAXException; 056 057/** 058 * Helps to easily read content from XML sources. 059 * <p> 060 * A main goal of {@link XQuery} is to keep XML reading as simple as possible. For this 061 * reason, sophisticated XML features like validation or namespaces are not supported. 062 * <p> 063 * Performance was not a goal as well. If you need to parse large documents, you better 064 * use the old-fashioned Java ways. 065 * 066 * @author Richard "Shred" Körber 067 */ 068@ParametersAreNonnullByDefault 069@Immutable 070@ThreadSafe 071public class XQuery { 072 073 private final Node node; 074 private final XPathFactory xpf = XPathFactory.newInstance(); 075 private final AtomicReference<Optional<XQuery>> parent = new AtomicReference<>(); 076 private final AtomicReference<Map<String, String>> attrMap = new AtomicReference<>(); 077 078 /** 079 * Private constructor for a {@link Node} element. 080 */ 081 private XQuery(Node node) { 082 this.node = node; 083 } 084 085 /** 086 * Parses an XML source and returns an {@link XQuery} object representing the root of 087 * the document. 088 * 089 * @param in 090 * {@link InputSource} of the XML document 091 * @return {@link XQuery} representing the root of the parsed document 092 * @throws IOException 093 * if the XML source could not be read or parsed for any reason 094 */ 095 public static @Nonnull XQuery parse(@WillClose InputSource in) throws IOException { 096 try { 097 DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder(); 098 return new XQuery(db.parse(in)); 099 } catch (ParserConfigurationException|SAXException ex) { 100 throw new IOException("Could not parse XML", ex); 101 } 102 } 103 104 /** 105 * Parses an XML source and returns an {@link XQuery} object representing the root of 106 * the document. 107 * 108 * @param in 109 * {@link InputStream} of the XML document 110 * @return {@link XQuery} representing the root of the parsed document 111 * @throws IOException 112 * if the XML source could not be read or parsed for any reason 113 */ 114 public static @Nonnull XQuery parse(@WillClose InputStream in) throws IOException { 115 return parse(new InputSource(in)); 116 } 117 118 /** 119 * Parses an XML source and returns an {@link XQuery} object representing the root of 120 * the document. 121 * 122 * @param r 123 * {@link Reader} providing the XML document 124 * @return {@link XQuery} representing the root of the parsed document 125 * @throws IOException 126 * if the XML source could not be read or parsed for any reason 127 */ 128 public static @Nonnull XQuery parse(@WillClose Reader r) throws IOException { 129 return parse(new InputSource(r)); 130 } 131 132 /** 133 * Parses an XML source and returns an {@link XQuery} object representing the root of 134 * the document. 135 * 136 * @param xml 137 * String containing the XML document 138 * @return {@link XQuery} representing the root of the parsed document 139 * @throws IOException 140 * if the XML source could not be read or parsed for any reason 141 */ 142 public static @Nonnull XQuery parse(String xml) throws IOException { 143 return parse(new StringReader(xml)); 144 } 145 146 /** 147 * Streams all children of this element. Children elements are represented by 148 * {@link XQuery} objects as well. 149 * 150 * @return {@link Stream} of children 151 */ 152 public @Nonnull Stream<XQuery> stream() { 153 return new NodeListSpliterator(node.getChildNodes()).stream() 154 .filter(it -> it instanceof Element) 155 .map(XQuery::new); 156 } 157 158 /** 159 * Returns the next sibling of this element. 160 * 161 * @return Next sibling element 162 * @since 1.1 163 */ 164 public @Nonnull Optional<XQuery> nextSibling() { 165 return findElement(Node::getNextSibling); 166 } 167 168 /** 169 * Returns the previous sibling of this element. 170 * 171 * @return Previous sibling element 172 * @since 1.1 173 */ 174 public @Nonnull Optional<XQuery> previousSibling() { 175 return findElement(Node::getPreviousSibling); 176 } 177 178 /** 179 * Selects elements based on the XPath expression that is applied to the tree 180 * represented by this {@link XQuery}. 181 * 182 * @param xpath 183 * XPath expression 184 * @return Stream of selected nodes as {@link XQuery} object 185 */ 186 public @Nonnull Stream<XQuery> select(String xpath) { 187 return new NodeListSpliterator(evaluate(xpath)).stream().map(XQuery::new); 188 } 189 190 /** 191 * Gets a single element based on the XPath expression that is applied to the tree 192 * represented by this {@link XQuery}. Exactly one element is expected to match the 193 * XPath expression, otherwise an exception is thrown. 194 * 195 * @param xpath 196 * XPath expression 197 * @return Selected node 198 * @since 1.1 199 */ 200 public @Nonnull XQuery get(String xpath) { 201 NodeList nl = evaluate(xpath); 202 if (nl.getLength() == 1) { 203 return new XQuery(nl.item(0)); 204 } else if (nl.getLength() == 0) { 205 throw new IllegalArgumentException("XPath '" + xpath 206 + "' does not match any elements"); 207 } else { 208 throw new IllegalArgumentException("XPath '" + xpath + "' matches " 209 + nl.getLength() + " elements"); 210 } 211 } 212 213 /** 214 * Checks if there is at least one element matching the XPath expression. 215 * 216 * @param xpath 217 * XPath expression 218 * @return {@code true} if there is at least one element, {@code false} if there is 219 * none. 220 * @since 1.1 221 */ 222 public boolean exists(String xpath) { 223 return select(xpath).findAny().isPresent(); 224 } 225 226 /** 227 * Selects values based on the XPath expression that is applied to the tree 228 * represented by this {@link XQuery}. 229 * 230 * @param xpath 231 * XPath expression 232 * @return Stream of strings containing the node values 233 */ 234 public @Nonnull Stream<String> value(String xpath) { 235 return select(xpath).map(XQuery::text); 236 } 237 238 /** 239 * Selects values based on the XPath expression that is applied to the tree 240 * represented by this {@link XQuery}. In contrast to {@link #value(String)}, this 241 * method reads the element texts recursively, using {@link #allText()}. 242 * 243 * @param xpath 244 * XPath expression 245 * @return Stream of strings containing the node values 246 */ 247 public @Nonnull Stream<String> allValue(String xpath) { 248 return select(xpath).map(XQuery::allText); 249 } 250 251 /** 252 * Returns the text selected by the XPath expression. 253 * 254 * @param xpath 255 * XPath expression 256 * @return Text selected by the expression 257 */ 258 public @Nonnull String text(String xpath) { 259 return value(xpath).collect(joining()); 260 } 261 262 /** 263 * Returns the tag name of this node. 264 * 265 * @return this {@link XQuery} node's tag name. 266 */ 267 public @Nonnull String name() { 268 return node.getNodeName(); 269 } 270 271 /** 272 * Returns the text content of this node. 273 * 274 * @return this {@link XQuery} node's text content, non recursively. 275 */ 276 public @Nonnull String text() { 277 return new NodeListSpliterator(node.getChildNodes()).stream() 278 .filter(it -> it instanceof Text) 279 .map(it -> ((Text) it).getNodeValue()) 280 .collect(joining()); 281 } 282 283 /** 284 * Returns the text content of the entire tree that is spawned by this node. 285 * 286 * @return this {@link XQuery} node's text content, recursively. 287 */ 288 public @Nonnull String allText() { 289 return node.getTextContent(); 290 } 291 292 /** 293 * Returns a map of attributes. 294 * 295 * @return a map of this node's attributes. 296 */ 297 public @Nonnull Map<String, String> attr() { 298 synchronized (this) { 299 if (attrMap.get() == null) { 300 attrMap.set( 301 Optional.ofNullable(node.getAttributes()) 302 .map(XQuery::attributesToMap) 303 .map(Collections::unmodifiableMap) 304 .orElseGet(Collections::emptyMap) 305 ); 306 } 307 } 308 return attrMap.get(); 309 } 310 311 /** 312 * Returns the parent node of this node, as {@link XQuery} object. A root node 313 * returns an empty optional instead. 314 * 315 * @return parent node 316 */ 317 public @Nonnull Optional<XQuery> parent() { 318 synchronized (this) { 319 if (parent.get() == null) { 320 parent.set(Optional.ofNullable(node.getParentNode()).map(XQuery::new)); 321 } 322 } 323 return parent.get(); 324 } 325 326 /** 327 * Checks if this is a root node. 328 * 329 * @return {@code true} if this is a root node, {@code false} if there's a parent. 330 * @since 1.1 331 */ 332 public boolean isRoot() { 333 return node.getParentNode() == null; 334 } 335 336 /** 337 * Returns the root node of this node, as {@link XQuery} object. A root node returns 338 * itself. 339 * 340 * @return root node 341 * @since 1.1 342 */ 343 public @Nonnull XQuery root() { 344 if (isRoot()) { 345 return this; 346 } else { 347 return new XQuery(node.getOwnerDocument()); 348 } 349 } 350 351 /** 352 * Evaluates the XPath expression and returns a list of nodes. 353 * 354 * @param xpath 355 * XPath expression 356 * @return {@link NodeList} matching the expression 357 * @throws IllegalArgumentException 358 * if the XPath expression was invalid 359 */ 360 private @Nonnull NodeList evaluate(String xpath) { 361 try { 362 XPathExpression expr = xpf.newXPath().compile(xpath); 363 return (NodeList) expr.evaluate(node, XPathConstants.NODESET); 364 } catch (XPathExpressionException ex) { 365 throw new IllegalArgumentException("Invalid XPath '" + xpath + "'", ex); 366 } 367 } 368 369 /** 370 * Finds an Element node by applying the iterator function until another Element was 371 * found. 372 * 373 * @param iterator 374 * Iterator to apply 375 * @return node that was found 376 */ 377 private @Nonnull Optional<XQuery> findElement(Function<Node, Node> iterator) { 378 Node it = node; 379 do { 380 it = iterator.apply(it); 381 } while (it != null && !(it instanceof Element)); 382 return Optional.ofNullable(it).map(XQuery::new); 383 } 384 385 /** 386 * Converts a {@link NamedNodeMap} to a standard {@link Map} of attributes. 387 * 388 * @param nnm {@link NamedNodeMap} to convert 389 * @return {@link Map} of attributes and their values 390 */ 391 private static @Nonnull Map<String, String> attributesToMap(NamedNodeMap nnm) { 392 return IntStream.range(0, nnm.getLength()) 393 .mapToObj(nnm::item) 394 .collect(toMap(Node::getNodeName, Node::getNodeValue)); 395 } 396 397}