002 * Shredzone Commons
003 *
004 * Copyright (C) 2014 Richard "Shred" Körber
005 *   http://commons.shredzone.org
006 *
007 * This program is free software: you can redistribute it and/or modify
008 * it under the terms of the GNU Library General Public License as
009 * published by the Free Software Foundation, either version 3 of the
010 * License, or (at your option) any later version.
011 *
012 * This program is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * GNU General Public License for more details.
016 *
017 * You should have received a copy of the GNU Library General Public License
018 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
019 */
020package org.shredzone.commons.xml;
022import static java.util.stream.Collectors.*;
024import java.io.IOException;
025import java.io.InputStream;
026import java.io.Reader;
027import java.io.StringReader;
028import java.util.Collections;
029import java.util.Map;
030import java.util.Optional;
031import java.util.concurrent.atomic.AtomicReference;
032import java.util.function.Function;
033import java.util.stream.IntStream;
034import java.util.stream.Stream;
036import javax.annotation.Nonnull;
037import javax.annotation.ParametersAreNonnullByDefault;
038import javax.annotation.WillClose;
039import javax.annotation.concurrent.Immutable;
040import javax.annotation.concurrent.ThreadSafe;
041import javax.xml.parsers.DocumentBuilder;
042import javax.xml.parsers.DocumentBuilderFactory;
043import javax.xml.parsers.ParserConfigurationException;
044import javax.xml.xpath.XPathConstants;
045import javax.xml.xpath.XPathExpression;
046import javax.xml.xpath.XPathExpressionException;
047import javax.xml.xpath.XPathFactory;
049import org.w3c.dom.Element;
050import org.w3c.dom.NamedNodeMap;
051import org.w3c.dom.Node;
052import org.w3c.dom.NodeList;
053import org.w3c.dom.Text;
054import org.xml.sax.InputSource;
055import org.xml.sax.SAXException;
058 * Helps to easily read content from XML sources.
059 * <p>
060 * A main goal of {@link XQuery} is to keep XML reading as simple as possible. For this
061 * reason, sophisticated XML features like validation or namespaces are not supported.
062 * <p>
063 * Performance was not a goal as well. If you need to parse large documents, you better
064 * use the old-fashioned Java ways.
065 *
066 * @author Richard "Shred" Körber
067 */
071public class XQuery {
073    private final Node node;
074    private final XPathFactory xpf = XPathFactory.newInstance();
075    private final AtomicReference<Optional<XQuery>> parent = new AtomicReference<>();
076    private final AtomicReference<Map<String, String>> attrMap = new AtomicReference<>();
078    /**
079     * Private constructor for a {@link Node} element.
080     */
081    private XQuery(Node node) {
082        this.node = node;
083    }
085    /**
086     * Parses an XML source and returns an {@link XQuery} object representing the root of
087     * the document.
088     *
089     * @param in
090     *            {@link InputSource} of the XML document
091     * @return {@link XQuery} representing the root of the parsed document
092     * @throws IOException
093     *             if the XML source could not be read or parsed for any reason
094     */
095    public static @Nonnull XQuery parse(@WillClose InputSource in) throws IOException {
096        try {
097            DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
098            return new XQuery(db.parse(in));
099        } catch (ParserConfigurationException|SAXException ex) {
100            throw new IOException("Could not parse XML", ex);
101        }
102    }
104    /**
105     * Parses an XML source and returns an {@link XQuery} object representing the root of
106     * the document.
107     *
108     * @param in
109     *            {@link InputStream} of the XML document
110     * @return {@link XQuery} representing the root of the parsed document
111     * @throws IOException
112     *             if the XML source could not be read or parsed for any reason
113     */
114    public static @Nonnull XQuery parse(@WillClose InputStream in) throws IOException {
115        return parse(new InputSource(in));
116    }
118    /**
119     * Parses an XML source and returns an {@link XQuery} object representing the root of
120     * the document.
121     *
122     * @param r
123     *            {@link Reader} providing the XML document
124     * @return {@link XQuery} representing the root of the parsed document
125     * @throws IOException
126     *             if the XML source could not be read or parsed for any reason
127     */
128    public static @Nonnull XQuery parse(@WillClose Reader r) throws IOException {
129        return parse(new InputSource(r));
130    }
132    /**
133     * Parses an XML source and returns an {@link XQuery} object representing the root of
134     * the document.
135     *
136     * @param xml
137     *            String containing the XML document
138     * @return {@link XQuery} representing the root of the parsed document
139     * @throws IOException
140     *             if the XML source could not be read or parsed for any reason
141     */
142    public static @Nonnull XQuery parse(String xml) throws IOException {
143        return parse(new StringReader(xml));
144    }
146    /**
147     * Streams all children of this element. Children elements are represented by
148     * {@link XQuery} objects as well.
149     *
150     * @return {@link Stream} of children
151     */
152    public @Nonnull Stream<XQuery> stream() {
153        return new NodeListSpliterator(node.getChildNodes()).stream()
154                        .filter(it -> it instanceof Element)
155                        .map(XQuery::new);
156    }
158    /**
159     * Returns the next sibling of this element.
160     *
161     * @return Next sibling element
162     * @since 1.1
163     */
164    public @Nonnull Optional<XQuery> nextSibling() {
165        return findElement(Node::getNextSibling);
166    }
168    /**
169     * Returns the previous sibling of this element.
170     *
171     * @return Previous sibling element
172     * @since 1.1
173     */
174    public @Nonnull Optional<XQuery> previousSibling() {
175        return findElement(Node::getPreviousSibling);
176    }
178    /**
179     * Selects elements based on the XPath expression that is applied to the tree
180     * represented by this {@link XQuery}.
181     *
182     * @param xpath
183     *            XPath expression
184     * @return Stream of selected nodes as {@link XQuery} object
185     */
186    public @Nonnull Stream<XQuery> select(String xpath) {
187        return new NodeListSpliterator(evaluate(xpath)).stream().map(XQuery::new);
188    }
190    /**
191     * Gets a single element based on the XPath expression that is applied to the tree
192     * represented by this {@link XQuery}. Exactly one element is expected to match the
193     * XPath expression, otherwise an exception is thrown.
194     *
195     * @param xpath
196     *            XPath expression
197     * @return Selected node
198     * @since 1.1
199     */
200    public @Nonnull XQuery get(String xpath) {
201        NodeList nl = evaluate(xpath);
202        if (nl.getLength() == 1) {
203            return new XQuery(nl.item(0));
204        } else if (nl.getLength() == 0) {
205            throw new IllegalArgumentException("XPath '" + xpath
206                + "' does not match any elements");
207        } else {
208            throw new IllegalArgumentException("XPath '" + xpath + "' matches "
209                + nl.getLength() + " elements");
210        }
211    }
213    /**
214     * Checks if there is at least one element matching the XPath expression.
215     *
216     * @param xpath
217     *            XPath expression
218     * @return {@code true} if there is at least one element, {@code false} if there is
219     *         none.
220     * @since 1.1
221     */
222    public boolean exists(String xpath) {
223        return select(xpath).findAny().isPresent();
224    }
226    /**
227     * Selects values based on the XPath expression that is applied to the tree
228     * represented by this {@link XQuery}.
229     *
230     * @param xpath
231     *            XPath expression
232     * @return Stream of strings containing the node values
233     */
234    public @Nonnull Stream<String> value(String xpath) {
235        return select(xpath).map(XQuery::text);
236    }
238    /**
239     * Selects values based on the XPath expression that is applied to the tree
240     * represented by this {@link XQuery}. In contrast to {@link #value(String)}, this
241     * method reads the element texts recursively, using {@link #allText()}.
242     *
243     * @param xpath
244     *            XPath expression
245     * @return Stream of strings containing the node values
246     */
247    public @Nonnull Stream<String> allValue(String xpath) {
248        return select(xpath).map(XQuery::allText);
249    }
251    /**
252     * Returns the text selected by the XPath expression.
253     *
254     * @param xpath
255     *            XPath expression
256     * @return Text selected by the expression
257     */
258    public @Nonnull String text(String xpath) {
259        return value(xpath).collect(joining());
260    }
262    /**
263     * Returns the tag name of this node.
264     *
265     * @return this {@link XQuery} node's tag name.
266     */
267    public @Nonnull String name() {
268        return node.getNodeName();
269    }
271    /**
272     * Returns the text content of this node.
273     *
274     * @return this {@link XQuery} node's text content, non recursively.
275     */
276    public @Nonnull String text() {
277        return new NodeListSpliterator(node.getChildNodes()).stream()
278                        .filter(it -> it instanceof Text)
279                        .map(it -> ((Text) it).getNodeValue())
280                        .collect(joining());
281    }
283    /**
284     * Returns the text content of the entire tree that is spawned by this node.
285     *
286     * @return this {@link XQuery} node's text content, recursively.
287     */
288    public @Nonnull String allText() {
289        return node.getTextContent();
290    }
292    /**
293     * Returns a map of attributes.
294     *
295     * @return a map of this node's attributes.
296     */
297    public @Nonnull Map<String, String> attr() {
298        synchronized (this) {
299            if (attrMap.get() == null) {
300                attrMap.set(
301                    Optional.ofNullable(node.getAttributes())
302                        .map(XQuery::attributesToMap)
303                        .map(Collections::unmodifiableMap)
304                        .orElseGet(Collections::emptyMap)
305                );
306            }
307        }
308        return attrMap.get();
309    }
311    /**
312     * Returns the parent node of this node, as {@link XQuery} object. A root node
313     * returns an empty optional instead.
314     *
315     * @return parent node
316     */
317    public @Nonnull Optional<XQuery> parent() {
318        synchronized (this) {
319            if (parent.get() == null) {
320                parent.set(Optional.ofNullable(node.getParentNode()).map(XQuery::new));
321            }
322        }
323        return parent.get();
324    }
326    /**
327     * Checks if this is a root node.
328     *
329     * @return {@code true} if this is a root node, {@code false} if there's a parent.
330     * @since 1.1
331     */
332    public boolean isRoot() {
333        return node.getParentNode() == null;
334    }
336    /**
337     * Returns the root node of this node, as {@link XQuery} object. A root node returns
338     * itself.
339     *
340     * @return root node
341     * @since 1.1
342     */
343    public @Nonnull XQuery root() {
344        if (isRoot()) {
345            return this;
346        } else {
347            return new XQuery(node.getOwnerDocument());
348        }
349    }
351    /**
352     * Evaluates the XPath expression and returns a list of nodes.
353     *
354     * @param xpath
355     *            XPath expression
356     * @return {@link NodeList} matching the expression
357     * @throws IllegalArgumentException
358     *             if the XPath expression was invalid
359     */
360    private @Nonnull NodeList evaluate(String xpath) {
361        try {
362            XPathExpression expr = xpf.newXPath().compile(xpath);
363            return (NodeList) expr.evaluate(node, XPathConstants.NODESET);
364        } catch (XPathExpressionException ex) {
365            throw new IllegalArgumentException("Invalid XPath '" + xpath + "'", ex);
366        }
367    }
369    /**
370     * Finds an Element node by applying the iterator function until another Element was
371     * found.
372     *
373     * @param iterator
374     *            Iterator to apply
375     * @return node that was found
376     */
377    private @Nonnull Optional<XQuery> findElement(Function<Node, Node> iterator) {
378        Node it = node;
379        do {
380            it = iterator.apply(it);
381        } while (it != null && !(it instanceof Element));
382        return Optional.ofNullable(it).map(XQuery::new);
383    }
385    /**
386     * Converts a {@link NamedNodeMap} to a standard {@link Map} of attributes.
387     *
388     * @param nnm {@link NamedNodeMap} to convert
389     * @return {@link Map} of attributes and their values
390     */
391    private static @Nonnull Map<String, String> attributesToMap(NamedNodeMap nnm) {
392        return IntStream.range(0, nnm.getLength())
393                    .mapToObj(nnm::item)
394                    .collect(toMap(Node::getNodeName, Node::getNodeValue));
395    }