001/* 002 * cilla - Blog Management System 003 * 004 * Copyright (C) 2012 Richard "Shred" Körber 005 * http://cilla.shredzone.org 006 * 007 * This program is free software: you can redistribute it and/or modify 008 * it under the terms of the GNU Affero General Public License as published 009 * by the Free Software Foundation, either version 3 of the License, or 010 * (at your option) any later version. 011 * 012 * This program is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU Affero General Public License 018 * along with this program. If not, see <http://www.gnu.org/licenses/>. 019 */ 020package org.shredzone.cilla.plugin.sitemap; 021 022import java.io.IOException; 023import java.io.OutputStream; 024import java.io.OutputStreamWriter; 025import java.text.DecimalFormat; 026import java.text.DecimalFormatSymbols; 027import java.text.SimpleDateFormat; 028import java.util.Date; 029import java.util.Locale; 030import java.util.TimeZone; 031 032/** 033 * A simple writer for sitemap.xml files. 034 * 035 * @author Richard "Shred" Körber 036 * @see <a href="http://www.sitemaps.org">sitemaps.org</a> 037 */ 038public class SitemapWriter extends OutputStreamWriter { 039 private static final char CR = '\n'; 040 041 private final DecimalFormat priorityFormat; 042 private final SimpleDateFormat dateFormat; 043 044 /** 045 * Enumeration of update frequencies. 046 * 047 * @see <a href="http://www.sitemaps.org/protocol.html#changefreqdef">sitemaps protocol</a> 048 */ 049 public static enum Frequency { 050 ALWAYS, HOURLY, DAILY, WEEKLY, MONTHLY, YEARLY, NEVER; 051 } 052 053 /** 054 * Instantiates a new {@link SitemapWriter}. 055 * 056 * @param out 057 * {@link OutputStream} to write to 058 */ 059 public SitemapWriter(OutputStream out) throws IOException { 060 super(out, "utf-8"); 061 062 priorityFormat = new DecimalFormat("0.0", DecimalFormatSymbols.getInstance(Locale.ENGLISH)); 063 064 dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ENGLISH); 065 dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); 066 } 067 068 /** 069 * Writes the XML header. Must be invoked once at the beginning of the stream. 070 */ 071 public void writeHeader() throws IOException { 072 write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + CR); 073 write("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">" + CR); 074 } 075 076 /** 077 * Writes an URL entry to the XML file. 078 * 079 * @param url 080 * URL of the page 081 * @param lastmod 082 * Last modification date, or {@code null} if unknown 083 * @param changeFreq 084 * Change frequency, or {@code null} if unknown 085 * @param priority 086 * Priority of the page in the sitemap (between 0f and 1f), or {@code null} 087 * for default priority 088 */ 089 public void writeUrl(String url, Date lastmod, Frequency changeFreq, Float priority) 090 throws IOException { 091 if (url == null || url.isEmpty()) { 092 throw new IllegalArgumentException("url must be set"); 093 } 094 095 if (priority != null && (priority < 0f || priority > 1f)) { 096 throw new IllegalArgumentException("priority out of range: " + priority); 097 } 098 099 write("<url>"); 100 101 write("<loc>"); 102 write(url); 103 write("</loc>"); 104 105 if (lastmod != null) { 106 write("<lastmod>"); 107 write(dateFormat.format(lastmod)); 108 write("</lastmod>"); 109 } 110 111 if (changeFreq != null) { 112 write("<changefreq>"); 113 write(changeFreq.name().toLowerCase()); 114 write("</changefreq>"); 115 } 116 117 if (priority != null) { 118 write("<priority>"); 119 write(priorityFormat.format(priority)); 120 write("</priority>"); 121 } 122 123 write("</url>" + CR); 124 } 125 126 /** 127 * Writes the XML footer. Must be invoked once before the stream is closed. 128 */ 129 public void writeFooter() throws IOException { 130 write("</urlset>" + CR); 131 } 132 133}