001/* 002 * Shredzone Commons 003 * 004 * Copyright (C) 2012 Richard "Shred" Körber 005 * http://commons.shredzone.org 006 * 007 * This program is free software: you can redistribute it and/or modify 008 * it under the terms of the GNU Library General Public License as 009 * published by the Free Software Foundation, either version 3 of the 010 * License, or (at your option) any later version. 011 * 012 * This program is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU Library General Public License 018 * along with this program. If not, see <http://www.gnu.org/licenses/>. 019 */ 020 021package org.shredzone.commons.view.util; 022 023import java.net.URLDecoder; 024import java.net.URLEncoder; 025import java.nio.charset.StandardCharsets; 026import java.text.Normalizer; 027import java.util.regex.Matcher; 028import java.util.regex.Pattern; 029 030import javax.annotation.Nonnull; 031import javax.annotation.ParametersAreNonnullByDefault; 032 033/** 034 * Utility methods for view path management. 035 * 036 * @author Richard "Shred" Körber 037 */ 038@ParametersAreNonnullByDefault 039public final class PathUtils { 040 041 private PathUtils() { 042 // Utility class without constructor 043 } 044 045 /** 046 * Simplifies a path part. The resulting string only contains numbers ([0-9]) and 047 * lowercase characters ([a-z]). One ore more consecutive whitespaces or a few 048 * non-ascii characters are converted into a single dash '-'. All other characters are 049 * either converted to ASCII characters, or removed. 050 * <p> 051 * This method can be used to convert e.g. titles into URL parts, for search engine 052 * optimization. 053 * <p> 054 * On accented characters, the accent is removed. However, currently German umlauts 055 * are converted into their respective ASCII counterparts ('ä' -> 'ae'). Future 056 * implementations may also contain translations for other language's accented 057 * characters. 058 * <p> 059 * Consider this method as one-way encoding. Future releases may return different 060 * strings. 061 * 062 * @param part 063 * path part to simplify 064 * @return simplified path part 065 */ 066 public static @Nonnull String simplify(String part) { 067 StringBuilder result = new StringBuilder(part.length()); 068 069 boolean lastWasDash = false; 070 071 for (char ch : part.toLowerCase().toCharArray()) { 072 if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z')) { 073 result.append(ch); 074 lastWasDash = false; 075 076 } else if (ch == ' ' || ch == '+' || ch == '-' || ch == '_' || ch == '&') { 077 if (!lastWasDash) { 078 result.append('-'); 079 } 080 lastWasDash = true; 081 082 } else if (ch >= 128) { 083 // TODO: German-centric... Is there an international implementation? 084 switch (ch) { 085 case 'ä': 086 case 'Ä': 087 result.append("ae"); 088 lastWasDash = false; 089 break; 090 091 case 'ö': 092 case 'Ö': 093 result.append("oe"); 094 lastWasDash = false; 095 break; 096 097 case 'ü': 098 case 'Ü': 099 result.append("ue"); 100 lastWasDash = false; 101 break; 102 103 case 'ß': 104 result.append("ss"); 105 lastWasDash = false; 106 break; 107 108 default: 109 String normalized = Normalizer.normalize(Character.toString(ch), Normalizer.Form.NFKD); 110 for (char nch : normalized.toLowerCase().toCharArray()) { 111 if (Character.isLetterOrDigit(nch)) { 112 lastWasDash = false; 113 result.append(nch); 114 } 115 } 116 } 117 } 118 } 119 120 return result.toString(); 121 } 122 123 /** 124 * Suggests a file name suffix for the given content type. 125 * <p> 126 * The current implementation only detects the standard HTML image types. 127 * 128 * @param mime 129 * content type to find a suffix for 130 * @return suggested suffix, or "bin" if there is no known suffix 131 */ 132 public static @Nonnull String suffix(String mime) { 133 // Prominent Mime Types 134 switch (mime) { 135 case "image/png": return "png"; 136 case "image/jpeg": return "jpg"; 137 case "image/gif": return "gif"; 138 case "image/svg+xml": return "svg"; 139 case "image/tiff": return "tif"; 140 } 141 142 // Try to guess 143 Matcher m = Pattern.compile("^.*?/(.{1,6}?)(\\+.*)?$").matcher(mime); 144 if (m.matches()) { 145 return m.group(1); 146 } 147 148 // Is it a text? 149 if (mime.startsWith("text/")) { 150 return "txt"; 151 } 152 153 // Fallback to bin 154 return "bin"; 155 } 156 157 /** 158 * URL encodes a string. utf-8 charset is used for encoding. 159 * <p> 160 * This is a convenience call of {@link URLEncoder#encode(String, String)} with 161 * exception handling. 162 * 163 * @param string 164 * string to be URL encoded 165 * @return encoded string 166 */ 167 public static @Nonnull String encode(String string) { 168 return URLEncoder.encode(string, StandardCharsets.UTF_8); 169 } 170 171 /** 172 * URL decodes a string. utf-8 charset is used for decoding. 173 * <p> 174 * This is a convenience call of {@link URLDecoder#decode(String, String)} with 175 * exception handling. 176 * 177 * @param string 178 * the string to be URL decoded 179 * @return decoded string 180 */ 181 public static @Nonnull String decode(String string) { 182 return URLDecoder.decode(string, StandardCharsets.UTF_8); 183 } 184 185}