001/*
002 * Shredzone Commons
003 *
004 * Copyright (C) 2012 Richard "Shred" Körber
005 *   http://commons.shredzone.org
006 *
007 * This program is free software: you can redistribute it and/or modify
008 * it under the terms of the GNU Library General Public License as
009 * published by the Free Software Foundation, either version 3 of the
010 * License, or (at your option) any later version.
011 *
012 * This program is distributed in the hope that it will be useful,
013 * but WITHOUT ANY WARRANTY; without even the implied warranty of
014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015 * GNU General Public License for more details.
016 *
017 * You should have received a copy of the GNU Library General Public License
018 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
019 */
020package org.shredzone.commons.text.filter;
021
022import org.shredzone.commons.text.TextFilter;
023
024/**
025 * A filter that normalizes EOL markers. CR and CRLF are converted to LF.
026 *
027 * @author Richard "Shred" Körber
028 */
029public class NormalizeFilter implements TextFilter {
030
031    @Override
032    public CharSequence apply(CharSequence text) {
033        StringBuilder sb = toStringBuilder(text);
034
035        // Benchmark says this is faster than replaceAll
036        int max = sb.length();
037        for (int ix = 0; ix < max; ix++) {
038            if (sb.charAt(ix) == '\r') {
039                if ((ix + 1) < max && sb.charAt(ix + 1) == '\n') {
040                    sb.deleteCharAt(ix);
041                    max--;
042                } else {
043                    sb.setCharAt(ix, '\n');
044                }
045            }
046        }
047
048        return sb;
049    }
050
051}