package de.consist.bmu.rule.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;

import de.consist.bmu.rule.xpath.XPathFassade;

public class DIN_SPEC_91379 {
	private static final Log LOGGER = LogFactory.getLog(DIN_SPEC_91379.class);

	private static final int MAX_STRING_LENGTH = 100;
	
	private static String datatypeC = null;
	private static Pattern pattern = null;

	private static void init() {
		if (datatypeC == null) {
			try {
				Document doc = XmlUtils.readFromResource("/de/consist/bmu/schema-dt/din-91379-datatypes.xsd", "UTF-8");
				XPathFassade xpf = XPathFassade.getInstance();
				datatypeC = xpf.evaluate(doc,
						"/xs:schema/xs:simpleType[@name='datatypeC']/xs:restriction/xs:pattern/@value");
				pattern = Pattern.compile(datatypeC, Pattern.CANON_EQ | Pattern.UNICODE_CHARACTER_CLASS);

			} catch (Exception e) {
				LOGGER.error("Fehler bei der Initialisierung", e);
			}
		}
	}

	private static int _matches(String text) {
		int retVal = -1;
		if (pattern != null) {
			Matcher matcher = pattern.matcher(text);
			if (matcher.find()) {
				int end = matcher.end();
				if (end < text.length()) {
					retVal = end;
				}
			}
		}
		return retVal;
	}

	/**
	 * @param text Der zu pruefende String
	 * @return Position (Index) des ersten nicht übereinstimmenden Zeichens, -1 bei
	 *         vollstaendiger Ueberinstimmung.
	 */
	public static int matches(String text) {
		int retVal = -1;
		init();
		String rest = text;
		int pos = 0;
		while (rest != null) {
			int len = rest.length();
			String text1 = rest.substring(0, Math.min(len, MAX_STRING_LENGTH));
			retVal = _matches(text1);
			if (retVal >= 0) {
				return pos + retVal;
			}
			rest = (len > MAX_STRING_LENGTH) ? rest.substring(MAX_STRING_LENGTH) : null;
			pos += MAX_STRING_LENGTH;
		}
		return retVal;
	}

	/**
	 * @param text            Der zu pruefende String
	 * @param replacementChar Das Zeichen mit dem ersetzt werden soll.
	 * @return Der String mit den ersetzten Zeichen.
	 */
	public static String replace(String text, char replacementChar) {
		init();
		StringBuffer buf = new StringBuffer();
		if (pattern != null) {
			Matcher matcher = pattern.matcher(text);
			LOGGER.debug("text: " + text + ", length: " + text.length());
			while (matcher.find()) {
				int start = matcher.start();
				int end = matcher.end();
				String group = matcher.group();
				boolean hitEnd = matcher.hitEnd();
				LOGGER.debug("start: " + start + ", end: " + end + ", group: " + group + ", hitEnd: " + hitEnd);
				if (!group.isEmpty()) {
					buf.append(group);
				} else if (!hitEnd || (end < text.length())) {
					buf.append(replacementChar);
				}
			}
		}
		String retVal = text;
		if (buf.length() > 0) {
			retVal = buf.toString();
		}
		LOGGER.debug("result: " + retVal + ", length: " + retVal.length());
		return retVal;
	}

	public static void main(String[] args) {
		String test = "ABCDEFGHIJKLMNOPQRSTUVW1234567890";
		System.out.println(matches(test));
		test = "ABCDEFGHIJKL\u30abMNOPQRSTUVW1234567890ABCDEFGHIJKL\uE053MNOPQRSTUVW1234567890";
		System.out.println(matches(test));
		System.out.println(replace(test, '_'));
		test = "11";
		System.out.println(matches(test));
		System.out.println(replace(test, '_'));
		test = "\uE053";
		System.out.println(matches(test));
		System.out.println(replace(test, '_'));
	}
}
