package freenet.support; import java.util.Arrays; import java.util.HashSet; public class StringValidityChecker { /** * Taken from http://kb.mozillazine.org/Network.IDN.blacklist_chars */ static HashSet idnBlacklist = new HashSet(Arrays.asList( new Character[] { 0x0020, /* SPACE */ 0x00A0, /* NO-BREAK SPACE */ 0x00BC, /* VULGAR FRACTION ONE QUARTER */ 0x00BD, /* VULGAR FRACTION ONE HALF */ 0x01C3, /* LATIN LETTER RETROFLEX CLICK */ 0x0337, /* COMBINING SHORT SOLIDUS OVERLAY */ 0x0338, /* COMBINING LONG SOLIDUS OVERLAY */ 0x05C3, /* HEBREW PUNCTUATION SOF PASUQ */ 0x05F4, /* HEBREW PUNCTUATION GERSHAYIM */ 0x06D4, /* ARABIC FULL STOP */ 0x0702, /* SYRIAC SUBLINEAR FULL STOP */ 0x115F, /* HANGUL CHOSEONG FILLER */ 0x1160, /* HANGUL JUNGSEONG FILLER */ 0x2000, /* EN QUAD */ 0x2001, /* EM QUAD */ 0x2002, /* EN SPACE */ 0x2003, /* EM SPACE */ 0x2004, /* THREE-PER-EM SPACE */ 0x2005, /* FOUR-PER-EM SPACE */ 0x2006, /* SIX-PER-EM-SPACE */ 0x2007, /* FIGURE SPACE */ 0x2008, /* PUNCTUATION SPACE */ 0x2009, /* THIN SPACE */ 0x200A, /* HAIR SPACE */ 0x200B, /* ZERO WIDTH SPACE */ 0x2024, /* ONE DOT LEADER */ 0x2027, /* HYPHENATION POINT */ 0x2028, /* LINE SEPARATOR */ 0x2029, /* PARAGRAPH SEPARATOR */ 0x202F, /* NARROW NO-BREAK SPACE */ 0x2039, /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */ 0x203A, /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */ 0x2044, /* FRACTION SLASH */ 0x205F, /* MEDIUM MATHEMATICAL SPACE */ 0x2154, /* VULGAR FRACTION TWO THIRDS */ 0x2155, /* VULGAR FRACTION ONE FIFTH */ 0x2156, /* VULGAR FRACTION TWO FIFTHS */ 0x2159, /* VULGAR FRACTION ONE SIXTH */ 0x215A, /* VULGAR FRACTION FIVE SIXTHS */ 0x215B, /* VULGAR FRACTION ONE EIGTH */ 0x215F, /* FRACTION NUMERATOR ONE */ 0x2215, /* DIVISION SLASH */ 0x23AE, /* INTEGRAL EXTENSION */ 0x29F6, /* SOLIDUS WITH OVERBAR */ 0x29F8, /* BIG SOLIDUS */ 0x2AFB, /* TRIPLE SOLIDUS BINARY RELATION */ 0x2AFD, /* DOUBLE SOLIDUS OPERATOR */ 0x2FF0, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT */ 0x2FF1, /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW */ 0x2FF2, /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT */ 0x2FF3, /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW */ 0x2FF4, /* IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND */ 0x2FF5, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE */ 0x2FF6, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW */ 0x2FF7, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT */ 0x2FF8, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT */ 0x2FF9, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT */ 0x2FFA, /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT */ 0x2FFB, /* IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */ 0x3000, /* IDEOGRAPHIC SPACE */ 0x3002, /* IDEOGRAPHIC FULL STOP */ 0x3014, /* LEFT TORTOISE SHELL BRACKET */ 0x3015, /* RIGHT TORTOISE SHELL BRACKET */ 0x3033, /* VERTICAL KANA REPEAT MARK UPPER HALF */ 0x3164, /* HANGUL FILLER */ 0x321D, /* PARENTHESIZED KOREAN CHARACTER OJEON */ 0x321E, /* PARENTHESIZED KOREAN CHARACTER O HU */ 0x33AE, /* SQUARE RAD OVER S */ 0x33AF, /* SQUARE RAD OVER S SQUARED */ 0x33C6, /* SQUARE C OVER KG */ 0x33DF, /* SQUARE A OVER M */ 0xFE14, /* PRESENTATION FORM FOR VERTICAL SEMICOLON */ 0xFE15, /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */ 0xFE3F, /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */ 0xFE5D, /* SMALL LEFT TORTOISE SHELL BRACKET */ 0xFE5E, /* SMALL RIGHT TORTOISE SHELL BRACKET */ 0xFEFF, /* ZERO-WIDTH NO-BREAK SPACE */ 0xFF0E, /* FULLWIDTH FULL STOP */ 0xFF0F, /* FULL WIDTH SOLIDUS */ 0xFF61, /* HALFWIDTH IDEOGRAPHIC FULL STOP */ 0xFFA0, /* HALFWIDTH HANGUL FILLER */ 0xFFF9, /* INTERLINEAR ANNOTATION ANCHOR */ 0xFFFA, /* INTERLINEAR ANNOTATION SEPARATOR */ 0xFFFB, /* INTERLINEAR ANNOTATION TERMINATOR */ 0xFFFC, /* OBJECT REPLACEMENT CHARACTER */ 0xFFFD, /* REPLACEMENT CHARACTER */ })); public static boolean containsNoIDNBlacklistCharacters(String text) { for(Character c : text.toCharArray()) { if(idnBlacklist.contains(c)) return false; } return true; } public static boolean containsNoLinebreaks(String text) { for(Character c : text.toCharArray()) { if(Character.getType(c) == Character.LINE_SEPARATOR || Character.getType(c) == Character.PARAGRAPH_SEPARATOR || c == '\n' || c == '\r') return false; } return true; } /** * Check for any values in the string that are not valid Unicode * characters. */ public static boolean containsNoInvalidCharacters(String text) { for (int i = 0; i < text.length(); ) { int c = text.codePointAt(i); i += Character.charCount(c); if ((c & 0xFFFE) == 0xFFFE || Character.getType(c) == Character.SURROGATE) return false; } return true; } /** * Check for any control characters (including tab, LF, and CR) in * the string. */ public static boolean containsNoControlCharacters(String text) { for(Character c : text.toCharArray()) { if(Character.getType(c) == Character.CONTROL) return false; } return true; } /** * Check for any unpaired directional or annotation characters in * the string, or any nested annotations. */ public static boolean containsNoInvalidFormatting(String text) { int dirCount = 0; boolean inAnnotatedText = false; boolean inAnnotation = false; for (Character c : text.toCharArray()) { if (c == 0x202A // LEFT-TO-RIGHT EMBEDDING || c == 0x202B // RIGHT-TO-LEFT EMBEDDING || c == 0x202D // LEFT-TO-RIGHT OVERRIDE || c == 0x202E) { // RIGHT-TO-LEFT OVERRIDE dirCount++; } else if (c == 0x202C) { // POP DIRECTIONAL FORMATTING dirCount--; if (dirCount < 0) return false; } else if (c == 0xFFF9) { // INTERLINEAR ANNOTATION ANCHOR if (inAnnotatedText || inAnnotation) return false; inAnnotatedText = true; } else if (c == 0xFFFA) { // INTERLINEAR ANNOTATION SEPARATOR if (!inAnnotatedText) return false; inAnnotatedText = false; inAnnotation = true; } else if (c == 0xFFFB) { // INTERLINEAR ANNOTATION TERMINATOR if (!inAnnotation) return false; inAnnotation = false; } } return (dirCount == 0 && !inAnnotatedText && !inAnnotation); } }