Dies ist kein Update der Libidn auf IDNA2008. Ziel ist es, mit einfachen Mitteln das IDNA2003-Mapping von Codepoints der Kategorie PVALID
(RFC 5892), insbesondere also des "ß" (U+00DF; LATIN SMALL LETTER SHARP S
), bei Bedarf unterdrücken zu können. Ausgangspunkt ist die Erfordernis, kurzfristig Domainnamen mit "ß" innerhalb der DE-Zone verabeiten zu können.
Die Änderungen werden hier nur für die C#-Version in libidn-1.9 erwähnt und sind ggf. auf andere Sprachen oder Versionen zu übertragen.
Ziel ist es, die Funktionen ToAscii
und ToUnicode
um einen Parameter bool useIDNA2008
zu erweitern, der die Wirkung hat, dass PVALID
Codepoints vom Mapping ausgenommen werden.
(1) Hinzufügen einer Klasse IDNA2008, die alle PVALID
Codepoints enthält:
// IDNA2008.cs
namespace Gnu.Inet.Encoding {
class IDNA2008
{
// rfc5892 PVALID codepoints
public static char[] PVALID = new char[] {
'u00DF', // LATIN SMALL LETTER SHARP S
'u03C2', // GREEK SMALL LETTER FINAL SIGMA
'u06FD', // ARABIC SIGN SINDHI AMPERSAND
'u06FE', // ARABIC SIGN SINDHI POSTPOSITION ME
'u0F0B', // TIBETAN MARK INTERSYLLABIC TSHEG
'u3007' // IDEOGRAPHIC NUMBER ZERO
};
}
}
(2) Überladen der Funktion Map
(Stringprep.cs) mit einem dritten Parameter, der zu ignorierende Codepoints bezeichnet:
internal static void Map(StringBuilder s, char[] search, string[] replace, char[] ignore)
{
for (int i = 0; i < search.Length; i++)
{
char c = search[i];
// check if c should be ignored
bool ign = false;
for (int t = 0; t < ignore.Length; t++)
{
if (ignore[t] == c)
{
ign = true;
break;
}
}
if (ign)
continue;
int j = 0;
while (j < s.Length)
{
if (c == s[j])
{
//s.deleteCharAt(j);
s.Remove(j, 1);
if (null != replace[i])
{
s.Insert(j, replace[i]);
j += replace[i].Length - 1;
}
}
else
{
j++;
}
}
}
}
(3) Überladen der Funktion Nameprep (Strinprep.cs) mit einem dritten Parameter bool useIDNA2008
:
public static string NamePrep(string input, bool allowUnassigned, bool useIDNA2008)
{
if (input == null)
{
throw new System.NullReferenceException();
}
StringBuilder s = new StringBuilder(input);
if (!allowUnassigned && Contains(s, RFC3454.A1))
{
throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED);
}
Filter(s, RFC3454.B1);
// EsZett Hotfix
if (useIDNA2008)
Map(s, RFC3454.B2search, RFC3454.B2replace, IDNA2008.PVALID);
else
Map(s, RFC3454.B2search, RFC3454.B2replace);
s = new StringBuilder(NFKC.NormalizeNFKC(s.ToString()));
// B.3 is only needed if NFKC is not used, right?
// map(s, RFC3454.B3search, RFC3454.B3replace);
if (Contains(s, RFC3454.C12) || Contains(s, RFC3454.C22) || Contains(s, RFC3454.C3) || Contains(s, RFC3454.C4) || Contains(s, RFC3454.C5) || Contains(s, RFC3454.C6) || Contains(s, RFC3454.C7) || Contains(s, RFC3454.C8))
{
// Table C.9 only contains code points > 0xFFFF which Java
// doesn't handle
throw new StringprepException(StringprepException.CONTAINS_PROHIBITED);
}
// Bidi handling
bool r = Contains(s, RFC3454.D1);
bool l = Contains(s, RFC3454.D2);
// RFC 3454, section 6, requirement 1: already handled above (table C.8)
// RFC 3454, section 6, requirement 2
if (r && l)
{
throw new StringprepException(StringprepException.BIDI_BOTHRAL);
}
// RFC 3454, section 6, requirement 3
if (r)
{
if (!Contains(s[0], RFC3454.D1) || !Contains(s[s.Length - 1], RFC3454.D1))
{
throw new StringprepException(StringprepException.BIDI_LTRAL);
}
}
return s.ToString();
}
(4) Überladen der Funktionen ToAscii
und ToUnicode
(IDNA.cs), um den Parameter
bool useIDNA2008
an Nameprep
durchzureichen:
public static string ToASCII(string input, bool allowUnassigned, bool useSTD3ASCIIRules, bool useIDNA2008)
{
// Step 1: Check if the string contains code points outside
// the ASCII range 0..0x7c.
bool nonASCII = false;
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
// Step 2: Perform the nameprep operation.
if (nonASCII)
{
try
{
input = Stringprep.NamePrep(input, allowUnassigned, useIDNA2008);
}
catch (StringprepException e)
{
// TODO
throw new IDNAException(e);
}
}
// Step 3: - Verify the absence of non-LDH ASCII code points
// (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
// (char) 0x7b..0x7f
// - Verify the absence of leading and trailing
// hyphen-minus
if (useSTD3ASCIIRules)
{
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f))
{
throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
}
}
if (input.StartsWith("-") || input.EndsWith("-"))
{
throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
}
}
// Step 4: If all code points are inside 0..0x7f, skip to step 8
nonASCII = false;
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
string output = input;
if (nonASCII)
{
// Step 5: Verify that the sequence does not begin with the ACE prefix.
if (input.StartsWith(ACE_PREFIX))
{
throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
}
// Step 6: Punycode
try
{
output = Punycode.Encode(input);
}
catch (PunycodeException e)
{
// TODO
throw new IDNAException(e);
}
// Step 7: Prepend the ACE prefix.
output = ACE_PREFIX + output;
}
// Step 8: Check that the length is inside 1..63.
if (output.Length < 1 || output.Length > 63)
{
throw new IDNAException(IDNAException.TOO_LONG);
}
return output;
}
public static string ToUnicode(string input, bool allowUnassigned, bool useSTD3ASCIIRules, bool useIDNA2008)
{
string original = input;
bool nonASCII = false;
// Step 1: If all code points are inside 0..0x7f, skip to step 3.
for (int i = 0; i < input.Length; i++)
{
int c = input[i];
if (c > 0x7f)
{
nonASCII = true;
break;
}
}
// Step 2: Perform the Nameprep operation.
if (nonASCII)
{
try
{
input = Stringprep.NamePrep(input, allowUnassigned, useIDNA2008);
}
catch (StringprepException e)
{
// ToUnicode never fails!
return original;
}
}
// Step 3: Verify the sequence starts with the ACE prefix.
if (!input.StartsWith(ACE_PREFIX))
{
// ToUnicode never fails!
return original;
}
string stored = input;
// Step 4: Remove the ACE prefix.
input = input.Substring(ACE_PREFIX.Length);
// Step 5: Decode using punycode
string output;
try
{
output = Punycode.Decode(input);
}
catch (PunycodeException e)
{
// ToUnicode never fails!
return original;
}
// Step 6: Apply toASCII
string ascii;
try
{
ascii = ToASCII(output, allowUnassigned, useSTD3ASCIIRules, useIDNA2008);
}
catch (IDNAException e)
{
// ToUnicode never fails!
return original;
}
// Step 7: Compare case-insensitively.
if (!ascii.ToUpper().Equals(stored.ToUpper()))
{
// ToUnicode never fails!
return original;
}
// Step 8: Return the result.
return output;
}
(5) Testen von ToAscii
und ToUnicode
mit und ohne Anwendung von IDNA2008:
[TestMethod()]
public void Test030_EsZett_IDNA2003()
{
string u1 = "täßt";
// Nameprep IDNA2203 should send "täßt" to "tässt"
string u2 = Stringprep.NamePrep(u1, false, false);
Assert.AreEqual("tässt", u2);
// ToAscii IDNA2003 should send both "täßt" and "tässt" to "xn--tsst-loa"
string a1 = IDNA.ToASCII(u1, false, true, false);
Assert.AreEqual("xn--tsst-loa", a1);
string a2 = IDNA.ToASCII(u2, false, true, false);
Assert.AreEqual(a1, a2);
// ToUnicode IDNA2003 should send "xn--tsst-loa" to "tässt"
string u3 = IDNA.ToUnicode(a1, false, true, false);
Assert.AreEqual(u2, u3);
}
[TestMethod()]
public void Test040_EsZett_IDNA2008()
{
string u1 = "täßt";
// Nameprep IDNA2208 should send "täßt" to "täßt"
string u2 = Stringprep.NamePrep(u1, false, true);
Assert.AreEqual(u1, u2);
// ToAscii IDNA2008 should send "täßt" to "xn--tt-giat"
string a1 = IDNA.ToASCII(u1, false, true, true);
Assert.AreEqual("xn--tt-giat", a1);
string a2 = IDNA.ToASCII(u2, false, true, true);
Assert.AreEqual(a1, a2);
// ToUnicode IDNA2003 should send "xn--tt-giat.de" to "täßt"
string u3 = IDNA.ToUnicode(a1, false, true, true);
Assert.AreEqual(u2, u3);
}
Kommentare