Menu

Sebastian Böthin

Softwareentwicklung

Gnu Libidn EsZett Hotfix

01.11.2010

Dies ist kein Update der Libidn auf IDNA2008. Ziel ist es, mit einfachen Mitteln das IDNA2003-Mapping von Codepoints der Kategorie PVALID (RFC 5892), insbesondere also des "ß" (U+00DF; LATIN SMALL LETTER SHARP S), bei Bedarf unterdrücken zu können. Ausgangspunkt ist die Erfordernis, kurzfristig Domainnamen mit "ß" innerhalb der DE-Zone verabeiten zu können.

Die Änderungen werden hier nur für die C#-Version in libidn-1.9 erwähnt und sind ggf. auf andere Sprachen oder Versionen zu übertragen.

Ziel ist es, die Funktionen ToAscii und ToUnicode um einen Parameter bool useIDNA2008 zu erweitern, der die Wirkung hat, dass PVALID Codepoints vom Mapping ausgenommen werden.

(1) Hinzufügen einer Klasse IDNA2008, die alle PVALID Codepoints enthält:


  // IDNA2008.cs
  namespace Gnu.Inet.Encoding {
    class IDNA2008
    {
      // rfc5892 PVALID codepoints
      public static char[] PVALID = new char[] {
        'u00DF', // LATIN SMALL LETTER SHARP S
        'u03C2', // GREEK SMALL LETTER FINAL SIGMA
        'u06FD', // ARABIC SIGN SINDHI AMPERSAND
        'u06FE', // ARABIC SIGN SINDHI POSTPOSITION ME
        'u0F0B', // TIBETAN MARK INTERSYLLABIC TSHEG
        'u3007'  // IDEOGRAPHIC NUMBER ZERO
      };
    }
  }

(2) Überladen der Funktion Map (Stringprep.cs) mit einem dritten Parameter, der zu ignorierende Codepoints bezeichnet:


  internal static void Map(StringBuilder s, char[] search, string[] replace, char[] ignore)
  {
    for (int i = 0; i < search.Length; i++)
    {
      char c = search[i];

      // check if c should be ignored
      bool ign = false;
      for (int t = 0; t < ignore.Length; t++)
      {
        if (ignore[t] == c)
        {
          ign = true;
          break;
        }
      }
      if (ign)
        continue;

      int j = 0;
      while (j < s.Length)
      {
        if (c == s[j])
        {
          //s.deleteCharAt(j);
          s.Remove(j, 1);
          if (null != replace[i])
          {
            s.Insert(j, replace[i]);
            j += replace[i].Length - 1;
          }
        }
        else
        {
          j++;
        }
      }
    }
  }

(3) Überladen der Funktion Nameprep (Strinprep.cs) mit einem dritten Parameter bool useIDNA2008:


  public static string NamePrep(string input, bool allowUnassigned, bool useIDNA2008)
  {
    if (input == null)
    {
      throw new System.NullReferenceException();
    }

    StringBuilder s = new StringBuilder(input);

    if (!allowUnassigned && Contains(s, RFC3454.A1))
    {
      throw new StringprepException(StringprepException.CONTAINS_UNASSIGNED);
    }

    Filter(s, RFC3454.B1);

    // EsZett Hotfix
    if (useIDNA2008)
      Map(s, RFC3454.B2search, RFC3454.B2replace, IDNA2008.PVALID);
    else
      Map(s, RFC3454.B2search, RFC3454.B2replace);


    s = new StringBuilder(NFKC.NormalizeNFKC(s.ToString()));
    // B.3 is only needed if NFKC is not used, right?
    // map(s, RFC3454.B3search, RFC3454.B3replace);

    if (Contains(s, RFC3454.C12) || Contains(s, RFC3454.C22) || Contains(s, RFC3454.C3) || Contains(s, RFC3454.C4) || Contains(s, RFC3454.C5) || Contains(s, RFC3454.C6) || Contains(s, RFC3454.C7) || Contains(s, RFC3454.C8))
    {
      // Table C.9 only contains code points > 0xFFFF which Java
      // doesn't handle
      throw new StringprepException(StringprepException.CONTAINS_PROHIBITED);
    }

    // Bidi handling
    bool r = Contains(s, RFC3454.D1);
    bool l = Contains(s, RFC3454.D2);

    // RFC 3454, section 6, requirement 1: already handled above (table C.8)

    // RFC 3454, section 6, requirement 2
    if (r && l)
    {
      throw new StringprepException(StringprepException.BIDI_BOTHRAL);
    }

    // RFC 3454, section 6, requirement 3
    if (r)
    {
      if (!Contains(s[0], RFC3454.D1) || !Contains(s[s.Length - 1], RFC3454.D1))
      {
        throw new StringprepException(StringprepException.BIDI_LTRAL);
      }
    }

    return s.ToString();
  }

(4) Überladen der Funktionen ToAscii und ToUnicode (IDNA.cs), um den Parameter
bool useIDNA2008 an Nameprep durchzureichen:


  public static string ToASCII(string input, bool allowUnassigned, bool useSTD3ASCIIRules, bool useIDNA2008)
  {
    // Step 1: Check if the string contains code points outside
    //         the ASCII range 0..0x7c.

    bool nonASCII = false;

    for (int i = 0; i < input.Length; i++)
    {
      int c = input[i];
      if (c > 0x7f)
      {
        nonASCII = true;
        break;
      }
    }

    // Step 2: Perform the nameprep operation.

    if (nonASCII)
    {
      try
      {
        input = Stringprep.NamePrep(input, allowUnassigned, useIDNA2008);
      }
      catch (StringprepException e)
      {
        // TODO
        throw new IDNAException(e);
      }
    }

    // Step 3: - Verify the absence of non-LDH ASCII code points
    //    (char) 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60,
    //    (char) 0x7b..0x7f
    //         - Verify the absence of leading and trailing
    //           hyphen-minus

    if (useSTD3ASCIIRules)
    {
      for (int i = 0; i < input.Length; i++)
      {
        int c = input[i];
        if ((c <= 0x2c) || (c >= 0x2e && c <= 0x2f) || (c >= 0x3a && c <= 0x40) || (c >= 0x5b && c <= 0x60) || (c >= 0x7b && c <= 0x7f))
        {
          throw new IDNAException(IDNAException.CONTAINS_NON_LDH);
        }
      }

      if (input.StartsWith("-") || input.EndsWith("-"))
      {
        throw new IDNAException(IDNAException.CONTAINS_HYPHEN);
      }
    }

    // Step 4: If all code points are inside 0..0x7f, skip to step 8

    nonASCII = false;

    for (int i = 0; i < input.Length; i++)
    {
      int c = input[i];
      if (c > 0x7f)
      {
        nonASCII = true;
        break;
      }
    }

    string output = input;

    if (nonASCII)
    {

      // Step 5: Verify that the sequence does not begin with the ACE prefix.

      if (input.StartsWith(ACE_PREFIX))
      {
        throw new IDNAException(IDNAException.CONTAINS_ACE_PREFIX);
      }

      // Step 6: Punycode

      try
      {
        output = Punycode.Encode(input);
      }
      catch (PunycodeException e)
      {
        // TODO
        throw new IDNAException(e);
      }

      // Step 7: Prepend the ACE prefix.

      output = ACE_PREFIX + output;
    }

    // Step 8: Check that the length is inside 1..63.

    if (output.Length < 1 || output.Length > 63)
    {
      throw new IDNAException(IDNAException.TOO_LONG);
    }

    return output;
  }


  public static string ToUnicode(string input, bool allowUnassigned, bool useSTD3ASCIIRules, bool useIDNA2008)
  {
    string original = input;
    bool nonASCII = false;

    // Step 1: If all code points are inside 0..0x7f, skip to step 3.

    for (int i = 0; i < input.Length; i++)
    {
      int c = input[i];
      if (c > 0x7f)
      {
        nonASCII = true;
        break;
      }
    }

    // Step 2: Perform the Nameprep operation.

    if (nonASCII)
    {
      try
      {
        input = Stringprep.NamePrep(input, allowUnassigned, useIDNA2008);
      }
      catch (StringprepException e)
      {
        // ToUnicode never fails!
        return original;
      }
    }

    // Step 3: Verify the sequence starts with the ACE prefix.

    if (!input.StartsWith(ACE_PREFIX))
    {
      // ToUnicode never fails!
      return original;
    }

    string stored = input;

    // Step 4: Remove the ACE prefix.

    input = input.Substring(ACE_PREFIX.Length);

    // Step 5: Decode using punycode

    string output;

    try
    {
      output = Punycode.Decode(input);
    }
    catch (PunycodeException e)
    {
      // ToUnicode never fails!
      return original;
    }

    // Step 6: Apply toASCII

    string ascii;

    try
    {
      ascii = ToASCII(output, allowUnassigned, useSTD3ASCIIRules, useIDNA2008);
    }
    catch (IDNAException e)
    {
      // ToUnicode never fails!
      return original;
    }

    // Step 7: Compare case-insensitively.

    if (!ascii.ToUpper().Equals(stored.ToUpper()))
    {
      // ToUnicode never fails!
      return original;
    }

    // Step 8: Return the result.

    return output;
  }

(5) Testen von ToAscii und ToUnicode mit und ohne Anwendung von IDNA2008:


  [TestMethod()]
  public void Test030_EsZett_IDNA2003()
  {

    string u1 = "täßt";

    // Nameprep IDNA2203 should send "täßt" to "tässt"
    string u2 = Stringprep.NamePrep(u1, false, false);
    Assert.AreEqual("tässt", u2);

    // ToAscii IDNA2003 should send both "täßt" and "tässt" to "xn--tsst-loa"
    string a1 = IDNA.ToASCII(u1, false, true, false);
    Assert.AreEqual("xn--tsst-loa", a1);

    string a2 = IDNA.ToASCII(u2, false, true, false);
    Assert.AreEqual(a1, a2);

    // ToUnicode IDNA2003 should send "xn--tsst-loa" to "tässt"
    string u3 = IDNA.ToUnicode(a1, false, true, false);
    Assert.AreEqual(u2, u3);

  }

  [TestMethod()]
  public void Test040_EsZett_IDNA2008()
  {

    string u1 = "täßt";

    // Nameprep IDNA2208 should send "täßt" to "täßt"
    string u2 = Stringprep.NamePrep(u1, false, true);
    Assert.AreEqual(u1, u2);

    // ToAscii IDNA2008 should send "täßt" to "xn--tt-giat"
    string a1 = IDNA.ToASCII(u1, false, true, true);
    Assert.AreEqual("xn--tt-giat", a1);

    string a2 = IDNA.ToASCII(u2, false, true, true);
    Assert.AreEqual(a1, a2);

    // ToUnicode IDNA2003 should send "xn--tt-giat.de" to "täßt"
    string u3 = IDNA.ToUnicode(a1, false, true, true);
    Assert.AreEqual(u2, u3);

  }

Go Back

Kommentar