# HG changeset patch
# User Simon Montagu <smontagu@smontagu.org>
# Date 1334833171 -10800
# Node ID 3b8a84e1a1035391f062607d9754866e632a6101
# Parent  2f0d8ab5b3cb0b75dbb91f18939a1fcc37e95762
[mq]: 746900

diff --git a/intl/uconv/src/nsUTF8ToUnicode.cpp b/intl/uconv/src/nsUTF8ToUnicode.cpp
--- a/intl/uconv/src/nsUTF8ToUnicode.cpp
+++ b/intl/uconv/src/nsUTF8ToUnicode.cpp
@@ -249,53 +249,34 @@ NS_IMETHODIMP nsUTF8ToUnicode::Convert(c
     if (0 == mState) {
       // When mState is zero we expect either a US-ASCII character or a
       // multi-octet sequence.
       if (0 == (0x80 & (*in))) {
         PRInt32 max_loops = NS_MIN(inend - in, outend - out);
         Convert_ascii_run(in, out, max_loops);
         --in; // match the rest of the cases
         mBytes = 1;
-      } else if (0xC0 == (0xE0 & (*in))) {
-        // First octet of 2 octet sequence
+      } else if (0xC0 == (0xE0 & (*in)) && (unsigned char)*in > 0xC1) {
+        // First octet of 2 octet sequence (excluding illegal values 0xC0/0xC1)
         mUcs4 = (PRUint32)(*in);
         mUcs4 = (mUcs4 & 0x1F) << 6;
         mState = 1;
         mBytes = 2;
       } else if (0xE0 == (0xF0 & (*in))) {
         // First octet of 3 octet sequence
         mUcs4 = (PRUint32)(*in);
         mUcs4 = (mUcs4 & 0x0F) << 12;
         mState = 2;
         mBytes = 3;
-      } else if (0xF0 == (0xF8 & (*in))) {
-        // First octet of 4 octet sequence
+      } else if (0xF0 == (0xF8 & (*in)) && (unsigned char)*in < 0xF5) {
+        // First octet of 4 octet sequence in the legal range 0xF0-0xF4
         mUcs4 = (PRUint32)(*in);
         mUcs4 = (mUcs4 & 0x07) << 18;
         mState = 3;
         mBytes = 4;
-      } else if (0xF8 == (0xFC & (*in))) {
-        /* First octet of 5 octet sequence.
-         *
-         * This is illegal because the encoded codepoint must be either
-         * (a) not the shortest form or
-         * (b) outside the Unicode range of 0-0x10FFFF.
-         * Rather than trying to resynchronize, we will carry on until the end
-         * of the sequence and let the later error handling code catch it.
-         */
-        mUcs4 = (PRUint32)(*in);
-        mUcs4 = (mUcs4 & 0x03) << 24;
-        mState = 4;
-        mBytes = 5;
-      } else if (0xFC == (0xFE & (*in))) {
-        // First octet of 6 octet sequence, see comments for 5 octet sequence.
-        mUcs4 = (PRUint32)(*in);
-        mUcs4 = (mUcs4 & 1) << 30;
-        mState = 5;
-        mBytes = 6;
       } else {
         /* Current octet is neither in the US-ASCII range nor a legal first
          * octet of a multi-octet sequence.
          *
          * Return an error condition. Caller is responsible for flushing and
          * refilling the buffer and resetting state.
          */
         res = NS_ERROR_ILLEGAL_INPUT;
diff --git a/intl/uconv/tests/unit/test_utf8_illegals.js b/intl/uconv/tests/unit/test_utf8_illegals.js
--- a/intl/uconv/tests/unit/test_utf8_illegals.js
+++ b/intl/uconv/tests/unit/test_utf8_illegals.js
@@ -1,18 +1,15 @@
 // Tests illegal UTF-8 sequences
 
 const Cc = Components.Constructor;
 const Ci = Components.interfaces;
         
-const inStrings1 = new Array("%c0%af",              // long forms of 0x2F
-                             "%e0%80%af",
+const inStrings1 = new Array("%e0%80%af",           // long forms of 0x2F
                              "%f0%80%80%af",
-                             "%f8%80%80%80%af",
-                             "%fc%80%80%80%80%af",
                                                     // lone surrogates
                              "%ed%a0%80",           // D800
                              "%ed%ad%bf",           // DB7F
                              "%ed%ae%80",           // DB80
                              "%ed%af%bf",           // DBFF
                              "%ed%b0%80",           // DC00
                              "%ed%be%80",           // DF80
                              "%ed%bf%bf");          // DFFF
@@ -23,16 +20,36 @@ const inStrings2 = new Array("%ed%a0%80%
                              "%ed%ad%bf%ed%b0%80",  // DB7F DC00
                              "%ed%ad%bf%ed%bf%bf",  // DB7F DFFF
                              "%ed%ae%80%ed%b0%80",  // DB80 DC00
                              "%ed%ae%80%ed%bf%bf",  // DB80 DFFF
                              "%ed%af%bf%ed%b0%80",  // DBFF DC00
                              "%ed%ad%bf%ed%bf%bf"); // DBFF DFFF
 const expected2 = "ABC\ufffd\ufffdXYZ";
 
+const inStrings3 = new Array("%c0%af",              // Illegal bytes in 2-octet
+			     "%c1%af");             //  sequences
+const expected3 = "ABC\ufffd\ufffdXYZ";
+
+const inStrings4 = new Array("%f5%80%80%80",        // Illegal bytes in 4-octet
+                             "%f7%bf%bf%bf");       //  sequences
+const expected4 = "ABC\ufffd\ufffd\ufffd\ufffdXYZ";
+
+const inStrings5 = new Array("%f8%80%80%80%80",     // Illegal bytes in 5-octet
+                             "%f8%80%80%80%af",     //  sequences
+			     "%fb%bf%bf%bf%bf");
+const expected5 = "ABC\ufffd\ufffd\ufffd\ufffd\ufffdXYZ";
+
+const inStrings6 = new Array("%fc%80%80%80%80%80",  // Illegal bytes in 6-octet
+                             "%fc%80%80%80%80%af",  //  sequences
+			     "%fd%bf%bf%bf%bf%bf");
+const expected6 = "ABC\ufffd\ufffd\ufffd\ufffd\ufffd\ufffdXYZ";
+                             
+
+
 function testCaseInputStream(inStr, expected)
 {
   var dataURI = "data:text/plain; charset=UTF-8,ABC" + inStr + "XYZ"
   dump(inStr + "==>");
 
   var IOService = Cc("@mozilla.org/network/io-service;1",
 		     "nsIIOService");
   var ConverterInputStream =
@@ -69,9 +86,25 @@ function run_test() {
     for (var i = 0; i < inStrings1.length; ++i) {
 	var inStr = inStrings1[i];
 	testCaseInputStream(inStr, expected1);
     }
     for (var i = 0; i < inStrings2.length; ++i) {
 	var inStr = inStrings2[i];
 	testCaseInputStream(inStr, expected2);
     }
+    for (var i = 0; i < inStrings3.length; ++i) {
+	var inStr = inStrings3[i];
+	testCaseInputStream(inStr, expected3);
+    }
+    for (var i = 0; i < inStrings4.length; ++i) {
+	var inStr = inStrings4[i];
+	testCaseInputStream(inStr, expected4);
+    }
+    for (var i = 0; i < inStrings5.length; ++i) {
+	var inStr = inStrings5[i];
+	testCaseInputStream(inStr, expected5);
+    }
+    for (var i = 0; i < inStrings6.length; ++i) {
+	var inStr = inStrings6[i];
+	testCaseInputStream(inStr, expected6);
+    }
 }