Skip to content

Commit 07cf795

Browse files
committed
add some utf8 decoding convenience functions
- decodeUtf8Either (alias for decodeUtf8') - decodeUtf8Maybe - decodeUtf8Lenient ( = decodeUtf8With lenientDecode)
1 parent 8d1b6ff commit 07cf795

File tree

1 file changed

+25
-1
lines changed

1 file changed

+25
-1
lines changed

src/Data/Text/Encoding.hs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,12 @@ module Data.Text.Encoding
3131

3232
-- ** Catchable failure
3333
, decodeUtf8'
34+
, decodeUtf8Either
35+
, decodeUtf8Maybe
3436

3537
-- ** Controllable error handling
3638
, decodeUtf8With
39+
, decodeUtf8Lenient
3740
, decodeUtf16LEWith
3841
, decodeUtf16BEWith
3942
, decodeUtf32LEWith
@@ -64,7 +67,7 @@ import Control.Monad.ST (runST)
6467
import Data.Bits ((.&.))
6568
import Data.ByteString as B
6669
import qualified Data.ByteString.Internal as B
67-
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
70+
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode, lenientDecode)
6871
import Data.Text.Internal (Text(..), safe, text)
6972
import Data.Text.Internal.Functions
7073
import Data.Text.Internal.Private (runText)
@@ -353,6 +356,27 @@ decodeUtf8' :: ByteString -> Either UnicodeException Text
353356
decodeUtf8' = unsafeDupablePerformIO . try . evaluate . decodeUtf8With strictDecode
354357
{-# INLINE decodeUtf8' #-}
355358

359+
-- | Decode a 'ByteString' containing UTF-8 encoded text.
360+
--
361+
-- If the input contains any invalid UTF-8 data, the relevant
362+
-- exception will be returned, otherwise the decoded text.
363+
decodeUtf8Either :: ByteString -> Either UnicodeException Text
364+
decodeUtf8Either = decodeUtf8'
365+
366+
-- | Decode a 'ByteString' containing UTF-8 encoded text.
367+
--
368+
-- If the input contains any invalid UTF-8 data, 'Nothing' will be
369+
-- returned, otherwise the decoded text.
370+
decodeUtf8Maybe :: ByteString -> Maybe Text
371+
decodeUtf8Maybe = either (const Nothing) Just . decodeUtf8'
372+
373+
-- | Decode a 'ByteString' containing UTF-8 encoded text.
374+
--
375+
-- Any invalid input bytes will be replaced with the Unicode replacement
376+
-- character U+FFFD.
377+
decodeUtf8Lenient :: ByteString -> Text
378+
decodeUtf8Lenient = decodeUtf8With lenientDecode
379+
356380
-- | Encode text to a ByteString 'B.Builder' using UTF-8 encoding.
357381
--
358382
-- @since 1.1.0.0

0 commit comments

Comments
 (0)