Skip to content

Commit efca305

Browse files
clyringhs-viktor
authored andcommitted
Add NonEmpty variants of inits and tails (haskell#557)
* Add basic benchmarks for inits/tails * Add NonEmpty variants of inits and tails The lazy versions use new implementations: - Lazy tails got about 10% faster with ghc-9.2. (A happy accident!) - Lazy inits got much faster: - For the first few chunks it is about 1.5x faster, due to better list fusion. - When there are many chunks it is about 4x faster. * Formatting and comments, as suggested in review * Add link to a relevant CLC issue about NonEmpty - haskell/core-libraries-committee#107
1 parent 635560f commit efca305

File tree

6 files changed

+102
-20
lines changed

6 files changed

+102
-20
lines changed

Data/ByteString.hs

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ module Data.ByteString (
130130
groupBy,
131131
inits,
132132
tails,
133+
initsNE,
134+
tailsNE,
133135
stripPrefix,
134136
stripSuffix,
135137

@@ -235,6 +237,8 @@ import Data.ByteString.Lazy.Internal (fromStrict, toStrict)
235237
import Data.ByteString.Unsafe
236238

237239
import qualified Data.List as List
240+
import qualified Data.List.NonEmpty as NE
241+
import Data.List.NonEmpty (NonEmpty(..))
238242

239243
import Data.Word (Word8)
240244

@@ -427,7 +431,7 @@ last ps@(BS x l)
427431
unsafeWithForeignPtr x $ \p -> peekByteOff p (l-1)
428432
{-# INLINE last #-}
429433

430-
-- | /O(1)/ Return all the elements of a 'ByteString' except the last one.
434+
-- | /O(1)/ Returns all the elements of a 'ByteString' except the last one.
431435
-- An exception will be thrown in the case of an empty ByteString.
432436
--
433437
-- This is a partial function, consider using 'unsnoc' instead.
@@ -1686,17 +1690,47 @@ unzip ls = (pack (P.map fst ls), pack (P.map snd ls))
16861690
-- ---------------------------------------------------------------------
16871691
-- Special lists
16881692

1689-
-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1693+
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
16901694
inits :: ByteString -> [ByteString]
1691-
inits (BS x l) = [BS x n | n <- [0..l]]
1695+
-- see Note [Avoid NonEmpty combinators]
1696+
inits bs = NE.toList $! initsNE bs
16921697

1693-
-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1698+
-- | /O(n)/ Returns all initial segments of the given 'ByteString', shortest first.
1699+
--
1700+
-- @since 0.11.4.0
1701+
initsNE :: ByteString -> NonEmpty ByteString
1702+
-- see Note [Avoid NonEmpty combinators]
1703+
initsNE (BS x len) = empty :| [BS x n | n <- [1..len]]
1704+
1705+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
16941706
tails :: ByteString -> [ByteString]
1695-
tails p | null p = [empty]
1696-
| otherwise = p : tails (unsafeTail p)
1707+
-- see Note [Avoid NonEmpty combinators]
1708+
tails bs = NE.toList $! tailsNE bs
1709+
1710+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
1711+
--
1712+
-- @since 0.11.4.0
1713+
tailsNE :: ByteString -> NonEmpty ByteString
1714+
-- see Note [Avoid NonEmpty combinators]
1715+
tailsNE p | null p = empty :| []
1716+
| otherwise = p :| tails (unsafeTail p)
16971717

16981718
-- less efficent spacewise: tails (BS x l) = [BS (plusForeignPtr x n) (l-n) | n <- [0..l]]
16991719

1720+
{-
1721+
Note [Avoid NonEmpty combinators]
1722+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1723+
1724+
As of base-4.17, most of the NonEmpty API is surprisingly lazy.
1725+
Using it without forcing the arguments yourself is just begging GHC
1726+
to make your code waste time allocating useless selector thunks.
1727+
This may change in the future. See also this CLC issue:
1728+
https://github.com/haskell/core-libraries-committee/issues/107
1729+
But until then, "refactor" with care!
1730+
-}
1731+
1732+
1733+
17001734
-- ---------------------------------------------------------------------
17011735
-- ** Ordered 'ByteString's
17021736

Data/ByteString/Char8.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ module Data.ByteString.Char8 (
129129
groupBy,
130130
inits,
131131
tails,
132+
initsNE,
133+
tailsNE,
132134
strip,
133135
stripPrefix,
134136
stripSuffix,
@@ -261,7 +263,7 @@ import qualified Data.ByteString.Unsafe as B
261263

262264
-- Listy functions transparently exported
263265
import Data.ByteString (null,length,tail,init,append
264-
,inits,tails,reverse,transpose
266+
,inits,tails,initsNE,tailsNE,reverse,transpose
265267
,concat,take,takeEnd,drop,dropEnd,splitAt
266268
,intercalate,sort,isPrefixOf,isSuffixOf
267269
,isInfixOf,stripPrefix,stripSuffix

Data/ByteString/Lazy.hs

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@ module Data.ByteString.Lazy (
145145
groupBy,
146146
inits,
147147
tails,
148+
initsNE,
149+
tailsNE,
148150
stripPrefix,
149151
stripSuffix,
150152

@@ -228,6 +230,8 @@ import Prelude hiding
228230
,getContents,getLine,putStr,putStrLn ,zip,zipWith,unzip,notElem)
229231

230232
import qualified Data.List as List
233+
import qualified Data.List.NonEmpty as NE
234+
import Data.List.NonEmpty (NonEmpty(..))
231235
import qualified Data.Bifunctor as BF
232236
import qualified Data.ByteString as P (ByteString) -- type name only
233237
import qualified Data.ByteString as S -- S for strict (hmm...)
@@ -384,7 +388,7 @@ last (Chunk c0 cs0) = go c0 cs0
384388
go _ (Chunk c cs) = go c cs
385389
-- XXX Don't inline this. Something breaks with 6.8.2 (haven't investigated yet)
386390

387-
-- | /O(n\/c)/ Return all the elements of a 'ByteString' except the last one.
391+
-- | /O(n\/c)/ Returns all the elements of a 'ByteString' except the last one.
388392
--
389393
-- This is a partial function, consider using 'unsnoc' instead.
390394
init :: HasCallStack => ByteString -> ByteString
@@ -1433,19 +1437,39 @@ unzip ls = (pack (List.map fst ls), pack (List.map snd ls))
14331437
-- ---------------------------------------------------------------------
14341438
-- Special lists
14351439

1436-
-- | /O(n)/ Return all initial segments of the given 'ByteString', shortest first.
1440+
-- | Returns all initial segments of the given 'ByteString', shortest first.
14371441
inits :: ByteString -> [ByteString]
1438-
inits = (Empty :) . inits'
1439-
where inits' Empty = []
1440-
inits' (Chunk c cs) = List.map (`Chunk` Empty) (List.drop 1 (S.inits c))
1441-
++ List.map (Chunk c) (inits' cs)
1442+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1443+
inits bs = NE.toList $! initsNE bs
14421444

1443-
-- | /O(n)/ Return all final segments of the given 'ByteString', longest first.
1445+
-- | Returns all initial segments of the given 'ByteString', shortest first.
1446+
--
1447+
-- @since 0.11.4.0
1448+
initsNE :: ByteString -> NonEmpty ByteString
1449+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1450+
initsNE = (Empty :|) . inits' id
1451+
where
1452+
inits' :: (ByteString -> ByteString) -> ByteString -> [ByteString]
1453+
-- inits' f bs === map f (tail (inits bs))
1454+
inits' _ Empty = []
1455+
inits' f (Chunk c@(S.BS x len) cs)
1456+
= [f (S.BS x n `Chunk` Empty) | n <- [1..len]]
1457+
++ inits' (f . Chunk c) cs
1458+
1459+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
14441460
tails :: ByteString -> [ByteString]
1445-
tails Empty = [Empty]
1446-
tails cs@(Chunk c cs')
1447-
| S.length c == 1 = cs : tails cs'
1448-
| otherwise = cs : tails (Chunk (S.unsafeTail c) cs')
1461+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1462+
tails bs = NE.toList $! tailsNE bs
1463+
1464+
-- | /O(n)/ Returns all final segments of the given 'ByteString', longest first.
1465+
--
1466+
-- @since 0.11.4.0
1467+
tailsNE :: ByteString -> NonEmpty ByteString
1468+
-- see Note [Avoid NonEmpty combinators] in Data.ByteString
1469+
tailsNE bs = case uncons bs of
1470+
Nothing -> Empty :| []
1471+
Just (_, tl) -> bs :| tails tl
1472+
14491473

14501474
-- ---------------------------------------------------------------------
14511475
-- Low level constructors

Data/ByteString/Lazy/Char8.hs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ module Data.ByteString.Lazy.Char8 (
124124
groupBy,
125125
inits,
126126
tails,
127+
initsNE,
128+
tailsNE,
127129
stripPrefix,
128130
stripSuffix,
129131

@@ -233,7 +235,7 @@ import Data.ByteString.Lazy
233235
(fromChunks, toChunks
234236
,empty,null,length,tail,init,append,reverse,transpose,cycle
235237
,concat,take,takeEnd,drop,dropEnd,splitAt,intercalate
236-
,isPrefixOf,isSuffixOf,group,inits,tails,copy
238+
,isPrefixOf,isSuffixOf,group,inits,tails,initsNE,tailsNE,copy
237239
,stripPrefix,stripSuffix
238240
,hGetContents, hGet, hPut, getContents
239241
,hGetNonBlocking, hPutNonBlocking

bench/BenchAll.hs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,12 @@ lazyByteStringData :: L.ByteString
9797
lazyByteStringData = case S.splitAt (nRepl `div` 2) byteStringData of
9898
(bs1, bs2) -> L.fromChunks [bs1, bs2]
9999

100+
{-# NOINLINE smallChunksData #-}
101+
smallChunksData :: L.ByteString
102+
smallChunksData
103+
= L.fromChunks [S.take sz (S.drop n byteStringData)
104+
| let sz = 48, n <- [0, sz .. S.length byteStringData]]
105+
100106
{-# NOINLINE byteStringChunksData #-}
101107
byteStringChunksData :: [S.ByteString]
102108
byteStringChunksData = map (S.pack . replicate (4 ) . fromIntegral) intData
@@ -404,6 +410,15 @@ main = do
404410
, bench "balancedSlow" $ partitionLazy (\x -> hashWord8 x < w 128)
405411
]
406412
]
413+
, bgroup "inits"
414+
[ bench "strict" $ nf S.inits byteStringData
415+
, bench "lazy" $ nf L.inits lazyByteStringData
416+
, bench "lazy (small chunks)" $ nf L.inits smallChunksData
417+
]
418+
, bgroup "tails"
419+
[ bench "strict" $ nf S.tails byteStringData
420+
, bench "lazy" $ nf L.tails lazyByteStringData
421+
]
407422
, bgroup "sort" $ map (\s -> bench (S8.unpack s) $ nf S.sort s) sortInputs
408423
, bgroup "stimes" $ let st = stimes :: Int -> S.ByteString -> S.ByteString
409424
in

tests/Properties/ByteString.hs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,19 @@ import qualified Data.ByteString.Lazy.Internal as B (invariant)
5959
#define BYTESTRING_TYPE B.ByteString
6060
#endif
6161

62-
import Prelude hiding (head, tail)
6362
import Data.Int
6463
import Numeric.Natural (Natural)
6564

6665
import Text.Read
6766

6867
#endif
6968

69+
import Prelude hiding (head, tail)
7070
import Control.Arrow
7171
import Data.Char
7272
import Data.Foldable
7373
import qualified Data.List as List
74+
import qualified Data.List.NonEmpty as NE
7475
import Data.Semigroup
7576
import Data.String
7677
import Data.Tuple
@@ -231,6 +232,10 @@ tests =
231232
\x -> map B.unpack (B.inits x) === List.inits (B.unpack x)
232233
, testProperty "tails" $
233234
\x -> map B.unpack (B.tails x) === List.tails (B.unpack x)
235+
, testProperty "initsNE" $
236+
\x -> NE.map B.unpack (B.initsNE x) === NE.inits (B.unpack x)
237+
, testProperty "tailsNE" $
238+
\x -> NE.map B.unpack (B.tailsNE x) === NE.tails (B.unpack x)
234239
#endif
235240
, testProperty "all" $
236241
\f x -> B.all f x === all f (B.unpack x)

0 commit comments

Comments
 (0)