java I/O FilterInputStream及其子类源码分析

最新推荐文章于 2023-10-20 17:28:34 发布

原创最新推荐文章于 2023-10-20 17:28:34 发布 · 1.9k 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#DataInputStream #BufferedInputStream #PushbackInputStream #源码实现

本文深入探讨了 Java 中 FilterInputStream 的子类 DataInputStream、BufferedInputStream 和 PushbackInputStream 的实现原理与应用场景。DataInputStream 用于从流中读取基本数据类型；BufferedInputStream 利用缓冲区提高读取效率；PushbackInputStream 支持回退读取。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

FilterInputStream

作用：用来提供装饰器类接口，以控制特定输入流

我们先来看看其定义：

public class FilterInputStream extends InputStream {

    protected volatile InputStream in;                //维护一个InputStream对象

    protected FilterInputStream(InputStream in) {     //构造方法参数需要一个inputStream
        this.in = in;
    }

    public int read() throws IOException {
        return in.read();                            //委托给InputStream
    }
    
    public void close() throws IOException {         //委托给InputStream
        in.close();
    }

    /**
     * 剩下的  read   skip    available   mark  reset   markSupported  
     * 方法也没有什么特别之处，全部是都是委托给了InputStream类的相应方法
     * 因为其作用是提供装饰器类接口，因而此类并没有对InputStream的功能做
     * 任何扩展其扩展主要交给了其子类来实现。
     */
}

现在在来看看其子类都有哪些：

FilterInputStream是所有装饰器类的基类，用于提供特殊的输入流控制
其子类包含 DataInputStream ：与DataOutputStream搭配使用，可按照可移植方式从流中读取基本数据类型（int char long 等）
BufferedInputStream ：使用其可以防止每次读取时都得进行实际的写操作，代表使用缓冲区
LineNumberInputStream ：跟踪输入流中的行号，可以调用getLineNumber()和setlineNumber(int)

PushbackInputStream :具有“能弹出一个字节的缓冲区”因此可以将读到的最后一个字符回退。

下面再来一个一个的分析其子类的详细特点以及实现方式

我们先来看看 DataInputStream的类定义

public class DataInputStream extends FilterInputStream implements DataInput

其继承自FIlterInputStream 实现了 DataInput接口，其中DataInput接口就是针对inputStream类做的增强特性，我们直接看DataInputStream的定义就好了。

import java.io.*;

/**
 * DataInputStream 的作用就是允许应用程序以与机器无关方式从底层输入流中读取基本 Java 数据类型。
 * 应用程序可以使用数据输出流写入稍后由数据输入流读取的数据。
 */
public class DataInputStream extends FilterInputStream implements DataInput {

    public DataInputStream(InputStream in) {
        super(in);
    }   //构造方法需要InputStream类作为参数

    private byte bytearr[] = new byte[80];                  //字节数组
    private char chararr[] = new char[80];                  //字符数组

    public final void readFully(byte b[], int off, int len) throws IOException {
        while (n < len) {
            //如果 读取的数据小于指定的len，就会一直循环下去，
            //此方法适用于网络读取已知长度的数据，否则容器导致持续阻塞
        }
    }


    public final boolean readBoolean() throws IOException {     //读取boolean类型的数据
        return (ch != 0);               //可以看出 false 在流中是 0
    }

    public final byte readByte() throws IOException {
        int ch = in.read(); //java中byte是一个字节
        return (byte)(ch);  //读取byte ,直接在获取流数据后强制转化
    }


    public final short readShort() throws IOException {
        int ch1 = in.read();        //java中short是两个字节
        int ch2 = in.read();
        if ((ch1 | ch2) < 0)
            throw new EOFException();
        return (short)((ch1 << 8) + (ch2 << 0));    //高字节，底字节合并后强制转化
    }


    public final char readChar() throws IOException {
        int ch1 = in.read();            //可以看出char也是两个字节
        int ch2 = in.read();
        if ((ch1 | ch2) < 0)
            throw new EOFException();
        return (char)((ch1 << 8) + (ch2 << 0));
    }

    public final int readInt() throws IOException {
        int ch1 = in.read();            //可以看出int是4字节
        int ch2 = in.read();
        int ch3 = in.read();
        int ch4 = in.read();
        if ((ch1 | ch2 | ch3 | ch4) < 0)
            throw new EOFException();
        return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
    }

    private byte readBuffer[] = new byte[8];

    public final long readLong() throws IOException {
        readFully(readBuffer, 0, 8);        //可以看出long是8个字节
    }
    
    public final float readFloat() throws IOException {
        return Float.intBitsToFloat(readInt());     //Float 4字节
    }

    public final double readDouble() throws IOException {
        return Double.longBitsToDouble(readLong()); //Double 8字节
    }

    private char lineBuffer[];      //行缓冲区

    @Deprecated
    public final String readLine() throws IOException {
       //被过期注解标记，作用就是用啦读取一行
    }

    public final String readUTF() throws IOException {
        return readUTF(this);
    }

    /**
     * 是从输入流中读取UTF-8编码的数据，并以String字符串的形式返回
     */
    public final static String readUTF(DataInput in) throws IOException {

        int utflen = in.readUnsignedShort();    //获取数据的长度（UTF-8输入流的前两位无符号short类型的值是数据的长度）
        byte[] bytearr = null;
        char[] chararr = null;

        if (in instanceof DataInputStream) {        // 如果in本身是“数据输入流”
            DataInputStream dis = (DataInputStream)in;      //强制转化为输入流
            if (dis.bytearr.length < utflen){               //如果输入流的的字节数组长度小于实际需要读取的数据长度
                dis.bytearr = new byte[utflen*2];           //扩容
                dis.chararr = new char[utflen*2];           //扩容
            }
            chararr = dis.chararr;                          //赋值
            bytearr = dis.bytearr;                          //赋值        -->主要目的就是节省内容空间
        } else {
            bytearr = new byte[utflen];                     //如果输入流没有自带缓冲空间，就只能自己分配空间了
            chararr = new char[utflen];
        }

        int c, char2, char3;                                //声明变量
        int count = 0;
        int chararr_count=0;

        in.readFully(bytearr, 0, utflen);  //一次性从输入流中获取全部数据并写到字节数组中

        /**
         * 将“字节数组bytearr”中的数据 拷贝到 “字符数组chararr”中
         * 注意：这里相当于“预处理的输入流中单字节的符号”，因为UTF-8是1-4个字节可变的。
         */
        while (count < utflen) {                //遍历将字节数组中的数据拷贝到字符数组chararr中
            /**
             * Java中一个byte转换成int时，对于负数，会做位扩展，举例来说，一个byte的-1（即0xff），会被转换成int的-1（即0xffffffff）,显然该结果不是我们需要的
             * 而0xff默认是整形，所以，一个byte跟0xff相与会先将那个byte转化成整形运算，这样，结果中的高的24个比特就总会被清0。
             * Byte a = (byte) 0xF0;
             * int c = a & 0xFF;  byte转int  将高24位置0 得到c的值 0xf0
             */
            c = (int) bytearr[count] & 0xff;        //将每个字节转化为int值

            if (c > 127) break;   //    UTF-8的每个字节的值都不会超过127；所以，超过127，则退出。
            count++;              //记录转化的个数
            chararr[chararr_count++]=(char)c;       //将c保存到字符数组中
        }

        while (count < utflen) {
            c = (int) bytearr[count] & 0xff;
            switch (c >> 4) {
                case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
                    /* 0xxxxxxx*/
                    count++;
                    chararr[chararr_count++]=(char)c;
                    break;
                case 12: case 13:
                    /* 110x xxxx   10xx xxxx*/
                    count += 2;
                    if (count > utflen)
                        throw new UTFDataFormatException(
                                "malformed input: partial character at end");
                    char2 = (int) bytearr[count-1];
                    if ((char2 & 0xC0) != 0x80)
                        throw new UTFDataFormatException(
                                "malformed input around byte " + count);
                    chararr[chararr_count++]=(char)(((c & 0x1F) << 6) |
                            (char2 & 0x3F));
                    break;
                case 14:
                    /* 1110 xxxx  10xx xxxx  10xx xxxx */
                    count += 3;
                    if (count > utflen)
                        throw new UTFDataFormatException(
                                "malformed input: partial character at end");
                    char2 = (int) bytearr[count-2];
                    char3 = (int) bytearr[count-1];
                    if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80))
                        throw new UTFDataFormatException(
                                "malformed input around byte " + (count-1));
                    chararr[chararr_count++]=(char)(((c     & 0x0F) << 12) |
                            ((char2 & 0x3F) << 6)  |
                            ((char3 & 0x3F) << 0));
                    break;
                default:
                    /* 10xx xxxx,  1111 xxxx */
                    throw new UTFDataFormatException(
                            "malformed input around byte " + count);
            }
        }
        // The number of chars produced may be less than utflen
        return new String(chararr, 0, chararr_count);
    }
}

通过分析该源码可以看出DataInputStream的作用：主要用来处理从流中读取基本的数据，以及最后的一个方法，从流中读取UTF-8的数据并以String字符串的形式返回。以上就是DataInputStream针对InputStream所做的增强特定了。

再来看看 BufferedInputStream

import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;

/**
 * 作用：使用其可以防止每次读取时都得进行实际的写操作，代表使用缓冲区
 */
class BufferedInputStream extends FilterInputStream {

    private static int DEFAULT_BUFFER_SIZE = 8192;  //默认的缓冲区大小

    private static int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; //最大的缓冲区大小

    protected volatile byte buf[];      //字节数组

    private static final                //缓存数组的原子更新器，该成员变量与buf数组的volatile关键字共同组成了buf数组的原子更新功能实现。
    AtomicReferenceFieldUpdater<BufferedInputStream, byte[]> bufUpdater =
            AtomicReferenceFieldUpdater.newUpdater
                    (BufferedInputStream.class,  byte[].class, "buf");

    protected int count;            //该成员变量表示目前缓冲区域中有多少有效的字节。

    protected int pos;              //该成员变量表示了当前缓冲区的读取位置。

    protected int markpos = -1;     //标记位置用于 重复读取流

    protected int marklimit;        //标记的最大保留空间，实际为读取流的一个副本，其定义了副本的最大存储容量


    public BufferedInputStream(InputStream in) {        //构造方法，需要使用inputStream作为参数
        this(in, DEFAULT_BUFFER_SIZE);                  //使用默认的缓冲区大小，可以看出，还有一个指定大小的构造方法
    }

    private InputStream getInIfOpen() throws IOException {      //获取inputStream
        InputStream input = in;
        if (input == null)
            throw new IOException("Stream closed");
        return input;
    }

    private byte[] getBufIfOpen() throws IOException {        //获取缓冲区
        byte[] buffer = buf;
        if (buffer == null)
            throw new IOException("Stream closed");
        return buffer;
    }

    /**
     * 作用：提供了对缓冲区的扩容，与写入方法
     */
    private void fill() throws IOException {
        byte[] buffer = getBufIfOpen();                 //得到缓冲区，同时如果修改了该buffer的应用那么buf[]的内容也会变化
        if (markpos < 0)    //如果没有reset的需求
            pos = 0;        //直接重置pos标识下一可读取位置
        else if (pos >= buffer.length)  //位置大于缓冲区长度，这里表示已经没有可用空间了
            if (markpos > 0) {  //存在标记位置对标记位置到pos位置的数据予以保留，以确保后面如果调用reset()重新从mark位置读取会取得成功
                int sz = pos - markpos;
                System.arraycopy(buffer, markpos, buffer, 0, sz);   //该实现是通过将缓冲区域中markpos至pos部分的移至缓冲区头部实现
                pos = sz;
                markpos = 0;
            } else if (buffer.length >= marklimit) {        //如果缓冲区已经足够大，可以容纳marklimit，则直接重置
                markpos = -1;   //因为整个数据都可以读到缓冲区，因而标记也就失去了意义
                pos = 0;        //丢弃所有缓冲区内容
            } else if (buffer.length >= MAX_BUFFER_SIZE) {  //如果缓冲区的长度大过了最大缓冲区长度
                throw new OutOfMemoryError("Required array size too large");        //抛出异常
            } else {            //对缓冲区进行扩容
                int nsz = (pos <= MAX_BUFFER_SIZE - pos) ?
                        pos * 2 : MAX_BUFFER_SIZE;              //设置新的缓冲区大小
                if (nsz > marklimit)
                    nsz = marklimit;                            //最大不能错过最大值
                byte nbuf[] = new byte[nsz];                    //重新分配缓冲区空间
                System.arraycopy(buffer, 0, nbuf, 0, pos);      //将原来的较小的缓冲内容COPY至增容的新缓冲区中
                if (!bufUpdater.compareAndSet(this, buffer, nbuf)) {        //这里使用了原子变量引用更新，确保多线程环境下内存的可见性
                    throw new IOException("Stream closed");
                }
                buffer = nbuf;      //修改缓冲区的引用
            }
        count = pos;
        int n = getInIfOpen().read(buffer, pos, buffer.length - pos);       //将数据读到新的缓冲区
        if (n > 0)
            count = n + pos;
    }

    public synchronized int read() throws IOException {
        if (pos >= count) {     //表示读取位置已经超过了缓冲区可用范围，
            fill();             //对缓冲区进行重新填充
            if (pos >= count)   //当填充后再次读取时发现没有数据可读，证明读到了流末尾
                return -1;      //直接返回-1
        }
        return getBufIfOpen()[pos++] & 0xff;  //这里表示读取位置尚未超过缓冲区有效范围，直接返回缓冲区内容
    }

    private int read1(byte[] b, int off, int len) throws IOException {      //一次读取多个字节
        int avail = count - pos;
        if (avail <= 0) {

            //这里使用了一个巧妙的机制，如果读取的长度大于缓冲区的长度
            //并且没有markpos，则直接从原始输入流中进行读取，从而避免无谓的
            //COPY（从原始输入流至缓冲区，读取缓冲区全部数据，清空缓冲区，
            //重新填入原始输入流数据）
            if (len >= getBufIfOpen().length && markpos < 0) {
                return getInIfOpen().read(b, off, len);
            }
            fill();     //当无数据可读时，从原始流中载入数据到缓冲区中
            avail = count - pos;
            if (avail <= 0) return -1;
        }
        int cnt = (avail < len) ? avail : len;
        //从缓冲区中读取数据，返回实际读取到的大小
        System.arraycopy(getBufIfOpen(), pos, b, off, cnt);
        pos += cnt;
        return cnt;
    }

    /**
     *  此方法就是通过循环调用read1方法，读取指定长度的数据，知道满足长度要求或者读到文件末尾
     */
    public synchronized int read(byte b[], int off, int len)
            throws IOException
    {
        getBufIfOpen(); // 获取缓冲区
        if ((off | len | (off + len) | (b.length - (off + len))) < 0) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return 0;
        }

        int n = 0;
        for (;;) {      //死循环
            int nread = read1(b, off + n, len - n); //
            if (nread <= 0)                         //如果读到文件末尾
                return (n == 0) ? nread : n;        //返回
            n += nread;
            if (n >= len)                           //如果读取的数据大于需要读取的长度
                return n;                           //返回
            // if not closed but no bytes available, return
            InputStream input = in;
            if (input != null && input.available() <= 0)    //如果流可用，但是没有字节可以返回
                return n;
        }
    }

    /**
     *  同样此方法表示略过多少字节
     */
    public synchronized long skip(long n) throws IOException {
        getBufIfOpen(); // Check for closed stream
        if (n <= 0) {
            return 0;
        }
        long avail = count - pos;

        if (avail <= 0) {
            // If no mark position set then don't keep in buffer
            //如果没有mark标记，则直接从原始输入流中skip
            if (markpos <0)
                return getInIfOpen().skip(n);

            // Fill in buffer to save bytes for reset
            fill();
            avail = count - pos;
            if (avail <= 0)
                return 0;
        }
        //该方法的实现为尽量原则，不保证一定略过规定的字节数。
        long skipped = (avail < n) ? avail : n;
        pos += skipped;
        return skipped;
    }

    //估计目前可用的字节数，原始流中可用的字节数+缓冲区中可用的字节数
    public synchronized int available() throws IOException {
        int n = count - pos;
        int avail = getInIfOpen().available();
        return n > (Integer.MAX_VALUE - avail)
                ? Integer.MAX_VALUE
                : n + avail;
    }

    //用于设置最大标记限制，以及标记位置
    public synchronized void mark(int readlimit) {
        marklimit = readlimit;
        markpos = pos;
    }

    //重置位置：该实现清晰的表明下一读取位置被推到了以前的标记位置，以实现重新读取区段的功能
    public synchronized void reset() throws IOException {
        getBufIfOpen(); // Cause exception if closed
        if (markpos < 0)
            throw new IOException("Resetting to invalid mark");
        pos = markpos;      //修改当前读取的流位置
    }

    //是否支持标记。默认支持
    public boolean markSupported() {
        return true;
    }

    //关闭流
    public void close() throws IOException {
        byte[] buffer;
        while ( (buffer = buf) != null) {
            if (bufUpdater.compareAndSet(this, buffer, null)) {
                InputStream input = in;
                in = null;
                if (input != null)
                    input.close();
                return;
            }
            // Else retry in case a new buf was CASed in fill()
        }
    }
}

通过上面的源码分析可以知道：主要的实现实现就是，先将数据写入到缓冲区中，然后读取的时候如果能够在缓冲区中获取到就直接到缓冲区中获取，否则重新填充缓冲区，然后

然后附加了一些标记重置的功能。

再来看看：LineNumberInputStream

@Deprecated
public class LineNumberInputStream extends FilterInputStream

可以看到该类使用了过时的注解，因而我们就不需要浪费时间分析了。

下面再来看看回退缓冲输入流：

PushbackInputStream：

定义如下：

/**
 * PushbackInputStream存在的意义就是允许我试探性的读取数据流，
 * 如果不是我们想要的则返还回去，之所以能够这样，因为其内部维护了一个pushback buf缓冲区
 */
public class PushbackInputStream extends FilterInputStream {

    protected byte[] buf;       //缓冲区

    protected int pos;

    private void ensureOpen() throws IOException {
        if (in == null)
            throw new IOException("Stream closed");
    }

    public PushbackInputStream(InputStream in, int size) {      //构造函数可以指定返回的字节个数
        super(in);
        if (size <= 0) {
            throw new IllegalArgumentException("size <= 0");
        }
        this.buf = new byte[size];                              //初始化缓冲区的大小
        this.pos = size;                                        //设置读取的位置
    }

    public PushbackInputStream(InputStream in) {                //默认回退一个
        this(in, 1);
    }

    public int read() throws IOException {
        ensureOpen();                                           //确保流存在
        if (pos < buf.length) {                                 //如果要读取的位置在缓冲区里面
            return buf[pos++] & 0xff;                           //返回缓冲区中的内容
        }
        return super.read();                                    //否则调用超类的读函数
    }

    public int read(byte[] b, int off, int len) throws IOException {    //读取指定的长度
        ensureOpen();
        if (b == null) {
            throw new NullPointerException();
        } else if (off < 0 || len < 0 || len > b.length - off) {
            throw new IndexOutOfBoundsException();
        } else if (len == 0) {
            return 0;
        }

        int avail = buf.length - pos;           //缓冲区长度减去读取位置
        if (avail > 0) {                        //如果大于0，表明部分数据可以从缓冲区读取
            if (len < avail) {                  //如果要读取的长度小于可从缓冲区读取的字符
                avail = len;                    //修改可读取值为实际要读的长度
            }
            System.arraycopy(buf, pos, b, off, avail);  //将buf中的数据复制到b中
            pos += avail;                               //修改pos的值
            off += avail;                               //修改off偏移量的值
            len -= avail;                               //修改len的值
        }
        if (len > 0) {                                  //如果从缓冲区读取的数据不够
            len = super.read(b, off, len);              //从流中读取
            if (len == -1) {
                return avail == 0 ? -1 : avail;
            }
            return avail + len;
        }
        return avail;
    }

    public void unread(int b) throws IOException {      //不读字符b
        ensureOpen();
        if (pos == 0) {
            throw new IOException("Push back buffer is full");
        }
        buf[--pos] = (byte)b;                           //实际就是修改缓冲区中的值，同时pos后退
    }

    public void unread(byte[] b, int off, int len) throws IOException {
        ensureOpen();
        if (len > pos) {
            throw new IOException("Push back buffer is full");
        }
        pos -= len;         //修改缓冲区中的值，pos后退多个
        System.arraycopy(b, off, buf, pos, len);
    }

    public void unread(byte[] b) throws IOException {
        unread(b, 0, b.length);
    }
}

通过分析源码可以很清楚的发现其用法，就是可以预先从流中获取部分数据，来判断该数据是否是自己需要的，如果不是还可以在回退到流中重新获取，而实现的方式就是通过维护了一个Buf数组，需要在初始化的时候指明可以回退的大小，默认为1.

例如：

一次从文件中读取两个字节，并检查两个字节合并后的整数值是否在0xA440--0xFFFF之间，这样可以简单地判断其两个字节合并后是否为BIG码。如果是BIG5码则使用这两个字节产生String实例以显示汉字字符；如果不在这个范围之内，则可能是个ASCII范围内的字符，您可以显示第一个字节的字符表示，并将第二个字节推回流，以待下一次可以重新读取。

但是阅读该源码的时候一直有一种困惑，就是不知道这个回退缓冲区里面的数据是何时被初始化的，按照的我的想法，应该是发生在read方法中读取的时候顺便初始化化回退缓冲区，但是在源码中并没有发现该操作。。。

编写此博客时参考了诸多博客，因而发表的时候，已经搞乱了，不知道参考了哪些博客，再此表示抱歉，也对于CSDN的博主表示感谢。