BufferedInputSream实现原理

FileInputSream源码分析

/**
 * A <code>FileInputStream</code> obtains input bytes
 * from a file in a file system. What files
 * are  available depends on the host environment.
 *
 * <p><code>FileInputStream</code> is meant for reading streams of raw bytes
 * such as image data. For reading streams of characters, consider using
 * <code>FileReader</code>.
 */
public class FileInputSream {
    /**
     * 从输入流中读取一个字节
     * 该方法是native本地方法，这是因为Java不能直接与操作系统或计算机硬件交互，
     * 需要通过调用C/C++这样更底层的语言来实现对于磁盘数据的访问
     * 对于其他的read()底层就是调用该方法
     */
    private native int read0() throws IOException;

    public int read() throws IOException {
        return read0();
    }
    /**
     *从输入流中读取多个字节到byte数组中
     * 这也是后面BufferedInputSream实现的基础
     */
    private native int readBytes(byte b[], int off, int len) throws IOException;

    public int read(byte b[]) throws IOException {
        return readBytes(b, 0, b.length);
    }
    public int read(byte b[], int off, int len) throws IOException {
        return readBytes(b, off, len);
    }
}

通过对于FileInputSream的源码分析，如果用read()方法读取以恶搞文件，每读取一个字节就需要访问一次磁盘，这样读取方式是及其低效的。
即使使用read(byte[])方法读取时，虽然在一定程度上可以提升效率，但是当文件特别大时，也会频繁的对磁盘进行访问。为了提高输入流的工作效率，Java提供了BufferedInputSream类。

BufferedInputStream

/**
 * A <code>BufferedInputStream</code> adds
 * functionality to another input stream-namely,
 * the ability to buffer the input and to
 * support the <code>mark</code> and <code>reset</code>
 * methods. When  the <code>BufferedInputStream</code>
 * is created, an internal buffer array is
 * created. As bytes  from the stream are read
 * or skipped, the internal buffer is refilled
 * as necessary  from the contained input stream,
 * many bytes at a time. The <code>mark</code>
 * operation  remembers a point in the input
 * stream and the <code>reset</code> operation
 * causes all the  bytes read since the most
 * recent <code>mark</code> operation to be
 * reread before new bytes are  taken from
 * the contained input stream.
 */
public class BufferedInputStream extends FilterInputStream {

    //缓冲区数据默认大小，也就是8M
    private static int DEFAULT_BUFFER_SIZE = 8192;

    /**
     * 内部缓冲数组，也就是与底层磁盘读取数据时，一次读取8M的数据存放在该数组中
     * 大小默认为8M，也可以通过构造函数修改默认值
     */

    protected volatile byte buf[];
    /**
     * 缓冲区中还没有读取的字节数
     * 当count=0时，表示缓冲区内容已经读完，需要再次从磁盘读取来填充
     */
    protected int count;

    /**
     * 缓冲指针，记录缓冲区当前读取位置
     * 通过pos与count的比较来判断是否需要填充缓冲数组
     */
    protected int pos;

    /**
     * 构造方法之一
     * @param in  : 在这里使用的是装饰模式
     * @param size ：可以修改默认缓冲区大小
     */
    public BufferedInputStream(InputStream in, int size) {
        super(in);
        if (size <= 0) {
            throw new IllegalArgumentException("Buffer size <= 0");
        }
        buf = new byte[size];
    }

    private InputStream getInIfOpen() throws IOException {
        InputStream input = in;
        if (input == null)
            throw new IOException("Stream closed");
        return input;
    }

    private byte[] getBufIfOpen() throws IOException {
        byte[] buffer = buf;
        if (buffer == null)
            throw new IOException("Stream closed");
        return buffer;
    }

    /**
     * 填充缓冲数组
     * @throws IOException
     */
    private void fill() throws IOException {
        byte[] buffer = getBufIfOpen();
        if (markpos < 0)
            pos = 0;            /* no mark: throw away the buffer */
        else if (pos >= buffer.length)  /* no room left in buffer */
            if (markpos > 0) {  /* can throw away early part of the buffer */
                int sz = pos - markpos;
                System.arraycopy(buffer, markpos, buffer, 0, sz);
                pos = sz;
                markpos = 0;
            } else if (buffer.length >= marklimit) {
                markpos = -1;   /* buffer got too big, invalidate mark */
                pos = 0;        /* drop buffer contents */
            } else if (buffer.length >= MAX_BUFFER_SIZE) {
                throw new OutOfMemoryError("Required array size too large");
            } else {            /* grow buffer */
                int nsz = (pos <= MAX_BUFFER_SIZE - pos) ?
                        pos * 2 : MAX_BUFFER_SIZE;
                if (nsz > marklimit)
                    nsz = marklimit;
                byte nbuf[] = new byte[nsz];
                System.arraycopy(buffer, 0, nbuf, 0, pos);
                if (!bufUpdater.compareAndSet(this, buffer, nbuf)) {
                    throw new IOException("Stream closed");
                }
                buffer = nbuf;
            }
        count = pos;
        //在这里会调用构造方法传递进来的in的read方法读取数据至缓冲数组中
        int n = getInIfOpen().read(buffer, pos, buffer.length - pos);
        if (n > 0)
            count = n + pos;
    }
    /**
     * 读取一个字节
     * 与FileInputStream中的read()方法不同的是，这里是从缓冲区数组中读取了一个字节
     * 也就是直接从内存中获取的，效率远高于前者
     */
    public synchronized int read() throws IOException {
        /**
         * 如果pos >= count
         * 说明缓冲数组中的数据被全部读完，需要再次填充缓冲数组
         */
        if (pos >= count) {
            fill();
            if (pos >= count)
                return -1;
        }
        //直接将缓冲数组中的一个字节的内容返回
        return getBufIfOpen()[pos++] & 0xff;
    }

    /**
     * 从缓冲区中一次读取多个字节
     * 与上面的原理基本类似
     */
    private int read1(byte[] b, int off, int len) throws IOException {
        int avail = count - pos;
        if (avail <= 0) {
            if (len >= getBufIfOpen().length && markpos < 0) {
                return getInIfOpen().read(b, off, len);
            }
            fill();
            avail = count - pos;
            if (avail <= 0) return -1;
        }
        int cnt = (avail < len) ? avail : len;
        System.arraycopy(getBufIfOpen(), pos, b, off, cnt);
        pos += cnt;
        return cnt;
    }
    
}