InputStreamReaderEx

標準のInputStreamReaderは内部的にBufferedReaderを使っていて、readメソッドが実際に何バイト読んだのかわからないので、それがわかるものを作ってみた。

public class InputStreamReaderEx extends Reader {

  private InputStream stream;
  private CharsetDecoder decoder;
  private ByteBuffer bb;
  private long byteCount;
  private long charCount;

  public InputStreamReaderEx(InputStream stream) {
    this.stream = stream;
    decoder = Charset.defaultCharset().newDecoder();
    bb = ByteBuffer.allocate(4);
    bb.flip();
  }

  public InputStreamReaderEx(InputStream stream, String encoding)
      throws UnsupportedEncodingException {
    this.stream = stream;
    Charset cs = Charset.forName(encoding);
    if (cs == null) {
      throw new UnsupportedEncodingException(encoding);
    }
    decoder = cs.newDecoder();
    bb = ByteBuffer.allocate(4);
    bb.flip();
  }

  @Override
  public int read(char[] cbuf, int off, int len) throws IOException {
    // In order to handle surrogate pairs, this method requires that
    // the invoker attempt to read at least two characters. Saving the
    // extra character, if any, at a higher level is easier than trying
    // to deal with it here.
    assert (len > 1);

    CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
    if (cb.position() != 0)
      // Ensure that cb[0] == cbuf[off]
    cb = cb.slice();

    boolean eof = false;
    for (;;) {
      CoderResult cr = decoder.decode(bb, cb, eof);
      if (cr.isUnderflow()) {
        if (eof)
          break;
        if (!cb.hasRemaining())
          break;
        if ((cb.position() > 0) && !inReady())
          break; // Block at most once
        int n = readBytes();
        if (n < 0) {
          eof = true;
          if ((cb.position() == 0) && (!bb.hasRemaining()))
            break;
          decoder.reset();
        }
        continue;
      }
      if (cr.isOverflow()) {
        assert cb.position() > 0;
        break;
      }
      cr.throwException();
    }

    if (eof) {
      // ## Need to flush decoder
      decoder.reset();
    }

    charCount += cb.position();

    if (cb.position() == 0) {
      if (eof)
        return -1;
      assert false;
    }
    return cb.position();
  }

  private boolean inReady() {
    try {
      return (((stream != null) && (stream.available() > 0)) || (stream instanceof FileInputStream)); // ##
                                                                                                      // RBC.available()?
    } catch (IOException x) {
      return false;
    }
  }

  private int readBytes() throws IOException {
    bb.compact();
    try {
      // Read from the input stream, and then update the buffer
      int lim = bb.limit();
      int pos = bb.position();
      assert (pos <= lim);
      int rem = (pos <= lim ? lim - pos : 0);
      assert rem > 0;
      int n = stream.read(bb.array(), bb.arrayOffset() + pos, rem);
      if (n < 0)
        return n;
      if (n == 0)
        throw new IOException("Underlying input stream returned zero bytes");
      assert (n <= rem) : "n = " + n + ", rem = " + rem;
      bb.position(pos + n);
      byteCount += rem;
    } finally {
      // Flip even when an IOException is thrown,
      // otherwise the stream will stutter
      bb.flip();
    }

    int rem = bb.remaining();
    assert (rem != 0) : rem;
    return rem;
  }

  @Override
  public void close() throws IOException {
    stream.close();
  }

  public long getByteCount() {
    if (byteCount == 0) {
      return 0;
    }
    // 正確には元ストリームから何バイト読んだかではなく、readメソッドで読み込んだ文字がトータルで何バイト分かを返す
    return byteCount - (bb.capacity() - bb.position());
  }

  public long getCharCount() {
    return charCount;
  }

}