AutoLFInputStream.java

  1. /*
  2.  * Copyright (C) 2010, 2013 Marc Strapetz <marc.strapetz@syntevo.com>
  3.  * Copyright (C) 2015, 2020 Ivan Motsch <ivan.motsch@bsiag.com> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.util.io;

  12. import java.io.IOException;
  13. import java.io.InputStream;
  14. import java.util.Arrays;
  15. import java.util.EnumSet;
  16. import java.util.Set;

  17. import org.eclipse.jgit.diff.RawText;

  18. /**
  19.  * An InputStream that normalizes CRLF to LF.
  20.  * <p>
  21.  * Existing single CR are not changed to LF but are retained as is.
  22.  * </p>
  23.  * <p>
  24.  * Optionally, a binary check on the first {@link RawText#getBufferSize()} bytes
  25.  * is performed and in case of binary files, canonicalization is turned off (for
  26.  * the complete file). If binary checking determines that the input is
  27.  * CR/LF-delimited text and the stream has been created for checkout,
  28.  * canonicalization is also turned off.
  29.  * </p>
  30.  *
  31.  * @since 4.3
  32.  */
  33. public class AutoLFInputStream extends InputStream {

  34.     // This is the former EolCanonicalizingInputStream with a new name in order
  35.     // to have same naming for all LF / CRLF streams.

  36.     /**
  37.      * Flags for controlling auto-detection of binary vs. text content (for
  38.      * text=auto).
  39.      *
  40.      * @since 5.9
  41.      */
  42.     public enum StreamFlag {
  43.         /**
  44.          * Check the first 8kB for binary content and switch off
  45.          * canonicalization off for the whole file if so.
  46.          */
  47.         DETECT_BINARY,
  48.         /**
  49.          * If {@link #DETECT_BINARY} is set, throw an {@link IsBinaryException}
  50.          * if binary content is detected.
  51.          */
  52.         ABORT_IF_BINARY,
  53.         /**
  54.          * If {@link #DETECT_BINARY} is set and content is found to be CR-LF
  55.          * delimited text, switch off canonicalization.
  56.          */
  57.         FOR_CHECKOUT
  58.     }

  59.     private final byte[] single = new byte[1];

  60.     private final byte[] buf = new byte[RawText.getBufferSize()];

  61.     private final InputStream in;

  62.     private int cnt;

  63.     private int ptr;

  64.     /**
  65.      * Set to {@code true} if no CR/LF processing is to be done: if the input is
  66.      * binary data, or CR/LF-delimited text and {@link StreamFlag#FOR_CHECKOUT}
  67.      * was given.
  68.      */
  69.     private boolean passAsIs;

  70.     /**
  71.      * Set to {@code true} if the input was detected to be binary data.
  72.      */
  73.     private boolean isBinary;

  74.     private boolean detectBinary;

  75.     private final boolean abortIfBinary;

  76.     private final boolean forCheckout;

  77.     /**
  78.      * A special exception thrown when {@link AutoLFInputStream} is told to
  79.      * throw an exception when attempting to read a binary file. The exception
  80.      * may be thrown at any stage during reading.
  81.      *
  82.      * @since 3.3
  83.      */
  84.     public static class IsBinaryException extends IOException {
  85.         private static final long serialVersionUID = 1L;

  86.         IsBinaryException() {
  87.             super();
  88.         }
  89.     }

  90.     /**
  91.      * Factory method for creating an {@link AutoLFInputStream} with the
  92.      * specified {@link StreamFlag flags}.
  93.      *
  94.      * @param in
  95.      *            raw input stream
  96.      * @param flags
  97.      *            {@link StreamFlag}s controlling the stream behavior
  98.      * @return a new {@link AutoLFInputStream}
  99.      * @since 5.9
  100.      */
  101.     public static AutoLFInputStream create(InputStream in,
  102.             StreamFlag... flags) {
  103.         if (flags == null) {
  104.             return new AutoLFInputStream(in, null);
  105.         }
  106.         EnumSet<StreamFlag> set = EnumSet.noneOf(StreamFlag.class);
  107.         set.addAll(Arrays.asList(flags));
  108.         return new AutoLFInputStream(in, set);
  109.     }

  110.     /**
  111.      * Creates a new InputStream, wrapping the specified stream.
  112.      *
  113.      * @param in
  114.      *            raw input stream
  115.      * @param flags
  116.      *            {@link StreamFlag}s controlling the stream behavior;
  117.      *            {@code null} is treated as an empty set
  118.      * @since 5.9
  119.      */
  120.     public AutoLFInputStream(InputStream in, Set<StreamFlag> flags) {
  121.         this.in = in;
  122.         this.detectBinary = flags != null
  123.                 && flags.contains(StreamFlag.DETECT_BINARY);
  124.         this.abortIfBinary = flags != null
  125.                 && flags.contains(StreamFlag.ABORT_IF_BINARY);
  126.         this.forCheckout = flags != null
  127.                 && flags.contains(StreamFlag.FOR_CHECKOUT);
  128.     }

  129.     /**
  130.      * Creates a new InputStream, wrapping the specified stream.
  131.      *
  132.      * @param in
  133.      *            raw input stream
  134.      * @param detectBinary
  135.      *            whether binaries should be detected
  136.      * @since 2.0
  137.      * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
  138.      *             instead
  139.      */
  140.     @Deprecated
  141.     public AutoLFInputStream(InputStream in, boolean detectBinary) {
  142.         this(in, detectBinary, false);
  143.     }

  144.     /**
  145.      * Creates a new InputStream, wrapping the specified stream.
  146.      *
  147.      * @param in
  148.      *            raw input stream
  149.      * @param detectBinary
  150.      *            whether binaries should be detected
  151.      * @param abortIfBinary
  152.      *            throw an IOException if the file is binary
  153.      * @since 3.3
  154.      * @deprecated since 5.9, use {@link #create(InputStream, StreamFlag...)}
  155.      *             instead
  156.      */
  157.     @Deprecated
  158.     public AutoLFInputStream(InputStream in, boolean detectBinary,
  159.             boolean abortIfBinary) {
  160.         this.in = in;
  161.         this.detectBinary = detectBinary;
  162.         this.abortIfBinary = abortIfBinary;
  163.         this.forCheckout = false;
  164.     }

  165.     /** {@inheritDoc} */
  166.     @Override
  167.     public int read() throws IOException {
  168.         final int read = read(single, 0, 1);
  169.         return read == 1 ? single[0] & 0xff : -1;
  170.     }

  171.     /** {@inheritDoc} */
  172.     @Override
  173.     public int read(byte[] bs, int off, int len)
  174.             throws IOException {
  175.         if (len == 0)
  176.             return 0;

  177.         if (cnt == -1)
  178.             return -1;

  179.         int i = off;
  180.         final int end = off + len;

  181.         while (i < end) {
  182.             if (ptr == cnt && !fillBuffer()) {
  183.                 break;
  184.             }

  185.             byte b = buf[ptr++];
  186.             if (passAsIs || b != '\r') {
  187.                 // Logic for binary files ends here
  188.                 bs[i++] = b;
  189.                 continue;
  190.             }

  191.             if (ptr == cnt && !fillBuffer()) {
  192.                 bs[i++] = '\r';
  193.                 break;
  194.             }

  195.             if (buf[ptr] == '\n') {
  196.                 bs[i++] = '\n';
  197.                 ptr++;
  198.             } else
  199.                 bs[i++] = '\r';
  200.         }

  201.         return i == off ? -1 : i - off;
  202.     }

  203.     /**
  204.      * Whether the stream has detected as a binary so far.
  205.      *
  206.      * @return true if the stream has detected as a binary so far.
  207.      * @since 3.3
  208.      */
  209.     public boolean isBinary() {
  210.         return isBinary;
  211.     }

  212.     /** {@inheritDoc} */
  213.     @Override
  214.     public void close() throws IOException {
  215.         in.close();
  216.     }

  217.     private boolean fillBuffer() throws IOException {
  218.         cnt = 0;
  219.         while (cnt < buf.length) {
  220.             int n = in.read(buf, cnt, buf.length - cnt);
  221.             if (n < 0) {
  222.                 break;
  223.             }
  224.             cnt += n;
  225.         }
  226.         if (cnt < 1) {
  227.             cnt = -1;
  228.             return false;
  229.         }
  230.         if (detectBinary) {
  231.             isBinary = RawText.isBinary(buf, cnt, cnt < buf.length);
  232.             passAsIs = isBinary;
  233.             detectBinary = false;
  234.             if (isBinary && abortIfBinary) {
  235.                 throw new IsBinaryException();
  236.             }
  237.             if (!passAsIs && forCheckout) {
  238.                 passAsIs = RawText.isCrLfText(buf, cnt, cnt < buf.length);
  239.             }
  240.         }
  241.         ptr = 0;
  242.         return true;
  243.     }
  244. }