RawSubStringPattern.java

  1. /*
  2.  * Copyright (C) 2009, Google Inc.
  3.  * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4.  *
  5.  * This program and the accompanying materials are made available under the
  6.  * terms of the Eclipse Distribution License v. 1.0 which is available at
  7.  * https://www.eclipse.org/org/documents/edl-v10.php.
  8.  *
  9.  * SPDX-License-Identifier: BSD-3-Clause
  10.  */

  11. package org.eclipse.jgit.util;

  12. import org.eclipse.jgit.internal.JGitText;
  13. import org.eclipse.jgit.lib.Constants;

  14. /**
  15.  * Searches text using only substring search.
  16.  * <p>
  17.  * Instances are thread-safe. Multiple concurrent threads may perform matches on
  18.  * different character sequences at the same time.
  19.  */
  20. public class RawSubStringPattern {
  21.     private final String needleString;

  22.     private final byte[] needle;

  23.     /**
  24.      * Construct a new substring pattern.
  25.      *
  26.      * @param patternText
  27.      *            text to locate. This should be a literal string, as no
  28.      *            meta-characters are supported by this implementation. The
  29.      *            string may not be the empty string.
  30.      */
  31.     public RawSubStringPattern(String patternText) {
  32.         if (patternText.length() == 0)
  33.             throw new IllegalArgumentException(JGitText.get().cannotMatchOnEmptyString);
  34.         needleString = patternText;

  35.         final byte[] b = Constants.encode(patternText);
  36.         needle = new byte[b.length];
  37.         for (int i = 0; i < b.length; i++)
  38.             needle[i] = lc(b[i]);
  39.     }

  40.     /**
  41.      * Match a character sequence against this pattern.
  42.      *
  43.      * @param rcs
  44.      *            the sequence to match. Must not be null but the length of the
  45.      *            sequence is permitted to be 0.
  46.      * @return offset within <code>rcs</code> of the first occurrence of this
  47.      *         pattern; -1 if this pattern does not appear at any position of
  48.      *         <code>rcs</code>.
  49.      */
  50.     public int match(RawCharSequence rcs) {
  51.         final int needleLen = needle.length;
  52.         final byte first = needle[0];

  53.         final byte[] text = rcs.buffer;
  54.         int matchPos = rcs.startPtr;
  55.         final int maxPos = rcs.endPtr - needleLen;

  56.         OUTER: for (; matchPos <= maxPos; matchPos++) {
  57.             if (neq(first, text[matchPos])) {
  58.                 while (++matchPos <= maxPos && neq(first, text[matchPos])) {
  59.                     /* skip */
  60.                 }
  61.                 if (matchPos > maxPos)
  62.                     return -1;
  63.             }

  64.             int si = matchPos + 1;
  65.             for (int j = 1; j < needleLen; j++, si++) {
  66.                 if (neq(needle[j], text[si]))
  67.                     continue OUTER;
  68.             }
  69.             return matchPos;
  70.         }
  71.         return -1;
  72.     }

  73.     private static final boolean neq(byte a, byte b) {
  74.         return a != b && a != lc(b);
  75.     }

  76.     private static final byte lc(byte q) {
  77.         return (byte) StringUtils.toLowerCase((char) (q & 0xff));
  78.     }

  79.     /**
  80.      * Get the literal pattern string this instance searches for.
  81.      *
  82.      * @return the pattern string given to our constructor.
  83.      */
  84.     public String pattern() {
  85.         return needleString;
  86.     }

  87.     /** {@inheritDoc} */
  88.     @Override
  89.     public String toString() {
  90.         return pattern();
  91.     }
  92. }