package org.apache.fontbox.ttf.gsub;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.tools.parser.Parse;

/* loaded from: input_file:org/apache/fontbox/ttf/gsub/CompoundCharacterTokenizer.class */
public class CompoundCharacterTokenizer {
    private final Pattern regexExpression;

    public CompoundCharacterTokenizer(Set<String> set) {
        this.regexExpression = Pattern.compile(getRegexFromTokens(set));
    }

    @Deprecated
    public CompoundCharacterTokenizer(String str) {
        this.regexExpression = Pattern.compile(str);
    }

    public CompoundCharacterTokenizer(Pattern pattern) {
        this.regexExpression = pattern;
    }

    public List<String> tokenize(String str) {
        int i;
        ArrayList arrayList = new ArrayList();
        Matcher matcher = this.regexExpression.matcher(str);
        int i2 = 0;
        while (true) {
            i = i2;
            if (!matcher.find()) {
                break;
            }
            String substring = str.substring(i, matcher.start());
            if (substring.length() > 0) {
                arrayList.add(substring);
            }
            arrayList.add(matcher.group());
            i2 = matcher.end();
        }
        String substring2 = str.substring(i);
        if (substring2.length() > 0) {
            arrayList.add(substring2);
        }
        return arrayList;
    }

    private String getRegexFromTokens(Set<String> set) {
        StringJoiner stringJoiner = new StringJoiner(")|(", Parse.BRACKET_LRB, Parse.BRACKET_RRB);
        stringJoiner.getClass();
        set.forEach((v1) -> {
            r1.add(v1);
        });
        return stringJoiner.toString();
    }
}
