/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.tokenizer.retokenizer;

import ai.grazie.nlp.tokenizer.NonDestructiveTokenizer;
import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.sequence.CharSequenceRetokenizer;
import ai.grazie.nlp.tokenizer.sequence.ExtensionsKt;
import ai.grazie.nlp.tokenizer.sequence.NonDestructiveCharSequenceTokenizer;
import ai.grazie.nlp.utils.Symbols;
import ai.grazie.text.Text;
import ai.grazie.text.TextRange;
import ai.grazie.utils.TextJVMKt;
import java.util.Set;
import kotlin.Metadata;
import kotlin.collections.IndexedValue;
import kotlin.collections.SetsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import kotlin.sequences.Sequence;
import kotlin.sequences.SequencesKt;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={2, 1, 0}, k=1, xi=48, d1={"\u0000:\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\r\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0000\n\u0002\u0010\b\n\u0002\b\u0002\u0018\u0000 \u00122\u00020\u0001:\u0001\u0012B\u000f\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\u0004\b\u0004\u0010\u0005B\u0011\b\u0016\u0012\u0006\u0010\u0002\u001a\u00020\u0006\u00a2\u0006\u0004\b\u0004\u0010\u0007J$\u0010\b\u001a\b\u0012\u0004\u0012\u00020\n0\t2\u0006\u0010\u000b\u001a\u00020\f2\f\u0010\r\u001a\b\u0012\u0004\u0012\u00020\n0\tH\u0016J\u0018\u0010\u000e\u001a\u00020\u000f2\u0006\u0010\u000b\u001a\u00020\f2\u0006\u0010\u0010\u001a\u00020\u0011H\u0002\u00a8\u0006\u0013"}, d2={"Lai/grazie/nlp/tokenizer/retokenizer/QuotesRetokenizer;", "Lai/grazie/nlp/tokenizer/sequence/CharSequenceRetokenizer;", "tokenizer", "Lai/grazie/nlp/tokenizer/sequence/NonDestructiveCharSequenceTokenizer;", "<init>", "(Lai/grazie/nlp/tokenizer/sequence/NonDestructiveCharSequenceTokenizer;)V", "Lai/grazie/nlp/tokenizer/NonDestructiveTokenizer;", "(Lai/grazie/nlp/tokenizer/NonDestructiveTokenizer;)V", "retokenizeAsSequence", "Lkotlin/sequences/Sequence;", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "text", "", "tokens", "isBetweenCJKAndNonCJK", "", "index", "", "Companion", "nlp-tokenizer"})
public final class QuotesRetokenizer
extends CharSequenceRetokenizer {
    @NotNull
    public static final Companion Companion = new Companion(null);
    @NotNull
    private static final Set<Character> apostropheSet = Symbols.INSTANCE.getApostrophes();
    @NotNull
    private static final Set<Character> quotesSet = SetsKt.setOf((Object)Character.valueOf('\"'));

    public QuotesRetokenizer(@NotNull NonDestructiveCharSequenceTokenizer tokenizer) {
        Intrinsics.checkNotNullParameter((Object)tokenizer, (String)"tokenizer");
        super(tokenizer);
    }

    public QuotesRetokenizer(@NotNull NonDestructiveTokenizer tokenizer) {
        Intrinsics.checkNotNullParameter((Object)tokenizer, (String)"tokenizer");
        this(ExtensionsKt.toInefficientCharSequenceTokenizer(tokenizer));
    }

    @Override
    @NotNull
    public Sequence<Tokenizer.Token> retokenizeAsSequence(@NotNull CharSequence text2, @NotNull Sequence<Tokenizer.Token> tokens) {
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        Intrinsics.checkNotNullParameter(tokens, (String)"tokens");
        return SequencesKt.map((Sequence)SequencesKt.map((Sequence)SequencesKt.filter((Sequence)SequencesKt.withIndex((Sequence)StringsKt.asSequence((CharSequence)text2)), arg_0 -> QuotesRetokenizer.retokenizeAsSequence$lambda$0(text2, this, arg_0)), QuotesRetokenizer::retokenizeAsSequence$lambda$1), arg_0 -> QuotesRetokenizer.retokenizeAsSequence$lambda$2(text2, arg_0));
    }

    private final boolean isBetweenCJKAndNonCJK(CharSequence text2, int index) {
        if (index == 0 || index == text2.length() - 1) {
            return false;
        }
        if (!Character.isLetter(text2.charAt(index - 1)) || !Character.isLetter(text2.charAt(index + 1))) {
            return false;
        }
        return TextJVMKt.isCodePointIdeographic(text2.charAt(index - 1)) != TextJVMKt.isCodePointIdeographic(text2.charAt(index + 1));
    }

    private static final boolean retokenizeAsSequence$lambda$0(CharSequence $text, QuotesRetokenizer this$0, IndexedValue it) {
        Intrinsics.checkNotNullParameter((Object)it, (String)"it");
        boolean isQuote = quotesSet.contains(it.getValue());
        boolean isApostrophe = apostropheSet.contains(it.getValue()) && (it.getIndex() == $text.length() - 1 || it.getIndex() == 0 || !Character.isLetter($text.charAt(it.getIndex() + 1)) || !Character.isLetter($text.charAt(it.getIndex() - 1)) || this$0.isBetweenCJKAndNonCJK($text, it.getIndex()));
        return isQuote || isApostrophe;
    }

    private static final IntRange retokenizeAsSequence$lambda$1(IndexedValue it) {
        Intrinsics.checkNotNullParameter((Object)it, (String)"it");
        return new IntRange(it.getIndex(), it.getIndex());
    }

    private static final Tokenizer.Token retokenizeAsSequence$lambda$2(CharSequence $text, IntRange it) {
        Intrinsics.checkNotNullParameter((Object)it, (String)"it");
        return new Tokenizer.Token(new Text(StringsKt.substring((CharSequence)$text, (IntRange)it)), TextRange.Companion.invoke(it));
    }

    @Metadata(mv={2, 1, 0}, k=1, xi=48, d1={"\u0000\u0018\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0003\n\u0002\u0010\"\n\u0002\u0010\f\n\u0002\b\u0002\b\u0086\u0003\u0018\u00002\u00020\u0001B\t\b\u0002\u00a2\u0006\u0004\b\u0002\u0010\u0003R\u0014\u0010\u0004\u001a\b\u0012\u0004\u0012\u00020\u00060\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0014\u0010\u0007\u001a\b\u0012\u0004\u0012\u00020\u00060\u0005X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\b"}, d2={"Lai/grazie/nlp/tokenizer/retokenizer/QuotesRetokenizer$Companion;", "", "<init>", "()V", "apostropheSet", "", "", "quotesSet", "nlp-tokenizer"})
    public static final class Companion {
        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

