/*
 * Decompiled with CFR 0.152.
 */
package org.elasticsearch.xpack.esql.expression.function.grouping;

import java.io.IOException;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.NamedWriteable;
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.BytesRefHash;
import org.elasticsearch.compute.operator.EvalOperator;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.xpack.esql.capabilities.Validatable;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.TypeResolutions;
import org.elasticsearch.xpack.esql.core.tree.Node;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper;
import org.elasticsearch.xpack.esql.expression.function.FunctionInfo;
import org.elasticsearch.xpack.esql.expression.function.Param;
import org.elasticsearch.xpack.esql.expression.function.grouping.CategorizeEvaluator;
import org.elasticsearch.xpack.esql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationBytesRefHash;
import org.elasticsearch.xpack.ml.aggs.categorization.CategorizationPartOfSpeechDictionary;
import org.elasticsearch.xpack.ml.aggs.categorization.TokenListCategorizer;
import org.elasticsearch.xpack.ml.job.categorization.CategorizationAnalyzer;

public class Categorize
extends GroupingFunction
implements Validatable {
    public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(Expression.class, "Categorize", Categorize::new);
    private final Expression field;

    @FunctionInfo(returnType={"integer"}, description="Categorizes text messages.")
    public Categorize(Source source, @Param(name="field", type={"text", "keyword"}, description="Expression to categorize") Expression field) {
        super(source, List.of(field));
        this.field = field;
    }

    private Categorize(StreamInput in) throws IOException {
        this(Source.readFrom((StreamInput)((PlanStreamInput)in)), (Expression)in.readNamedWriteable(Expression.class));
    }

    public void writeTo(StreamOutput out) throws IOException {
        this.source().writeTo(out);
        out.writeNamedWriteable((NamedWriteable)this.field);
    }

    public String getWriteableName() {
        return Categorize.ENTRY.name;
    }

    public boolean foldable() {
        return this.field.foldable();
    }

    static int process(BytesRef v, CategorizationAnalyzer analyzer, TokenListCategorizer.CloseableTokenListCategorizer categorizer) {
        int n;
        block8: {
            String s = v.utf8ToString();
            TokenStream ts = analyzer.tokenStream("text", s);
            try {
                n = categorizer.computeCategory(ts, s.length(), 1L).getId();
                if (ts == null) break block8;
            }
            catch (Throwable throwable) {
                try {
                    if (ts != null) {
                        try {
                            ts.close();
                        }
                        catch (Throwable throwable2) {
                            throwable.addSuppressed(throwable2);
                        }
                    }
                    throw throwable;
                }
                catch (IOException e) {
                    throw new RuntimeException(e);
                }
            }
            ts.close();
        }
        return n;
    }

    @Override
    public EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) {
        return new CategorizeEvaluator.Factory(this.source(), toEvaluator.apply(this.field), context -> new CategorizationAnalyzer((Analyzer)new CustomAnalyzer(TokenizerFactory.newFactory((String)"whitespace", WhitespaceTokenizer::new), new CharFilterFactory[0], new TokenFilterFactory[0]), true), context -> new TokenListCategorizer.CloseableTokenListCategorizer(new CategorizationBytesRefHash(new BytesRefHash(2048L, context.bigArrays())), CategorizationPartOfSpeechDictionary.getInstance(), 0.7f));
    }

    protected Expression.TypeResolution resolveType() {
        return TypeResolutions.isString((Expression)this.field(), (String)this.sourceText(), (TypeResolutions.ParamOrdinal)TypeResolutions.ParamOrdinal.DEFAULT);
    }

    public DataType dataType() {
        return DataType.INTEGER;
    }

    public Expression replaceChildren(List<Expression> newChildren) {
        return new Categorize(this.source(), newChildren.get(0));
    }

    protected NodeInfo<? extends Expression> info() {
        return NodeInfo.create((Node)this, Categorize::new, (Object)this.field);
    }

    public Expression field() {
        return this.field;
    }

    public String toString() {
        return "Categorize{field=" + String.valueOf(this.field) + "}";
    }
}

