package com.xy.nlp.tokenizer.seg.FShort;

import com.xy.nlp.tokenizer.corpus.tag.Nature;
import java.util.Arrays;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;

/* loaded from: classes4.dex */
public class LetterSegmenter implements ISegmenter {
    private static final char[] Letter_Connector = {'#', '&', '+', '-', FilenameUtils.EXTENSION_SEPARATOR, '@', '_', ':', 65306, IOUtils.DIR_SEPARATOR_UNIX, '?', '~', '%', '=', '!', '*', ' '};
    private static final char[] Num_Connector = {',', FilenameUtils.EXTENSION_SEPARATOR, '-', '_', ' ', IOUtils.DIR_SEPARATOR_UNIX};
    public static final String SEGMENTER_NAME = "LETTER_SEGMENTER";
    private int arabicEnd;
    private int arabicStart;
    private int end;
    private int englishEnd;
    private int englishStart;
    private int start;

    public LetterSegmenter() {
        Arrays.sort(Letter_Connector);
        Arrays.sort(Num_Connector);
        this.start = -1;
        this.end = -1;
        this.englishStart = -1;
        this.englishEnd = -1;
        this.arabicStart = -1;
        this.arabicEnd = -1;
    }

    private boolean isLetterConnector(char c) {
        return Arrays.binarySearch(Letter_Connector, c) >= 0;
    }

    private boolean isNumConnector(char c) {
        return Arrays.binarySearch(Num_Connector, c) >= 0;
    }

    private void processArabicLetter(AnalyzeContext analyzeContext) {
        int i;
        int i2;
        if (this.arabicStart == -1) {
            if (9 == analyzeContext.getCurrentCharType()) {
                int cursor = analyzeContext.getCursor();
                this.arabicStart = cursor;
                this.arabicEnd = cursor;
            }
        } else if (9 == analyzeContext.getCurrentCharType()) {
            this.arabicEnd = analyzeContext.getCursor();
        } else if (17 != analyzeContext.getCurrentCharType() || !isNumConnector(analyzeContext.getCurrentChar())) {
            Lexeme lexeme = new Lexeme(this.arabicStart, this.arabicEnd + 1, 2);
            lexeme.setNature(Nature.m);
            analyzeContext.addLexeme(lexeme);
            this.arabicStart = -1;
            this.arabicEnd = -1;
        }
        if (!analyzeContext.isBufferConsumed() || (i = this.arabicStart) == -1 || (i2 = this.arabicEnd) == -1) {
            return;
        }
        Lexeme lexeme2 = new Lexeme(i, i2 + 1, 2);
        lexeme2.setNature(Nature.m);
        analyzeContext.addLexeme(lexeme2);
        this.arabicStart = -1;
        this.arabicEnd = -1;
    }

    private void processEnglishLetter(AnalyzeContext analyzeContext) {
        int i;
        int i2;
        if (this.englishStart == -1) {
            if (8 == analyzeContext.getCurrentCharType()) {
                int cursor = analyzeContext.getCursor();
                this.englishStart = cursor;
                this.englishEnd = cursor;
            }
        } else if (8 == analyzeContext.getCurrentCharType()) {
            this.englishEnd = analyzeContext.getCursor();
        } else {
            Lexeme lexeme = new Lexeme(this.englishStart, this.englishEnd + 1, 1);
            lexeme.setNature(Nature.nx);
            analyzeContext.addLexeme(lexeme);
            this.englishStart = -1;
            this.englishEnd = -1;
        }
        if (!analyzeContext.isBufferConsumed() || (i = this.englishStart) == -1 || (i2 = this.englishEnd) == -1) {
            return;
        }
        Lexeme lexeme2 = new Lexeme(i, i2 + 1, 1);
        lexeme2.setNature(Nature.nx);
        analyzeContext.addLexeme(lexeme2);
        this.englishStart = -1;
        this.englishEnd = -1;
    }

    private void processMixLetter(AnalyzeContext analyzeContext) {
        int i;
        int i2;
        if (this.start == -1) {
            if (9 == analyzeContext.getCurrentCharType() || 8 == analyzeContext.getCurrentCharType()) {
                int cursor = analyzeContext.getCursor();
                this.start = cursor;
                this.end = cursor;
            }
        } else if (9 == analyzeContext.getCurrentCharType() || 8 == analyzeContext.getCurrentCharType()) {
            this.end = analyzeContext.getCursor();
        } else if (17 == analyzeContext.getCurrentCharType() && isLetterConnector(analyzeContext.getCurrentChar())) {
            this.end = analyzeContext.getCursor();
        } else {
            char[] segmentBuff = analyzeContext.getSegmentBuff();
            int i3 = this.end;
            if (segmentBuff[i3] == ' ') {
                this.end = i3 - 1;
            }
            Lexeme lexeme = new Lexeme(this.start, this.end + 1, 3);
            lexeme.setNature(Nature.nx);
            analyzeContext.addLexeme(lexeme);
            this.start = -1;
            this.end = -1;
        }
        if (!analyzeContext.isBufferConsumed() || (i = this.start) == -1 || (i2 = this.end) == -1) {
            return;
        }
        Lexeme lexeme2 = new Lexeme(i, i2 + 1, 3);
        lexeme2.setNature(Nature.nx);
        analyzeContext.addLexeme(lexeme2);
        this.start = -1;
        this.end = -1;
    }

    @Override // com.xy.nlp.tokenizer.seg.FShort.ISegmenter
    public void analyze(AnalyzeContext analyzeContext) {
        processEnglishLetter(analyzeContext);
        processArabicLetter(analyzeContext);
        processMixLetter(analyzeContext);
    }

    @Override // com.xy.nlp.tokenizer.seg.FShort.ISegmenter
    public void reset() {
        this.start = -1;
        this.end = -1;
        this.englishStart = -1;
        this.englishEnd = -1;
        this.arabicStart = -1;
        this.arabicEnd = -1;
    }
}
