package com.microsoft.mobile.polymer.datamodel.ml;

import com.microsoft.mobile.polymer.datamodel.ml.common.WordToken;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/* loaded from: classes.dex */
public class MessageTokenizer {
    private static final String LOG_TAG = "MessageTokenizer";
    private static final int MAX_MESSAGE_LENGTH = 1000;
    private static final int MAX_WORD_LENGTH = 25;
    private static final String STOPWORD_FILENAME = "stop_words.txt";
    private HashSet<String> stopWords;
    private Pattern wordMatcherRegex = Pattern.compile("[A-Za-z]+");

    public MessageTokenizer() {
        loadStopWords();
    }

    /* JADX WARN: Removed duplicated region for block: B:36:0x0068 A[EXC_TOP_SPLITTER, SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private void loadStopWords() {
        /*
            r5 = this;
            r2 = 0
            android.content.Context r0 = com.microsoft.mobile.polymer.util.ContextHolder.getAppContext()     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            java.io.BufferedReader r1 = new java.io.BufferedReader     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            java.io.InputStreamReader r3 = new java.io.InputStreamReader     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            android.content.res.AssetManager r0 = r0.getAssets()     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            java.lang.String r4 = "stop_words.txt"
            java.io.InputStream r0 = r0.open(r4)     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            java.lang.String r4 = "UTF-8"
            r3.<init>(r0, r4)     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            r1.<init>(r3)     // Catch: java.lang.Throwable -> L64 java.io.IOException -> L77
            java.util.HashSet r0 = new java.util.HashSet     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            r0.<init>()     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            r5.stopWords = r0     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
        L22:
            java.lang.String r0 = r1.readLine()     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            if (r0 == 0) goto L4c
            java.util.HashSet<java.lang.String> r2 = r5.stopWords     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            java.lang.String r0 = r0.trim()     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            java.lang.String r0 = r0.toLowerCase()     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            r2.add(r0)     // Catch: java.io.IOException -> L36 java.lang.Throwable -> L75
            goto L22
        L36:
            r0 = move-exception
        L37:
            java.lang.String r2 = "MessageTokenizer"
            java.lang.String r3 = "Exception while reading stop word file."
            com.microsoft.mobile.common.trace.a.a(r2, r3)     // Catch: java.lang.Throwable -> L75
            java.lang.String r2 = "MessageTokenizer"
            com.microsoft.mobile.polymer.util.CommonUtils.RecordOrThrowException(r2, r0)     // Catch: java.lang.Throwable -> L75
            r0 = 0
            r5.stopWords = r0     // Catch: java.lang.Throwable -> L75
            if (r1 == 0) goto L4b
            r1.close()     // Catch: java.io.IOException -> L5b
        L4b:
            return
        L4c:
            if (r1 == 0) goto L4b
            r1.close()     // Catch: java.io.IOException -> L52
            goto L4b
        L52:
            r0 = move-exception
            java.lang.String r0 = "MessageTokenizer"
            java.lang.String r1 = "Could not close the Buffered Reader."
            com.microsoft.mobile.common.trace.a.a(r0, r1)
            goto L4b
        L5b:
            r0 = move-exception
            java.lang.String r0 = "MessageTokenizer"
            java.lang.String r1 = "Could not close the Buffered Reader."
            com.microsoft.mobile.common.trace.a.a(r0, r1)
            goto L4b
        L64:
            r0 = move-exception
            r1 = r2
        L66:
            if (r1 == 0) goto L6b
            r1.close()     // Catch: java.io.IOException -> L6c
        L6b:
            throw r0
        L6c:
            r1 = move-exception
            java.lang.String r1 = "MessageTokenizer"
            java.lang.String r2 = "Could not close the Buffered Reader."
            com.microsoft.mobile.common.trace.a.a(r1, r2)
            goto L6b
        L75:
            r0 = move-exception
            goto L66
        L77:
            r0 = move-exception
            r1 = r2
            goto L37
        */
        throw new UnsupportedOperationException("Method not decompiled: com.microsoft.mobile.polymer.datamodel.ml.MessageTokenizer.loadStopWords():void");
    }

    public List<WordToken> tokenizeAndClean(String str) {
        ArrayList arrayList = new ArrayList();
        if (str != null && str.length() != 0) {
            if (str.length() > 1000) {
                str = str.substring(0, 1000);
            }
            Matcher matcher = this.wordMatcherRegex.matcher(str);
            while (matcher.find()) {
                String group = matcher.group();
                if (group.length() != 0 && group.length() <= 25) {
                    String lowerCase = group.toLowerCase();
                    WordToken wordToken = new WordToken(lowerCase);
                    if (this.stopWords != null && this.stopWords.contains(lowerCase)) {
                        wordToken.setIsStopWord(true);
                    }
                    arrayList.add(wordToken);
                }
            }
        }
        return arrayList;
    }
}
