Support elongated words

2024-06-06 22:09:04 -04:00 · 2024-06-06 22:09:04 -04:00 · cc80a10088
commit cc80a10088
parent 237b42afa5
2 changed files with 15 additions and 7 deletions
--- a/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java
+++ b/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java
@ -28,6 +28,7 @@ public final class VulgarityProcessor extends TextProcessor {
     * Substitutions for characters in profane words.
     */
    private static final Map<Character, List<Character>> charSubstitutions = Collections.synchronizedMap(new HashMap<>());
+
    static { // Populate char substitutions
        charSubstitutions.put('e', Collections.singletonList('3'));
        charSubstitutions.put('i', List.of('1', '!'));
@ -75,8 +76,9 @@ public final class VulgarityProcessor extends TextProcessor {
                matched.add(word);
                int start = offset + matcher.start();
                int end = offset + matcher.end();
-                replacement.replace(start, end, Character.toString(replaceChar).repeat(word.length()));
-                offset += word.length() - (end - start);
+                String matchedWord = matcher.group();
+                replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
+                offset += matchedWord.length() - (end - start);
            }
        }

@ -122,7 +124,7 @@ public final class VulgarityProcessor extends TextProcessor {
                for (Character substitution : charSubstitutions.get(lowerChar)) {
                    chars.append(substitution);
                }
-                obfuscatedWordRegex.append('[').append(chars).append(']');
+                obfuscatedWordRegex.append('[').append(chars).append("]+");
            } else {
                obfuscatedWordRegex.append(lowerChar);
            }
--- a/API/src/main/java/me/braydon/profanity/service/FiltrationService.java
+++ b/API/src/main/java/me/braydon/profanity/service/FiltrationService.java
@ -88,16 +88,22 @@ public final class FiltrationService {
        log.info("Downloaded lists in {}ms", System.currentTimeMillis() - before);
    }

+    /**
+     * Filter the content in the given input.
+     *
+     * @param input the input to filter
+     * @return the response from filtering the content
+     */
    @NonNull
    public ContentProcessResponse process(@NonNull ContentProcessInput input) {
+        String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
+
        List<String> matched = new ArrayList<>(); // The content that was matched
        List<ContentTag> tags = new ArrayList<>(); // Tags obtained from the processed content
        StringBuilder replacement = new StringBuilder(input.getContent());

        // Handle filtering if a profanity list is present
        if (profanityList != null) {
-            String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
-
            // Invoke each text processor on the content
            for (TextProcessor textProcessor : textProcessors) {
                int before = matched.size();
@ -109,8 +115,8 @@ public final class FiltrationService {
        }

        // Calculate the score based on
-        // the matched profane content
-        double score = 0D;
+        // the matched profane content, that cannot be bypassed by changing the content length
+        double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D);

        return new ContentProcessResponse(replacement.toString(), matched, tags, score);
    }