From cc80a10088c8fd873fb683d978476e9280f4c408 Mon Sep 17 00:00:00 2001 From: Rainnny7 Date: Thu, 6 Jun 2024 22:09:04 -0400 Subject: [PATCH] Support elongated words --- .../processor/impl/VulgarityProcessor.java | 8 +++++--- .../profanity/service/FiltrationService.java | 14 ++++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java b/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java index 3f48636..3118b4b 100644 --- a/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java +++ b/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java @@ -28,6 +28,7 @@ public final class VulgarityProcessor extends TextProcessor { * Substitutions for characters in profane words. */ private static final Map> charSubstitutions = Collections.synchronizedMap(new HashMap<>()); + static { // Populate char substitutions charSubstitutions.put('e', Collections.singletonList('3')); charSubstitutions.put('i', List.of('1', '!')); @@ -75,8 +76,9 @@ public final class VulgarityProcessor extends TextProcessor { matched.add(word); int start = offset + matcher.start(); int end = offset + matcher.end(); - replacement.replace(start, end, Character.toString(replaceChar).repeat(word.length())); - offset += word.length() - (end - start); + String matchedWord = matcher.group(); + replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length())); + offset += matchedWord.length() - (end - start); } } @@ -122,7 +124,7 @@ public final class VulgarityProcessor extends TextProcessor { for (Character substitution : charSubstitutions.get(lowerChar)) { chars.append(substitution); } - obfuscatedWordRegex.append('[').append(chars).append(']'); + obfuscatedWordRegex.append('[').append(chars).append("]+"); } else { obfuscatedWordRegex.append(lowerChar); } diff --git a/API/src/main/java/me/braydon/profanity/service/FiltrationService.java b/API/src/main/java/me/braydon/profanity/service/FiltrationService.java index 6e55826..f77be0a 100644 --- a/API/src/main/java/me/braydon/profanity/service/FiltrationService.java +++ b/API/src/main/java/me/braydon/profanity/service/FiltrationService.java @@ -88,16 +88,22 @@ public final class FiltrationService { log.info("Downloaded lists in {}ms", System.currentTimeMillis() - before); } + /** + * Filter the content in the given input. + * + * @param input the input to filter + * @return the response from filtering the content + */ @NonNull public ContentProcessResponse process(@NonNull ContentProcessInput input) { + String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter + List matched = new ArrayList<>(); // The content that was matched List tags = new ArrayList<>(); // Tags obtained from the processed content StringBuilder replacement = new StringBuilder(input.getContent()); // Handle filtering if a profanity list is present if (profanityList != null) { - String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter - // Invoke each text processor on the content for (TextProcessor textProcessor : textProcessors) { int before = matched.size(); @@ -109,8 +115,8 @@ public final class FiltrationService { } // Calculate the score based on - // the matched profane content - double score = 0D; + // the matched profane content, that cannot be bypassed by changing the content length + double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D); return new ContentProcessResponse(replacement.toString(), matched, tags, score); }