Support elongated words

This commit is contained in:
Braydon 2024-06-06 22:09:04 -04:00
parent 237b42afa5
commit cc80a10088
2 changed files with 15 additions and 7 deletions

@ -28,6 +28,7 @@ public final class VulgarityProcessor extends TextProcessor {
* Substitutions for characters in profane words.
*/
private static final Map<Character, List<Character>> charSubstitutions = Collections.synchronizedMap(new HashMap<>());
static { // Populate char substitutions
charSubstitutions.put('e', Collections.singletonList('3'));
charSubstitutions.put('i', List.of('1', '!'));
@ -75,8 +76,9 @@ public final class VulgarityProcessor extends TextProcessor {
matched.add(word);
int start = offset + matcher.start();
int end = offset + matcher.end();
replacement.replace(start, end, Character.toString(replaceChar).repeat(word.length()));
offset += word.length() - (end - start);
String matchedWord = matcher.group();
replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
offset += matchedWord.length() - (end - start);
}
}
@ -122,7 +124,7 @@ public final class VulgarityProcessor extends TextProcessor {
for (Character substitution : charSubstitutions.get(lowerChar)) {
chars.append(substitution);
}
obfuscatedWordRegex.append('[').append(chars).append(']');
obfuscatedWordRegex.append('[').append(chars).append("]+");
} else {
obfuscatedWordRegex.append(lowerChar);
}

@ -88,16 +88,22 @@ public final class FiltrationService {
log.info("Downloaded lists in {}ms", System.currentTimeMillis() - before);
}
/**
* Filter the content in the given input.
*
* @param input the input to filter
* @return the response from filtering the content
*/
@NonNull
public ContentProcessResponse process(@NonNull ContentProcessInput input) {
String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
List<String> matched = new ArrayList<>(); // The content that was matched
List<ContentTag> tags = new ArrayList<>(); // Tags obtained from the processed content
StringBuilder replacement = new StringBuilder(input.getContent());
// Handle filtering if a profanity list is present
if (profanityList != null) {
String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
// Invoke each text processor on the content
for (TextProcessor textProcessor : textProcessors) {
int before = matched.size();
@ -109,8 +115,8 @@ public final class FiltrationService {
}
// Calculate the score based on
// the matched profane content
double score = 0D;
// the matched profane content, that cannot be bypassed by changing the content length
double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D);
return new ContentProcessResponse(replacement.toString(), matched, tags, score);
}