Support elongated words
This commit is contained in:
parent
237b42afa5
commit
cc80a10088
@ -28,6 +28,7 @@ public final class VulgarityProcessor extends TextProcessor {
|
||||
* Substitutions for characters in profane words.
|
||||
*/
|
||||
private static final Map<Character, List<Character>> charSubstitutions = Collections.synchronizedMap(new HashMap<>());
|
||||
|
||||
static { // Populate char substitutions
|
||||
charSubstitutions.put('e', Collections.singletonList('3'));
|
||||
charSubstitutions.put('i', List.of('1', '!'));
|
||||
@ -75,8 +76,9 @@ public final class VulgarityProcessor extends TextProcessor {
|
||||
matched.add(word);
|
||||
int start = offset + matcher.start();
|
||||
int end = offset + matcher.end();
|
||||
replacement.replace(start, end, Character.toString(replaceChar).repeat(word.length()));
|
||||
offset += word.length() - (end - start);
|
||||
String matchedWord = matcher.group();
|
||||
replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
|
||||
offset += matchedWord.length() - (end - start);
|
||||
}
|
||||
}
|
||||
|
||||
@ -122,7 +124,7 @@ public final class VulgarityProcessor extends TextProcessor {
|
||||
for (Character substitution : charSubstitutions.get(lowerChar)) {
|
||||
chars.append(substitution);
|
||||
}
|
||||
obfuscatedWordRegex.append('[').append(chars).append(']');
|
||||
obfuscatedWordRegex.append('[').append(chars).append("]+");
|
||||
} else {
|
||||
obfuscatedWordRegex.append(lowerChar);
|
||||
}
|
||||
|
@ -88,16 +88,22 @@ public final class FiltrationService {
|
||||
log.info("Downloaded lists in {}ms", System.currentTimeMillis() - before);
|
||||
}
|
||||
|
||||
/**
|
||||
* Filter the content in the given input.
|
||||
*
|
||||
* @param input the input to filter
|
||||
* @return the response from filtering the content
|
||||
*/
|
||||
@NonNull
|
||||
public ContentProcessResponse process(@NonNull ContentProcessInput input) {
|
||||
String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
|
||||
|
||||
List<String> matched = new ArrayList<>(); // The content that was matched
|
||||
List<ContentTag> tags = new ArrayList<>(); // Tags obtained from the processed content
|
||||
StringBuilder replacement = new StringBuilder(input.getContent());
|
||||
|
||||
// Handle filtering if a profanity list is present
|
||||
if (profanityList != null) {
|
||||
String content = StringEscapeUtils.escapeJava(input.getContent()).toLowerCase().trim(); // The content to filter
|
||||
|
||||
// Invoke each text processor on the content
|
||||
for (TextProcessor textProcessor : textProcessors) {
|
||||
int before = matched.size();
|
||||
@ -109,8 +115,8 @@ public final class FiltrationService {
|
||||
}
|
||||
|
||||
// Calculate the score based on
|
||||
// the matched profane content
|
||||
double score = 0D;
|
||||
// the matched profane content, that cannot be bypassed by changing the content length
|
||||
double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D);
|
||||
|
||||
return new ContentProcessResponse(replacement.toString(), matched, tags, score);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user