URL and IP filtering
This commit is contained in:
parent
f5b07e9c12
commit
1cc854f0da
@ -12,6 +12,11 @@ import java.util.List;
|
||||
*/
|
||||
@AllArgsConstructor @Getter
|
||||
public final class ContentProcessResponse {
|
||||
/**
|
||||
* Does the content contain profanity?
|
||||
*/
|
||||
private final boolean containsProfanity;
|
||||
|
||||
/**
|
||||
* The replacement for the content.
|
||||
*/
|
||||
|
@ -0,0 +1,58 @@
|
||||
package me.braydon.profanity.processor.impl;
|
||||
|
||||
import lombok.NonNull;
|
||||
import me.braydon.profanity.common.ContentTag;
|
||||
import me.braydon.profanity.model.ProfanityList;
|
||||
import me.braydon.profanity.processor.TextProcessor;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* A text processor to filter advertisement content.
|
||||
*
|
||||
* @author Braydon
|
||||
*/
|
||||
public final class AdTextProcessor extends TextProcessor {
|
||||
private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+(?:\\.[a-z]{2,})+(?:/\\S*)?)\\b");
|
||||
private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))");
|
||||
|
||||
public AdTextProcessor() {
|
||||
super(ContentTag.ADVERTISEMENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processor the given content.
|
||||
*
|
||||
* @param profanityList the profanity list to use
|
||||
* @param content the content to process
|
||||
* @param replacement the replacement content to modify
|
||||
* @param replaceChar the replace char to use
|
||||
* @param matched the matched content to add to
|
||||
* @return the replaced content
|
||||
*/
|
||||
@Override @NonNull
|
||||
public StringBuilder process(@NonNull ProfanityList profanityList, @NonNull String content,
|
||||
@NonNull StringBuilder replacement, int replaceChar, @NonNull List<String> matched) {
|
||||
AtomicInteger offset = new AtomicInteger();
|
||||
Consumer<Matcher> handleReplacements = matcher -> {
|
||||
while (matcher.find()) {
|
||||
String matchedGroup = matcher.group();
|
||||
matched.add(matchedGroup);
|
||||
|
||||
// Replace the matched group with the replace char
|
||||
int start = offset.get() + matcher.start();
|
||||
int end = offset.get() + matcher.end();
|
||||
String matchedWord = matcher.group();
|
||||
replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
|
||||
offset.set(offset.get() + (matchedWord.length() - (end - start)));
|
||||
}
|
||||
};
|
||||
handleReplacements.accept(URL_REGEX.matcher(content)); // Handle URLs
|
||||
handleReplacements.accept(IPV4_REGEX.matcher(content)); // Handle IPs
|
||||
return replacement;
|
||||
}
|
||||
}
|
@ -18,7 +18,7 @@ import java.util.regex.Pattern;
|
||||
*
|
||||
* @author Braydon
|
||||
*/
|
||||
public final class VulgarityProcessor extends TextProcessor {
|
||||
public final class VulgarityTextProcessor extends TextProcessor {
|
||||
private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*";
|
||||
|
||||
/**
|
||||
@ -44,7 +44,7 @@ public final class VulgarityProcessor extends TextProcessor {
|
||||
charSubstitutions.put('1', Collections.singletonList('!'));
|
||||
}
|
||||
|
||||
public VulgarityProcessor() {
|
||||
public VulgarityTextProcessor() {
|
||||
super(ContentTag.VULGARITY);
|
||||
}
|
||||
|
||||
@ -74,6 +74,8 @@ public final class VulgarityProcessor extends TextProcessor {
|
||||
|
||||
while (matcher.find()) {
|
||||
matched.add(word);
|
||||
|
||||
// Replace the matched group with the replace char
|
||||
int start = offset + matcher.start();
|
||||
int end = offset + matcher.end();
|
||||
String matchedWord = matcher.group();
|
@ -13,7 +13,8 @@ import me.braydon.profanity.model.ProfanityList;
|
||||
import me.braydon.profanity.model.input.ContentProcessInput;
|
||||
import me.braydon.profanity.model.response.ContentProcessResponse;
|
||||
import me.braydon.profanity.processor.TextProcessor;
|
||||
import me.braydon.profanity.processor.impl.VulgarityProcessor;
|
||||
import me.braydon.profanity.processor.impl.AdTextProcessor;
|
||||
import me.braydon.profanity.processor.impl.VulgarityTextProcessor;
|
||||
import me.braydon.profanity.repository.ProfanityListRepository;
|
||||
import org.apache.commons.text.StringEscapeUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
@ -51,7 +52,8 @@ public final class FiltrationService {
|
||||
this.profanityListRepository = profanityListRepository;
|
||||
|
||||
// Register text processors
|
||||
textProcessors.add(new VulgarityProcessor());
|
||||
textProcessors.add(new VulgarityTextProcessor());
|
||||
textProcessors.add(new AdTextProcessor());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -116,8 +118,12 @@ public final class FiltrationService {
|
||||
|
||||
// Calculate the score based on
|
||||
// the matched profane content, that cannot be bypassed by changing the content length
|
||||
double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D);
|
||||
double score = 0D;
|
||||
for (String match : matched) {
|
||||
score+= 2D / (double) match.length();
|
||||
}
|
||||
score = Math.min(score, 1D);
|
||||
|
||||
return new ContentProcessResponse(replacement.toString(), matched, tags, score);
|
||||
return new ContentProcessResponse(!matched.isEmpty(), replacement.toString(), matched, tags, score);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user