URL and IP filtering
This commit is contained in:
parent
f5b07e9c12
commit
1cc854f0da
@ -12,6 +12,11 @@ import java.util.List;
|
|||||||
*/
|
*/
|
||||||
@AllArgsConstructor @Getter
|
@AllArgsConstructor @Getter
|
||||||
public final class ContentProcessResponse {
|
public final class ContentProcessResponse {
|
||||||
|
/**
|
||||||
|
* Does the content contain profanity?
|
||||||
|
*/
|
||||||
|
private final boolean containsProfanity;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The replacement for the content.
|
* The replacement for the content.
|
||||||
*/
|
*/
|
||||||
|
@ -0,0 +1,58 @@
|
|||||||
|
package me.braydon.profanity.processor.impl;
|
||||||
|
|
||||||
|
import lombok.NonNull;
|
||||||
|
import me.braydon.profanity.common.ContentTag;
|
||||||
|
import me.braydon.profanity.model.ProfanityList;
|
||||||
|
import me.braydon.profanity.processor.TextProcessor;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.function.Consumer;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A text processor to filter advertisement content.
|
||||||
|
*
|
||||||
|
* @author Braydon
|
||||||
|
*/
|
||||||
|
public final class AdTextProcessor extends TextProcessor {
|
||||||
|
private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+(?:\\.[a-z]{2,})+(?:/\\S*)?)\\b");
|
||||||
|
private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))");
|
||||||
|
|
||||||
|
public AdTextProcessor() {
|
||||||
|
super(ContentTag.ADVERTISEMENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processor the given content.
|
||||||
|
*
|
||||||
|
* @param profanityList the profanity list to use
|
||||||
|
* @param content the content to process
|
||||||
|
* @param replacement the replacement content to modify
|
||||||
|
* @param replaceChar the replace char to use
|
||||||
|
* @param matched the matched content to add to
|
||||||
|
* @return the replaced content
|
||||||
|
*/
|
||||||
|
@Override @NonNull
|
||||||
|
public StringBuilder process(@NonNull ProfanityList profanityList, @NonNull String content,
|
||||||
|
@NonNull StringBuilder replacement, int replaceChar, @NonNull List<String> matched) {
|
||||||
|
AtomicInteger offset = new AtomicInteger();
|
||||||
|
Consumer<Matcher> handleReplacements = matcher -> {
|
||||||
|
while (matcher.find()) {
|
||||||
|
String matchedGroup = matcher.group();
|
||||||
|
matched.add(matchedGroup);
|
||||||
|
|
||||||
|
// Replace the matched group with the replace char
|
||||||
|
int start = offset.get() + matcher.start();
|
||||||
|
int end = offset.get() + matcher.end();
|
||||||
|
String matchedWord = matcher.group();
|
||||||
|
replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
|
||||||
|
offset.set(offset.get() + (matchedWord.length() - (end - start)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
handleReplacements.accept(URL_REGEX.matcher(content)); // Handle URLs
|
||||||
|
handleReplacements.accept(IPV4_REGEX.matcher(content)); // Handle IPs
|
||||||
|
return replacement;
|
||||||
|
}
|
||||||
|
}
|
@ -18,7 +18,7 @@ import java.util.regex.Pattern;
|
|||||||
*
|
*
|
||||||
* @author Braydon
|
* @author Braydon
|
||||||
*/
|
*/
|
||||||
public final class VulgarityProcessor extends TextProcessor {
|
public final class VulgarityTextProcessor extends TextProcessor {
|
||||||
private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*";
|
private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -44,7 +44,7 @@ public final class VulgarityProcessor extends TextProcessor {
|
|||||||
charSubstitutions.put('1', Collections.singletonList('!'));
|
charSubstitutions.put('1', Collections.singletonList('!'));
|
||||||
}
|
}
|
||||||
|
|
||||||
public VulgarityProcessor() {
|
public VulgarityTextProcessor() {
|
||||||
super(ContentTag.VULGARITY);
|
super(ContentTag.VULGARITY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -74,6 +74,8 @@ public final class VulgarityProcessor extends TextProcessor {
|
|||||||
|
|
||||||
while (matcher.find()) {
|
while (matcher.find()) {
|
||||||
matched.add(word);
|
matched.add(word);
|
||||||
|
|
||||||
|
// Replace the matched group with the replace char
|
||||||
int start = offset + matcher.start();
|
int start = offset + matcher.start();
|
||||||
int end = offset + matcher.end();
|
int end = offset + matcher.end();
|
||||||
String matchedWord = matcher.group();
|
String matchedWord = matcher.group();
|
@ -13,7 +13,8 @@ import me.braydon.profanity.model.ProfanityList;
|
|||||||
import me.braydon.profanity.model.input.ContentProcessInput;
|
import me.braydon.profanity.model.input.ContentProcessInput;
|
||||||
import me.braydon.profanity.model.response.ContentProcessResponse;
|
import me.braydon.profanity.model.response.ContentProcessResponse;
|
||||||
import me.braydon.profanity.processor.TextProcessor;
|
import me.braydon.profanity.processor.TextProcessor;
|
||||||
import me.braydon.profanity.processor.impl.VulgarityProcessor;
|
import me.braydon.profanity.processor.impl.AdTextProcessor;
|
||||||
|
import me.braydon.profanity.processor.impl.VulgarityTextProcessor;
|
||||||
import me.braydon.profanity.repository.ProfanityListRepository;
|
import me.braydon.profanity.repository.ProfanityListRepository;
|
||||||
import org.apache.commons.text.StringEscapeUtils;
|
import org.apache.commons.text.StringEscapeUtils;
|
||||||
import org.springframework.beans.factory.annotation.Autowired;
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
@ -51,7 +52,8 @@ public final class FiltrationService {
|
|||||||
this.profanityListRepository = profanityListRepository;
|
this.profanityListRepository = profanityListRepository;
|
||||||
|
|
||||||
// Register text processors
|
// Register text processors
|
||||||
textProcessors.add(new VulgarityProcessor());
|
textProcessors.add(new VulgarityTextProcessor());
|
||||||
|
textProcessors.add(new AdTextProcessor());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -116,8 +118,12 @@ public final class FiltrationService {
|
|||||||
|
|
||||||
// Calculate the score based on
|
// Calculate the score based on
|
||||||
// the matched profane content, that cannot be bypassed by changing the content length
|
// the matched profane content, that cannot be bypassed by changing the content length
|
||||||
double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D);
|
double score = 0D;
|
||||||
|
for (String match : matched) {
|
||||||
|
score+= 2D / (double) match.length();
|
||||||
|
}
|
||||||
|
score = Math.min(score, 1D);
|
||||||
|
|
||||||
return new ContentProcessResponse(replacement.toString(), matched, tags, score);
|
return new ContentProcessResponse(!matched.isEmpty(), replacement.toString(), matched, tags, score);
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user