URL and IP filtering

This commit is contained in:
Braydon 2024-06-07 00:33:47 -04:00
parent f5b07e9c12
commit 1cc854f0da
4 changed files with 77 additions and 6 deletions

@ -12,6 +12,11 @@ import java.util.List;
*/ */
@AllArgsConstructor @Getter @AllArgsConstructor @Getter
public final class ContentProcessResponse { public final class ContentProcessResponse {
/**
* Does the content contain profanity?
*/
private final boolean containsProfanity;
/** /**
* The replacement for the content. * The replacement for the content.
*/ */

@ -0,0 +1,58 @@
package me.braydon.profanity.processor.impl;
import lombok.NonNull;
import me.braydon.profanity.common.ContentTag;
import me.braydon.profanity.model.ProfanityList;
import me.braydon.profanity.processor.TextProcessor;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A text processor to filter advertisement content.
*
* @author Braydon
*/
public final class AdTextProcessor extends TextProcessor {
private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+(?:\\.[a-z]{2,})+(?:/\\S*)?)\\b");
private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))");
public AdTextProcessor() {
super(ContentTag.ADVERTISEMENT);
}
/**
* Processor the given content.
*
* @param profanityList the profanity list to use
* @param content the content to process
* @param replacement the replacement content to modify
* @param replaceChar the replace char to use
* @param matched the matched content to add to
* @return the replaced content
*/
@Override @NonNull
public StringBuilder process(@NonNull ProfanityList profanityList, @NonNull String content,
@NonNull StringBuilder replacement, int replaceChar, @NonNull List<String> matched) {
AtomicInteger offset = new AtomicInteger();
Consumer<Matcher> handleReplacements = matcher -> {
while (matcher.find()) {
String matchedGroup = matcher.group();
matched.add(matchedGroup);
// Replace the matched group with the replace char
int start = offset.get() + matcher.start();
int end = offset.get() + matcher.end();
String matchedWord = matcher.group();
replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length()));
offset.set(offset.get() + (matchedWord.length() - (end - start)));
}
};
handleReplacements.accept(URL_REGEX.matcher(content)); // Handle URLs
handleReplacements.accept(IPV4_REGEX.matcher(content)); // Handle IPs
return replacement;
}
}

@ -18,7 +18,7 @@ import java.util.regex.Pattern;
* *
* @author Braydon * @author Braydon
*/ */
public final class VulgarityProcessor extends TextProcessor { public final class VulgarityTextProcessor extends TextProcessor {
private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*"; private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*";
/** /**
@ -44,7 +44,7 @@ public final class VulgarityProcessor extends TextProcessor {
charSubstitutions.put('1', Collections.singletonList('!')); charSubstitutions.put('1', Collections.singletonList('!'));
} }
public VulgarityProcessor() { public VulgarityTextProcessor() {
super(ContentTag.VULGARITY); super(ContentTag.VULGARITY);
} }
@ -74,6 +74,8 @@ public final class VulgarityProcessor extends TextProcessor {
while (matcher.find()) { while (matcher.find()) {
matched.add(word); matched.add(word);
// Replace the matched group with the replace char
int start = offset + matcher.start(); int start = offset + matcher.start();
int end = offset + matcher.end(); int end = offset + matcher.end();
String matchedWord = matcher.group(); String matchedWord = matcher.group();

@ -13,7 +13,8 @@ import me.braydon.profanity.model.ProfanityList;
import me.braydon.profanity.model.input.ContentProcessInput; import me.braydon.profanity.model.input.ContentProcessInput;
import me.braydon.profanity.model.response.ContentProcessResponse; import me.braydon.profanity.model.response.ContentProcessResponse;
import me.braydon.profanity.processor.TextProcessor; import me.braydon.profanity.processor.TextProcessor;
import me.braydon.profanity.processor.impl.VulgarityProcessor; import me.braydon.profanity.processor.impl.AdTextProcessor;
import me.braydon.profanity.processor.impl.VulgarityTextProcessor;
import me.braydon.profanity.repository.ProfanityListRepository; import me.braydon.profanity.repository.ProfanityListRepository;
import org.apache.commons.text.StringEscapeUtils; import org.apache.commons.text.StringEscapeUtils;
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Autowired;
@ -51,7 +52,8 @@ public final class FiltrationService {
this.profanityListRepository = profanityListRepository; this.profanityListRepository = profanityListRepository;
// Register text processors // Register text processors
textProcessors.add(new VulgarityProcessor()); textProcessors.add(new VulgarityTextProcessor());
textProcessors.add(new AdTextProcessor());
} }
/** /**
@ -116,8 +118,12 @@ public final class FiltrationService {
// Calculate the score based on // Calculate the score based on
// the matched profane content, that cannot be bypassed by changing the content length // the matched profane content, that cannot be bypassed by changing the content length
double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D); double score = 0D;
for (String match : matched) {
score+= 2D / (double) match.length();
}
score = Math.min(score, 1D);
return new ContentProcessResponse(replacement.toString(), matched, tags, score); return new ContentProcessResponse(!matched.isEmpty(), replacement.toString(), matched, tags, score);
} }
} }