From 1cc854f0dadf24757a675ed6a0eb5959dc4def8c Mon Sep 17 00:00:00 2001 From: Rainnny7 Date: Fri, 7 Jun 2024 00:33:47 -0400 Subject: [PATCH] URL and IP filtering --- .../response/ContentProcessResponse.java | 5 ++ .../processor/impl/AdTextProcessor.java | 58 +++++++++++++++++++ ...essor.java => VulgarityTextProcessor.java} | 6 +- .../profanity/service/FiltrationService.java | 14 +++-- 4 files changed, 77 insertions(+), 6 deletions(-) create mode 100644 API/src/main/java/me/braydon/profanity/processor/impl/AdTextProcessor.java rename API/src/main/java/me/braydon/profanity/processor/impl/{VulgarityProcessor.java => VulgarityTextProcessor.java} (96%) diff --git a/API/src/main/java/me/braydon/profanity/model/response/ContentProcessResponse.java b/API/src/main/java/me/braydon/profanity/model/response/ContentProcessResponse.java index 4ecd94a..4851583 100644 --- a/API/src/main/java/me/braydon/profanity/model/response/ContentProcessResponse.java +++ b/API/src/main/java/me/braydon/profanity/model/response/ContentProcessResponse.java @@ -12,6 +12,11 @@ import java.util.List; */ @AllArgsConstructor @Getter public final class ContentProcessResponse { + /** + * Does the content contain profanity? + */ + private final boolean containsProfanity; + /** * The replacement for the content. */ diff --git a/API/src/main/java/me/braydon/profanity/processor/impl/AdTextProcessor.java b/API/src/main/java/me/braydon/profanity/processor/impl/AdTextProcessor.java new file mode 100644 index 0000000..85b73ee --- /dev/null +++ b/API/src/main/java/me/braydon/profanity/processor/impl/AdTextProcessor.java @@ -0,0 +1,58 @@ +package me.braydon.profanity.processor.impl; + +import lombok.NonNull; +import me.braydon.profanity.common.ContentTag; +import me.braydon.profanity.model.ProfanityList; +import me.braydon.profanity.processor.TextProcessor; + +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * A text processor to filter advertisement content. + * + * @author Braydon + */ +public final class AdTextProcessor extends TextProcessor { + private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+(?:\\.[a-z]{2,})+(?:/\\S*)?)\\b"); + private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))"); + + public AdTextProcessor() { + super(ContentTag.ADVERTISEMENT); + } + + /** + * Processor the given content. + * + * @param profanityList the profanity list to use + * @param content the content to process + * @param replacement the replacement content to modify + * @param replaceChar the replace char to use + * @param matched the matched content to add to + * @return the replaced content + */ + @Override @NonNull + public StringBuilder process(@NonNull ProfanityList profanityList, @NonNull String content, + @NonNull StringBuilder replacement, int replaceChar, @NonNull List matched) { + AtomicInteger offset = new AtomicInteger(); + Consumer handleReplacements = matcher -> { + while (matcher.find()) { + String matchedGroup = matcher.group(); + matched.add(matchedGroup); + + // Replace the matched group with the replace char + int start = offset.get() + matcher.start(); + int end = offset.get() + matcher.end(); + String matchedWord = matcher.group(); + replacement.replace(start, end, Character.toString(replaceChar).repeat(matchedWord.length())); + offset.set(offset.get() + (matchedWord.length() - (end - start))); + } + }; + handleReplacements.accept(URL_REGEX.matcher(content)); // Handle URLs + handleReplacements.accept(IPV4_REGEX.matcher(content)); // Handle IPs + return replacement; + } +} \ No newline at end of file diff --git a/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java b/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityTextProcessor.java similarity index 96% rename from API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java rename to API/src/main/java/me/braydon/profanity/processor/impl/VulgarityTextProcessor.java index 556469c..b8b9291 100644 --- a/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityProcessor.java +++ b/API/src/main/java/me/braydon/profanity/processor/impl/VulgarityTextProcessor.java @@ -18,7 +18,7 @@ import java.util.regex.Pattern; * * @author Braydon */ -public final class VulgarityProcessor extends TextProcessor { +public final class VulgarityTextProcessor extends TextProcessor { private static final String PUNCTUATION_PATTERN = "[\\p{Punct}]*"; /** @@ -44,7 +44,7 @@ public final class VulgarityProcessor extends TextProcessor { charSubstitutions.put('1', Collections.singletonList('!')); } - public VulgarityProcessor() { + public VulgarityTextProcessor() { super(ContentTag.VULGARITY); } @@ -74,6 +74,8 @@ public final class VulgarityProcessor extends TextProcessor { while (matcher.find()) { matched.add(word); + + // Replace the matched group with the replace char int start = offset + matcher.start(); int end = offset + matcher.end(); String matchedWord = matcher.group(); diff --git a/API/src/main/java/me/braydon/profanity/service/FiltrationService.java b/API/src/main/java/me/braydon/profanity/service/FiltrationService.java index f77be0a..92fcb5f 100644 --- a/API/src/main/java/me/braydon/profanity/service/FiltrationService.java +++ b/API/src/main/java/me/braydon/profanity/service/FiltrationService.java @@ -13,7 +13,8 @@ import me.braydon.profanity.model.ProfanityList; import me.braydon.profanity.model.input.ContentProcessInput; import me.braydon.profanity.model.response.ContentProcessResponse; import me.braydon.profanity.processor.TextProcessor; -import me.braydon.profanity.processor.impl.VulgarityProcessor; +import me.braydon.profanity.processor.impl.AdTextProcessor; +import me.braydon.profanity.processor.impl.VulgarityTextProcessor; import me.braydon.profanity.repository.ProfanityListRepository; import org.apache.commons.text.StringEscapeUtils; import org.springframework.beans.factory.annotation.Autowired; @@ -51,7 +52,8 @@ public final class FiltrationService { this.profanityListRepository = profanityListRepository; // Register text processors - textProcessors.add(new VulgarityProcessor()); + textProcessors.add(new VulgarityTextProcessor()); + textProcessors.add(new AdTextProcessor()); } /** @@ -116,8 +118,12 @@ public final class FiltrationService { // Calculate the score based on // the matched profane content, that cannot be bypassed by changing the content length - double score = Math.min(matched.stream().mapToDouble(String::length).sum() / content.length(), 1D); + double score = 0D; + for (String match : matched) { + score+= 2D / (double) match.length(); + } + score = Math.min(score, 1D); - return new ContentProcessResponse(replacement.toString(), matched, tags, score); + return new ContentProcessResponse(!matched.isEmpty(), replacement.toString(), matched, tags, score); } } \ No newline at end of file