Fix URL regex pattern matching wrong TLDs

This commit is contained in:
Braydon 2024-06-07 13:59:24 -04:00
parent 1cc854f0da
commit ef1fa30b45

@ -17,7 +17,7 @@ import java.util.regex.Pattern;
* @author Braydon * @author Braydon
*/ */
public final class AdTextProcessor extends TextProcessor { public final class AdTextProcessor extends TextProcessor {
private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+(?:\\.[a-z]{2,})+(?:/\\S*)?)\\b"); private static final Pattern URL_REGEX = Pattern.compile("(?i)\\b((?:https?://)?(?:www\\.)?[a-z0-9.-]+\\.[a-z]{2,10}(?:/\\S*)?)\\b");
private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))"); private static final Pattern IPV4_REGEX = Pattern.compile("(([0-1]?[0-9]{1,2}\\.)|(2[0-4][0-9]\\.)|(25[0-5]\\.)){3}(([0-1]?[0-9]{1,2})|(2[0-4][0-9])|(25[0-5]))");
public AdTextProcessor() { public AdTextProcessor() {