From 6b0b0b9b572c6c7d4bc5ffdb4e9195292541bf7c Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 19:48:21 +0700 Subject: [PATCH] fix url extraction --- src/utils/__tests__/extractURLs.test.ts | 21 +++++++++++++++++++++ src/utils/extract.ts | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 src/utils/__tests__/extractURLs.test.ts diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts new file mode 100644 index 0000000..ffabfda --- /dev/null +++ b/src/utils/__tests__/extractURLs.test.ts @@ -0,0 +1,21 @@ +import { extractURLs } from "../extract"; + +describe("extractURLs", () => { + it("extracts simple urls", () => { + const result = extractURLs( + "Trying out links https://map.vault48.org/test 123" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("works with that weird new VK urls", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|https://map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); +}); diff --git a/src/utils/extract.ts b/src/utils/extract.ts index 5da454d..70e6279 100644 --- a/src/utils/extract.ts +++ b/src/utils/extract.ts @@ -1,6 +1,6 @@ import { URL } from "url"; -const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/gim; +const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; export const extractURLs = (text: string): URL[] => { const matches = text.match(urlRe) || [];