diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts index ebf2b4a..6fddfd1 100644 --- a/src/utils/__tests__/extractURLs.test.ts +++ b/src/utils/__tests__/extractURLs.test.ts @@ -18,4 +18,29 @@ describe("extractURLs", () => { expect(result.length).toBe(1); expect(result[0].href).toBe("https://map.vault48.org/test"); }); + + it("works with that weird new VK urls without scheme", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("deduplicates matching urls", () => { + const result = extractURLs( + `Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test] map.vault48.org/test https://map.vault48.org/test map.vault48.org/test2 https://map.vault48.org/test3 + [#alias|map.vault48.org/test2|map.vault48.org/test2] [#alias|map.vault48.org/test3|map.vault48.org/test3] [#alias|map.vault48.org/test4|map.vault48.org/test4] https://map.vault48.org/test5 + ` + ).map((it) => it.href); + + expect(result).toEqual([ + "https://map.vault48.org/test", + "https://map.vault48.org/test2", + "https://map.vault48.org/test3", + "https://map.vault48.org/test4", + "https://map.vault48.org/test5", + ]); + }); }); diff --git a/src/utils/__tests__/transformMDLinks.test.ts b/src/utils/__tests__/transformMDLinks.test.ts index c46e4dd..387cbfe 100644 --- a/src/utils/__tests__/transformMDLinks.test.ts +++ b/src/utils/__tests__/transformMDLinks.test.ts @@ -2,21 +2,27 @@ import { transformMDLinks } from "../links"; describe("transformMDLinks", () => { it("extracts simple urls", () => { - const result = transformMDLinks( - "Trying out links https://map.vault48.org/test 123" - ); - - expect(result).toBe( + expect( + transformMDLinks("Trying out links https://map.vault48.org/test 123") + ).toBe( "Trying out links [https://map.vault48…](https://map.vault48.org/test) 123" ); }); it("works with that weird new VK urls", () => { - const result = transformMDLinks( - "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( + "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" ); - expect(result).toBe( + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" ); }); diff --git a/src/utils/links.ts b/src/utils/links.ts index 8bd2429..b2514a2 100644 --- a/src/utils/links.ts +++ b/src/utils/links.ts @@ -1,13 +1,26 @@ import { URL } from "url"; const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; -const weirdLongUrlRegex = /\[(.*)\|(.*)\|(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})\]/g; + +/** Yep, that's how VK posts it's links */ +const weirdLongUrlRegex = /\[\#alias\|([^\|]+)\|([^\]]+)\]/gim; + +const fixUrl = (url: string) => + url.startsWith("http") || !url ? url : `https://${url}`; /** Extracts URLs from text */ export const extractURLs = (text: string): URL[] => { - const matches = text.match(simpleUrlRegex) || []; + const urls = new Set(); - return matches + text + .match(weirdLongUrlRegex) + ?.forEach((match) => + urls.add(fixUrl(match.replace(weirdLongUrlRegex, "$1"))) + ); + + text.match(simpleUrlRegex)?.forEach((match) => urls.add(match)); + + return Array.from(urls) .map((m) => { try { return new URL(m); @@ -30,7 +43,10 @@ export const transformMDLinks = (value: string) => return val; } - return `[${trimTo(args[1], 20)}](${args[2]})`; + const title = trimTo(args[0] ?? args[1], 20); + const url = fixUrl(args[1]); + + return `[${title}](${url})`; }) .replace(simpleUrlRegex, (val) => { if (val.endsWith(")")) {