From 96fea09b3f5d9ada43d7fa17387067dc8a019473 Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 19:48:12 +0700 Subject: [PATCH 1/7] fix some startup problems --- README.md | 5 ++++- package.json | 1 + src/config/index.ts | 7 +++++-- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 91d6ad9..557c571 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # Vk to Telegram bot ### Configuring + Copy `config.example.yml` to `config.yml` and set it up. #### Running -`yarn && yarn build && node ./dist/index.js` + +Setup environment: `yarn environment`, then run the application: `yarn && yarn build && node ./dist/index.js` #### Custom arguments + ```bash node ./dist/index.js \ --config ./config-dev.yml diff --git a/package.json b/package.json index 8bc631e..bf14297 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "main": "index.js", "license": "MIT", "scripts": { + "environment": "docker-compose -f ./docker/docker-compose.yml up db -d", "start": "node ./dist/index.js", "dev": "NODE_ENV=dev node -r ts-node/register ./src/index.ts --config=./config.yml", "build": "rm -rf ./dist && tsc && copyfiles -f ./config*.yml ./dist && copyfiles ./templates/*.md ./dist", diff --git a/src/config/index.ts b/src/config/index.ts index dc3c6c0..c483e43 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -30,8 +30,11 @@ export default function prepareConfig() { const key = JSON.parse( fs.readFileSync(config.calendar?.keyFile).toString() ) as CalendarKeyFile; - calendarKeyValidator.validateSync(key); - config.calendarKey = key; + + if (key) { + calendarKeyValidator.validateSync(key); + config.calendarKey = key; + } } catch (error) { console.warn("tried to parse calendar key, got error", error); } From 6b0b0b9b572c6c7d4bc5ffdb4e9195292541bf7c Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 19:48:21 +0700 Subject: [PATCH 2/7] fix url extraction --- src/utils/__tests__/extractURLs.test.ts | 21 +++++++++++++++++++++ src/utils/extract.ts | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 src/utils/__tests__/extractURLs.test.ts diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts new file mode 100644 index 0000000..ffabfda --- /dev/null +++ b/src/utils/__tests__/extractURLs.test.ts @@ -0,0 +1,21 @@ +import { extractURLs } from "../extract"; + +describe("extractURLs", () => { + it("extracts simple urls", () => { + const result = extractURLs( + "Trying out links https://map.vault48.org/test 123" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("works with that weird new VK urls", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|https://map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); +}); diff --git a/src/utils/extract.ts b/src/utils/extract.ts index 5da454d..70e6279 100644 --- a/src/utils/extract.ts +++ b/src/utils/extract.ts @@ -1,6 +1,6 @@ import { URL } from "url"; -const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/gim; +const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; export const extractURLs = (text: string): URL[] => { const matches = text.match(urlRe) || []; From 3a0b21bc196cb27f7efc5b85430890cccf1b6413 Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 19:52:02 +0700 Subject: [PATCH 3/7] add some tests --- src/utils/date/__tests__/getDateFromText.real.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/utils/date/__tests__/getDateFromText.real.json b/src/utils/date/__tests__/getDateFromText.real.json index 1058a2b..846da1d 100644 --- a/src/utils/date/__tests__/getDateFromText.real.json +++ b/src/utils/date/__tests__/getDateFromText.real.json @@ -498,5 +498,15 @@ "text": "Каракан 11 – 12 июня 2023: Итоги \n \n+ Покат был максимально лайтовым. Никакого лосизма, никаких продираний сквозь заросли, никаких тасканий велов, никаких преодолений. \n+ Путь от дома до пункта назначения занял всего 9 часов. Примерно в 15:10 были на берегу, а это значит, что времени на отдых, многократное употребление еды и исследование окрестностей осталось предостаточно. \n+ Пыльная дорога, отсыпанная щебнем между Быстровкой и Завьялово настолько короткая (около 7 км и преодолевается примерно за 20 минут) что неудобствами, которые она доставляет, можно пренебречь. \n+ Лесная дорога от Факела Революции до берега безупречна. Только ради неё стоит ехать в Каракан. Каждый велосипедист хотя бы раз в жизни должен по ней проехать. \n+ С клещами не сталкивались, комары встречались редко, были добрые и совсем не кусались. \n+ Обратно выехали не очень рано, часов в 10. В город приехали не очень поздно, часов в 19 и это, пожалуй, идеально. Достаточно времени на то чтобы выспаться и разобрать снарягу. \n \nНевыносимую печаль в этой поездке вызывали вереницы лесовозов и горы из стволов деревьев вдоль дороги. Не упускайте возможности скатать в Караканский бор, пока его не выпилили. \n \nФотографии с поката: https://vk.com/album-124752609_293941505\nПрямые трансляции НВС покатов: https://t.me/pogonia_live \nПодразделение НВС для тех, кто любит писать, что не приедет на покат: https://vk.com/nvs_sportloto", "created": "2023-06-13 23:13:27", "date": "2023-06-11 15:10:00" + }, + { + "text": "Ленивый Ленинкат\n\n22.02.2025\n\nСтарт в 11:00 с площади Ленина.", + "created": "2025-02-20 23:13:27", + "date": "2025-02-22 11:00:00" + }, + { + "text": "Ленивый Ленинкат\n\n22.02.25\n\nСтарт в 11:00 с площади Ленина.", + "created": "2025-02-20 23:13:27", + "date": "2025-02-22 11:00:00" } ] From 21314479390784ab0e6004e1816635fa95d29c12 Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 20:02:11 +0700 Subject: [PATCH 4/7] add ncc builder --- docker/Dockerfile | 7 +++++++ package.json | 3 ++- tsconfig.json | 2 +- yarn.lock | 36 +++++++++++++++++++++++++++++++++--- 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 1fdd0e0..50689ff 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -10,8 +10,15 @@ RUN yarn COPY . . RUN yarn build +FROM node:18-bookworm AS runner + + +COPY --from=builder /app/dist ./ + WORKDIR /app/dist + COPY ./docker/wait-for-it.sh . + EXPOSE 80 CMD ["node", "./index.js"] diff --git a/package.json b/package.json index bf14297..0970327 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "environment": "docker-compose -f ./docker/docker-compose.yml up db -d", "start": "node ./dist/index.js", "dev": "NODE_ENV=dev node -r ts-node/register ./src/index.ts --config=./config.yml", - "build": "rm -rf ./dist && tsc && copyfiles -f ./config*.yml ./dist && copyfiles ./templates/*.md ./dist", + "build": "rm -rf ./output ./dist && tsc && yarn ncc build ./output/index.js -o ./dist && rm -rf ./output && copyfiles -f ./config*.yml ./dist && copyfiles ./templates/*.md ./dist", "test": "jest" }, "dependencies": { @@ -56,6 +56,7 @@ "@types/winston": "^2.4.4", "@types/yargs": "^16.0.1", "@types/yup": "^0.29.11", + "@vercel/ncc": "^0.38.3", "copyfiles": "^2.4.1", "jest": "^29.7.0", "prettier": "^2.2.1", diff --git a/tsconfig.json b/tsconfig.json index e4aa9e0..bd2d97a 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,7 +4,7 @@ "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, - "outDir": "./dist/", + "outDir": "./output/", "sourceMap": true, "noImplicitAny": false, "allowSyntheticDefaultImports": true, diff --git a/yarn.lock b/yarn.lock index ce41d7a..10e8c41 100644 --- a/yarn.lock +++ b/yarn.lock @@ -830,6 +830,11 @@ resolved "https://registry.yarnpkg.com/@types/yup/-/yup-0.29.11.tgz#d654a112973f5e004bf8438122bd7e56a8e5cd7e" integrity sha512-9cwk3c87qQKZrT251EDoibiYRILjCmxBvvcb4meofCmx1vdnNcR9gyildy5vOHASpOKMsn42CugxUvcwK5eu1g== +"@vercel/ncc@^0.38.3": + version "0.38.3" + resolved "https://registry.yarnpkg.com/@vercel/ncc/-/ncc-0.38.3.tgz#5475eeee3ac0f1a439f237596911525a490a88b5" + integrity sha512-rnK6hJBS6mwc+Bkab+PGPs9OiS0i/3kdTO+CkI8V0/VrW3vmz7O2Pxjw/owOlmo6PKEIxRSeZKv/kuL9itnpYA== + abort-controller@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" @@ -3817,7 +3822,7 @@ string-length@^4.0.1: char-regex "^1.0.2" strip-ansi "^6.0.0" -"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0": version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -3835,6 +3840,15 @@ string-width@^4.1.0, string-width@^4.2.0: is-fullwidth-code-point "^3.0.0" strip-ansi "^6.0.0" +string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + string-width@^5.0.1, string-width@^5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" @@ -3863,7 +3877,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -3877,6 +3891,13 @@ strip-ansi@^6.0.0: dependencies: ansi-regex "^5.0.0" +strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + strip-ansi@^7.0.1: version "7.1.0" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.0.tgz#d5b6568ca689d8561370b0707685d22434faff45" @@ -4381,7 +4402,16 @@ wordwrap@^1.0.0: resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb" integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus= -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== From f28f291ac21b210e733f7b8b6681ae71b92df9cd Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 20:30:24 +0700 Subject: [PATCH 5/7] fix links parsing --- docker/Dockerfile | 4 +- src/service/template/index.ts | 2 +- src/service/vk/handlers/PostNewHandler.ts | 2 +- src/utils/__tests__/extractURLs.test.ts | 2 +- src/utils/__tests__/transformMDLinks.test.ts | 23 +++++++++++ src/utils/extract.ts | 17 -------- src/utils/links.ts | 41 ++++++++++++++++++++ src/utils/transformMDLinks.ts | 10 ----- 8 files changed, 68 insertions(+), 33 deletions(-) create mode 100644 src/utils/__tests__/transformMDLinks.test.ts delete mode 100644 src/utils/extract.ts create mode 100644 src/utils/links.ts delete mode 100644 src/utils/transformMDLinks.ts diff --git a/docker/Dockerfile b/docker/Dockerfile index 50689ff..90f1e44 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -12,11 +12,9 @@ RUN yarn build FROM node:18-bookworm AS runner - -COPY --from=builder /app/dist ./ - WORKDIR /app/dist +COPY --from=builder /app/dist ./ COPY ./docker/wait-for-it.sh . EXPOSE 80 diff --git a/src/service/template/index.ts b/src/service/template/index.ts index 4771bf8..ed211f8 100644 --- a/src/service/template/index.ts +++ b/src/service/template/index.ts @@ -9,7 +9,7 @@ import path from "path"; import hb from "handlebars"; import strip from "strip-markdown"; import { VFileCompatible } from "vfile"; -import transformMDLinks from "../../utils/transformMDLinks"; +import { transformMDLinks } from "../../utils/links"; const removeFrontmatter = () => (tree) => { tree.children = tree.children.filter((item) => item.type !== "yaml"); diff --git a/src/service/vk/handlers/PostNewHandler.ts b/src/service/vk/handlers/PostNewHandler.ts index b8a6095..a5f5daa 100644 --- a/src/service/vk/handlers/PostNewHandler.ts +++ b/src/service/vk/handlers/PostNewHandler.ts @@ -11,7 +11,7 @@ import { User, } from "typegram"; import { keys } from "lodash"; -import { extractURLs } from "../../../utils/extract"; +import { extractURLs } from "../../../utils/links"; import logger from "../../logger"; import Composer from "telegraf"; import { Template } from "../../template"; diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts index ffabfda..ebf2b4a 100644 --- a/src/utils/__tests__/extractURLs.test.ts +++ b/src/utils/__tests__/extractURLs.test.ts @@ -1,4 +1,4 @@ -import { extractURLs } from "../extract"; +import { extractURLs } from "../links"; describe("extractURLs", () => { it("extracts simple urls", () => { diff --git a/src/utils/__tests__/transformMDLinks.test.ts b/src/utils/__tests__/transformMDLinks.test.ts new file mode 100644 index 0000000..c46e4dd --- /dev/null +++ b/src/utils/__tests__/transformMDLinks.test.ts @@ -0,0 +1,23 @@ +import { transformMDLinks } from "../links"; + +describe("transformMDLinks", () => { + it("extracts simple urls", () => { + const result = transformMDLinks( + "Trying out links https://map.vault48.org/test 123" + ); + + expect(result).toBe( + "Trying out links [https://map.vault48…](https://map.vault48.org/test) 123" + ); + }); + + it("works with that weird new VK urls", () => { + const result = transformMDLinks( + "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + ); + + expect(result).toBe( + "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" + ); + }); +}); diff --git a/src/utils/extract.ts b/src/utils/extract.ts deleted file mode 100644 index 70e6279..0000000 --- a/src/utils/extract.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { URL } from "url"; - -const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; - -export const extractURLs = (text: string): URL[] => { - const matches = text.match(urlRe) || []; - - return matches - .map((m) => { - try { - return new URL(m); - } catch (e) { - return; - } - }) - .filter((el) => el) as URL[]; -}; diff --git a/src/utils/links.ts b/src/utils/links.ts new file mode 100644 index 0000000..8bd2429 --- /dev/null +++ b/src/utils/links.ts @@ -0,0 +1,41 @@ +import { URL } from "url"; + +const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; +const weirdLongUrlRegex = /\[(.*)\|(.*)\|(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})\]/g; + +/** Extracts URLs from text */ +export const extractURLs = (text: string): URL[] => { + const matches = text.match(simpleUrlRegex) || []; + + return matches + .map((m) => { + try { + return new URL(m); + } catch (e) { + return; + } + }) + .filter((el) => el) as URL[]; +}; + +/** Adds ... to text if its length exceeds maxLength */ +const trimTo = (val: string, maxLength: number) => + val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val; + +/** Formatting all links in markdown output, trimming them to reasonable length */ +export const transformMDLinks = (value: string) => + value + .replace(weirdLongUrlRegex, (val, ...args) => { + if (args.length < 2) { + return val; + } + + return `[${trimTo(args[1], 20)}](${args[2]})`; + }) + .replace(simpleUrlRegex, (val) => { + if (val.endsWith(")")) { + return val; + } + + return `[${trimTo(val, 20)}](${val})`; + }); diff --git a/src/utils/transformMDLinks.ts b/src/utils/transformMDLinks.ts deleted file mode 100644 index 2b036bc..0000000 --- a/src/utils/transformMDLinks.ts +++ /dev/null @@ -1,10 +0,0 @@ -const urlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g; - -const trimTo = (val: string, maxLength: number) => - val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val; - -/** Formatting all links in markdown output, trimming them to reasonable length */ -export default (value: string) => - value.replace(urlRegex, (val) => { - return `[${trimTo(val, 20)}](${val})`; - }); From 6ac6ca93562084d0aea919418b0bf2744928de74 Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 21:00:07 +0700 Subject: [PATCH 6/7] fix link extraction --- src/utils/__tests__/extractURLs.test.ts | 25 ++++++++++++++++++++ src/utils/__tests__/transformMDLinks.test.ts | 22 ++++++++++------- src/utils/links.ts | 24 +++++++++++++++---- 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts index ebf2b4a..6fddfd1 100644 --- a/src/utils/__tests__/extractURLs.test.ts +++ b/src/utils/__tests__/extractURLs.test.ts @@ -18,4 +18,29 @@ describe("extractURLs", () => { expect(result.length).toBe(1); expect(result[0].href).toBe("https://map.vault48.org/test"); }); + + it("works with that weird new VK urls without scheme", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("deduplicates matching urls", () => { + const result = extractURLs( + `Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test] map.vault48.org/test https://map.vault48.org/test map.vault48.org/test2 https://map.vault48.org/test3 + [#alias|map.vault48.org/test2|map.vault48.org/test2] [#alias|map.vault48.org/test3|map.vault48.org/test3] [#alias|map.vault48.org/test4|map.vault48.org/test4] https://map.vault48.org/test5 + ` + ).map((it) => it.href); + + expect(result).toEqual([ + "https://map.vault48.org/test", + "https://map.vault48.org/test2", + "https://map.vault48.org/test3", + "https://map.vault48.org/test4", + "https://map.vault48.org/test5", + ]); + }); }); diff --git a/src/utils/__tests__/transformMDLinks.test.ts b/src/utils/__tests__/transformMDLinks.test.ts index c46e4dd..387cbfe 100644 --- a/src/utils/__tests__/transformMDLinks.test.ts +++ b/src/utils/__tests__/transformMDLinks.test.ts @@ -2,21 +2,27 @@ import { transformMDLinks } from "../links"; describe("transformMDLinks", () => { it("extracts simple urls", () => { - const result = transformMDLinks( - "Trying out links https://map.vault48.org/test 123" - ); - - expect(result).toBe( + expect( + transformMDLinks("Trying out links https://map.vault48.org/test 123") + ).toBe( "Trying out links [https://map.vault48…](https://map.vault48.org/test) 123" ); }); it("works with that weird new VK urls", () => { - const result = transformMDLinks( - "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( + "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" ); - expect(result).toBe( + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" ); }); diff --git a/src/utils/links.ts b/src/utils/links.ts index 8bd2429..b2514a2 100644 --- a/src/utils/links.ts +++ b/src/utils/links.ts @@ -1,13 +1,26 @@ import { URL } from "url"; const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; -const weirdLongUrlRegex = /\[(.*)\|(.*)\|(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})\]/g; + +/** Yep, that's how VK posts it's links */ +const weirdLongUrlRegex = /\[\#alias\|([^\|]+)\|([^\]]+)\]/gim; + +const fixUrl = (url: string) => + url.startsWith("http") || !url ? url : `https://${url}`; /** Extracts URLs from text */ export const extractURLs = (text: string): URL[] => { - const matches = text.match(simpleUrlRegex) || []; + const urls = new Set(); - return matches + text + .match(weirdLongUrlRegex) + ?.forEach((match) => + urls.add(fixUrl(match.replace(weirdLongUrlRegex, "$1"))) + ); + + text.match(simpleUrlRegex)?.forEach((match) => urls.add(match)); + + return Array.from(urls) .map((m) => { try { return new URL(m); @@ -30,7 +43,10 @@ export const transformMDLinks = (value: string) => return val; } - return `[${trimTo(args[1], 20)}](${args[2]})`; + const title = trimTo(args[0] ?? args[1], 20); + const url = fixUrl(args[1]); + + return `[${title}](${url})`; }) .replace(simpleUrlRegex, (val) => { if (val.endsWith(")")) { From eb63618aeb35ee3112bc78d6bfc75ae77a21b709 Mon Sep 17 00:00:00 2001 From: Fedor Katurov Date: Mon, 24 Feb 2025 21:05:25 +0700 Subject: [PATCH 7/7] fix dockerfile --- docker/Dockerfile | 2 ++ package.json | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 90f1e44..245cd3a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -15,6 +15,8 @@ FROM node:18-bookworm AS runner WORKDIR /app/dist COPY --from=builder /app/dist ./ +COPY --from=builder /app/templates /templates + COPY ./docker/wait-for-it.sh . EXPOSE 80 diff --git a/package.json b/package.json index 0970327..a396a16 100644 --- a/package.json +++ b/package.json @@ -8,7 +8,7 @@ "environment": "docker-compose -f ./docker/docker-compose.yml up db -d", "start": "node ./dist/index.js", "dev": "NODE_ENV=dev node -r ts-node/register ./src/index.ts --config=./config.yml", - "build": "rm -rf ./output ./dist && tsc && yarn ncc build ./output/index.js -o ./dist && rm -rf ./output && copyfiles -f ./config*.yml ./dist && copyfiles ./templates/*.md ./dist", + "build": "rm -rf ./output ./dist && tsc && yarn ncc build ./output/index.js -o ./dist && rm -rf ./output", "test": "jest" }, "dependencies": {