diff --git a/README.md b/README.md index 91d6ad9..557c571 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,15 @@ # Vk to Telegram bot ### Configuring + Copy `config.example.yml` to `config.yml` and set it up. #### Running -`yarn && yarn build && node ./dist/index.js` + +Setup environment: `yarn environment`, then run the application: `yarn && yarn build && node ./dist/index.js` #### Custom arguments + ```bash node ./dist/index.js \ --config ./config-dev.yml diff --git a/docker/Dockerfile b/docker/Dockerfile index 1fdd0e0..245cd3a 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -10,8 +10,15 @@ RUN yarn COPY . . RUN yarn build +FROM node:18-bookworm AS runner + WORKDIR /app/dist + +COPY --from=builder /app/dist ./ +COPY --from=builder /app/templates /templates + COPY ./docker/wait-for-it.sh . + EXPOSE 80 CMD ["node", "./index.js"] diff --git a/package.json b/package.json index 8bc631e..a396a16 100644 --- a/package.json +++ b/package.json @@ -5,9 +5,10 @@ "main": "index.js", "license": "MIT", "scripts": { + "environment": "docker-compose -f ./docker/docker-compose.yml up db -d", "start": "node ./dist/index.js", "dev": "NODE_ENV=dev node -r ts-node/register ./src/index.ts --config=./config.yml", - "build": "rm -rf ./dist && tsc && copyfiles -f ./config*.yml ./dist && copyfiles ./templates/*.md ./dist", + "build": "rm -rf ./output ./dist && tsc && yarn ncc build ./output/index.js -o ./dist && rm -rf ./output", "test": "jest" }, "dependencies": { @@ -55,6 +56,7 @@ "@types/winston": "^2.4.4", "@types/yargs": "^16.0.1", "@types/yup": "^0.29.11", + "@vercel/ncc": "^0.38.3", "copyfiles": "^2.4.1", "jest": "^29.7.0", "prettier": "^2.2.1", diff --git a/src/config/index.ts b/src/config/index.ts index dc3c6c0..c483e43 100644 --- a/src/config/index.ts +++ b/src/config/index.ts @@ -30,8 +30,11 @@ export default function prepareConfig() { const key = JSON.parse( fs.readFileSync(config.calendar?.keyFile).toString() ) as CalendarKeyFile; - calendarKeyValidator.validateSync(key); - config.calendarKey = key; + + if (key) { + calendarKeyValidator.validateSync(key); + config.calendarKey = key; + } } catch (error) { console.warn("tried to parse calendar key, got error", error); } diff --git a/src/service/template/index.ts b/src/service/template/index.ts index 4771bf8..ed211f8 100644 --- a/src/service/template/index.ts +++ b/src/service/template/index.ts @@ -9,7 +9,7 @@ import path from "path"; import hb from "handlebars"; import strip from "strip-markdown"; import { VFileCompatible } from "vfile"; -import transformMDLinks from "../../utils/transformMDLinks"; +import { transformMDLinks } from "../../utils/links"; const removeFrontmatter = () => (tree) => { tree.children = tree.children.filter((item) => item.type !== "yaml"); diff --git a/src/service/vk/handlers/PostNewHandler.ts b/src/service/vk/handlers/PostNewHandler.ts index b8a6095..a5f5daa 100644 --- a/src/service/vk/handlers/PostNewHandler.ts +++ b/src/service/vk/handlers/PostNewHandler.ts @@ -11,7 +11,7 @@ import { User, } from "typegram"; import { keys } from "lodash"; -import { extractURLs } from "../../../utils/extract"; +import { extractURLs } from "../../../utils/links"; import logger from "../../logger"; import Composer from "telegraf"; import { Template } from "../../template"; diff --git a/src/utils/__tests__/extractURLs.test.ts b/src/utils/__tests__/extractURLs.test.ts new file mode 100644 index 0000000..6fddfd1 --- /dev/null +++ b/src/utils/__tests__/extractURLs.test.ts @@ -0,0 +1,46 @@ +import { extractURLs } from "../links"; + +describe("extractURLs", () => { + it("extracts simple urls", () => { + const result = extractURLs( + "Trying out links https://map.vault48.org/test 123" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("works with that weird new VK urls", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|https://map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("works with that weird new VK urls without scheme", () => { + const result = extractURLs( + "Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test]" + ); + + expect(result.length).toBe(1); + expect(result[0].href).toBe("https://map.vault48.org/test"); + }); + + it("deduplicates matching urls", () => { + const result = extractURLs( + `Trying out links: [#alias|map.vault48.org/test|map.vault48.org/test] map.vault48.org/test https://map.vault48.org/test map.vault48.org/test2 https://map.vault48.org/test3 + [#alias|map.vault48.org/test2|map.vault48.org/test2] [#alias|map.vault48.org/test3|map.vault48.org/test3] [#alias|map.vault48.org/test4|map.vault48.org/test4] https://map.vault48.org/test5 + ` + ).map((it) => it.href); + + expect(result).toEqual([ + "https://map.vault48.org/test", + "https://map.vault48.org/test2", + "https://map.vault48.org/test3", + "https://map.vault48.org/test4", + "https://map.vault48.org/test5", + ]); + }); +}); diff --git a/src/utils/__tests__/transformMDLinks.test.ts b/src/utils/__tests__/transformMDLinks.test.ts new file mode 100644 index 0000000..387cbfe --- /dev/null +++ b/src/utils/__tests__/transformMDLinks.test.ts @@ -0,0 +1,29 @@ +import { transformMDLinks } from "../links"; + +describe("transformMDLinks", () => { + it("extracts simple urls", () => { + expect( + transformMDLinks("Trying out links https://map.vault48.org/test 123") + ).toBe( + "Trying out links [https://map.vault48…](https://map.vault48.org/test) 123" + ); + }); + + it("works with that weird new VK urls", () => { + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( + "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" + ); + + expect( + transformMDLinks( + "Trying out links [#alias|12345678901234567890123|map.vault48.org/test_abc_def_ghi] 123" + ) + ).toBe( + "Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123" + ); + }); +}); diff --git a/src/utils/date/__tests__/getDateFromText.real.json b/src/utils/date/__tests__/getDateFromText.real.json index 1058a2b..846da1d 100644 --- a/src/utils/date/__tests__/getDateFromText.real.json +++ b/src/utils/date/__tests__/getDateFromText.real.json @@ -498,5 +498,15 @@ "text": "Каракан 11 – 12 июня 2023: Итоги \n \n+ Покат был максимально лайтовым. Никакого лосизма, никаких продираний сквозь заросли, никаких тасканий велов, никаких преодолений. \n+ Путь от дома до пункта назначения занял всего 9 часов. Примерно в 15:10 были на берегу, а это значит, что времени на отдых, многократное употребление еды и исследование окрестностей осталось предостаточно. \n+ Пыльная дорога, отсыпанная щебнем между Быстровкой и Завьялово настолько короткая (около 7 км и преодолевается примерно за 20 минут) что неудобствами, которые она доставляет, можно пренебречь. \n+ Лесная дорога от Факела Революции до берега безупречна. Только ради неё стоит ехать в Каракан. Каждый велосипедист хотя бы раз в жизни должен по ней проехать. \n+ С клещами не сталкивались, комары встречались редко, были добрые и совсем не кусались. \n+ Обратно выехали не очень рано, часов в 10. В город приехали не очень поздно, часов в 19 и это, пожалуй, идеально. Достаточно времени на то чтобы выспаться и разобрать снарягу. \n \nНевыносимую печаль в этой поездке вызывали вереницы лесовозов и горы из стволов деревьев вдоль дороги. Не упускайте возможности скатать в Караканский бор, пока его не выпилили. \n \nФотографии с поката: https://vk.com/album-124752609_293941505\nПрямые трансляции НВС покатов: https://t.me/pogonia_live \nПодразделение НВС для тех, кто любит писать, что не приедет на покат: https://vk.com/nvs_sportloto", "created": "2023-06-13 23:13:27", "date": "2023-06-11 15:10:00" + }, + { + "text": "Ленивый Ленинкат\n\n22.02.2025\n\nСтарт в 11:00 с площади Ленина.", + "created": "2025-02-20 23:13:27", + "date": "2025-02-22 11:00:00" + }, + { + "text": "Ленивый Ленинкат\n\n22.02.25\n\nСтарт в 11:00 с площади Ленина.", + "created": "2025-02-20 23:13:27", + "date": "2025-02-22 11:00:00" } ] diff --git a/src/utils/extract.ts b/src/utils/extract.ts deleted file mode 100644 index 5da454d..0000000 --- a/src/utils/extract.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { URL } from "url"; - -const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/gim; - -export const extractURLs = (text: string): URL[] => { - const matches = text.match(urlRe) || []; - - return matches - .map((m) => { - try { - return new URL(m); - } catch (e) { - return; - } - }) - .filter((el) => el) as URL[]; -}; diff --git a/src/utils/links.ts b/src/utils/links.ts new file mode 100644 index 0000000..b2514a2 --- /dev/null +++ b/src/utils/links.ts @@ -0,0 +1,57 @@ +import { URL } from "url"; + +const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim; + +/** Yep, that's how VK posts it's links */ +const weirdLongUrlRegex = /\[\#alias\|([^\|]+)\|([^\]]+)\]/gim; + +const fixUrl = (url: string) => + url.startsWith("http") || !url ? url : `https://${url}`; + +/** Extracts URLs from text */ +export const extractURLs = (text: string): URL[] => { + const urls = new Set(); + + text + .match(weirdLongUrlRegex) + ?.forEach((match) => + urls.add(fixUrl(match.replace(weirdLongUrlRegex, "$1"))) + ); + + text.match(simpleUrlRegex)?.forEach((match) => urls.add(match)); + + return Array.from(urls) + .map((m) => { + try { + return new URL(m); + } catch (e) { + return; + } + }) + .filter((el) => el) as URL[]; +}; + +/** Adds ... to text if its length exceeds maxLength */ +const trimTo = (val: string, maxLength: number) => + val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val; + +/** Formatting all links in markdown output, trimming them to reasonable length */ +export const transformMDLinks = (value: string) => + value + .replace(weirdLongUrlRegex, (val, ...args) => { + if (args.length < 2) { + return val; + } + + const title = trimTo(args[0] ?? args[1], 20); + const url = fixUrl(args[1]); + + return `[${title}](${url})`; + }) + .replace(simpleUrlRegex, (val) => { + if (val.endsWith(")")) { + return val; + } + + return `[${trimTo(val, 20)}](${val})`; + }); diff --git a/src/utils/transformMDLinks.ts b/src/utils/transformMDLinks.ts deleted file mode 100644 index 2b036bc..0000000 --- a/src/utils/transformMDLinks.ts +++ /dev/null @@ -1,10 +0,0 @@ -const urlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g; - -const trimTo = (val: string, maxLength: number) => - val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val; - -/** Formatting all links in markdown output, trimming them to reasonable length */ -export default (value: string) => - value.replace(urlRegex, (val) => { - return `[${trimTo(val, 20)}](${val})`; - }); diff --git a/tsconfig.json b/tsconfig.json index e4aa9e0..bd2d97a 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -4,7 +4,7 @@ "esModuleInterop": true, "emitDecoratorMetadata": true, "experimentalDecorators": true, - "outDir": "./dist/", + "outDir": "./output/", "sourceMap": true, "noImplicitAny": false, "allowSyntheticDefaultImports": true, diff --git a/yarn.lock b/yarn.lock index ce41d7a..10e8c41 100644 --- a/yarn.lock +++ b/yarn.lock @@ -830,6 +830,11 @@ resolved "https://registry.yarnpkg.com/@types/yup/-/yup-0.29.11.tgz#d654a112973f5e004bf8438122bd7e56a8e5cd7e" integrity sha512-9cwk3c87qQKZrT251EDoibiYRILjCmxBvvcb4meofCmx1vdnNcR9gyildy5vOHASpOKMsn42CugxUvcwK5eu1g== +"@vercel/ncc@^0.38.3": + version "0.38.3" + resolved "https://registry.yarnpkg.com/@vercel/ncc/-/ncc-0.38.3.tgz#5475eeee3ac0f1a439f237596911525a490a88b5" + integrity sha512-rnK6hJBS6mwc+Bkab+PGPs9OiS0i/3kdTO+CkI8V0/VrW3vmz7O2Pxjw/owOlmo6PKEIxRSeZKv/kuL9itnpYA== + abort-controller@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/abort-controller/-/abort-controller-3.0.0.tgz#eaf54d53b62bae4138e809ca225c8439a6efb392" @@ -3817,7 +3822,7 @@ string-length@^4.0.1: char-regex "^1.0.2" strip-ansi "^6.0.0" -"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0": version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -3835,6 +3840,15 @@ string-width@^4.1.0, string-width@^4.2.0: is-fullwidth-code-point "^3.0.0" strip-ansi "^6.0.0" +string-width@^4.2.3: + version "4.2.3" + resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" + integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== + dependencies: + emoji-regex "^8.0.0" + is-fullwidth-code-point "^3.0.0" + strip-ansi "^6.0.1" + string-width@^5.0.1, string-width@^5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794" @@ -3863,7 +3877,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1": version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -3877,6 +3891,13 @@ strip-ansi@^6.0.0: dependencies: ansi-regex "^5.0.0" +strip-ansi@^6.0.1: + version "6.0.1" + resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" + integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== + dependencies: + ansi-regex "^5.0.1" + strip-ansi@^7.0.1: version "7.1.0" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.0.tgz#d5b6568ca689d8561370b0707685d22434faff45" @@ -4381,7 +4402,16 @@ wordwrap@^1.0.0: resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb" integrity sha1-J1hIEIkUVqQXHI0CJkQa3pDLyus= -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": + version "7.0.0" + resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" + integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== + dependencies: + ansi-styles "^4.0.0" + string-width "^4.1.0" + strip-ansi "^6.0.0" + +wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==