mirror of
https://github.com/muerwre/vk-tg-bot.git
synced 2025-04-24 22:46:41 +07:00
fix links parsing
This commit is contained in:
parent
2131447939
commit
f28f291ac2
8 changed files with 68 additions and 33 deletions
|
@ -12,11 +12,9 @@ RUN yarn build
|
||||||
|
|
||||||
FROM node:18-bookworm AS runner
|
FROM node:18-bookworm AS runner
|
||||||
|
|
||||||
|
|
||||||
COPY --from=builder /app/dist ./
|
|
||||||
|
|
||||||
WORKDIR /app/dist
|
WORKDIR /app/dist
|
||||||
|
|
||||||
|
COPY --from=builder /app/dist ./
|
||||||
COPY ./docker/wait-for-it.sh .
|
COPY ./docker/wait-for-it.sh .
|
||||||
|
|
||||||
EXPOSE 80
|
EXPOSE 80
|
||||||
|
|
|
@ -9,7 +9,7 @@ import path from "path";
|
||||||
import hb from "handlebars";
|
import hb from "handlebars";
|
||||||
import strip from "strip-markdown";
|
import strip from "strip-markdown";
|
||||||
import { VFileCompatible } from "vfile";
|
import { VFileCompatible } from "vfile";
|
||||||
import transformMDLinks from "../../utils/transformMDLinks";
|
import { transformMDLinks } from "../../utils/links";
|
||||||
|
|
||||||
const removeFrontmatter = () => (tree) => {
|
const removeFrontmatter = () => (tree) => {
|
||||||
tree.children = tree.children.filter((item) => item.type !== "yaml");
|
tree.children = tree.children.filter((item) => item.type !== "yaml");
|
||||||
|
|
|
@ -11,7 +11,7 @@ import {
|
||||||
User,
|
User,
|
||||||
} from "typegram";
|
} from "typegram";
|
||||||
import { keys } from "lodash";
|
import { keys } from "lodash";
|
||||||
import { extractURLs } from "../../../utils/extract";
|
import { extractURLs } from "../../../utils/links";
|
||||||
import logger from "../../logger";
|
import logger from "../../logger";
|
||||||
import Composer from "telegraf";
|
import Composer from "telegraf";
|
||||||
import { Template } from "../../template";
|
import { Template } from "../../template";
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import { extractURLs } from "../extract";
|
import { extractURLs } from "../links";
|
||||||
|
|
||||||
describe("extractURLs", () => {
|
describe("extractURLs", () => {
|
||||||
it("extracts simple urls", () => {
|
it("extracts simple urls", () => {
|
||||||
|
|
23
src/utils/__tests__/transformMDLinks.test.ts
Normal file
23
src/utils/__tests__/transformMDLinks.test.ts
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
import { transformMDLinks } from "../links";
|
||||||
|
|
||||||
|
describe("transformMDLinks", () => {
|
||||||
|
it("extracts simple urls", () => {
|
||||||
|
const result = transformMDLinks(
|
||||||
|
"Trying out links https://map.vault48.org/test 123"
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result).toBe(
|
||||||
|
"Trying out links [https://map.vault48…](https://map.vault48.org/test) 123"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("works with that weird new VK urls", () => {
|
||||||
|
const result = transformMDLinks(
|
||||||
|
"Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123"
|
||||||
|
);
|
||||||
|
|
||||||
|
expect(result).toBe(
|
||||||
|
"Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
|
@ -1,17 +0,0 @@
|
||||||
import { URL } from "url";
|
|
||||||
|
|
||||||
const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim;
|
|
||||||
|
|
||||||
export const extractURLs = (text: string): URL[] => {
|
|
||||||
const matches = text.match(urlRe) || [];
|
|
||||||
|
|
||||||
return matches
|
|
||||||
.map((m) => {
|
|
||||||
try {
|
|
||||||
return new URL(m);
|
|
||||||
} catch (e) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.filter((el) => el) as URL[];
|
|
||||||
};
|
|
41
src/utils/links.ts
Normal file
41
src/utils/links.ts
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
import { URL } from "url";
|
||||||
|
|
||||||
|
const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim;
|
||||||
|
const weirdLongUrlRegex = /\[(.*)\|(.*)\|(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})\]/g;
|
||||||
|
|
||||||
|
/** Extracts URLs from text */
|
||||||
|
export const extractURLs = (text: string): URL[] => {
|
||||||
|
const matches = text.match(simpleUrlRegex) || [];
|
||||||
|
|
||||||
|
return matches
|
||||||
|
.map((m) => {
|
||||||
|
try {
|
||||||
|
return new URL(m);
|
||||||
|
} catch (e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter((el) => el) as URL[];
|
||||||
|
};
|
||||||
|
|
||||||
|
/** Adds ... to text if its length exceeds maxLength */
|
||||||
|
const trimTo = (val: string, maxLength: number) =>
|
||||||
|
val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val;
|
||||||
|
|
||||||
|
/** Formatting all links in markdown output, trimming them to reasonable length */
|
||||||
|
export const transformMDLinks = (value: string) =>
|
||||||
|
value
|
||||||
|
.replace(weirdLongUrlRegex, (val, ...args) => {
|
||||||
|
if (args.length < 2) {
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `[${trimTo(args[1], 20)}](${args[2]})`;
|
||||||
|
})
|
||||||
|
.replace(simpleUrlRegex, (val) => {
|
||||||
|
if (val.endsWith(")")) {
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
|
||||||
|
return `[${trimTo(val, 20)}](${val})`;
|
||||||
|
});
|
|
@ -1,10 +0,0 @@
|
||||||
const urlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g;
|
|
||||||
|
|
||||||
const trimTo = (val: string, maxLength: number) =>
|
|
||||||
val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val;
|
|
||||||
|
|
||||||
/** Formatting all links in markdown output, trimming them to reasonable length */
|
|
||||||
export default (value: string) =>
|
|
||||||
value.replace(urlRegex, (val) => {
|
|
||||||
return `[${trimTo(val, 20)}](${val})`;
|
|
||||||
});
|
|
Loading…
Add table
Add a link
Reference in a new issue