mirror of
https://github.com/muerwre/vk-tg-bot.git
synced 2025-04-24 22:46:41 +07:00
fix links parsing
This commit is contained in:
parent
2131447939
commit
f28f291ac2
8 changed files with 68 additions and 33 deletions
|
@ -12,11 +12,9 @@ RUN yarn build
|
|||
|
||||
FROM node:18-bookworm AS runner
|
||||
|
||||
|
||||
COPY --from=builder /app/dist ./
|
||||
|
||||
WORKDIR /app/dist
|
||||
|
||||
COPY --from=builder /app/dist ./
|
||||
COPY ./docker/wait-for-it.sh .
|
||||
|
||||
EXPOSE 80
|
||||
|
|
|
@ -9,7 +9,7 @@ import path from "path";
|
|||
import hb from "handlebars";
|
||||
import strip from "strip-markdown";
|
||||
import { VFileCompatible } from "vfile";
|
||||
import transformMDLinks from "../../utils/transformMDLinks";
|
||||
import { transformMDLinks } from "../../utils/links";
|
||||
|
||||
const removeFrontmatter = () => (tree) => {
|
||||
tree.children = tree.children.filter((item) => item.type !== "yaml");
|
||||
|
|
|
@ -11,7 +11,7 @@ import {
|
|||
User,
|
||||
} from "typegram";
|
||||
import { keys } from "lodash";
|
||||
import { extractURLs } from "../../../utils/extract";
|
||||
import { extractURLs } from "../../../utils/links";
|
||||
import logger from "../../logger";
|
||||
import Composer from "telegraf";
|
||||
import { Template } from "../../template";
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { extractURLs } from "../extract";
|
||||
import { extractURLs } from "../links";
|
||||
|
||||
describe("extractURLs", () => {
|
||||
it("extracts simple urls", () => {
|
||||
|
|
23
src/utils/__tests__/transformMDLinks.test.ts
Normal file
23
src/utils/__tests__/transformMDLinks.test.ts
Normal file
|
@ -0,0 +1,23 @@
|
|||
import { transformMDLinks } from "../links";
|
||||
|
||||
describe("transformMDLinks", () => {
|
||||
it("extracts simple urls", () => {
|
||||
const result = transformMDLinks(
|
||||
"Trying out links https://map.vault48.org/test 123"
|
||||
);
|
||||
|
||||
expect(result).toBe(
|
||||
"Trying out links [https://map.vault48…](https://map.vault48.org/test) 123"
|
||||
);
|
||||
});
|
||||
|
||||
it("works with that weird new VK urls", () => {
|
||||
const result = transformMDLinks(
|
||||
"Trying out links [#alias|12345678901234567890123|https://map.vault48.org/test_abc_def_ghi] 123"
|
||||
);
|
||||
|
||||
expect(result).toBe(
|
||||
"Trying out links [1234567890123456789…](https://map.vault48.org/test_abc_def_ghi) 123"
|
||||
);
|
||||
});
|
||||
});
|
|
@ -1,17 +0,0 @@
|
|||
import { URL } from "url";
|
||||
|
||||
const urlRe = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim;
|
||||
|
||||
export const extractURLs = (text: string): URL[] => {
|
||||
const matches = text.match(urlRe) || [];
|
||||
|
||||
return matches
|
||||
.map((m) => {
|
||||
try {
|
||||
return new URL(m);
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
})
|
||||
.filter((el) => el) as URL[];
|
||||
};
|
41
src/utils/links.ts
Normal file
41
src/utils/links.ts
Normal file
|
@ -0,0 +1,41 @@
|
|||
import { URL } from "url";
|
||||
|
||||
const simpleUrlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})/gim;
|
||||
const weirdLongUrlRegex = /\[(.*)\|(.*)\|(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s\]]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s\]]{2,}|www\.[a-zA-Z0-9]+\.[^\s\]]{2,})\]/g;
|
||||
|
||||
/** Extracts URLs from text */
|
||||
export const extractURLs = (text: string): URL[] => {
|
||||
const matches = text.match(simpleUrlRegex) || [];
|
||||
|
||||
return matches
|
||||
.map((m) => {
|
||||
try {
|
||||
return new URL(m);
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
})
|
||||
.filter((el) => el) as URL[];
|
||||
};
|
||||
|
||||
/** Adds ... to text if its length exceeds maxLength */
|
||||
const trimTo = (val: string, maxLength: number) =>
|
||||
val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val;
|
||||
|
||||
/** Formatting all links in markdown output, trimming them to reasonable length */
|
||||
export const transformMDLinks = (value: string) =>
|
||||
value
|
||||
.replace(weirdLongUrlRegex, (val, ...args) => {
|
||||
if (args.length < 2) {
|
||||
return val;
|
||||
}
|
||||
|
||||
return `[${trimTo(args[1], 20)}](${args[2]})`;
|
||||
})
|
||||
.replace(simpleUrlRegex, (val) => {
|
||||
if (val.endsWith(")")) {
|
||||
return val;
|
||||
}
|
||||
|
||||
return `[${trimTo(val, 20)}](${val})`;
|
||||
});
|
|
@ -1,10 +0,0 @@
|
|||
const urlRegex = /(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})/g;
|
||||
|
||||
const trimTo = (val: string, maxLength: number) =>
|
||||
val.length > maxLength ? val.substring(0, maxLength - 1).concat("…") : val;
|
||||
|
||||
/** Formatting all links in markdown output, trimming them to reasonable length */
|
||||
export default (value: string) =>
|
||||
value.replace(urlRegex, (val) => {
|
||||
return `[${trimTo(val, 20)}](${val})`;
|
||||
});
|
Loading…
Add table
Add a link
Reference in a new issue