From 90abfb6de874f6c2c27240627a0398c802e9e3fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20LUDWIG?= Date: Fri, 26 Jul 2024 19:05:59 +0200 Subject: [PATCH] feat: try deep internal links of wikipedia pages --- TODO.md | 9 +-- apps/cli/package.json | 2 +- apps/cli/src/index.ts | 29 ++++++-- apps/cli/src/main.ts | 11 +++ .../src/WikipediaClient.tsx | 41 ++++++----- .../src/wikipedia-api.ts | 69 ++++++++++++++++++- 6 files changed, 129 insertions(+), 32 deletions(-) create mode 100755 apps/cli/src/main.ts diff --git a/TODO.md b/TODO.md index 08345ec..c603f41 100644 --- a/TODO.md +++ b/TODO.md @@ -2,15 +2,12 @@ - [x] chore: initial commit (+ mirror on GitHub) - [x] Deploy first staging version (v1.0.0-staging.1) -- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all articles to go from one to another, or none if it is not possible +- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible +- [ ] Check, cache and store (in `.json` file) all Wikipedia Pages and its internal links, maybe use Wikipedia Dump ()? - [ ] Implement toast notifications for errors, warnings, and success messages -- [ ] v1.0.0-staging.2 -- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc. - [ ] Implement CLI (`cli`) -- [ ] v1.0.0-staging.3 - [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) -- [ ] v1.0.0-staging.4 -- [ ] v1.0.0 +- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc. ## Links diff --git a/apps/cli/package.json b/apps/cli/package.json index 5ba7115..2263ae9 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -11,7 +11,7 @@ }, "scripts": { "start": "node --import=tsx ./src/index.ts", - "dev": "node --import=tsx --watch --watch-preserve-output ./src/index.ts", + "dev-test": "node --import=tsx --watch --watch-preserve-output ./src/index.ts", "lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives", "lint:typescript": "tsc --noEmit" }, diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index 2cacb3e..cbc01cf 100755 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -1,11 +1,26 @@ #!/usr/bin/env -S node --import=tsx -import { add } from "#abc/def/add.js" +import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api" -import { VERSION } from "@repo/constants" -import { sum } from "@repo/wikipedia-game-solver/wikipedia-api" +const localeWikipedia = "en" -console.log("Hello, world!") -console.log(sum(1, 2)) -console.log(add(2, 3)) -console.log(`v${VERSION}`) +const fromPageInput = "Linux" +const toPageInput = "Node.js" +console.log({ + fromPageInput, + toPageInput, +}) +const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([ + getWikipediaPageInternalLinks({ + title: fromPageInput, + locale: localeWikipedia, + }), + getWikipediaPageInternalLinks({ + title: toPageInput, + locale: localeWikipedia, + }), +]) +console.log({ + fromPageWikipediaLinks, + toPageWikipediaLinks, +}) diff --git a/apps/cli/src/main.ts b/apps/cli/src/main.ts new file mode 100755 index 0000000..2cacb3e --- /dev/null +++ b/apps/cli/src/main.ts @@ -0,0 +1,11 @@ +#!/usr/bin/env -S node --import=tsx + +import { add } from "#abc/def/add.js" + +import { VERSION } from "@repo/constants" +import { sum } from "@repo/wikipedia-game-solver/wikipedia-api" + +console.log("Hello, world!") +console.log(sum(1, 2)) +console.log(add(2, 3)) +console.log(`v${VERSION}`) diff --git a/packages/wikipedia-game-solver/src/WikipediaClient.tsx b/packages/wikipedia-game-solver/src/WikipediaClient.tsx index 88944b3..77c931e 100644 --- a/packages/wikipedia-game-solver/src/WikipediaClient.tsx +++ b/packages/wikipedia-game-solver/src/WikipediaClient.tsx @@ -20,27 +20,34 @@ export const WikipediaClient: React.FC = () => { const handleClick: React.MouseEventHandler = async () => { setIsLoading(true) - const fromArticleInput = "Linux" - const toArticleInput = "Node.js" + const fromPageInput = "Linux" + const toPageInput = "Node.js" console.log({ - fromArticleInput, - toArticleInput, + fromPageInput, + toPageInput, }) - const [fromArticleWikipediaLinks, toArticleWikipediaLinks] = - await Promise.all([ - getWikipediaPageInternalLinks({ - title: fromArticleInput, - locale: localeWikipedia, - }), - getWikipediaPageInternalLinks({ - title: toArticleInput, - locale: localeWikipedia, - }), - ]) + const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([ + getWikipediaPageInternalLinks({ + title: fromPageInput, + locale: localeWikipedia, + }), + getWikipediaPageInternalLinks({ + title: toPageInput, + locale: localeWikipedia, + }), + ]) console.log({ - fromArticleWikipediaLinks, - toArticleWikipediaLinks, + fromPageWikipediaLinks, + toPageWikipediaLinks, }) + // const deepInternalLinks = await getDeepWikipediaPageInternalLinks({ + // locale: localeWikipedia, + // data: { + // [fromPageWikipediaLinks.title]: fromPageWikipediaLinks, + // [toPageWikipediaLinks.title]: toPageWikipediaLinks, + // }, + // }) + // console.log(deepInternalLinks) setIsLoading(false) } diff --git a/packages/wikipedia-game-solver/src/wikipedia-api.ts b/packages/wikipedia-game-solver/src/wikipedia-api.ts index 308eb1b..8d2b14b 100644 --- a/packages/wikipedia-game-solver/src/wikipedia-api.ts +++ b/packages/wikipedia-game-solver/src/wikipedia-api.ts @@ -4,13 +4,23 @@ export const sum = (a: number, b: number): number => { return a + b } +/** + * @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions + * To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error). + */ + export const WIKIPEDIA_LOCALES = ["en", "fr"] as const export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number] +const WIKIPEDIA_LOCALES_MAP: Record = { + "en-US": "en", + "fr-FR": "fr", +} + export const fromLocaleToWikipediaLocale = ( locale: Locale, ): WikipediaLocale => { - return locale === "en-US" ? "en" : "fr" + return WIKIPEDIA_LOCALES_MAP[locale] } export const getWikipediaLink = (locale: WikipediaLocale): string => { @@ -48,15 +58,34 @@ interface GetWikipediaPageInternalLinksInput { } interface GetWikipediaPageInternalLinksOutput { + /** + * Title of the Wikipedia page. + */ title: string + + /** + * Page id is unique for each page on Wikipedia, can be used to link to the page. + * @example `https://${locale}.wikipedia.org/?curid=${pageId}` + */ + pageId: number + + /** + * List of internal links on the Wikipedia page. + */ links: string[] } +/** + * Get internal links from a Wikipedia page. + * @param input + * @returns + */ export const getWikipediaPageInternalLinks = async ( input: GetWikipediaPageInternalLinksInput, ): Promise => { const links: string[] = [] let title = input.title + let pageId = 0 let plcontinue: string | null = null const fetchLinks = async (): Promise => { @@ -94,6 +123,7 @@ export const getWikipediaPageInternalLinks = async ( break } title = pageData.title + pageId = pageData.pageid links.push( ...pageData.links.map((link) => { return link.title @@ -106,6 +136,43 @@ export const getWikipediaPageInternalLinks = async ( return { title, + pageId, links, } } + +export interface WikipediaPagesInternalLinks { + [key: string]: GetWikipediaPageInternalLinksOutput +} + +export interface GetDeepWikipediaPageInternalLinksInput { + locale: WikipediaLocale + data: WikipediaPagesInternalLinks +} + +export const getDeepWikipediaPageInternalLinks = async ( + input: GetDeepWikipediaPageInternalLinksInput, +): Promise => { + const pagesTitles = Object.keys(input.data) + await Promise.all( + pagesTitles.map(async (pageTitle) => { + const links = input.data[pageTitle]?.links ?? [] + await Promise.all( + links.map(async (pageTitleLink) => { + if (pageTitleLink in input.data) { + return + } + input.data[pageTitleLink] = await getWikipediaPageInternalLinks({ + locale: input.locale, + title: pageTitleLink, + }) + await getDeepWikipediaPageInternalLinks({ + locale: input.locale, + data: input.data, + }) + }), + ) + }), + ) + return input.data +}