feat: try deep internal links of wikipedia pages
Some checks failed
Chromatic / chromatic (push) Successful in 4m43s
CI / ci (push) Failing after 1m30s
CI / commitlint (push) Successful in 14s

This commit is contained in:
Théo LUDWIG 2024-07-26 19:05:59 +02:00
parent 0ee7b35530
commit 90abfb6de8
Signed by: theoludwig
GPG Key ID: ADFE5A563D718F3B
6 changed files with 129 additions and 32 deletions

View File

@ -2,15 +2,12 @@
- [x] chore: initial commit (+ mirror on GitHub)
- [x] Deploy first staging version (v1.0.0-staging.1)
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all articles to go from one to another, or none if it is not possible
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible
- [ ] Check, cache and store (in `.json` file) all Wikipedia Pages and its internal links, maybe use Wikipedia Dump (<https://en.wikipedia.org/wiki/Wikipedia:Database_download>)?
- [ ] Implement toast notifications for errors, warnings, and success messages
- [ ] v1.0.0-staging.2
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
- [ ] Implement CLI (`cli`)
- [ ] v1.0.0-staging.3
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/))
- [ ] v1.0.0-staging.4
- [ ] v1.0.0
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
## Links

View File

@ -11,7 +11,7 @@
},
"scripts": {
"start": "node --import=tsx ./src/index.ts",
"dev": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
"dev-test": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
"lint:typescript": "tsc --noEmit"
},

View File

@ -1,11 +1,26 @@
#!/usr/bin/env -S node --import=tsx
import { add } from "#abc/def/add.js"
import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api"
import { VERSION } from "@repo/constants"
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
const localeWikipedia = "en"
console.log("Hello, world!")
console.log(sum(1, 2))
console.log(add(2, 3))
console.log(`v${VERSION}`)
const fromPageInput = "Linux"
const toPageInput = "Node.js"
console.log({
fromPageInput,
toPageInput,
})
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
getWikipediaPageInternalLinks({
title: fromPageInput,
locale: localeWikipedia,
}),
getWikipediaPageInternalLinks({
title: toPageInput,
locale: localeWikipedia,
}),
])
console.log({
fromPageWikipediaLinks,
toPageWikipediaLinks,
})

11
apps/cli/src/main.ts Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env -S node --import=tsx
import { add } from "#abc/def/add.js"
import { VERSION } from "@repo/constants"
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
console.log("Hello, world!")
console.log(sum(1, 2))
console.log(add(2, 3))
console.log(`v${VERSION}`)

View File

@ -20,27 +20,34 @@ export const WikipediaClient: React.FC = () => {
const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => {
setIsLoading(true)
const fromArticleInput = "Linux"
const toArticleInput = "Node.js"
const fromPageInput = "Linux"
const toPageInput = "Node.js"
console.log({
fromArticleInput,
toArticleInput,
fromPageInput,
toPageInput,
})
const [fromArticleWikipediaLinks, toArticleWikipediaLinks] =
await Promise.all([
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
getWikipediaPageInternalLinks({
title: fromArticleInput,
title: fromPageInput,
locale: localeWikipedia,
}),
getWikipediaPageInternalLinks({
title: toArticleInput,
title: toPageInput,
locale: localeWikipedia,
}),
])
console.log({
fromArticleWikipediaLinks,
toArticleWikipediaLinks,
fromPageWikipediaLinks,
toPageWikipediaLinks,
})
// const deepInternalLinks = await getDeepWikipediaPageInternalLinks({
// locale: localeWikipedia,
// data: {
// [fromPageWikipediaLinks.title]: fromPageWikipediaLinks,
// [toPageWikipediaLinks.title]: toPageWikipediaLinks,
// },
// })
// console.log(deepInternalLinks)
setIsLoading(false)
}

View File

@ -4,13 +4,23 @@ export const sum = (a: number, b: number): number => {
return a + b
}
/**
* @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions
* To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error).
*/
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
"en-US": "en",
"fr-FR": "fr",
}
export const fromLocaleToWikipediaLocale = (
locale: Locale,
): WikipediaLocale => {
return locale === "en-US" ? "en" : "fr"
return WIKIPEDIA_LOCALES_MAP[locale]
}
export const getWikipediaLink = (locale: WikipediaLocale): string => {
@ -48,15 +58,34 @@ interface GetWikipediaPageInternalLinksInput {
}
interface GetWikipediaPageInternalLinksOutput {
/**
* Title of the Wikipedia page.
*/
title: string
/**
* Page id is unique for each page on Wikipedia, can be used to link to the page.
* @example `https://${locale}.wikipedia.org/?curid=${pageId}`
*/
pageId: number
/**
* List of internal links on the Wikipedia page.
*/
links: string[]
}
/**
* Get internal links from a Wikipedia page.
* @param input
* @returns
*/
export const getWikipediaPageInternalLinks = async (
input: GetWikipediaPageInternalLinksInput,
): Promise<GetWikipediaPageInternalLinksOutput> => {
const links: string[] = []
let title = input.title
let pageId = 0
let plcontinue: string | null = null
const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => {
@ -94,6 +123,7 @@ export const getWikipediaPageInternalLinks = async (
break
}
title = pageData.title
pageId = pageData.pageid
links.push(
...pageData.links.map((link) => {
return link.title
@ -106,6 +136,43 @@ export const getWikipediaPageInternalLinks = async (
return {
title,
pageId,
links,
}
}
export interface WikipediaPagesInternalLinks {
[key: string]: GetWikipediaPageInternalLinksOutput
}
export interface GetDeepWikipediaPageInternalLinksInput {
locale: WikipediaLocale
data: WikipediaPagesInternalLinks
}
export const getDeepWikipediaPageInternalLinks = async (
input: GetDeepWikipediaPageInternalLinksInput,
): Promise<WikipediaPagesInternalLinks> => {
const pagesTitles = Object.keys(input.data)
await Promise.all(
pagesTitles.map(async (pageTitle) => {
const links = input.data[pageTitle]?.links ?? []
await Promise.all(
links.map(async (pageTitleLink) => {
if (pageTitleLink in input.data) {
return
}
input.data[pageTitleLink] = await getWikipediaPageInternalLinks({
locale: input.locale,
title: pageTitleLink,
})
await getDeepWikipediaPageInternalLinks({
locale: input.locale,
data: input.data,
})
}),
)
}),
)
return input.data
}