feat: try deep internal links of wikipedia pages
Some checks failed
Chromatic / chromatic (push) Successful in 4m43s
CI / ci (push) Failing after 1m30s
CI / commitlint (push) Successful in 14s

This commit is contained in:
Théo LUDWIG 2024-07-26 19:05:59 +02:00
parent 0ee7b35530
commit 90abfb6de8
Signed by: theoludwig
GPG Key ID: ADFE5A563D718F3B
6 changed files with 129 additions and 32 deletions

View File

@ -2,15 +2,12 @@
- [x] chore: initial commit (+ mirror on GitHub) - [x] chore: initial commit (+ mirror on GitHub)
- [x] Deploy first staging version (v1.0.0-staging.1) - [x] Deploy first staging version (v1.0.0-staging.1)
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all articles to go from one to another, or none if it is not possible - [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible
- [ ] Check, cache and store (in `.json` file) all Wikipedia Pages and its internal links, maybe use Wikipedia Dump (<https://en.wikipedia.org/wiki/Wikipedia:Database_download>)?
- [ ] Implement toast notifications for errors, warnings, and success messages - [ ] Implement toast notifications for errors, warnings, and success messages
- [ ] v1.0.0-staging.2
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
- [ ] Implement CLI (`cli`) - [ ] Implement CLI (`cli`)
- [ ] v1.0.0-staging.3
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) - [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/))
- [ ] v1.0.0-staging.4 - [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
- [ ] v1.0.0
## Links ## Links

View File

@ -11,7 +11,7 @@
}, },
"scripts": { "scripts": {
"start": "node --import=tsx ./src/index.ts", "start": "node --import=tsx ./src/index.ts",
"dev": "node --import=tsx --watch --watch-preserve-output ./src/index.ts", "dev-test": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives", "lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
"lint:typescript": "tsc --noEmit" "lint:typescript": "tsc --noEmit"
}, },

View File

@ -1,11 +1,26 @@
#!/usr/bin/env -S node --import=tsx #!/usr/bin/env -S node --import=tsx
import { add } from "#abc/def/add.js" import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api"
import { VERSION } from "@repo/constants" const localeWikipedia = "en"
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
console.log("Hello, world!") const fromPageInput = "Linux"
console.log(sum(1, 2)) const toPageInput = "Node.js"
console.log(add(2, 3)) console.log({
console.log(`v${VERSION}`) fromPageInput,
toPageInput,
})
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
getWikipediaPageInternalLinks({
title: fromPageInput,
locale: localeWikipedia,
}),
getWikipediaPageInternalLinks({
title: toPageInput,
locale: localeWikipedia,
}),
])
console.log({
fromPageWikipediaLinks,
toPageWikipediaLinks,
})

11
apps/cli/src/main.ts Executable file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env -S node --import=tsx
import { add } from "#abc/def/add.js"
import { VERSION } from "@repo/constants"
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
console.log("Hello, world!")
console.log(sum(1, 2))
console.log(add(2, 3))
console.log(`v${VERSION}`)

View File

@ -20,27 +20,34 @@ export const WikipediaClient: React.FC = () => {
const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => { const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => {
setIsLoading(true) setIsLoading(true)
const fromArticleInput = "Linux" const fromPageInput = "Linux"
const toArticleInput = "Node.js" const toPageInput = "Node.js"
console.log({ console.log({
fromArticleInput, fromPageInput,
toArticleInput, toPageInput,
}) })
const [fromArticleWikipediaLinks, toArticleWikipediaLinks] = const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
await Promise.all([ getWikipediaPageInternalLinks({
getWikipediaPageInternalLinks({ title: fromPageInput,
title: fromArticleInput, locale: localeWikipedia,
locale: localeWikipedia, }),
}), getWikipediaPageInternalLinks({
getWikipediaPageInternalLinks({ title: toPageInput,
title: toArticleInput, locale: localeWikipedia,
locale: localeWikipedia, }),
}), ])
])
console.log({ console.log({
fromArticleWikipediaLinks, fromPageWikipediaLinks,
toArticleWikipediaLinks, toPageWikipediaLinks,
}) })
// const deepInternalLinks = await getDeepWikipediaPageInternalLinks({
// locale: localeWikipedia,
// data: {
// [fromPageWikipediaLinks.title]: fromPageWikipediaLinks,
// [toPageWikipediaLinks.title]: toPageWikipediaLinks,
// },
// })
// console.log(deepInternalLinks)
setIsLoading(false) setIsLoading(false)
} }

View File

@ -4,13 +4,23 @@ export const sum = (a: number, b: number): number => {
return a + b return a + b
} }
/**
* @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions
* To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error).
*/
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number] export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
"en-US": "en",
"fr-FR": "fr",
}
export const fromLocaleToWikipediaLocale = ( export const fromLocaleToWikipediaLocale = (
locale: Locale, locale: Locale,
): WikipediaLocale => { ): WikipediaLocale => {
return locale === "en-US" ? "en" : "fr" return WIKIPEDIA_LOCALES_MAP[locale]
} }
export const getWikipediaLink = (locale: WikipediaLocale): string => { export const getWikipediaLink = (locale: WikipediaLocale): string => {
@ -48,15 +58,34 @@ interface GetWikipediaPageInternalLinksInput {
} }
interface GetWikipediaPageInternalLinksOutput { interface GetWikipediaPageInternalLinksOutput {
/**
* Title of the Wikipedia page.
*/
title: string title: string
/**
* Page id is unique for each page on Wikipedia, can be used to link to the page.
* @example `https://${locale}.wikipedia.org/?curid=${pageId}`
*/
pageId: number
/**
* List of internal links on the Wikipedia page.
*/
links: string[] links: string[]
} }
/**
* Get internal links from a Wikipedia page.
* @param input
* @returns
*/
export const getWikipediaPageInternalLinks = async ( export const getWikipediaPageInternalLinks = async (
input: GetWikipediaPageInternalLinksInput, input: GetWikipediaPageInternalLinksInput,
): Promise<GetWikipediaPageInternalLinksOutput> => { ): Promise<GetWikipediaPageInternalLinksOutput> => {
const links: string[] = [] const links: string[] = []
let title = input.title let title = input.title
let pageId = 0
let plcontinue: string | null = null let plcontinue: string | null = null
const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => { const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => {
@ -94,6 +123,7 @@ export const getWikipediaPageInternalLinks = async (
break break
} }
title = pageData.title title = pageData.title
pageId = pageData.pageid
links.push( links.push(
...pageData.links.map((link) => { ...pageData.links.map((link) => {
return link.title return link.title
@ -106,6 +136,43 @@ export const getWikipediaPageInternalLinks = async (
return { return {
title, title,
pageId,
links, links,
} }
} }
export interface WikipediaPagesInternalLinks {
[key: string]: GetWikipediaPageInternalLinksOutput
}
export interface GetDeepWikipediaPageInternalLinksInput {
locale: WikipediaLocale
data: WikipediaPagesInternalLinks
}
export const getDeepWikipediaPageInternalLinks = async (
input: GetDeepWikipediaPageInternalLinksInput,
): Promise<WikipediaPagesInternalLinks> => {
const pagesTitles = Object.keys(input.data)
await Promise.all(
pagesTitles.map(async (pageTitle) => {
const links = input.data[pageTitle]?.links ?? []
await Promise.all(
links.map(async (pageTitleLink) => {
if (pageTitleLink in input.data) {
return
}
input.data[pageTitleLink] = await getWikipediaPageInternalLinks({
locale: input.locale,
title: pageTitleLink,
})
await getDeepWikipediaPageInternalLinks({
locale: input.locale,
data: input.data,
})
}),
)
}),
)
return input.data
}