feat: try deep internal links of wikipedia pages
This commit is contained in:
parent
0ee7b35530
commit
90abfb6de8
9
TODO.md
9
TODO.md
@ -2,15 +2,12 @@
|
|||||||
|
|
||||||
- [x] chore: initial commit (+ mirror on GitHub)
|
- [x] chore: initial commit (+ mirror on GitHub)
|
||||||
- [x] Deploy first staging version (v1.0.0-staging.1)
|
- [x] Deploy first staging version (v1.0.0-staging.1)
|
||||||
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all articles to go from one to another, or none if it is not possible
|
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible
|
||||||
|
- [ ] Check, cache and store (in `.json` file) all Wikipedia Pages and its internal links, maybe use Wikipedia Dump (<https://en.wikipedia.org/wiki/Wikipedia:Database_download>)?
|
||||||
- [ ] Implement toast notifications for errors, warnings, and success messages
|
- [ ] Implement toast notifications for errors, warnings, and success messages
|
||||||
- [ ] v1.0.0-staging.2
|
|
||||||
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
|
|
||||||
- [ ] Implement CLI (`cli`)
|
- [ ] Implement CLI (`cli`)
|
||||||
- [ ] v1.0.0-staging.3
|
|
||||||
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/))
|
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/))
|
||||||
- [ ] v1.0.0-staging.4
|
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
|
||||||
- [ ] v1.0.0
|
|
||||||
|
|
||||||
## Links
|
## Links
|
||||||
|
|
||||||
|
@ -11,7 +11,7 @@
|
|||||||
},
|
},
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node --import=tsx ./src/index.ts",
|
"start": "node --import=tsx ./src/index.ts",
|
||||||
"dev": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
|
"dev-test": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
|
||||||
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
|
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
|
||||||
"lint:typescript": "tsc --noEmit"
|
"lint:typescript": "tsc --noEmit"
|
||||||
},
|
},
|
||||||
|
@ -1,11 +1,26 @@
|
|||||||
#!/usr/bin/env -S node --import=tsx
|
#!/usr/bin/env -S node --import=tsx
|
||||||
|
|
||||||
import { add } from "#abc/def/add.js"
|
import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api"
|
||||||
|
|
||||||
import { VERSION } from "@repo/constants"
|
const localeWikipedia = "en"
|
||||||
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
|
|
||||||
|
|
||||||
console.log("Hello, world!")
|
const fromPageInput = "Linux"
|
||||||
console.log(sum(1, 2))
|
const toPageInput = "Node.js"
|
||||||
console.log(add(2, 3))
|
console.log({
|
||||||
console.log(`v${VERSION}`)
|
fromPageInput,
|
||||||
|
toPageInput,
|
||||||
|
})
|
||||||
|
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
|
||||||
|
getWikipediaPageInternalLinks({
|
||||||
|
title: fromPageInput,
|
||||||
|
locale: localeWikipedia,
|
||||||
|
}),
|
||||||
|
getWikipediaPageInternalLinks({
|
||||||
|
title: toPageInput,
|
||||||
|
locale: localeWikipedia,
|
||||||
|
}),
|
||||||
|
])
|
||||||
|
console.log({
|
||||||
|
fromPageWikipediaLinks,
|
||||||
|
toPageWikipediaLinks,
|
||||||
|
})
|
||||||
|
11
apps/cli/src/main.ts
Executable file
11
apps/cli/src/main.ts
Executable file
@ -0,0 +1,11 @@
|
|||||||
|
#!/usr/bin/env -S node --import=tsx
|
||||||
|
|
||||||
|
import { add } from "#abc/def/add.js"
|
||||||
|
|
||||||
|
import { VERSION } from "@repo/constants"
|
||||||
|
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
|
||||||
|
|
||||||
|
console.log("Hello, world!")
|
||||||
|
console.log(sum(1, 2))
|
||||||
|
console.log(add(2, 3))
|
||||||
|
console.log(`v${VERSION}`)
|
@ -20,27 +20,34 @@ export const WikipediaClient: React.FC = () => {
|
|||||||
|
|
||||||
const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => {
|
const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => {
|
||||||
setIsLoading(true)
|
setIsLoading(true)
|
||||||
const fromArticleInput = "Linux"
|
const fromPageInput = "Linux"
|
||||||
const toArticleInput = "Node.js"
|
const toPageInput = "Node.js"
|
||||||
console.log({
|
console.log({
|
||||||
fromArticleInput,
|
fromPageInput,
|
||||||
toArticleInput,
|
toPageInput,
|
||||||
})
|
})
|
||||||
const [fromArticleWikipediaLinks, toArticleWikipediaLinks] =
|
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
|
||||||
await Promise.all([
|
getWikipediaPageInternalLinks({
|
||||||
getWikipediaPageInternalLinks({
|
title: fromPageInput,
|
||||||
title: fromArticleInput,
|
locale: localeWikipedia,
|
||||||
locale: localeWikipedia,
|
}),
|
||||||
}),
|
getWikipediaPageInternalLinks({
|
||||||
getWikipediaPageInternalLinks({
|
title: toPageInput,
|
||||||
title: toArticleInput,
|
locale: localeWikipedia,
|
||||||
locale: localeWikipedia,
|
}),
|
||||||
}),
|
])
|
||||||
])
|
|
||||||
console.log({
|
console.log({
|
||||||
fromArticleWikipediaLinks,
|
fromPageWikipediaLinks,
|
||||||
toArticleWikipediaLinks,
|
toPageWikipediaLinks,
|
||||||
})
|
})
|
||||||
|
// const deepInternalLinks = await getDeepWikipediaPageInternalLinks({
|
||||||
|
// locale: localeWikipedia,
|
||||||
|
// data: {
|
||||||
|
// [fromPageWikipediaLinks.title]: fromPageWikipediaLinks,
|
||||||
|
// [toPageWikipediaLinks.title]: toPageWikipediaLinks,
|
||||||
|
// },
|
||||||
|
// })
|
||||||
|
// console.log(deepInternalLinks)
|
||||||
setIsLoading(false)
|
setIsLoading(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,13 +4,23 @@ export const sum = (a: number, b: number): number => {
|
|||||||
return a + b
|
return a + b
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions
|
||||||
|
* To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error).
|
||||||
|
*/
|
||||||
|
|
||||||
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
|
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
|
||||||
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
|
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
|
||||||
|
|
||||||
|
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
|
||||||
|
"en-US": "en",
|
||||||
|
"fr-FR": "fr",
|
||||||
|
}
|
||||||
|
|
||||||
export const fromLocaleToWikipediaLocale = (
|
export const fromLocaleToWikipediaLocale = (
|
||||||
locale: Locale,
|
locale: Locale,
|
||||||
): WikipediaLocale => {
|
): WikipediaLocale => {
|
||||||
return locale === "en-US" ? "en" : "fr"
|
return WIKIPEDIA_LOCALES_MAP[locale]
|
||||||
}
|
}
|
||||||
|
|
||||||
export const getWikipediaLink = (locale: WikipediaLocale): string => {
|
export const getWikipediaLink = (locale: WikipediaLocale): string => {
|
||||||
@ -48,15 +58,34 @@ interface GetWikipediaPageInternalLinksInput {
|
|||||||
}
|
}
|
||||||
|
|
||||||
interface GetWikipediaPageInternalLinksOutput {
|
interface GetWikipediaPageInternalLinksOutput {
|
||||||
|
/**
|
||||||
|
* Title of the Wikipedia page.
|
||||||
|
*/
|
||||||
title: string
|
title: string
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Page id is unique for each page on Wikipedia, can be used to link to the page.
|
||||||
|
* @example `https://${locale}.wikipedia.org/?curid=${pageId}`
|
||||||
|
*/
|
||||||
|
pageId: number
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List of internal links on the Wikipedia page.
|
||||||
|
*/
|
||||||
links: string[]
|
links: string[]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get internal links from a Wikipedia page.
|
||||||
|
* @param input
|
||||||
|
* @returns
|
||||||
|
*/
|
||||||
export const getWikipediaPageInternalLinks = async (
|
export const getWikipediaPageInternalLinks = async (
|
||||||
input: GetWikipediaPageInternalLinksInput,
|
input: GetWikipediaPageInternalLinksInput,
|
||||||
): Promise<GetWikipediaPageInternalLinksOutput> => {
|
): Promise<GetWikipediaPageInternalLinksOutput> => {
|
||||||
const links: string[] = []
|
const links: string[] = []
|
||||||
let title = input.title
|
let title = input.title
|
||||||
|
let pageId = 0
|
||||||
let plcontinue: string | null = null
|
let plcontinue: string | null = null
|
||||||
|
|
||||||
const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => {
|
const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => {
|
||||||
@ -94,6 +123,7 @@ export const getWikipediaPageInternalLinks = async (
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
title = pageData.title
|
title = pageData.title
|
||||||
|
pageId = pageData.pageid
|
||||||
links.push(
|
links.push(
|
||||||
...pageData.links.map((link) => {
|
...pageData.links.map((link) => {
|
||||||
return link.title
|
return link.title
|
||||||
@ -106,6 +136,43 @@ export const getWikipediaPageInternalLinks = async (
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
title,
|
title,
|
||||||
|
pageId,
|
||||||
links,
|
links,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface WikipediaPagesInternalLinks {
|
||||||
|
[key: string]: GetWikipediaPageInternalLinksOutput
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface GetDeepWikipediaPageInternalLinksInput {
|
||||||
|
locale: WikipediaLocale
|
||||||
|
data: WikipediaPagesInternalLinks
|
||||||
|
}
|
||||||
|
|
||||||
|
export const getDeepWikipediaPageInternalLinks = async (
|
||||||
|
input: GetDeepWikipediaPageInternalLinksInput,
|
||||||
|
): Promise<WikipediaPagesInternalLinks> => {
|
||||||
|
const pagesTitles = Object.keys(input.data)
|
||||||
|
await Promise.all(
|
||||||
|
pagesTitles.map(async (pageTitle) => {
|
||||||
|
const links = input.data[pageTitle]?.links ?? []
|
||||||
|
await Promise.all(
|
||||||
|
links.map(async (pageTitleLink) => {
|
||||||
|
if (pageTitleLink in input.data) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
input.data[pageTitleLink] = await getWikipediaPageInternalLinks({
|
||||||
|
locale: input.locale,
|
||||||
|
title: pageTitleLink,
|
||||||
|
})
|
||||||
|
await getDeepWikipediaPageInternalLinks({
|
||||||
|
locale: input.locale,
|
||||||
|
data: input.data,
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
}),
|
||||||
|
)
|
||||||
|
return input.data
|
||||||
|
}
|
||||||
|
Reference in New Issue
Block a user