feat: try deep internal links of wikipedia pages
This commit is contained in:
parent
0ee7b35530
commit
90abfb6de8
9
TODO.md
9
TODO.md
@ -2,15 +2,12 @@
|
||||
|
||||
- [x] chore: initial commit (+ mirror on GitHub)
|
||||
- [x] Deploy first staging version (v1.0.0-staging.1)
|
||||
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all articles to go from one to another, or none if it is not possible
|
||||
- [ ] Implement Wikipedia Game Solver (`website`) with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible
|
||||
- [ ] Check, cache and store (in `.json` file) all Wikipedia Pages and its internal links, maybe use Wikipedia Dump (<https://en.wikipedia.org/wiki/Wikipedia:Database_download>)?
|
||||
- [ ] Implement toast notifications for errors, warnings, and success messages
|
||||
- [ ] v1.0.0-staging.2
|
||||
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
|
||||
- [ ] Implement CLI (`cli`)
|
||||
- [ ] v1.0.0-staging.3
|
||||
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/))
|
||||
- [ ] v1.0.0-staging.4
|
||||
- [ ] v1.0.0
|
||||
- [ ] Add docs to add locale/edit translations, create component, install a dependency in a package, create a new package, technology used, architecture, links where it's deployed, how to use/install for end users, how to update dependencies with `npx taze -l` etc.
|
||||
|
||||
## Links
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node --import=tsx ./src/index.ts",
|
||||
"dev": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
|
||||
"dev-test": "node --import=tsx --watch --watch-preserve-output ./src/index.ts",
|
||||
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
|
||||
"lint:typescript": "tsc --noEmit"
|
||||
},
|
||||
|
@ -1,11 +1,26 @@
|
||||
#!/usr/bin/env -S node --import=tsx
|
||||
|
||||
import { add } from "#abc/def/add.js"
|
||||
import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api"
|
||||
|
||||
import { VERSION } from "@repo/constants"
|
||||
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
|
||||
const localeWikipedia = "en"
|
||||
|
||||
console.log("Hello, world!")
|
||||
console.log(sum(1, 2))
|
||||
console.log(add(2, 3))
|
||||
console.log(`v${VERSION}`)
|
||||
const fromPageInput = "Linux"
|
||||
const toPageInput = "Node.js"
|
||||
console.log({
|
||||
fromPageInput,
|
||||
toPageInput,
|
||||
})
|
||||
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
|
||||
getWikipediaPageInternalLinks({
|
||||
title: fromPageInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
getWikipediaPageInternalLinks({
|
||||
title: toPageInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
])
|
||||
console.log({
|
||||
fromPageWikipediaLinks,
|
||||
toPageWikipediaLinks,
|
||||
})
|
||||
|
11
apps/cli/src/main.ts
Executable file
11
apps/cli/src/main.ts
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env -S node --import=tsx
|
||||
|
||||
import { add } from "#abc/def/add.js"
|
||||
|
||||
import { VERSION } from "@repo/constants"
|
||||
import { sum } from "@repo/wikipedia-game-solver/wikipedia-api"
|
||||
|
||||
console.log("Hello, world!")
|
||||
console.log(sum(1, 2))
|
||||
console.log(add(2, 3))
|
||||
console.log(`v${VERSION}`)
|
@ -20,27 +20,34 @@ export const WikipediaClient: React.FC = () => {
|
||||
|
||||
const handleClick: React.MouseEventHandler<HTMLButtonElement> = async () => {
|
||||
setIsLoading(true)
|
||||
const fromArticleInput = "Linux"
|
||||
const toArticleInput = "Node.js"
|
||||
const fromPageInput = "Linux"
|
||||
const toPageInput = "Node.js"
|
||||
console.log({
|
||||
fromArticleInput,
|
||||
toArticleInput,
|
||||
fromPageInput,
|
||||
toPageInput,
|
||||
})
|
||||
const [fromArticleWikipediaLinks, toArticleWikipediaLinks] =
|
||||
await Promise.all([
|
||||
getWikipediaPageInternalLinks({
|
||||
title: fromArticleInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
getWikipediaPageInternalLinks({
|
||||
title: toArticleInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
])
|
||||
const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([
|
||||
getWikipediaPageInternalLinks({
|
||||
title: fromPageInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
getWikipediaPageInternalLinks({
|
||||
title: toPageInput,
|
||||
locale: localeWikipedia,
|
||||
}),
|
||||
])
|
||||
console.log({
|
||||
fromArticleWikipediaLinks,
|
||||
toArticleWikipediaLinks,
|
||||
fromPageWikipediaLinks,
|
||||
toPageWikipediaLinks,
|
||||
})
|
||||
// const deepInternalLinks = await getDeepWikipediaPageInternalLinks({
|
||||
// locale: localeWikipedia,
|
||||
// data: {
|
||||
// [fromPageWikipediaLinks.title]: fromPageWikipediaLinks,
|
||||
// [toPageWikipediaLinks.title]: toPageWikipediaLinks,
|
||||
// },
|
||||
// })
|
||||
// console.log(deepInternalLinks)
|
||||
setIsLoading(false)
|
||||
}
|
||||
|
||||
|
@ -4,13 +4,23 @@ export const sum = (a: number, b: number): number => {
|
||||
return a + b
|
||||
}
|
||||
|
||||
/**
|
||||
* @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions
|
||||
* To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error).
|
||||
*/
|
||||
|
||||
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
|
||||
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
|
||||
|
||||
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
|
||||
"en-US": "en",
|
||||
"fr-FR": "fr",
|
||||
}
|
||||
|
||||
export const fromLocaleToWikipediaLocale = (
|
||||
locale: Locale,
|
||||
): WikipediaLocale => {
|
||||
return locale === "en-US" ? "en" : "fr"
|
||||
return WIKIPEDIA_LOCALES_MAP[locale]
|
||||
}
|
||||
|
||||
export const getWikipediaLink = (locale: WikipediaLocale): string => {
|
||||
@ -48,15 +58,34 @@ interface GetWikipediaPageInternalLinksInput {
|
||||
}
|
||||
|
||||
interface GetWikipediaPageInternalLinksOutput {
|
||||
/**
|
||||
* Title of the Wikipedia page.
|
||||
*/
|
||||
title: string
|
||||
|
||||
/**
|
||||
* Page id is unique for each page on Wikipedia, can be used to link to the page.
|
||||
* @example `https://${locale}.wikipedia.org/?curid=${pageId}`
|
||||
*/
|
||||
pageId: number
|
||||
|
||||
/**
|
||||
* List of internal links on the Wikipedia page.
|
||||
*/
|
||||
links: string[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Get internal links from a Wikipedia page.
|
||||
* @param input
|
||||
* @returns
|
||||
*/
|
||||
export const getWikipediaPageInternalLinks = async (
|
||||
input: GetWikipediaPageInternalLinksInput,
|
||||
): Promise<GetWikipediaPageInternalLinksOutput> => {
|
||||
const links: string[] = []
|
||||
let title = input.title
|
||||
let pageId = 0
|
||||
let plcontinue: string | null = null
|
||||
|
||||
const fetchLinks = async (): Promise<WikipediaQueryLinksResponse> => {
|
||||
@ -94,6 +123,7 @@ export const getWikipediaPageInternalLinks = async (
|
||||
break
|
||||
}
|
||||
title = pageData.title
|
||||
pageId = pageData.pageid
|
||||
links.push(
|
||||
...pageData.links.map((link) => {
|
||||
return link.title
|
||||
@ -106,6 +136,43 @@ export const getWikipediaPageInternalLinks = async (
|
||||
|
||||
return {
|
||||
title,
|
||||
pageId,
|
||||
links,
|
||||
}
|
||||
}
|
||||
|
||||
export interface WikipediaPagesInternalLinks {
|
||||
[key: string]: GetWikipediaPageInternalLinksOutput
|
||||
}
|
||||
|
||||
export interface GetDeepWikipediaPageInternalLinksInput {
|
||||
locale: WikipediaLocale
|
||||
data: WikipediaPagesInternalLinks
|
||||
}
|
||||
|
||||
export const getDeepWikipediaPageInternalLinks = async (
|
||||
input: GetDeepWikipediaPageInternalLinksInput,
|
||||
): Promise<WikipediaPagesInternalLinks> => {
|
||||
const pagesTitles = Object.keys(input.data)
|
||||
await Promise.all(
|
||||
pagesTitles.map(async (pageTitle) => {
|
||||
const links = input.data[pageTitle]?.links ?? []
|
||||
await Promise.all(
|
||||
links.map(async (pageTitleLink) => {
|
||||
if (pageTitleLink in input.data) {
|
||||
return
|
||||
}
|
||||
input.data[pageTitleLink] = await getWikipediaPageInternalLinks({
|
||||
locale: input.locale,
|
||||
title: pageTitleLink,
|
||||
})
|
||||
await getDeepWikipediaPageInternalLinks({
|
||||
locale: input.locale,
|
||||
data: input.data,
|
||||
})
|
||||
}),
|
||||
)
|
||||
}),
|
||||
)
|
||||
return input.data
|
||||
}
|
||||
|
Reference in New Issue
Block a user