From ccd44c10fac0fc719916dbbcead9879013488619 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20LUDWIG?= Date: Sun, 28 Jul 2024 17:57:41 +0200 Subject: [PATCH] feat: cache.json for wikipedia links --- .gitignore | 1 + TODO.md | 2 + apps/cli/src/index.ts | 72 ++++++++++++----- data/README.md | 17 ++++ packages/wikipedia-game-solver/package.json | 1 + .../src/WikipediaClient.tsx | 5 +- .../src/wikipedia-api.ts | 80 +++++++++++-------- pnpm-lock.yaml | 12 +++ pnpm-workspace.yaml | 1 + 9 files changed, 137 insertions(+), 54 deletions(-) create mode 100644 data/README.md diff --git a/.gitignore b/.gitignore index b8cc858..b5875ab 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,7 @@ build/ *.pem .turbo bin/ +cache.json # debug npm-debug.log* diff --git a/TODO.md b/TODO.md index c603f41..9e36e98 100644 --- a/TODO.md +++ b/TODO.md @@ -11,6 +11,8 @@ ## Links +- +- - - How to get all URLs in a Wikipedia page: - diff --git a/apps/cli/src/index.ts b/apps/cli/src/index.ts index cbc01cf..f500604 100755 --- a/apps/cli/src/index.ts +++ b/apps/cli/src/index.ts @@ -1,26 +1,62 @@ #!/usr/bin/env -S node --import=tsx +import type { WikipediaPagesInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api" import { getWikipediaPageInternalLinks } from "@repo/wikipedia-game-solver/wikipedia-api" +import fs from "node:fs" +import path from "node:path" const localeWikipedia = "en" +const cachePath = path.join(process.cwd(), "cache.json") -const fromPageInput = "Linux" -const toPageInput = "Node.js" -console.log({ - fromPageInput, - toPageInput, -}) -const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([ - getWikipediaPageInternalLinks({ - title: fromPageInput, +const fromPageInput = "New York City" +// const fromPageInput = "Linux" +// const toPageInput = "Node.js" +// console.log({ +// fromPageInput, +// toPageInput, +// }) +// const [fromPageWikipediaLinks, toPageWikipediaLinks] = await Promise.all([ +// getWikipediaPageInternalLinks({ +// title: fromPageInput, +// locale: localeWikipedia, +// }), +// getWikipediaPageInternalLinks({ +// title: toPageInput, +// locale: localeWikipedia, +// }), +// ]) +// console.log({ +// fromPageWikipediaLinks, +// toPageWikipediaLinks, +// }) +// const data = { +// [fromPageWikipediaLinks.title]: fromPageWikipediaLinks, +// [toPageWikipediaLinks.title]: toPageWikipediaLinks, +// } + +const data = JSON.parse( + await fs.promises.readFile(cachePath, { encoding: "utf-8" }), +) as WikipediaPagesInternalLinks + +// let maxLinks = { max: 0, title: "" } +// for (const [title, page] of Object.entries(data)) { +// if (page.links.length > maxLinks.max) { +// maxLinks = { max: page.links.length, title } +// } +// } +// console.log(maxLinks) + +const pageLinks = (data[fromPageInput]?.links ?? []).slice(0, 1100) +for (const pageLink of pageLinks) { + if (pageLink in data) { + continue + } + console.log("Fetching", pageLink) + data[pageLink] = await getWikipediaPageInternalLinks({ + title: pageLink, locale: localeWikipedia, - }), - getWikipediaPageInternalLinks({ - title: toPageInput, - locale: localeWikipedia, - }), -]) -console.log({ - fromPageWikipediaLinks, - toPageWikipediaLinks, + }) +} +await fs.promises.writeFile(cachePath, JSON.stringify(data, null, 2), { + encoding: "utf-8", }) diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..2509542 --- /dev/null +++ b/data/README.md @@ -0,0 +1,17 @@ +# Wikipedia data + +Database layout: + + + + + +## Dumps Links + +- + +- +- +- +- +- diff --git a/packages/wikipedia-game-solver/package.json b/packages/wikipedia-game-solver/package.json index ac1d1ff..4f321a8 100644 --- a/packages/wikipedia-game-solver/package.json +++ b/packages/wikipedia-game-solver/package.json @@ -17,6 +17,7 @@ "@repo/config-tailwind": "workspace:*", "@repo/i18n": "workspace:*", "@repo/ui": "workspace:*", + "ky": "catalog:", "next": "catalog:", "next-intl": "catalog:", "react": "catalog:", diff --git a/packages/wikipedia-game-solver/src/WikipediaClient.tsx b/packages/wikipedia-game-solver/src/WikipediaClient.tsx index 77c931e..f9e3168 100644 --- a/packages/wikipedia-game-solver/src/WikipediaClient.tsx +++ b/packages/wikipedia-game-solver/src/WikipediaClient.tsx @@ -1,10 +1,8 @@ "use client" -import type { Locale } from "@repo/i18n/config" import { Button } from "@repo/ui/design/Button" import { Link } from "@repo/ui/design/Link" import { Typography } from "@repo/ui/design/Typography" -import { useLocale } from "next-intl" import { useState } from "react" import { fromLocaleToWikipediaLocale, @@ -15,8 +13,7 @@ import { export const WikipediaClient: React.FC = () => { const [isLoading, setIsLoading] = useState(false) - const localeCurrent = useLocale() as Locale - const localeWikipedia = fromLocaleToWikipediaLocale(localeCurrent) + const localeWikipedia = fromLocaleToWikipediaLocale("en-US") const handleClick: React.MouseEventHandler = async () => { setIsLoading(true) diff --git a/packages/wikipedia-game-solver/src/wikipedia-api.ts b/packages/wikipedia-game-solver/src/wikipedia-api.ts index 8d2b14b..2a96672 100644 --- a/packages/wikipedia-game-solver/src/wikipedia-api.ts +++ b/packages/wikipedia-game-solver/src/wikipedia-api.ts @@ -1,4 +1,5 @@ import type { Locale } from "@repo/i18n/config" +import ky from "ky" export const sum = (a: number, b: number): number => { return a + b @@ -99,14 +100,11 @@ export const getWikipediaPageInternalLinks = async ( if (plcontinue != null) { url.searchParams.set("plcontinue", plcontinue) } - const response = await fetch(url, { - method: "GET", - }) - if (!response.ok) { - throw new Error(response.statusText) - } - const json = (await response.json()) as WikipediaQueryLinksResponse - return json + return await ky + .get(url, { + method: "GET", + }) + .json() } do { @@ -129,8 +127,10 @@ export const getWikipediaPageInternalLinks = async ( return link.title }), ) - } catch { - break + } catch (error) { + console.error("Error", error) + console.error("title", title) + throw error } } while (plcontinue != null) @@ -152,27 +152,43 @@ export interface GetDeepWikipediaPageInternalLinksInput { export const getDeepWikipediaPageInternalLinks = async ( input: GetDeepWikipediaPageInternalLinksInput, -): Promise => { +): Promise => { const pagesTitles = Object.keys(input.data) - await Promise.all( - pagesTitles.map(async (pageTitle) => { - const links = input.data[pageTitle]?.links ?? [] - await Promise.all( - links.map(async (pageTitleLink) => { - if (pageTitleLink in input.data) { - return - } - input.data[pageTitleLink] = await getWikipediaPageInternalLinks({ - locale: input.locale, - title: pageTitleLink, - }) - await getDeepWikipediaPageInternalLinks({ - locale: input.locale, - data: input.data, - }) - }), - ) - }), - ) - return input.data + for (const pageTitle of pagesTitles) { + const links = input.data[pageTitle]?.links ?? [] + for (const pageTitleLink of links) { + if (pageTitleLink in input.data) { + continue + } + input.data[pageTitleLink] = await getWikipediaPageInternalLinks({ + locale: input.locale, + title: pageTitleLink, + }) + // await getDeepWikipediaPageInternalLinks({ + // locale: input.locale, + // data: input.data, + // }) + } + } + + // await Promise.all( + // pagesTitles.map(async (pageTitle) => { + // const links = input.data[pageTitle]?.links ?? [] + // await Promise.all( + // links.map(async (pageTitleLink) => { + // if (pageTitleLink in input.data) { + // return + // } + // input.data[pageTitleLink] = await getWikipediaPageInternalLinks({ + // locale: input.locale, + // title: pageTitleLink, + // }) + // await getDeepWikipediaPageInternalLinks({ + // locale: input.locale, + // data: input.data, + // }) + // }), + // ) + // }), + // ) } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 63a1023..1c339f9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -111,6 +111,9 @@ catalogs: http-server: specifier: 14.1.1 version: 14.1.1 + ky: + specifier: 1.5.0 + version: 1.5.0 next: specifier: 14.2.5 version: 14.2.5 @@ -626,6 +629,9 @@ importers: '@repo/ui': specifier: workspace:* version: link:../ui + ky: + specifier: 'catalog:' + version: 1.5.0 next: specifier: 'catalog:' version: 14.2.5(@babel/core@7.24.9)(@playwright/test@1.45.3)(react-dom@18.3.1(react@18.3.1))(react@18.3.1) @@ -5448,6 +5454,10 @@ packages: resolution: {integrity: sha512-dhG34DXATL5hSxJbIexCft8FChFXtmskoZYnoPWjXQuebWYCNkVeV3KkGegCK9CP1oswI/vQibS2GY7Em/sJJA==} engines: {node: '>= 8'} + ky@1.5.0: + resolution: {integrity: sha512-bkQo+UqryW6Zmo/DsixYZE4Z9t2mzvNMhceyIhuMuInb3knm5Q+GNGMKveydJAj+Z6piN1SwI6eR/V0G+Z0BtA==} + engines: {node: '>=18'} + language-subtag-registry@0.3.23: resolution: {integrity: sha512-0K65Lea881pHotoGEa5gDlMxt3pctLi2RplBb7Ezh4rRdLEOtgi7n4EwK9lamnUCkKBqaeKRVebTq6BAxSkpXQ==} @@ -14005,6 +14015,8 @@ snapshots: klona@2.0.6: {} + ky@1.5.0: {} + language-subtag-registry@0.3.23: {} language-tags@1.0.9: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 4999c52..3592a29 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -5,6 +5,7 @@ packages: catalog: # Utils "deepmerge": "4.3.1" + "ky": "1.5.0" # React.js/Next.js "next": "14.2.5"