From f0b22f6a063a9dafa52177aca8309f8b9b8a70d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20LUDWIG?= Date: Fri, 23 Aug 2024 23:21:42 +0200 Subject: [PATCH] feat(api): work in progress GET /wikipedia/shortest-paths?fromPageId=id&toPageId=id --- TODO.md | 2 +- apps/api/.env.example | 1 + .../shortest-paths/get_shortest_paths.ts | 226 ++++++++++++++++++ apps/api/config/database.ts | 2 +- apps/api/package.json | 1 + apps/api/shortest-paths-tests.ts | 170 +++++++++++++ apps/api/start/env.ts | 1 + data/.eslintrc.json | 14 +- 8 files changed, 414 insertions(+), 3 deletions(-) create mode 100644 apps/api/app/controllers/wikipedia/shortest-paths/get_shortest_paths.ts create mode 100644 apps/api/shortest-paths-tests.ts diff --git a/TODO.md b/TODO.md index 5bdd6a9..918af4a 100644 --- a/TODO.md +++ b/TODO.md @@ -33,7 +33,7 @@ - [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables - [x] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists) - [x] Implement `GET /wikipedia/pages/[id]` to get a page and all its internal links with the pageId - - [ ] Implement `GET /wikipedia/internal-links/paths?fromPageId=id&toPageId=id` to get all the possible paths between 2 pages + - [ ] Implement `GET /wikipedia/shortest-paths?fromPageId=id&toPageId=id` to get all the possible paths between 2 pages (e.g: `Node.js` `26415635` => `Linux` `6097297`) - [x] Setup tests with database + add coverage - [x] Setup Health checks - [x] Setup Rate limiting diff --git a/apps/api/.env.example b/apps/api/.env.example index 903bffc..8e9d2fc 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -11,5 +11,6 @@ DATABASE_PASSWORD=password DATABASE_NAME=wikipedia DATABASE_HOST=127.0.0.1 DATABASE_PORT=5432 +DATABASE_DEBUG=false LIMITER_STORE=database diff --git a/apps/api/app/controllers/wikipedia/shortest-paths/get_shortest_paths.ts b/apps/api/app/controllers/wikipedia/shortest-paths/get_shortest_paths.ts new file mode 100644 index 0000000..22cd214 --- /dev/null +++ b/apps/api/app/controllers/wikipedia/shortest-paths/get_shortest_paths.ts @@ -0,0 +1,226 @@ +import type { ExceptionMessage } from "#app/exceptions/handler.ts" +import Page, { type PageRaw } from "#app/models/page.ts" +import { throttle } from "#start/limiter.ts" +import type { HttpContext } from "@adonisjs/core/http" +import router from "@adonisjs/core/services/router" +import vine from "@vinejs/vine" + +export const get_shortest_paths_validator = vine.compile( + vine.object({ + fromPageId: vine.number().withoutDecimals().positive(), + toPageId: vine.number().withoutDecimals().positive(), + }), +) + +interface get_shortest_paths_response { + /** + * Object to get page information by their id. + * - Key: Page id. + * - Value: Page information. + */ + pages: Record + + /** + * Paths between two pages using only internal links. + * Each path is an array of page ids. + */ + paths: number[] +} + +const getPathsBetweenPages = async ( + fromPage: Page, + toPage: Page, +): Promise => { + if (fromPage.id === toPage.id) { + return [fromPage.id] + } + + // A queue of paths from the start page + const forwardQueue: Array<[Page, number[]]> = [[fromPage, [fromPage.id]]] + // A queue of paths from the end page + const backwardQueue: Array<[Page, number[]]> = [[toPage, [toPage.id]]] + + // Sets to track visited pages from both ends + const visitedFromStart = new Set([fromPage.id]) + const visitedFromEnd = new Set([toPage.id]) + + // Maps to track paths + const forwardPaths = new Map([[fromPage.id, [fromPage.id]]]) + const backwardPaths = new Map([[toPage.id, [toPage.id]]]) + + // Helper function to process a queue in one direction + const processQueue = async ( + queue: Array<[Page, number[]]>, + visitedThisSide: Set, + visitedOtherSide: Set, + pathsThisSide: Map, + pathsOtherSide: Map, + ): Promise => { + const [currentPage, currentPath] = queue.shift() as [Page, number[]] + await currentPage.load("internalLinks") + + for (const link of currentPage.internalLinks) { + if (!visitedThisSide.has(link.id)) { + const newPath = [...currentPath, link.id] + visitedThisSide.add(link.id) + pathsThisSide.set(link.id, newPath) + + // If the other side has visited this page, we've found a meeting point + if (visitedOtherSide.has(link.id)) { + const pathFromOtherSide = pathsOtherSide.get(link.id) ?? [] + return [...newPath.slice(0, -1), ...pathFromOtherSide.reverse()] + } + + // Otherwise, continue the BFS + queue.push([link, newPath]) + } + } + + return [] + } + + while (forwardQueue.length > 0 && backwardQueue.length > 0) { + // Expand the BFS from the start side + let result = await processQueue( + forwardQueue, + visitedFromStart, + visitedFromEnd, + forwardPaths, + backwardPaths, + ) + if (result.length > 0) { + return result + } + + // Expand the BFS from the end side + result = await processQueue( + backwardQueue, + visitedFromEnd, + visitedFromStart, + backwardPaths, + forwardPaths, + ) + if (result.length > 0) { + return result + } + } + + return [] +} + +// const getPathsBetweenPages = async ( +// fromPage: Page, +// toPage: Page, +// ): Promise => { +// const paths: number[][] = [] + +// const depthFirstSearch = async ( +// currentPage: Page, +// currentPath: number[], +// ): Promise => { +// currentPath.push(currentPage.id) +// if (currentPage.id === toPage.id) { +// paths.push([...currentPath]) +// } else { +// for (const link of currentPage.internalLinks) { +// const isAlreadyVisited = currentPath.includes(link.id) +// if (!isAlreadyVisited) { +// await link.load("internalLinks") +// await depthFirstSearch(link, currentPath) +// } +// } +// } +// currentPath.pop() +// } + +// await depthFirstSearch(fromPage, []) +// return paths +// } + +export default class get_shortest_paths { + public async handle(context: HttpContext): Promise< + | { + __response: ExceptionMessage + __status: 404 + } + | { + __response: ExceptionMessage + __status: 500 + } + | { + __response: get_shortest_paths_response + __status: 200 + } + > { + const payload = await context.request.validateUsing( + get_shortest_paths_validator, + ) + + const fromPage = await Page.findOrFail(payload.fromPageId) + await fromPage.load("internalLinks") + + const toPage = await Page.findOrFail(payload.toPageId) + await toPage.load("internalLinks") + + const isDepth0 = fromPage.id === toPage.id + if (isDepth0) { + return context.response.ok({ + pages: { + [fromPage.id]: { + id: fromPage.id, + title: fromPage.title, + }, + }, + paths: [], + }) + } + + const isDepth1 = fromPage.internalLinks.some((internalLink) => { + return internalLink.id === toPage.id + }) + if (isDepth1) { + return context.response.ok({ + pages: { + [fromPage.id]: { + id: fromPage.id, + title: fromPage.title, + }, + [toPage.id]: { + id: toPage.id, + title: toPage.title, + }, + }, + paths: [fromPage.id, toPage.id], + }) + } + + const paths = await getPathsBetweenPages(fromPage, toPage) + + return context.response.ok({ + pages: { + [fromPage.id]: { + id: fromPage.id, + title: fromPage.title, + }, + [toPage.id]: { + id: toPage.id, + title: toPage.title, + }, + }, + paths, + }) + + // return context.response.internalServerError({ + // message: "Shortest paths can't be determined.", + // }) + } +} + +router + .get("/wikipedia/shortest-paths", [get_shortest_paths]) + .use(throttle) + .openapi({ + description: + "Find the shortest paths between two Wikipedia pages, using only internal links.", + tags: ["wikipedia"], + }) diff --git a/apps/api/config/database.ts b/apps/api/config/database.ts index e35049c..aeecb36 100644 --- a/apps/api/config/database.ts +++ b/apps/api/config/database.ts @@ -7,7 +7,7 @@ const databaseConfig = defineConfig({ connection: app.inTest ? "sqlite" : "postgres", connections: { postgres: { - debug: app.inDev, + debug: env.get("DATABASE_DEBUG"), client: "pg", connection: { host: env.get("DATABASE_HOST"), diff --git a/apps/api/package.json b/apps/api/package.json index 1f3699a..0718ffe 100644 --- a/apps/api/package.json +++ b/apps/api/package.json @@ -16,6 +16,7 @@ "tuyau": "node --run ace -- tuyau:generate && node --run ace -- tuyau:generate:openapi --destination=\".adonisjs/openapi.yaml\"", "build": "node --run tuyau", "test": "c8 node --import=tsx ./bin/test.ts", + "test-shortest-paths": "node --import=tsx ./shortest-paths-tests.ts", "lint:eslint": "eslint . --max-warnings 0 --report-unused-disable-directives", "lint:typescript": "tsc --noEmit" }, diff --git a/apps/api/shortest-paths-tests.ts b/apps/api/shortest-paths-tests.ts new file mode 100644 index 0000000..7d2933f --- /dev/null +++ b/apps/api/shortest-paths-tests.ts @@ -0,0 +1,170 @@ +import { type PageWithInternalLinksRaw } from "#app/models/page.ts" + +const DATA: { [key: number]: PageWithInternalLinksRaw } = { + 0: { + id: 0, + title: "Page 0", + internalLinks: [ + { + id: 1, + title: "Page 1", + }, + { + id: 4, + title: "Page 4", + }, + ], + }, + 1: { + id: 1, + title: "Page 1", + internalLinks: [ + { + id: 2, + title: "Page 2", + }, + ], + }, + 2: { + id: 2, + title: "Page 2", + internalLinks: [ + { + id: 1, + title: "Page 1", + }, + { + id: 3, + title: "Page 3", + }, + { + id: 4, + title: "Page 4", + }, + ], + }, + 3: { + id: 3, + title: "Page 3", + internalLinks: [], + }, + 4: { + id: 4, + title: "Page 4", + internalLinks: [ + { + id: 2, + title: "Page 2", + }, + { + id: 3, + title: "Page 3", + }, + ], + }, +} + +// PILE (stack): LIFO: .pop() +// FILE (queue): FIFO: .shift() +// parcours en profondeur, ou DFS, pour Depth-First Search +// get all possible paths from 0 to 3 +// [[0, 1, 2, 3], [0, 1, 2, 4, 3], [0, 4, 2, 3], [0, 4, 3]] + +// console.log(DATA) + +const getPathsBetweenPages = async ( + fromPage: PageWithInternalLinksRaw, + toPage: PageWithInternalLinksRaw, + getPageById: (id: number) => Promise, +): Promise => { + const paths: number[][] = [] + + const depthFirstSearch = async ( + currentPage: PageWithInternalLinksRaw, + currentPath: number[], + ): Promise => { + currentPath.push(currentPage.id) + if (currentPage.id === toPage.id) { + paths.push([...currentPath]) + } else { + for (const link of currentPage.internalLinks) { + const isAlreadyVisited = currentPath.includes(link.id) + if (!isAlreadyVisited) { + await depthFirstSearch(await getPageById(link.id), currentPath) + } + } + } + currentPath.pop() + } + + await depthFirstSearch(fromPage, []) + return paths +} + +const getShortestPathsBetweenPages = async ( + fromPage: PageWithInternalLinksRaw, + toPage: PageWithInternalLinksRaw, + getPageById: (id: number) => Promise, +): Promise => { + const shortestPaths: number[][] = [] + const queue: Array<{ page: PageWithInternalLinksRaw; path: number[] }> = [ + { page: fromPage, path: [fromPage.id] }, + ] + let shortestLength: number | null = null + + while (queue.length > 0) { + const { page, path } = queue.shift() as { + page: PageWithInternalLinksRaw + path: number[] + } + + if (page.id === toPage.id) { + // If we reached the destination, check the path length + if (shortestLength === null || path.length <= shortestLength) { + if (shortestLength === null) { + shortestLength = path.length + } + if (path.length === shortestLength) { + shortestPaths.push(path) + } + } + // If we found a shorter path, discard previously found paths + else if (path.length < shortestLength) { + shortestPaths.length = 0 + shortestPaths.push(path) + shortestLength = path.length + } + } else { + for (const link of page.internalLinks) { + if (!path.includes(link.id)) { + queue.push({ + page: await getPageById(link.id), + path: [...path, link.id], + }) + } + } + } + } + + return shortestPaths +} + +console.log( + await getPathsBetweenPages( + DATA[0] as PageWithInternalLinksRaw, + DATA[3] as PageWithInternalLinksRaw, + async (id) => { + return DATA[id] as PageWithInternalLinksRaw + }, + ), +) + +console.log( + await getShortestPathsBetweenPages( + DATA[0] as PageWithInternalLinksRaw, + DATA[3] as PageWithInternalLinksRaw, + async (id) => { + return DATA[id] as PageWithInternalLinksRaw + }, + ), +) diff --git a/apps/api/start/env.ts b/apps/api/start/env.ts index 0e424b4..0574a9e 100644 --- a/apps/api/start/env.ts +++ b/apps/api/start/env.ts @@ -28,6 +28,7 @@ export default await Env.create(new URL("..", import.meta.url), { DATABASE_USER: Env.schema.string(), DATABASE_PASSWORD: Env.schema.string(), DATABASE_NAME: Env.schema.string(), + DATABASE_DEBUG: Env.schema.boolean(), /** * Variables for configuring the limiter package. diff --git a/data/.eslintrc.json b/data/.eslintrc.json index 42c084e..3405063 100644 --- a/data/.eslintrc.json +++ b/data/.eslintrc.json @@ -1,4 +1,16 @@ { "root": true, - "extends": ["@repo/eslint-config"] + "extends": ["@repo/eslint-config"], + "rules": { + "import-x/extensions": [ + "error", + "ignorePackages", + { + "ts": "never", + "tsx": "never", + "js": "always", + "jsx": "never" + } + ] + } }