feat(api): work in progress GET /wikipedia/shortest-paths?fromPageId=id&toPageId=id

This commit is contained in:
Théo LUDWIG 2024-08-23 23:21:42 +02:00
parent 4e707008f8
commit f0b22f6a06
Signed by: theoludwig
GPG Key ID: ADFE5A563D718F3B
8 changed files with 414 additions and 3 deletions

View File

@ -33,7 +33,7 @@
- [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables - [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables
- [x] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists) - [x] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists)
- [x] Implement `GET /wikipedia/pages/[id]` to get a page and all its internal links with the pageId - [x] Implement `GET /wikipedia/pages/[id]` to get a page and all its internal links with the pageId
- [ ] Implement `GET /wikipedia/internal-links/paths?fromPageId=id&toPageId=id` to get all the possible paths between 2 pages - [ ] Implement `GET /wikipedia/shortest-paths?fromPageId=id&toPageId=id` to get all the possible paths between 2 pages (e.g: `Node.js` `26415635` => `Linux` `6097297`)
- [x] Setup tests with database + add coverage - [x] Setup tests with database + add coverage
- [x] Setup Health checks - [x] Setup Health checks
- [x] Setup Rate limiting - [x] Setup Rate limiting

View File

@ -11,5 +11,6 @@ DATABASE_PASSWORD=password
DATABASE_NAME=wikipedia DATABASE_NAME=wikipedia
DATABASE_HOST=127.0.0.1 DATABASE_HOST=127.0.0.1
DATABASE_PORT=5432 DATABASE_PORT=5432
DATABASE_DEBUG=false
LIMITER_STORE=database LIMITER_STORE=database

View File

@ -0,0 +1,226 @@
import type { ExceptionMessage } from "#app/exceptions/handler.ts"
import Page, { type PageRaw } from "#app/models/page.ts"
import { throttle } from "#start/limiter.ts"
import type { HttpContext } from "@adonisjs/core/http"
import router from "@adonisjs/core/services/router"
import vine from "@vinejs/vine"
export const get_shortest_paths_validator = vine.compile(
vine.object({
fromPageId: vine.number().withoutDecimals().positive(),
toPageId: vine.number().withoutDecimals().positive(),
}),
)
interface get_shortest_paths_response {
/**
* Object to get page information by their id.
* - Key: Page id.
* - Value: Page information.
*/
pages: Record<number, PageRaw>
/**
* Paths between two pages using only internal links.
* Each path is an array of page ids.
*/
paths: number[]
}
const getPathsBetweenPages = async (
fromPage: Page,
toPage: Page,
): Promise<number[]> => {
if (fromPage.id === toPage.id) {
return [fromPage.id]
}
// A queue of paths from the start page
const forwardQueue: Array<[Page, number[]]> = [[fromPage, [fromPage.id]]]
// A queue of paths from the end page
const backwardQueue: Array<[Page, number[]]> = [[toPage, [toPage.id]]]
// Sets to track visited pages from both ends
const visitedFromStart = new Set<number>([fromPage.id])
const visitedFromEnd = new Set<number>([toPage.id])
// Maps to track paths
const forwardPaths = new Map<number, number[]>([[fromPage.id, [fromPage.id]]])
const backwardPaths = new Map<number, number[]>([[toPage.id, [toPage.id]]])
// Helper function to process a queue in one direction
const processQueue = async (
queue: Array<[Page, number[]]>,
visitedThisSide: Set<number>,
visitedOtherSide: Set<number>,
pathsThisSide: Map<number, number[]>,
pathsOtherSide: Map<number, number[]>,
): Promise<number[]> => {
const [currentPage, currentPath] = queue.shift() as [Page, number[]]
await currentPage.load("internalLinks")
for (const link of currentPage.internalLinks) {
if (!visitedThisSide.has(link.id)) {
const newPath = [...currentPath, link.id]
visitedThisSide.add(link.id)
pathsThisSide.set(link.id, newPath)
// If the other side has visited this page, we've found a meeting point
if (visitedOtherSide.has(link.id)) {
const pathFromOtherSide = pathsOtherSide.get(link.id) ?? []
return [...newPath.slice(0, -1), ...pathFromOtherSide.reverse()]
}
// Otherwise, continue the BFS
queue.push([link, newPath])
}
}
return []
}
while (forwardQueue.length > 0 && backwardQueue.length > 0) {
// Expand the BFS from the start side
let result = await processQueue(
forwardQueue,
visitedFromStart,
visitedFromEnd,
forwardPaths,
backwardPaths,
)
if (result.length > 0) {
return result
}
// Expand the BFS from the end side
result = await processQueue(
backwardQueue,
visitedFromEnd,
visitedFromStart,
backwardPaths,
forwardPaths,
)
if (result.length > 0) {
return result
}
}
return []
}
// const getPathsBetweenPages = async (
// fromPage: Page,
// toPage: Page,
// ): Promise<number[][]> => {
// const paths: number[][] = []
// const depthFirstSearch = async (
// currentPage: Page,
// currentPath: number[],
// ): Promise<void> => {
// currentPath.push(currentPage.id)
// if (currentPage.id === toPage.id) {
// paths.push([...currentPath])
// } else {
// for (const link of currentPage.internalLinks) {
// const isAlreadyVisited = currentPath.includes(link.id)
// if (!isAlreadyVisited) {
// await link.load("internalLinks")
// await depthFirstSearch(link, currentPath)
// }
// }
// }
// currentPath.pop()
// }
// await depthFirstSearch(fromPage, [])
// return paths
// }
export default class get_shortest_paths {
public async handle(context: HttpContext): Promise<
| {
__response: ExceptionMessage
__status: 404
}
| {
__response: ExceptionMessage
__status: 500
}
| {
__response: get_shortest_paths_response
__status: 200
}
> {
const payload = await context.request.validateUsing(
get_shortest_paths_validator,
)
const fromPage = await Page.findOrFail(payload.fromPageId)
await fromPage.load("internalLinks")
const toPage = await Page.findOrFail(payload.toPageId)
await toPage.load("internalLinks")
const isDepth0 = fromPage.id === toPage.id
if (isDepth0) {
return context.response.ok({
pages: {
[fromPage.id]: {
id: fromPage.id,
title: fromPage.title,
},
},
paths: [],
})
}
const isDepth1 = fromPage.internalLinks.some((internalLink) => {
return internalLink.id === toPage.id
})
if (isDepth1) {
return context.response.ok({
pages: {
[fromPage.id]: {
id: fromPage.id,
title: fromPage.title,
},
[toPage.id]: {
id: toPage.id,
title: toPage.title,
},
},
paths: [fromPage.id, toPage.id],
})
}
const paths = await getPathsBetweenPages(fromPage, toPage)
return context.response.ok({
pages: {
[fromPage.id]: {
id: fromPage.id,
title: fromPage.title,
},
[toPage.id]: {
id: toPage.id,
title: toPage.title,
},
},
paths,
})
// return context.response.internalServerError({
// message: "Shortest paths can't be determined.",
// })
}
}
router
.get("/wikipedia/shortest-paths", [get_shortest_paths])
.use(throttle)
.openapi({
description:
"Find the shortest paths between two Wikipedia pages, using only internal links.",
tags: ["wikipedia"],
})

View File

@ -7,7 +7,7 @@ const databaseConfig = defineConfig({
connection: app.inTest ? "sqlite" : "postgres", connection: app.inTest ? "sqlite" : "postgres",
connections: { connections: {
postgres: { postgres: {
debug: app.inDev, debug: env.get("DATABASE_DEBUG"),
client: "pg", client: "pg",
connection: { connection: {
host: env.get("DATABASE_HOST"), host: env.get("DATABASE_HOST"),

View File

@ -16,6 +16,7 @@
"tuyau": "node --run ace -- tuyau:generate && node --run ace -- tuyau:generate:openapi --destination=\".adonisjs/openapi.yaml\"", "tuyau": "node --run ace -- tuyau:generate && node --run ace -- tuyau:generate:openapi --destination=\".adonisjs/openapi.yaml\"",
"build": "node --run tuyau", "build": "node --run tuyau",
"test": "c8 node --import=tsx ./bin/test.ts", "test": "c8 node --import=tsx ./bin/test.ts",
"test-shortest-paths": "node --import=tsx ./shortest-paths-tests.ts",
"lint:eslint": "eslint . --max-warnings 0 --report-unused-disable-directives", "lint:eslint": "eslint . --max-warnings 0 --report-unused-disable-directives",
"lint:typescript": "tsc --noEmit" "lint:typescript": "tsc --noEmit"
}, },

View File

@ -0,0 +1,170 @@
import { type PageWithInternalLinksRaw } from "#app/models/page.ts"
const DATA: { [key: number]: PageWithInternalLinksRaw } = {
0: {
id: 0,
title: "Page 0",
internalLinks: [
{
id: 1,
title: "Page 1",
},
{
id: 4,
title: "Page 4",
},
],
},
1: {
id: 1,
title: "Page 1",
internalLinks: [
{
id: 2,
title: "Page 2",
},
],
},
2: {
id: 2,
title: "Page 2",
internalLinks: [
{
id: 1,
title: "Page 1",
},
{
id: 3,
title: "Page 3",
},
{
id: 4,
title: "Page 4",
},
],
},
3: {
id: 3,
title: "Page 3",
internalLinks: [],
},
4: {
id: 4,
title: "Page 4",
internalLinks: [
{
id: 2,
title: "Page 2",
},
{
id: 3,
title: "Page 3",
},
],
},
}
// PILE (stack): LIFO: .pop()
// FILE (queue): FIFO: .shift()
// parcours en profondeur, ou DFS, pour Depth-First Search
// get all possible paths from 0 to 3
// [[0, 1, 2, 3], [0, 1, 2, 4, 3], [0, 4, 2, 3], [0, 4, 3]]
// console.log(DATA)
const getPathsBetweenPages = async (
fromPage: PageWithInternalLinksRaw,
toPage: PageWithInternalLinksRaw,
getPageById: (id: number) => Promise<PageWithInternalLinksRaw>,
): Promise<number[][]> => {
const paths: number[][] = []
const depthFirstSearch = async (
currentPage: PageWithInternalLinksRaw,
currentPath: number[],
): Promise<void> => {
currentPath.push(currentPage.id)
if (currentPage.id === toPage.id) {
paths.push([...currentPath])
} else {
for (const link of currentPage.internalLinks) {
const isAlreadyVisited = currentPath.includes(link.id)
if (!isAlreadyVisited) {
await depthFirstSearch(await getPageById(link.id), currentPath)
}
}
}
currentPath.pop()
}
await depthFirstSearch(fromPage, [])
return paths
}
const getShortestPathsBetweenPages = async (
fromPage: PageWithInternalLinksRaw,
toPage: PageWithInternalLinksRaw,
getPageById: (id: number) => Promise<PageWithInternalLinksRaw>,
): Promise<number[][]> => {
const shortestPaths: number[][] = []
const queue: Array<{ page: PageWithInternalLinksRaw; path: number[] }> = [
{ page: fromPage, path: [fromPage.id] },
]
let shortestLength: number | null = null
while (queue.length > 0) {
const { page, path } = queue.shift() as {
page: PageWithInternalLinksRaw
path: number[]
}
if (page.id === toPage.id) {
// If we reached the destination, check the path length
if (shortestLength === null || path.length <= shortestLength) {
if (shortestLength === null) {
shortestLength = path.length
}
if (path.length === shortestLength) {
shortestPaths.push(path)
}
}
// If we found a shorter path, discard previously found paths
else if (path.length < shortestLength) {
shortestPaths.length = 0
shortestPaths.push(path)
shortestLength = path.length
}
} else {
for (const link of page.internalLinks) {
if (!path.includes(link.id)) {
queue.push({
page: await getPageById(link.id),
path: [...path, link.id],
})
}
}
}
}
return shortestPaths
}
console.log(
await getPathsBetweenPages(
DATA[0] as PageWithInternalLinksRaw,
DATA[3] as PageWithInternalLinksRaw,
async (id) => {
return DATA[id] as PageWithInternalLinksRaw
},
),
)
console.log(
await getShortestPathsBetweenPages(
DATA[0] as PageWithInternalLinksRaw,
DATA[3] as PageWithInternalLinksRaw,
async (id) => {
return DATA[id] as PageWithInternalLinksRaw
},
),
)

View File

@ -28,6 +28,7 @@ export default await Env.create(new URL("..", import.meta.url), {
DATABASE_USER: Env.schema.string(), DATABASE_USER: Env.schema.string(),
DATABASE_PASSWORD: Env.schema.string(), DATABASE_PASSWORD: Env.schema.string(),
DATABASE_NAME: Env.schema.string(), DATABASE_NAME: Env.schema.string(),
DATABASE_DEBUG: Env.schema.boolean(),
/** /**
* Variables for configuring the limiter package. * Variables for configuring the limiter package.

View File

@ -1,4 +1,16 @@
{ {
"root": true, "root": true,
"extends": ["@repo/eslint-config"] "extends": ["@repo/eslint-config"],
"rules": {
"import-x/extensions": [
"error",
"ignorePackages",
{
"ts": "never",
"tsx": "never",
"js": "always",
"jsx": "never"
}
]
}
} }