From cdc8cf2b05b4d22a77a940e71115fc90f2f6c027 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20LUDWIG?= Date: Mon, 12 Aug 2024 00:32:43 +0100 Subject: [PATCH] feat(api): implement GET /wikipedia/pages?title=search_title --- .env.example | 2 +- TODO.md | 9 +- apps/api/.env.example | 2 +- apps/api/src/app/routes/index.ts | 3 +- apps/api/src/app/routes/wikipedia/index.ts | 1 + .../api/src/app/routes/wikipedia/pages/get.ts | 36 +++++ apps/api/src/config/database.ts | 3 + apps/api/src/start/kernel.ts | 7 +- packages/utils/src/strings.ts | 57 ++++++++ packages/utils/src/tests/strings.test.ts | 110 +++++++++++++- packages/wikipedia-game-solver/package.json | 4 +- .../src/WikipediaClient.tsx | 4 +- .../src/__tests__/basic.test.ts | 9 -- .../src/__tests__/wikipedia-utils.test.ts | 135 ++++++++++++++++++ .../src/wikipedia-api.ts | 24 +--- .../src/wikipedia-utils.ts | 70 +++++++++ pnpm-lock.yaml | 3 + 17 files changed, 433 insertions(+), 46 deletions(-) create mode 100644 apps/api/src/app/routes/wikipedia/index.ts create mode 100644 apps/api/src/app/routes/wikipedia/pages/get.ts delete mode 100644 packages/wikipedia-game-solver/src/__tests__/basic.test.ts create mode 100644 packages/wikipedia-game-solver/src/__tests__/wikipedia-utils.test.ts create mode 100644 packages/wikipedia-game-solver/src/wikipedia-utils.ts diff --git a/.env.example b/.env.example index e9a73ea..91584cc 100644 --- a/.env.example +++ b/.env.example @@ -5,4 +5,4 @@ DATABASE_USER=wikipedia_user DATABASE_PASSWORD=password DATABASE_NAME=wikipedia DATABASE_HOST=127.0.0.1 -DATABASE_PORT=3306 +DATABASE_PORT=5432 diff --git a/TODO.md b/TODO.md index 23aff22..e5120af 100644 --- a/TODO.md +++ b/TODO.md @@ -31,16 +31,21 @@ - [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) to get shortest paths between 2 pages - [x] Init AdonisJS project - [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables - - [ ] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists) + - [x] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists) - [ ] Implement `GET /wikipedia/pages/internal-links/paths?from=Node.js&to=Linux` to get all the possible paths between 2 pages with titles sanitized + - [ ] Setup tests with database + add coverage + - [ ] Setup HTTP Requests logging in development (not needed in `test` mode) + - [ ] Setup Health checks + - [ ] Setup Rate limiting + - [ ] Share VineJS validators between `website` and `api` - [ ] Implement Wikipedia Game Solver (`website`) - [x] Init Next.js project - [ ] Try to use for API calls - [ ] Hard code 2 pages to test if it works with `console.log` in the browser - [ ] Implement a form with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible - [ ] Add images, links to the pages + good UI/UX - - [ ] Implement toast notifications for errors, warnings, and success messages - [ ] Autocompletion page titles + - [ ] Implement toast notifications for errors, warnings, and success messages - [ ] Implement CLI (`cli`) - [ ] Init Clipanion project - [ ] Implement `wikipedia-game-solver internal-links --from="Node.js" --to="Linux"` command to get all the possible paths between 2 pages. diff --git a/apps/api/.env.example b/apps/api/.env.example index 41128fd..c5fd9cf 100644 --- a/apps/api/.env.example +++ b/apps/api/.env.example @@ -9,4 +9,4 @@ DATABASE_USER=wikipedia_user DATABASE_PASSWORD=password DATABASE_NAME=wikipedia DATABASE_HOST=127.0.0.1 -DATABASE_PORT=3306 +DATABASE_PORT=5432 diff --git a/apps/api/src/app/routes/index.ts b/apps/api/src/app/routes/index.ts index cd881e2..e7686a1 100644 --- a/apps/api/src/app/routes/index.ts +++ b/apps/api/src/app/routes/index.ts @@ -1 +1,2 @@ -import "./get.js" +import "#app/routes/get.js" +import "#app/routes/wikipedia/index.js" diff --git a/apps/api/src/app/routes/wikipedia/index.ts b/apps/api/src/app/routes/wikipedia/index.ts new file mode 100644 index 0000000..138acf6 --- /dev/null +++ b/apps/api/src/app/routes/wikipedia/index.ts @@ -0,0 +1 @@ +import "#app/routes/wikipedia/pages/get.js" diff --git a/apps/api/src/app/routes/wikipedia/pages/get.ts b/apps/api/src/app/routes/wikipedia/pages/get.ts new file mode 100644 index 0000000..e5130ab --- /dev/null +++ b/apps/api/src/app/routes/wikipedia/pages/get.ts @@ -0,0 +1,36 @@ +import Page from "#app/models/page.js" +import type { HttpContext } from "@adonisjs/core/http" +import router from "@adonisjs/core/services/router" +import { sanitizePageTitle } from "@repo/wikipedia-game-solver/wikipedia-utils" +import vine from "@vinejs/vine" + +const requestValidator = vine.compile( + vine.object({ + title: vine + .string() + .minLength(1) + .maxLength(255) + .transform((value) => { + return sanitizePageTitle(value) + }), + limit: vine + .number() + .parse((value) => { + return value ?? 5 + }) + .withoutDecimals() + .range([1, 100]), + }), +) + +class Controller { + public async handle(context: HttpContext): Promise { + const payload = await context.request.validateUsing(requestValidator) + const pages = await Page.query() + .whereLike("title", `${payload.title}%`) + .limit(payload.limit) + return pages + } +} + +router.get("/wikipedia/pages", [Controller]) diff --git a/apps/api/src/config/database.ts b/apps/api/src/config/database.ts index 7b59fac..e34b97d 100644 --- a/apps/api/src/config/database.ts +++ b/apps/api/src/config/database.ts @@ -1,10 +1,13 @@ import env from "#start/env.js" +import app from "@adonisjs/core/services/app" import { defineConfig } from "@adonisjs/lucid" const databaseConfig = defineConfig({ + prettyPrintDebugQueries: !app.inProduction, connection: "postgres", connections: { postgres: { + debug: !app.inProduction, client: "pg", connection: { host: env.get("DATABASE_HOST"), diff --git a/apps/api/src/start/kernel.ts b/apps/api/src/start/kernel.ts index 7de2572..7b0e8ae 100644 --- a/apps/api/src/start/kernel.ts +++ b/apps/api/src/start/kernel.ts @@ -8,17 +8,14 @@ import router from "@adonisjs/core/services/router" import server from "@adonisjs/core/services/server" /** - * The error handler is used to convert an exception - * to a HTTP response. + * The error handler is used to convert an exception to a HTTP response. */ server.errorHandler(async () => { return await import("#app/exceptions/handler.js") }) /** - * The server middleware stack runs middleware on all the HTTP - * requests, even if there is no route registered for - * the request URL. + * The server middleware stack runs middleware on all the HTTP requests, even if there is no route registered for the requested URL. */ server.use([ async () => { diff --git a/packages/utils/src/strings.ts b/packages/utils/src/strings.ts index c857cb1..8adff1b 100644 --- a/packages/utils/src/strings.ts +++ b/packages/utils/src/strings.ts @@ -7,3 +7,60 @@ export const capitalize = (string: string): string => { return string.charAt(0).toUpperCase() + string.slice(1) } + +/** + * Trim any of the specified characters from the start and end of a string. + * @param string + * @param characters + * @returns + * @example trimAny("_____foo bar ", [" ", "_"]) // "foo bar" + */ +export const trimAny = (string: string, characters: string[]): string => { + let start = 0 + let end = string.length + + while ( + start < end && + (characters as Array).includes(string[start]) + ) { + start += 1 + } + + while ( + end > start && + (characters as Array).includes(string[end - 1]) + ) { + end -= 1 + } + + return start > 0 || end < string.length + ? string.substring(start, end) + : string +} + +/** + * Reduces consecutive occurrences of specified characters in a string to a single occurrence. + * + * @param input + * @param characters + * @returns + * @example reduceConsecutiveCharacters("Hello___there!!", ["_", "!"]) // "Hello_there!" + */ +export const reduceConsecutiveCharacters = ( + input: string, + characters: string[], +): string => { + let result = "" + let previousCharacter = "" + for (const currentCharacter of input) { + if (characters.includes(currentCharacter)) { + if (currentCharacter !== previousCharacter) { + result += currentCharacter + } + } else { + result += currentCharacter + } + previousCharacter = currentCharacter + } + return result +} diff --git a/packages/utils/src/tests/strings.test.ts b/packages/utils/src/tests/strings.test.ts index 7571ad0..5cb5c4f 100644 --- a/packages/utils/src/tests/strings.test.ts +++ b/packages/utils/src/tests/strings.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from "vitest" -import { capitalize } from "../strings.js" +import { capitalize, reduceConsecutiveCharacters, trimAny } from "../strings.js" describe("capitalize", () => { it("should capitalize the first letter of a string", () => { @@ -39,3 +39,111 @@ describe("capitalize", () => { expect(output).toEqual(expected) }) }) + +describe("trimAny", () => { + it("should trim any of the specified characters from the start and end of a string", () => { + // Arrange - Given + const input = "_____foo bar " + const characters = [" ", "_"] + + // Act - When + const output = trimAny(input, characters) + + // Assert - Then + const expected = "foo bar" + expect(output).toEqual(expected) + }) + + it("should trim any of the specified characters from the start and end of a string even if the start and end characters are different", () => { + // Arrange - Given + const input = "_ __ _foo bar _" + const characters = [" ", "_"] + + // Act - When + const output = trimAny(input, characters) + + // Assert - Then + const expected = "foo bar" + expect(output).toEqual(expected) + }) + + it("should return the same string when the input does not start or end with any of the specified characters", () => { + // Arrange - Given + const input = "foo bar" + const characters = [" ", "_"] + + // Act - When + const output = trimAny(input, characters) + + // Assert - Then + const expected = "foo bar" + expect(output).toEqual(expected) + }) + + it("should return an empty string when the input is an empty string", () => { + // Arrange - Given + const input = "" + const characters = [" ", "_"] + + // Act - When + const output = trimAny(input, characters) + + // Assert - Then + const expected = "" + expect(output).toEqual(expected) + }) + + it("should return an empty string when the input starts and ends with the specified characters", () => { + // Arrange - Given + const input = " _ " + const characters = [" ", "_"] + + // Act - When + const output = trimAny(input, characters) + + // Assert - Then + const expected = "" + expect(output).toEqual(expected) + }) +}) + +describe("reduceConsecutiveCharacters", () => { + it("should reduce consecutive occurrences of specified characters in a string to a single occurrence", () => { + // Arrange - Given + const input = "Hello___there!!" + const characters = ["_", "!"] + + // Act - When + const output = reduceConsecutiveCharacters(input, characters) + + // Assert - Then + const expected = "Hello_there!" + expect(output).toEqual(expected) + }) + + it("should return the same string when there are no consecutive occurrences of specified characters", () => { + // Arrange - Given + const input = "Hello there!" + const characters = ["_", "!"] + + // Act - When + const output = reduceConsecutiveCharacters(input, characters) + + // Assert - Then + const expected = "Hello there!" + expect(output).toEqual(expected) + }) + + it("should return an empty string when the input is an empty string", () => { + // Arrange - Given + const input = "" + const characters = ["_", "!"] + + // Act - When + const output = reduceConsecutiveCharacters(input, characters) + + // Assert - Then + const expected = "" + expect(output).toEqual(expected) + }) +}) diff --git a/packages/wikipedia-game-solver/package.json b/packages/wikipedia-game-solver/package.json index f5f1382..ca180c3 100644 --- a/packages/wikipedia-game-solver/package.json +++ b/packages/wikipedia-game-solver/package.json @@ -5,7 +5,8 @@ "type": "module", "exports": { "./WikipediaClient": "./src/WikipediaClient.tsx", - "./wikipedia-api": "./src/wikipedia-api.ts" + "./wikipedia-api": "./src/wikipedia-api.ts", + "./wikipedia-utils": "./src/wikipedia-utils.ts" }, "scripts": { "lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives", @@ -17,6 +18,7 @@ "@repo/config-tailwind": "workspace:*", "@repo/i18n": "workspace:*", "@repo/ui": "workspace:*", + "@repo/utils": "workspace:*", "ky": "catalog:", "next": "catalog:", "next-intl": "catalog:", diff --git a/packages/wikipedia-game-solver/src/WikipediaClient.tsx b/packages/wikipedia-game-solver/src/WikipediaClient.tsx index a01ef8c..8b4de2e 100644 --- a/packages/wikipedia-game-solver/src/WikipediaClient.tsx +++ b/packages/wikipedia-game-solver/src/WikipediaClient.tsx @@ -4,11 +4,11 @@ import { Button } from "@repo/ui/Design/Button" import { Link } from "@repo/ui/Design/Link" import { Typography } from "@repo/ui/Design/Typography" import { useState } from "react" +import { getWikipediaPageInternalLinks } from "./wikipedia-api" import { fromLocaleToWikipediaLocale, getWikipediaLink, - getWikipediaPageInternalLinks, -} from "./wikipedia-api" +} from "./wikipedia-utils" export const WikipediaClient: React.FC = () => { const [isLoading, setIsLoading] = useState(false) diff --git a/packages/wikipedia-game-solver/src/__tests__/basic.test.ts b/packages/wikipedia-game-solver/src/__tests__/basic.test.ts deleted file mode 100644 index 1b2ceeb..0000000 --- a/packages/wikipedia-game-solver/src/__tests__/basic.test.ts +++ /dev/null @@ -1,9 +0,0 @@ -import { describe, expect, it } from "vitest" - -import { sum } from "../wikipedia-api" - -describe("sum", () => { - it("adds 1 + 2 to equal 3", () => { - expect(sum(1, 2)).toBe(3) - }) -}) diff --git a/packages/wikipedia-game-solver/src/__tests__/wikipedia-utils.test.ts b/packages/wikipedia-game-solver/src/__tests__/wikipedia-utils.test.ts new file mode 100644 index 0000000..fc0ca28 --- /dev/null +++ b/packages/wikipedia-game-solver/src/__tests__/wikipedia-utils.test.ts @@ -0,0 +1,135 @@ +import { describe, expect, it } from "vitest" +import { + fromLocaleToWikipediaLocale, + fromSanitizedPageTitleToPageTitle, + getWikipediaLink, + sanitizePageTitle, +} from "../wikipedia-utils" + +describe("fromLocaleToWikipediaLocale", () => { + it("should return the correct Wikipedia locale", () => { + // Arrange - Given + const input = "en-US" + + // Act - When + const output = fromLocaleToWikipediaLocale(input) + + // Assert - Then + const expected = "en" + expect(output).toEqual(expected) + }) +}) + +describe("getWikipediaLink", () => { + it("should return the correct Wikipedia link for the given locale", () => { + // Arrange - Given + const input = "en" + + // Act - When + const output = getWikipediaLink(input) + + // Assert - Then + const expected = "https://en.wikipedia.org" + expect(output).toEqual(expected) + }) +}) + +describe("sanitizePageTitle", () => { + it("should return the correct sanitized page title", () => { + // Arrange - Given + const input = "foo bar" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Foo_bar" + expect(output).toEqual(expected) + }) + + it("should preserve the characters case", () => { + // Arrange - Given + const input = "Foo Bar" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Foo_Bar" + expect(output).toEqual(expected) + }) + + it("should remove leading and trailing spaces/underscores (rule 1)", () => { + // Arrange - Given + const input = " Abc_def__" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Abc_def" + expect(output).toEqual(expected) + }) + + it("should reduce consecutive spaces/underscores to a single one (rule 2)", () => { + // Arrange - Given + const input = "Abc def" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Abc_def" + expect(output).toEqual(expected) + }) + + it("should replace spaces by underscores (rule 3)", () => { + // Arrange - Given + const input = "Abc def" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Abc_def" + expect(output).toEqual(expected) + }) + + it("should capitalize the first character (rule 4)", () => { + // Arrange - Given + const input = "abc_def" + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "Abc_def" + expect(output).toEqual(expected) + }) + + it("should have a maximum of 255 characters (rule 5)", () => { + // Arrange - Given + const input = "a".repeat(256) + + // Act - When + const output = sanitizePageTitle(input) + + // Assert - Then + const expected = "A" + "a".repeat(254) + expect(output).toEqual(expected) + }) +}) + +describe("fromSanitizedPageTitleToPageTitle", () => { + it("should return the correct page title", () => { + // Arrange - Given + const input = "Foo_bar" + + // Act - When + const output = fromSanitizedPageTitleToPageTitle(input) + + // Assert - Then + const expected = "Foo bar" + expect(output).toEqual(expected) + }) +}) diff --git a/packages/wikipedia-game-solver/src/wikipedia-api.ts b/packages/wikipedia-game-solver/src/wikipedia-api.ts index 2a96672..88799d5 100644 --- a/packages/wikipedia-game-solver/src/wikipedia-api.ts +++ b/packages/wikipedia-game-solver/src/wikipedia-api.ts @@ -1,33 +1,11 @@ -import type { Locale } from "@repo/i18n/config" import ky from "ky" - -export const sum = (a: number, b: number): number => { - return a + b -} +import { getWikipediaLink, type WikipediaLocale } from "./wikipedia-utils" /** * @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions * To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error). */ -export const WIKIPEDIA_LOCALES = ["en", "fr"] as const -export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number] - -const WIKIPEDIA_LOCALES_MAP: Record = { - "en-US": "en", - "fr-FR": "fr", -} - -export const fromLocaleToWikipediaLocale = ( - locale: Locale, -): WikipediaLocale => { - return WIKIPEDIA_LOCALES_MAP[locale] -} - -export const getWikipediaLink = (locale: WikipediaLocale): string => { - return `https://${locale}.wikipedia.org` -} - interface WikipediaQueryLinksResponse { continue?: { plcontinue: string diff --git a/packages/wikipedia-game-solver/src/wikipedia-utils.ts b/packages/wikipedia-game-solver/src/wikipedia-utils.ts new file mode 100644 index 0000000..cd162c2 --- /dev/null +++ b/packages/wikipedia-game-solver/src/wikipedia-utils.ts @@ -0,0 +1,70 @@ +import type { Locale } from "@repo/i18n/config" +import { + capitalize, + reduceConsecutiveCharacters, + trimAny, +} from "@repo/utils/strings" + +export const WIKIPEDIA_LOCALES = ["en", "fr"] as const +export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number] + +const WIKIPEDIA_LOCALES_MAP: Record = { + "en-US": "en", + "fr-FR": "fr", +} + +export const fromLocaleToWikipediaLocale = ( + locale: Locale, +): WikipediaLocale => { + return WIKIPEDIA_LOCALES_MAP[locale] +} + +export const getWikipediaLink = (locale: WikipediaLocale): string => { + return `https://${locale}.wikipedia.org` +} + +/** + * Converts a page title to a sanitized version (also called "canonical form") that can be used in a URL. + * + * The sanitized page title is stored as text with the following restrictions: + * 1. leading and trailing spaces (` `) and underscores (`_`) are removed. + * 2. consecutive spaces/underscores are reduced to a single one. + * 3. spaces are replaced by underscores. + * 4. first character is capitalized. + * 5. maximum of 255 characters. + * @param pageTitle + * @see https://www.mediawiki.org/wiki/Manual:Page_title + * @see https://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_(technical_restrictions) + * @returns + * @example sanitizePageTitle("foo bar") // "Foo_bar" + * @example sanitizePageTitle("Foo Bar") // "Foo_Bar" + */ +export const sanitizePageTitle = (pageTitle: string): string => { + const rule1 = trimAny(pageTitle, [" ", "_"]) + const rule2 = reduceConsecutiveCharacters(rule1, [" ", "_"]) + const rule3 = rule2.replaceAll(" ", "_") + const rule4 = capitalize(rule3) + const rule5 = rule4.slice(0, 255) + return rule5 +} + +/** + * Converts a sanitized page title to a page title. + * + * A page title is the title of a wiki page, which is a human-readable and unique identifier for a page. + * + * Underscores (`_`) are replaced by spaces (` `). + * + * A page title is not to be confused with a display title. + * A display title is the preferred title associated with a wiki page (stored separately), with less restrictions than a page title. + * @param sanitizedPageTitle + * @see https://www.mediawiki.org/wiki/Manual:Page_title + * @see https://www.mediawiki.org/wiki/Display_title + * @returns + * @example fromSanitizedPageTitleToPageTitle("Foo_bar") // "Foo bar" + */ +export const fromSanitizedPageTitleToPageTitle = ( + sanitizedPageTitle: string, +): string => { + return sanitizedPageTitle.replaceAll("_", " ") +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ec36480..dfaa41c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -810,6 +810,9 @@ importers: '@repo/ui': specifier: workspace:* version: link:../ui + '@repo/utils': + specifier: workspace:* + version: link:../utils ky: specifier: 'catalog:' version: 1.5.0