feat(api): implement GET /wikipedia/pages?title=search_title

This commit is contained in:
Théo LUDWIG 2024-08-12 00:32:43 +01:00
parent 02ee112de4
commit cdc8cf2b05
Signed by: theoludwig
GPG Key ID: ADFE5A563D718F3B
17 changed files with 433 additions and 46 deletions

View File

@ -5,4 +5,4 @@ DATABASE_USER=wikipedia_user
DATABASE_PASSWORD=password
DATABASE_NAME=wikipedia
DATABASE_HOST=127.0.0.1
DATABASE_PORT=3306
DATABASE_PORT=5432

View File

@ -31,16 +31,21 @@
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) to get shortest paths between 2 pages
- [x] Init AdonisJS project
- [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables
- [ ] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists)
- [x] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists)
- [ ] Implement `GET /wikipedia/pages/internal-links/paths?from=Node.js&to=Linux` to get all the possible paths between 2 pages with titles sanitized
- [ ] Setup tests with database + add coverage
- [ ] Setup HTTP Requests logging in development (not needed in `test` mode)
- [ ] Setup Health checks
- [ ] Setup Rate limiting
- [ ] Share VineJS validators between `website` and `api`
- [ ] Implement Wikipedia Game Solver (`website`)
- [x] Init Next.js project
- [ ] Try to use <https://www.npmjs.com/package/@tuyau/client> for API calls
- [ ] Hard code 2 pages to test if it works with `console.log` in the browser
- [ ] Implement a form with inputs, button to submit, and list all pages to go from one to another, or none if it is not possible
- [ ] Add images, links to the pages + good UI/UX
- [ ] Implement toast notifications for errors, warnings, and success messages
- [ ] Autocompletion page titles
- [ ] Implement toast notifications for errors, warnings, and success messages
- [ ] Implement CLI (`cli`)
- [ ] Init Clipanion project
- [ ] Implement `wikipedia-game-solver internal-links --from="Node.js" --to="Linux"` command to get all the possible paths between 2 pages.

View File

@ -9,4 +9,4 @@ DATABASE_USER=wikipedia_user
DATABASE_PASSWORD=password
DATABASE_NAME=wikipedia
DATABASE_HOST=127.0.0.1
DATABASE_PORT=3306
DATABASE_PORT=5432

View File

@ -1 +1,2 @@
import "./get.js"
import "#app/routes/get.js"
import "#app/routes/wikipedia/index.js"

View File

@ -0,0 +1 @@
import "#app/routes/wikipedia/pages/get.js"

View File

@ -0,0 +1,36 @@
import Page from "#app/models/page.js"
import type { HttpContext } from "@adonisjs/core/http"
import router from "@adonisjs/core/services/router"
import { sanitizePageTitle } from "@repo/wikipedia-game-solver/wikipedia-utils"
import vine from "@vinejs/vine"
const requestValidator = vine.compile(
vine.object({
title: vine
.string()
.minLength(1)
.maxLength(255)
.transform((value) => {
return sanitizePageTitle(value)
}),
limit: vine
.number()
.parse((value) => {
return value ?? 5
})
.withoutDecimals()
.range([1, 100]),
}),
)
class Controller {
public async handle(context: HttpContext): Promise<Page[]> {
const payload = await context.request.validateUsing(requestValidator)
const pages = await Page.query()
.whereLike("title", `${payload.title}%`)
.limit(payload.limit)
return pages
}
}
router.get("/wikipedia/pages", [Controller])

View File

@ -1,10 +1,13 @@
import env from "#start/env.js"
import app from "@adonisjs/core/services/app"
import { defineConfig } from "@adonisjs/lucid"
const databaseConfig = defineConfig({
prettyPrintDebugQueries: !app.inProduction,
connection: "postgres",
connections: {
postgres: {
debug: !app.inProduction,
client: "pg",
connection: {
host: env.get("DATABASE_HOST"),

View File

@ -8,17 +8,14 @@ import router from "@adonisjs/core/services/router"
import server from "@adonisjs/core/services/server"
/**
* The error handler is used to convert an exception
* to a HTTP response.
* The error handler is used to convert an exception to a HTTP response.
*/
server.errorHandler(async () => {
return await import("#app/exceptions/handler.js")
})
/**
* The server middleware stack runs middleware on all the HTTP
* requests, even if there is no route registered for
* the request URL.
* The server middleware stack runs middleware on all the HTTP requests, even if there is no route registered for the requested URL.
*/
server.use([
async () => {

View File

@ -7,3 +7,60 @@
export const capitalize = (string: string): string => {
return string.charAt(0).toUpperCase() + string.slice(1)
}
/**
* Trim any of the specified characters from the start and end of a string.
* @param string
* @param characters
* @returns
* @example trimAny("_____foo bar ", [" ", "_"]) // "foo bar"
*/
export const trimAny = (string: string, characters: string[]): string => {
let start = 0
let end = string.length
while (
start < end &&
(characters as Array<string | undefined>).includes(string[start])
) {
start += 1
}
while (
end > start &&
(characters as Array<string | undefined>).includes(string[end - 1])
) {
end -= 1
}
return start > 0 || end < string.length
? string.substring(start, end)
: string
}
/**
* Reduces consecutive occurrences of specified characters in a string to a single occurrence.
*
* @param input
* @param characters
* @returns
* @example reduceConsecutiveCharacters("Hello___there!!", ["_", "!"]) // "Hello_there!"
*/
export const reduceConsecutiveCharacters = (
input: string,
characters: string[],
): string => {
let result = ""
let previousCharacter = ""
for (const currentCharacter of input) {
if (characters.includes(currentCharacter)) {
if (currentCharacter !== previousCharacter) {
result += currentCharacter
}
} else {
result += currentCharacter
}
previousCharacter = currentCharacter
}
return result
}

View File

@ -1,6 +1,6 @@
import { describe, expect, it } from "vitest"
import { capitalize } from "../strings.js"
import { capitalize, reduceConsecutiveCharacters, trimAny } from "../strings.js"
describe("capitalize", () => {
it("should capitalize the first letter of a string", () => {
@ -39,3 +39,111 @@ describe("capitalize", () => {
expect(output).toEqual(expected)
})
})
describe("trimAny", () => {
it("should trim any of the specified characters from the start and end of a string", () => {
// Arrange - Given
const input = "_____foo bar "
const characters = [" ", "_"]
// Act - When
const output = trimAny(input, characters)
// Assert - Then
const expected = "foo bar"
expect(output).toEqual(expected)
})
it("should trim any of the specified characters from the start and end of a string even if the start and end characters are different", () => {
// Arrange - Given
const input = "_ __ _foo bar _"
const characters = [" ", "_"]
// Act - When
const output = trimAny(input, characters)
// Assert - Then
const expected = "foo bar"
expect(output).toEqual(expected)
})
it("should return the same string when the input does not start or end with any of the specified characters", () => {
// Arrange - Given
const input = "foo bar"
const characters = [" ", "_"]
// Act - When
const output = trimAny(input, characters)
// Assert - Then
const expected = "foo bar"
expect(output).toEqual(expected)
})
it("should return an empty string when the input is an empty string", () => {
// Arrange - Given
const input = ""
const characters = [" ", "_"]
// Act - When
const output = trimAny(input, characters)
// Assert - Then
const expected = ""
expect(output).toEqual(expected)
})
it("should return an empty string when the input starts and ends with the specified characters", () => {
// Arrange - Given
const input = " _ "
const characters = [" ", "_"]
// Act - When
const output = trimAny(input, characters)
// Assert - Then
const expected = ""
expect(output).toEqual(expected)
})
})
describe("reduceConsecutiveCharacters", () => {
it("should reduce consecutive occurrences of specified characters in a string to a single occurrence", () => {
// Arrange - Given
const input = "Hello___there!!"
const characters = ["_", "!"]
// Act - When
const output = reduceConsecutiveCharacters(input, characters)
// Assert - Then
const expected = "Hello_there!"
expect(output).toEqual(expected)
})
it("should return the same string when there are no consecutive occurrences of specified characters", () => {
// Arrange - Given
const input = "Hello there!"
const characters = ["_", "!"]
// Act - When
const output = reduceConsecutiveCharacters(input, characters)
// Assert - Then
const expected = "Hello there!"
expect(output).toEqual(expected)
})
it("should return an empty string when the input is an empty string", () => {
// Arrange - Given
const input = ""
const characters = ["_", "!"]
// Act - When
const output = reduceConsecutiveCharacters(input, characters)
// Assert - Then
const expected = ""
expect(output).toEqual(expected)
})
})

View File

@ -5,7 +5,8 @@
"type": "module",
"exports": {
"./WikipediaClient": "./src/WikipediaClient.tsx",
"./wikipedia-api": "./src/wikipedia-api.ts"
"./wikipedia-api": "./src/wikipedia-api.ts",
"./wikipedia-utils": "./src/wikipedia-utils.ts"
},
"scripts": {
"lint:eslint": "eslint src --max-warnings 0 --report-unused-disable-directives",
@ -17,6 +18,7 @@
"@repo/config-tailwind": "workspace:*",
"@repo/i18n": "workspace:*",
"@repo/ui": "workspace:*",
"@repo/utils": "workspace:*",
"ky": "catalog:",
"next": "catalog:",
"next-intl": "catalog:",

View File

@ -4,11 +4,11 @@ import { Button } from "@repo/ui/Design/Button"
import { Link } from "@repo/ui/Design/Link"
import { Typography } from "@repo/ui/Design/Typography"
import { useState } from "react"
import { getWikipediaPageInternalLinks } from "./wikipedia-api"
import {
fromLocaleToWikipediaLocale,
getWikipediaLink,
getWikipediaPageInternalLinks,
} from "./wikipedia-api"
} from "./wikipedia-utils"
export const WikipediaClient: React.FC = () => {
const [isLoading, setIsLoading] = useState(false)

View File

@ -1,9 +0,0 @@
import { describe, expect, it } from "vitest"
import { sum } from "../wikipedia-api"
describe("sum", () => {
it("adds 1 + 2 to equal 3", () => {
expect(sum(1, 2)).toBe(3)
})
})

View File

@ -0,0 +1,135 @@
import { describe, expect, it } from "vitest"
import {
fromLocaleToWikipediaLocale,
fromSanitizedPageTitleToPageTitle,
getWikipediaLink,
sanitizePageTitle,
} from "../wikipedia-utils"
describe("fromLocaleToWikipediaLocale", () => {
it("should return the correct Wikipedia locale", () => {
// Arrange - Given
const input = "en-US"
// Act - When
const output = fromLocaleToWikipediaLocale(input)
// Assert - Then
const expected = "en"
expect(output).toEqual(expected)
})
})
describe("getWikipediaLink", () => {
it("should return the correct Wikipedia link for the given locale", () => {
// Arrange - Given
const input = "en"
// Act - When
const output = getWikipediaLink(input)
// Assert - Then
const expected = "https://en.wikipedia.org"
expect(output).toEqual(expected)
})
})
describe("sanitizePageTitle", () => {
it("should return the correct sanitized page title", () => {
// Arrange - Given
const input = "foo bar"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Foo_bar"
expect(output).toEqual(expected)
})
it("should preserve the characters case", () => {
// Arrange - Given
const input = "Foo Bar"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Foo_Bar"
expect(output).toEqual(expected)
})
it("should remove leading and trailing spaces/underscores (rule 1)", () => {
// Arrange - Given
const input = " Abc_def__"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Abc_def"
expect(output).toEqual(expected)
})
it("should reduce consecutive spaces/underscores to a single one (rule 2)", () => {
// Arrange - Given
const input = "Abc def"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Abc_def"
expect(output).toEqual(expected)
})
it("should replace spaces by underscores (rule 3)", () => {
// Arrange - Given
const input = "Abc def"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Abc_def"
expect(output).toEqual(expected)
})
it("should capitalize the first character (rule 4)", () => {
// Arrange - Given
const input = "abc_def"
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "Abc_def"
expect(output).toEqual(expected)
})
it("should have a maximum of 255 characters (rule 5)", () => {
// Arrange - Given
const input = "a".repeat(256)
// Act - When
const output = sanitizePageTitle(input)
// Assert - Then
const expected = "A" + "a".repeat(254)
expect(output).toEqual(expected)
})
})
describe("fromSanitizedPageTitleToPageTitle", () => {
it("should return the correct page title", () => {
// Arrange - Given
const input = "Foo_bar"
// Act - When
const output = fromSanitizedPageTitleToPageTitle(input)
// Assert - Then
const expected = "Foo bar"
expect(output).toEqual(expected)
})
})

View File

@ -1,33 +1,11 @@
import type { Locale } from "@repo/i18n/config"
import ky from "ky"
export const sum = (a: number, b: number): number => {
return a + b
}
import { getWikipediaLink, type WikipediaLocale } from "./wikipedia-utils"
/**
* @see https://www.mediawiki.org/wiki/Wikimedia_REST_API#Terms_and_conditions
* To avoid impacting other API users, limit your clients to no more than 200 requests/sec to this API overall. Many entry points additionally specify and enforce more restrictive rate limits (HTTP 429 error).
*/
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
"en-US": "en",
"fr-FR": "fr",
}
export const fromLocaleToWikipediaLocale = (
locale: Locale,
): WikipediaLocale => {
return WIKIPEDIA_LOCALES_MAP[locale]
}
export const getWikipediaLink = (locale: WikipediaLocale): string => {
return `https://${locale}.wikipedia.org`
}
interface WikipediaQueryLinksResponse {
continue?: {
plcontinue: string

View File

@ -0,0 +1,70 @@
import type { Locale } from "@repo/i18n/config"
import {
capitalize,
reduceConsecutiveCharacters,
trimAny,
} from "@repo/utils/strings"
export const WIKIPEDIA_LOCALES = ["en", "fr"] as const
export type WikipediaLocale = (typeof WIKIPEDIA_LOCALES)[number]
const WIKIPEDIA_LOCALES_MAP: Record<Locale, WikipediaLocale> = {
"en-US": "en",
"fr-FR": "fr",
}
export const fromLocaleToWikipediaLocale = (
locale: Locale,
): WikipediaLocale => {
return WIKIPEDIA_LOCALES_MAP[locale]
}
export const getWikipediaLink = (locale: WikipediaLocale): string => {
return `https://${locale}.wikipedia.org`
}
/**
* Converts a page title to a sanitized version (also called "canonical form") that can be used in a URL.
*
* The sanitized page title is stored as text with the following restrictions:
* 1. leading and trailing spaces (` `) and underscores (`_`) are removed.
* 2. consecutive spaces/underscores are reduced to a single one.
* 3. spaces are replaced by underscores.
* 4. first character is capitalized.
* 5. maximum of 255 characters.
* @param pageTitle
* @see https://www.mediawiki.org/wiki/Manual:Page_title
* @see https://en.wikipedia.org/wiki/Wikipedia:Naming_conventions_(technical_restrictions)
* @returns
* @example sanitizePageTitle("foo bar") // "Foo_bar"
* @example sanitizePageTitle("Foo Bar") // "Foo_Bar"
*/
export const sanitizePageTitle = (pageTitle: string): string => {
const rule1 = trimAny(pageTitle, [" ", "_"])
const rule2 = reduceConsecutiveCharacters(rule1, [" ", "_"])
const rule3 = rule2.replaceAll(" ", "_")
const rule4 = capitalize(rule3)
const rule5 = rule4.slice(0, 255)
return rule5
}
/**
* Converts a sanitized page title to a page title.
*
* A page title is the title of a wiki page, which is a human-readable and unique identifier for a page.
*
* Underscores (`_`) are replaced by spaces (` `).
*
* A page title is not to be confused with a display title.
* A display title is the preferred title associated with a wiki page (stored separately), with less restrictions than a page title.
* @param sanitizedPageTitle
* @see https://www.mediawiki.org/wiki/Manual:Page_title
* @see https://www.mediawiki.org/wiki/Display_title
* @returns
* @example fromSanitizedPageTitleToPageTitle("Foo_bar") // "Foo bar"
*/
export const fromSanitizedPageTitleToPageTitle = (
sanitizedPageTitle: string,
): string => {
return sanitizedPageTitle.replaceAll("_", " ")
}

View File

@ -810,6 +810,9 @@ importers:
'@repo/ui':
specifier: workspace:*
version: link:../ui
'@repo/utils':
specifier: workspace:*
version: link:../utils
ky:
specifier: 'catalog:'
version: 1.5.0