feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB
This commit is contained in:
parent
aa2fb4f5b9
commit
02ee112de4
10
.gitignore
vendored
10
.gitignore
vendored
@ -21,11 +21,13 @@ build/
|
|||||||
*.pem
|
*.pem
|
||||||
.turbo
|
.turbo
|
||||||
tmp/
|
tmp/
|
||||||
cache.json
|
|
||||||
|
# data
|
||||||
data/dump
|
data/dump
|
||||||
data/sql/*
|
data/sql-pages-inserts/*
|
||||||
!data/sql/0000-tables-create.sql
|
!data/sql-pages-inserts/0000-pages.sh
|
||||||
!data/sql/0999-constraints.sql
|
data/sql-internal-links-inserts/*
|
||||||
|
!data/sql-internal-links-inserts/0000-internal-links.sh
|
||||||
|
|
||||||
# debug
|
# debug
|
||||||
npm-debug.log*
|
npm-debug.log*
|
||||||
|
2
TODO.md
2
TODO.md
@ -30,7 +30,7 @@
|
|||||||
- [ ] Handle redirects
|
- [ ] Handle redirects
|
||||||
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) to get shortest paths between 2 pages
|
- [ ] Implement REST API (`api`) with JSON responses ([AdonisJS](https://adonisjs.com/)) to get shortest paths between 2 pages
|
||||||
- [x] Init AdonisJS project
|
- [x] Init AdonisJS project
|
||||||
- [ ] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables
|
- [x] Create Lucid models and migrations for Wikipedia Database Dump: `pages` and `internal_links` tables
|
||||||
- [ ] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists)
|
- [ ] Implement `GET /wikipedia/pages?title=Node.js` to search a page by title (not necessarily with the title sanitized, search with input by user to check if page exists)
|
||||||
- [ ] Implement `GET /wikipedia/pages/internal-links/paths?from=Node.js&to=Linux` to get all the possible paths between 2 pages with titles sanitized
|
- [ ] Implement `GET /wikipedia/pages/internal-links/paths?from=Node.js&to=Linux` to get all the possible paths between 2 pages with titles sanitized
|
||||||
- [ ] Implement Wikipedia Game Solver (`website`)
|
- [ ] Implement Wikipedia Game Solver (`website`)
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
"@repo/wikipedia-game-solver": "workspace:*",
|
"@repo/wikipedia-game-solver": "workspace:*",
|
||||||
"@vinejs/vine": "catalog:",
|
"@vinejs/vine": "catalog:",
|
||||||
"luxon": "catalog:",
|
"luxon": "catalog:",
|
||||||
"mysql2": "catalog:",
|
"pg": "catalog:",
|
||||||
"reflect-metadata": "catalog:",
|
"reflect-metadata": "catalog:",
|
||||||
"tsx": "catalog:",
|
"tsx": "catalog:",
|
||||||
"pino-pretty": "catalog:"
|
"pino-pretty": "catalog:"
|
||||||
|
@ -13,9 +13,9 @@ export default class HttpExceptionHandler extends ExceptionHandler {
|
|||||||
*/
|
*/
|
||||||
public override async handle(
|
public override async handle(
|
||||||
error: unknown,
|
error: unknown,
|
||||||
ctx: HttpContext,
|
context: HttpContext,
|
||||||
): Promise<unknown> {
|
): Promise<unknown> {
|
||||||
return await super.handle(error, ctx)
|
return await super.handle(error, context)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -25,8 +25,8 @@ export default class HttpExceptionHandler extends ExceptionHandler {
|
|||||||
*/
|
*/
|
||||||
public override async report(
|
public override async report(
|
||||||
error: unknown,
|
error: unknown,
|
||||||
ctx: HttpContext,
|
context: HttpContext,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
return await super.report(error, ctx)
|
return await super.report(error, context)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,13 @@ export default class AuthMiddleware {
|
|||||||
redirectTo = "/login"
|
redirectTo = "/login"
|
||||||
|
|
||||||
public async handle(
|
public async handle(
|
||||||
ctx: HttpContext,
|
context: HttpContext,
|
||||||
next: NextFn,
|
next: NextFn,
|
||||||
options: {
|
options: {
|
||||||
guards?: Array<keyof Authenticators>
|
guards?: Array<keyof Authenticators>
|
||||||
} = {},
|
} = {},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
await ctx.auth.authenticateUsing(options.guards, {
|
await context.auth.authenticateUsing(options.guards, {
|
||||||
loginRoute: this.redirectTo,
|
loginRoute: this.redirectTo,
|
||||||
})
|
})
|
||||||
return next()
|
return next()
|
||||||
|
@ -5,13 +5,13 @@ import type { NextFn } from "@adonisjs/core/types/http"
|
|||||||
/**
|
/**
|
||||||
* The container bindings middleware binds classes to their request specific value using the container resolver.
|
* The container bindings middleware binds classes to their request specific value using the container resolver.
|
||||||
*
|
*
|
||||||
* - We bind "HttpContext" class to the "ctx" object.
|
* - We bind "HttpContext" class to the "context" object.
|
||||||
* - And bind "Logger" class to the "ctx.logger" object.
|
* - And bind "Logger" class to the "context.logger" object.
|
||||||
*/
|
*/
|
||||||
export default class ContainerBindingsMiddleware {
|
export default class ContainerBindingsMiddleware {
|
||||||
public async handle(ctx: HttpContext, next: NextFn): Promise<void> {
|
public async handle(context: HttpContext, next: NextFn): Promise<void> {
|
||||||
ctx.containerResolver.bindValue(HttpContext, ctx)
|
context.containerResolver.bindValue(HttpContext, context)
|
||||||
ctx.containerResolver.bindValue(Logger, ctx.logger)
|
context.containerResolver.bindValue(Logger, context.logger)
|
||||||
|
|
||||||
return next()
|
return next()
|
||||||
}
|
}
|
||||||
|
30
apps/api/src/app/models/page.ts
Normal file
30
apps/api/src/app/models/page.ts
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import { BaseModel, column, manyToMany } from "@adonisjs/lucid/orm"
|
||||||
|
import type { ManyToMany } from "@adonisjs/lucid/types/relations"
|
||||||
|
|
||||||
|
export default class Page extends BaseModel {
|
||||||
|
protected tableName = "pages"
|
||||||
|
|
||||||
|
@column({ columnName: "id", serializeAs: "id", isPrimary: true })
|
||||||
|
declare id: number
|
||||||
|
|
||||||
|
@column({
|
||||||
|
columnName: "title",
|
||||||
|
serializeAs: "title",
|
||||||
|
})
|
||||||
|
declare title: string
|
||||||
|
|
||||||
|
@manyToMany(
|
||||||
|
() => {
|
||||||
|
return Page
|
||||||
|
},
|
||||||
|
{
|
||||||
|
pivotTable: "internal_links",
|
||||||
|
localKey: "id",
|
||||||
|
relatedKey: "id",
|
||||||
|
pivotForeignKey: "from_page_id",
|
||||||
|
pivotRelatedForeignKey: "to_page_id",
|
||||||
|
serializeAs: "internalLinks",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
declare internalLinks: ManyToMany<typeof Page>
|
||||||
|
}
|
@ -25,7 +25,7 @@ export default class User extends compose(BaseModel, AuthFinder) {
|
|||||||
columnName: "full_name",
|
columnName: "full_name",
|
||||||
serializeAs: "fullName",
|
serializeAs: "fullName",
|
||||||
})
|
})
|
||||||
declare fullName: string | null
|
declare fullName: string
|
||||||
|
|
||||||
@column({
|
@column({
|
||||||
columnName: "email",
|
columnName: "email",
|
||||||
@ -49,7 +49,7 @@ export default class User extends compose(BaseModel, AuthFinder) {
|
|||||||
autoCreate: true,
|
autoCreate: true,
|
||||||
autoUpdate: true,
|
autoUpdate: true,
|
||||||
})
|
})
|
||||||
declare updatedAt: DateTime | null
|
declare updatedAt: DateTime
|
||||||
|
|
||||||
static accessTokens = DbAccessTokensProvider.forModel(User)
|
static accessTokens = DbAccessTokensProvider.forModel(User)
|
||||||
}
|
}
|
||||||
|
@ -2,10 +2,10 @@ import env from "#start/env.js"
|
|||||||
import { defineConfig } from "@adonisjs/lucid"
|
import { defineConfig } from "@adonisjs/lucid"
|
||||||
|
|
||||||
const databaseConfig = defineConfig({
|
const databaseConfig = defineConfig({
|
||||||
connection: "mysql",
|
connection: "postgres",
|
||||||
connections: {
|
connections: {
|
||||||
mysql: {
|
postgres: {
|
||||||
client: "mysql2",
|
client: "pg",
|
||||||
connection: {
|
connection: {
|
||||||
host: env.get("DATABASE_HOST"),
|
host: env.get("DATABASE_HOST"),
|
||||||
port: env.get("DATABASE_PORT"),
|
port: env.get("DATABASE_PORT"),
|
||||||
|
@ -6,12 +6,12 @@ export default class CreateUsersTable extends BaseSchema {
|
|||||||
public override async up(): Promise<void> {
|
public override async up(): Promise<void> {
|
||||||
void this.schema.createTable(this.tableName, (table) => {
|
void this.schema.createTable(this.tableName, (table) => {
|
||||||
table.increments("id").notNullable()
|
table.increments("id").notNullable()
|
||||||
table.string("full_name").nullable()
|
table.string("full_name").notNullable()
|
||||||
table.string("email", 254).notNullable().unique()
|
table.string("email", 254).notNullable().unique()
|
||||||
table.string("password").notNullable()
|
table.string("password").notNullable()
|
||||||
|
|
||||||
table.timestamp("created_at").notNullable()
|
table.timestamp("created_at").notNullable()
|
||||||
table.timestamp("updated_at").nullable()
|
table.timestamp("updated_at").notNullable()
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,9 +10,9 @@ export default class CreateAccessTokensTable extends BaseSchema {
|
|||||||
.integer("tokenable_id")
|
.integer("tokenable_id")
|
||||||
.notNullable()
|
.notNullable()
|
||||||
.unsigned()
|
.unsigned()
|
||||||
.references("id")
|
.references("users.id")
|
||||||
.inTable("users")
|
|
||||||
.onDelete("CASCADE")
|
.onDelete("CASCADE")
|
||||||
|
.onUpdate("CASCADE")
|
||||||
|
|
||||||
table.string("type").notNullable()
|
table.string("type").notNullable()
|
||||||
table.string("name").nullable()
|
table.string("name").nullable()
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
import { BaseSchema } from "@adonisjs/lucid/schema"
|
||||||
|
|
||||||
|
export default class CreatePagesTable extends BaseSchema {
|
||||||
|
protected tableName = "pages"
|
||||||
|
|
||||||
|
public override async up(): Promise<void> {
|
||||||
|
void this.schema.createTable(this.tableName, (table) => {
|
||||||
|
table.increments("id").notNullable()
|
||||||
|
table.string("title", 255).notNullable().unique()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
public override async down(): Promise<void> {
|
||||||
|
void this.schema.dropTable(this.tableName)
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,29 @@
|
|||||||
|
import { BaseSchema } from "@adonisjs/lucid/schema"
|
||||||
|
|
||||||
|
export default class CreateInternalLinksTable extends BaseSchema {
|
||||||
|
protected tableName = "internal_links"
|
||||||
|
|
||||||
|
public override async up(): Promise<void> {
|
||||||
|
void this.schema.createTable(this.tableName, (table) => {
|
||||||
|
table.primary(["from_page_id", "to_page_id"])
|
||||||
|
table
|
||||||
|
.integer("from_page_id")
|
||||||
|
.unsigned()
|
||||||
|
.notNullable()
|
||||||
|
.references("pages.id")
|
||||||
|
.onDelete("CASCADE")
|
||||||
|
.onUpdate("CASCADE")
|
||||||
|
table
|
||||||
|
.integer("to_page_id")
|
||||||
|
.unsigned()
|
||||||
|
.notNullable()
|
||||||
|
.references("pages.id")
|
||||||
|
.onDelete("CASCADE")
|
||||||
|
.onUpdate("CASCADE")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
public override async down(): Promise<void> {
|
||||||
|
void this.schema.dropTable(this.tableName)
|
||||||
|
}
|
||||||
|
}
|
@ -1,27 +1,20 @@
|
|||||||
services:
|
services:
|
||||||
wikipedia-solver-dev-database:
|
wikipedia-solver-dev-database:
|
||||||
container_name: "wikipedia-solver-dev-database"
|
container_name: "wikipedia-solver-dev-database"
|
||||||
image: "mariadb:10.6.17"
|
image: "postgres:16.3"
|
||||||
restart: "unless-stopped"
|
restart: "unless-stopped"
|
||||||
env_file: ".env"
|
env_file: ".env"
|
||||||
environment:
|
environment:
|
||||||
MARIADB_USER: ${DATABASE_USER}
|
POSTGRES_USER: ${DATABASE_USER}
|
||||||
MARIADB_PASSWORD: ${DATABASE_PASSWORD}
|
POSTGRES_PASSWORD: ${DATABASE_PASSWORD}
|
||||||
MARIADB_ROOT_PASSWORD: ${DATABASE_PASSWORD}
|
POSTGRES_DB: ${DATABASE_NAME}
|
||||||
MARIADB_DATABASE: ${DATABASE_NAME}
|
|
||||||
command: |
|
command: |
|
||||||
--innodb_buffer_pool_size=4G
|
--max_wal_size=4GB
|
||||||
--key-buffer-size=4G
|
|
||||||
--innodb_log_buffer_size=256M
|
|
||||||
--innodb_log_file_size=1G
|
|
||||||
--innodb_write_io_threads=16
|
|
||||||
--innodb_flush_log_at_trx_commit=0
|
|
||||||
--max_allowed_packet=1G
|
|
||||||
ports:
|
ports:
|
||||||
- "${DATABASE_PORT-3306}:${DATABASE_PORT-3306}"
|
- "${DATABASE_PORT-5432}:${DATABASE_PORT-5432}"
|
||||||
volumes:
|
volumes:
|
||||||
- "wikipedia-solver-dev-mariadb-data:/var/lib/mysql"
|
- "wikipedia-solver-dev-postgres-data:/var/lib/postgresql/data"
|
||||||
# - "./sql:/docker-entrypoint-initdb.d/"
|
- "./data:/data/"
|
||||||
|
|
||||||
wikipedia-solver-dev-adminer:
|
wikipedia-solver-dev-adminer:
|
||||||
container_name: "wikipedia-solver-dev-adminer"
|
container_name: "wikipedia-solver-dev-adminer"
|
||||||
@ -38,4 +31,4 @@ services:
|
|||||||
- "./data/adminer/fonts/:/var/www/html/fonts"
|
- "./data/adminer/fonts/:/var/www/html/fonts"
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
wikipedia-solver-dev-mariadb-data:
|
wikipedia-solver-dev-postgres-data:
|
||||||
|
25
compose.yaml
25
compose.yaml
@ -27,27 +27,20 @@ services:
|
|||||||
|
|
||||||
wikipedia-solver-database:
|
wikipedia-solver-database:
|
||||||
container_name: "wikipedia-solver-database"
|
container_name: "wikipedia-solver-database"
|
||||||
image: "mariadb:10.6.17"
|
image: "postgres:16.3"
|
||||||
restart: "unless-stopped"
|
restart: "unless-stopped"
|
||||||
env_file: ".env"
|
env_file: ".env"
|
||||||
environment:
|
environment:
|
||||||
MARIADB_USER: ${DATABASE_USER}
|
POSTGRES_USER: ${DATABASE_USER}
|
||||||
MARIADB_PASSWORD: ${DATABASE_PASSWORD}
|
POSTGRES_PASSWORD: ${DATABASE_PASSWORD}
|
||||||
MARIADB_ROOT_PASSWORD: ${DATABASE_PASSWORD}
|
POSTGRES_DB: ${DATABASE_NAME}
|
||||||
MARIADB_DATABASE: ${DATABASE_NAME}
|
|
||||||
command: |
|
command: |
|
||||||
--innodb_buffer_pool_size=4G
|
--max_wal_size=4GB
|
||||||
--key-buffer-size=4G
|
|
||||||
--innodb_log_buffer_size=256M
|
|
||||||
--innodb_log_file_size=1G
|
|
||||||
--innodb_write_io_threads=16
|
|
||||||
--innodb_flush_log_at_trx_commit=0
|
|
||||||
--max_allowed_packet=1G
|
|
||||||
ports:
|
ports:
|
||||||
- "${DATABASE_PORT-3306}:${DATABASE_PORT-3306}"
|
- "${DATABASE_PORT-5432}:${DATABASE_PORT-5432}"
|
||||||
volumes:
|
volumes:
|
||||||
- "wikipedia-solver-mariadb-data:/var/lib/mysql"
|
- "wikipedia-solver-postgres-data:/var/lib/postgresql/data"
|
||||||
# - "./sql:/docker-entrypoint-initdb.d/"
|
- "./data:/data/"
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
wikipedia-solver-mariadb-data:
|
wikipedia-solver-postgres-data:
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
DATABASE_USER=wikipedia_user
|
|
||||||
DATABASE_PASSWORD=password
|
|
||||||
DATABASE_NAME=wikipedia
|
|
@ -2,7 +2,11 @@
|
|||||||
|
|
||||||
```sh
|
```sh
|
||||||
./download-wikipedia-dump.sh
|
./download-wikipedia-dump.sh
|
||||||
node --max-old-space-size=8096 database-wikipedia.js
|
node --max-old-space-size=8096 generate-sql-files.js
|
||||||
|
|
||||||
|
# Inside the Database container
|
||||||
|
docker exec -it wikipedia-solver-dev-database sh
|
||||||
|
/data/execute-sql.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
## Utils
|
## Utils
|
||||||
@ -11,13 +15,7 @@ Show the first 10 line of sql file: `head -n 10 ./dump/page.sql`
|
|||||||
|
|
||||||
Show the first 10 characters of sql file: `head -c 10 ./dump/page.sql`
|
Show the first 10 characters of sql file: `head -c 10 ./dump/page.sql`
|
||||||
|
|
||||||
To inspect volume size used by database: `docker system df -v | grep 'wikipedia-solver-mariadb-data'`
|
To inspect volume size used by database: `docker system df -v`
|
||||||
|
|
||||||
To enter in the database container: `docker exec -it wikipedia-solver-database sh`
|
|
||||||
|
|
||||||
Then: `mariadb --password="${DATABASE_PASSWORD}" --user="${DATABASE_USER}"`
|
|
||||||
|
|
||||||
And `use wikipedia;`, for example: `SELECT * FROM pages LIMIT 10;` or to execute a SQL script: `source /docker-entrypoint-initdb.d/3-internal-links-inserts.sql;`.
|
|
||||||
|
|
||||||
## Remove a volume
|
## Remove a volume
|
||||||
|
|
||||||
@ -32,15 +30,22 @@ docker volume rm data_wikipedia-solver-mariadb-data
|
|||||||
docker-compose down --volumes
|
docker-compose down --volumes
|
||||||
```
|
```
|
||||||
|
|
||||||
## MySQL Related
|
## PostgreSQL Related
|
||||||
|
|
||||||
<https://stackoverflow.com/questions/43954631/issues-with-wikipedia-dump-table-pagelinks>
|
<https://stackoverflow.com/questions/12206600/how-to-speed-up-insertion-performance-in-postgresql>
|
||||||
|
|
||||||
MySQL any way to import a huge (32 GB) sql dump faster?: <https://stackoverflow.com/questions/40384864/importing-wikipedia-dump-to-mysql>
|
```sh
|
||||||
|
docker exec -it wikipedia-solver-dev-database sh
|
||||||
|
|
||||||
Import data.sql MySQL Docker Container: <https://stackoverflow.com/questions/43880026/import-data-sql-mysql-docker-container>
|
psql --username="${DATABASE_USER}" --dbname="${DATABASE_NAME}"
|
||||||
|
```
|
||||||
|
|
||||||
<https://dba.stackexchange.com/questions/83125/mysql-any-way-to-import-a-huge-32-gb-sql-dump-faster>
|
```sql
|
||||||
|
-- Execute script with inserts
|
||||||
|
\i /data/sql-pages-inserts/0001-pages-inserts.sql
|
||||||
|
|
||||||
|
/data/sql-internal-links-inserts/0001-internal-links.sh
|
||||||
|
```
|
||||||
|
|
||||||
## Dumps Links
|
## Dumps Links
|
||||||
|
|
||||||
|
@ -1,39 +0,0 @@
|
|||||||
services:
|
|
||||||
wikipedia-solver-database:
|
|
||||||
container_name: "wikipedia-solver-database"
|
|
||||||
image: "mariadb:10.6.17"
|
|
||||||
restart: "unless-stopped"
|
|
||||||
env_file: ".env"
|
|
||||||
environment:
|
|
||||||
MARIADB_USER: ${DATABASE_USER}
|
|
||||||
MARIADB_PASSWORD: ${DATABASE_PASSWORD}
|
|
||||||
MARIADB_ROOT_PASSWORD: ${DATABASE_PASSWORD}
|
|
||||||
MARIADB_DATABASE: ${DATABASE_NAME}
|
|
||||||
command: |
|
|
||||||
--innodb_buffer_pool_size=4G
|
|
||||||
--key-buffer-size=4G
|
|
||||||
--innodb_log_buffer_size=256M
|
|
||||||
--innodb_log_file_size=1G
|
|
||||||
--innodb_write_io_threads=16
|
|
||||||
--innodb_flush_log_at_trx_commit=0
|
|
||||||
--max_allowed_packet=1G
|
|
||||||
volumes:
|
|
||||||
- "wikipedia-solver-mariadb-data:/var/lib/mysql"
|
|
||||||
- "./sql:/docker-entrypoint-initdb.d/"
|
|
||||||
|
|
||||||
adminer:
|
|
||||||
container_name: "adminer"
|
|
||||||
image: "adminer:4.8.1"
|
|
||||||
restart: "unless-stopped"
|
|
||||||
ports:
|
|
||||||
- "8080:8080"
|
|
||||||
env_file: ".env"
|
|
||||||
environment:
|
|
||||||
ADMINER_DEFAULT_SERVER: "wikipedia-solver-database"
|
|
||||||
volumes:
|
|
||||||
- "./adminer/default-orange.css:/var/www/html/adminer.css"
|
|
||||||
- "./adminer/logo.png:/var/www/html/logo.png"
|
|
||||||
- "./adminer/fonts/:/var/www/html/fonts"
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
wikipedia-solver-mariadb-data:
|
|
8
data/execute-sql.sh
Executable file
8
data/execute-sql.sh
Executable file
@ -0,0 +1,8 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
/data/sql/0000-sql-init.sh
|
||||||
|
|
||||||
|
/data/sql-pages-inserts/0000-pages.sh
|
||||||
|
/data/sql-internal-links-inserts/0000-internal-links.sh
|
||||||
|
|
||||||
|
/data/sql/0999-sql-end.sh
|
@ -7,7 +7,6 @@ import {
|
|||||||
} from "./utils.js"
|
} from "./utils.js"
|
||||||
|
|
||||||
const SQL_DUMP_PATH = path.join(process.cwd(), "dump")
|
const SQL_DUMP_PATH = path.join(process.cwd(), "dump")
|
||||||
const SQL_OUTPUT_PATH = path.join(process.cwd(), "sql")
|
|
||||||
const SQL_FILENAME_NUMBER_PAD = 4
|
const SQL_FILENAME_NUMBER_PAD = 4
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -52,9 +51,42 @@ const cleanPagesSQL = async () => {
|
|||||||
let current = ""
|
let current = ""
|
||||||
let lastPercent = 0
|
let lastPercent = 0
|
||||||
|
|
||||||
|
let pagesFileCount = 1
|
||||||
|
|
||||||
|
const INSERT_INTO_START_OUTPUT = "INSERT INTO pages (id, title) VALUES "
|
||||||
|
|
||||||
|
const BATCH_SIZE = 1_000_000
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @type {string[]}
|
||||||
|
*/
|
||||||
|
let batch = []
|
||||||
|
|
||||||
|
const flushBatch = async () => {
|
||||||
|
if (batch.length > 0) {
|
||||||
|
const batchString = batch.join(",")
|
||||||
|
const fileName = `${zeroPad(pagesFileCount, SQL_FILENAME_NUMBER_PAD)}-pages-inserts.sql`
|
||||||
|
const sqlOutputPath = path.join(
|
||||||
|
process.cwd(),
|
||||||
|
"sql-pages-inserts",
|
||||||
|
fileName,
|
||||||
|
)
|
||||||
|
await fs.promises.writeFile(
|
||||||
|
sqlOutputPath,
|
||||||
|
`${INSERT_INTO_START_OUTPUT}${batchString};`,
|
||||||
|
{
|
||||||
|
encoding: "utf-8",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
console.log(`flushBatch - ${fileName}, batch.length: ${batch.length}`)
|
||||||
|
pagesFileCount += 1
|
||||||
|
batch = []
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return await new Promise((resolve, reject) => {
|
return await new Promise((resolve, reject) => {
|
||||||
sqlInputFileStream
|
sqlInputFileStream
|
||||||
.on("data", (dataInput) => {
|
.on("data", async (dataInput) => {
|
||||||
const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size
|
const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size
|
||||||
const bytesReadPercent = bytesReadRatio * 100
|
const bytesReadPercent = bytesReadRatio * 100
|
||||||
|
|
||||||
@ -98,13 +130,21 @@ const cleanPagesSQL = async () => {
|
|||||||
|
|
||||||
if (namespace === "0" && !isRedirect) {
|
if (namespace === "0" && !isRedirect) {
|
||||||
wikipediaPagesKeyId[id] = title
|
wikipediaPagesKeyId[id] = title
|
||||||
|
batch.push(`(${id},E${title})`)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (batch.length >= BATCH_SIZE) {
|
||||||
|
sqlInputFileStream.pause()
|
||||||
|
await flushBatch()
|
||||||
|
sqlInputFileStream.resume()
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.on("error", (error) => {
|
.on("error", (error) => {
|
||||||
return reject(error)
|
return reject(error)
|
||||||
})
|
})
|
||||||
.on("close", () => {
|
.on("close", async () => {
|
||||||
|
await flushBatch()
|
||||||
console.log("cleanPagesSQL - Bytes read (100%).")
|
console.log("cleanPagesSQL - Bytes read (100%).")
|
||||||
return resolve(wikipediaPagesKeyId)
|
return resolve(wikipediaPagesKeyId)
|
||||||
})
|
})
|
||||||
@ -113,30 +153,6 @@ const cleanPagesSQL = async () => {
|
|||||||
|
|
||||||
const wikipediaPagesKeyId = await cleanPagesSQL()
|
const wikipediaPagesKeyId = await cleanPagesSQL()
|
||||||
|
|
||||||
const cleanPagesSQLWriteToFile = async () => {
|
|
||||||
console.log("cleanPagesSQLWriteToFile - Writing to file...")
|
|
||||||
const sqlOutputPath = path.join(
|
|
||||||
SQL_OUTPUT_PATH,
|
|
||||||
`${zeroPad(1, SQL_FILENAME_NUMBER_PAD)}-pages-inserts.sql`,
|
|
||||||
)
|
|
||||||
const INSERT_INTO_START_OUTPUT = "INSERT INTO pages (id, title) VALUES "
|
|
||||||
|
|
||||||
const wikipediaPagesString = Object.entries(wikipediaPagesKeyId)
|
|
||||||
.map(([id, title]) => {
|
|
||||||
return `(${id},${title})`
|
|
||||||
})
|
|
||||||
.join(",")
|
|
||||||
|
|
||||||
await fs.promises.writeFile(
|
|
||||||
sqlOutputPath,
|
|
||||||
`${INSERT_INTO_START_OUTPUT}${wikipediaPagesString};`,
|
|
||||||
{ encoding: "utf-8" },
|
|
||||||
)
|
|
||||||
console.log("cleanPagesSQLWriteToFile - Done.")
|
|
||||||
}
|
|
||||||
|
|
||||||
await cleanPagesSQLWriteToFile()
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Function to clean the `pagelinks.sql` file by:
|
* Function to clean the `pagelinks.sql` file by:
|
||||||
* - Removing all lines that don't start with `INSERT INTO...`.
|
* - Removing all lines that don't start with `INSERT INTO...`.
|
||||||
@ -145,7 +161,7 @@ await cleanPagesSQLWriteToFile()
|
|||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
*/
|
*/
|
||||||
const cleanInternalLinksSQL = async () => {
|
const cleanInternalLinksSQL = async () => {
|
||||||
let internalLinksFileCount = 2
|
let internalLinksFileCount = 1
|
||||||
const INSERT_INTO_START_OUTPUT =
|
const INSERT_INTO_START_OUTPUT =
|
||||||
"INSERT INTO internal_links (from_page_id, to_page_id) VALUES "
|
"INSERT INTO internal_links (from_page_id, to_page_id) VALUES "
|
||||||
|
|
||||||
@ -174,7 +190,11 @@ const cleanInternalLinksSQL = async () => {
|
|||||||
if (batch.length > 0) {
|
if (batch.length > 0) {
|
||||||
const batchString = batch.join(",")
|
const batchString = batch.join(",")
|
||||||
const fileName = `${zeroPad(internalLinksFileCount, SQL_FILENAME_NUMBER_PAD)}-internal-links-inserts.sql`
|
const fileName = `${zeroPad(internalLinksFileCount, SQL_FILENAME_NUMBER_PAD)}-internal-links-inserts.sql`
|
||||||
const sqlOutputPath = path.join(SQL_OUTPUT_PATH, fileName)
|
const sqlOutputPath = path.join(
|
||||||
|
process.cwd(),
|
||||||
|
"sql-internal-links-inserts",
|
||||||
|
fileName,
|
||||||
|
)
|
||||||
await fs.promises.writeFile(
|
await fs.promises.writeFile(
|
||||||
sqlOutputPath,
|
sqlOutputPath,
|
||||||
`${INSERT_INTO_START_OUTPUT}${batchString};`,
|
`${INSERT_INTO_START_OUTPUT}${batchString};`,
|
6
data/sql-internal-links-inserts/0000-internal-links.sh
Executable file
6
data/sql-internal-links-inserts/0000-internal-links.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
for sqlInsert in /data/sql-internal-links-inserts/*.sql; do
|
||||||
|
echo "${sqlInsert}"
|
||||||
|
time psql --username="${DATABASE_USER}" --dbname="${DATABASE_NAME}" --file="${sqlInsert}"
|
||||||
|
done
|
6
data/sql-pages-inserts/0000-pages.sh
Executable file
6
data/sql-pages-inserts/0000-pages.sh
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
for sqlInsert in /data/sql-pages-inserts/*.sql; do
|
||||||
|
echo "${sqlInsert}"
|
||||||
|
time psql --username="${DATABASE_USER}" --dbname="${DATABASE_NAME}" --file="${sqlInsert}"
|
||||||
|
done
|
3
data/sql/0000-sql-init.sh
Executable file
3
data/sql/0000-sql-init.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
time psql --username="${DATABASE_USER}" --dbname="${DATABASE_NAME}" --file="/data/sql/0000-sql-init.sql"
|
2
data/sql/0000-sql-init.sql
Normal file
2
data/sql/0000-sql-init.sql
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
ALTER TABLE pages DISABLE TRIGGER ALL;
|
||||||
|
ALTER TABLE internal_links DISABLE TRIGGER ALL;
|
@ -1,28 +0,0 @@
|
|||||||
CREATE TABLE `pages` (
|
|
||||||
`id` INT(8) UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
||||||
`title` VARBINARY(255) NOT NULL DEFAULT '',
|
|
||||||
-- `is_redirect` tinyint(1) unsigned NOT NULL DEFAULT 0,
|
|
||||||
|
|
||||||
PRIMARY KEY (`id`),
|
|
||||||
UNIQUE KEY (`title`)
|
|
||||||
) ENGINE=MyISAM AUTO_INCREMENT=76684425 DEFAULT CHARSET=binary ROW_FORMAT=COMPRESSED;
|
|
||||||
|
|
||||||
-- VARBINARY usage instead of VARCHAR explanation: <https://stackoverflow.com/a/13397437>
|
|
||||||
-- > War on varchar. Changed all occurrences of varchar(N) and varchar(N) binary to varbinary(N). varchars cause problems ("Invalid mix of collations" errors) on MySQL databases with certain configs, most notably the default MySQL config.
|
|
||||||
|
|
||||||
CREATE TABLE `internal_links` (
|
|
||||||
-- `id` INT(8) UNSIGNED NOT NULL AUTO_INCREMENT,
|
|
||||||
`from_page_id` INT(8) UNSIGNED NOT NULL,
|
|
||||||
`to_page_id` INT(8) UNSIGNED NOT NULL,
|
|
||||||
|
|
||||||
-- PRIMARY KEY (`id`)
|
|
||||||
PRIMARY KEY (`from_page_id`, `to_page_id`),
|
|
||||||
FOREIGN KEY (`from_page_id`) REFERENCES `pages` (`id`) ON DELETE CASCADE,
|
|
||||||
FOREIGN KEY (`to_page_id`) REFERENCES `pages` (`id`) ON DELETE CASCADE
|
|
||||||
) ENGINE=MyISAM DEFAULT CHARSET=binary ROW_FORMAT=COMPRESSED;
|
|
||||||
|
|
||||||
SET @@session.unique_checks = 0;
|
|
||||||
SET @@session.foreign_key_checks = 0;
|
|
||||||
|
|
||||||
SET FOREIGN_KEY_CHECKS = 0;
|
|
||||||
SET UNIQUE_CHECKS = 0;
|
|
@ -1,11 +0,0 @@
|
|||||||
-- SET @@session.foreign_key_checks = 0;
|
|
||||||
-- SET FOREIGN_KEY_CHECKS = 0;
|
|
||||||
|
|
||||||
-- ALTER TABLE `internal_links` ADD CONSTRAINT fk_from_page_id FOREIGN KEY (`from_page_id`) REFERENCES `pages` (`id`);
|
|
||||||
-- ALTER TABLE `internal_links` ADD CONSTRAINT fk_to_page_id FOREIGN KEY (`to_page_id`) REFERENCES `pages` (`id`);
|
|
||||||
|
|
||||||
SET @@session.unique_checks = 1;
|
|
||||||
SET @@session.foreign_key_checks = 1;
|
|
||||||
|
|
||||||
SET FOREIGN_KEY_CHECKS = 1;
|
|
||||||
SET UNIQUE_CHECKS = 1;
|
|
3
data/sql/0999-sql-end.sh
Executable file
3
data/sql/0999-sql-end.sh
Executable file
@ -0,0 +1,3 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
time psql --username="${DATABASE_USER}" --dbname="${DATABASE_NAME}" --file="/data/sql/0999-sql-end.sql"
|
2
data/sql/0999-sql-end.sql
Normal file
2
data/sql/0999-sql-end.sql
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
ALTER TABLE pages ENABLE TRIGGER ALL;
|
||||||
|
ALTER TABLE internal_links ENABLE TRIGGER ALL;
|
@ -26,7 +26,7 @@
|
|||||||
"editorconfig-checker": "5.1.8",
|
"editorconfig-checker": "5.1.8",
|
||||||
"playwright": "catalog:",
|
"playwright": "catalog:",
|
||||||
"prettier": "3.3.3",
|
"prettier": "3.3.3",
|
||||||
"prettier-plugin-tailwindcss": "0.6.5",
|
"prettier-plugin-tailwindcss": "0.6.6",
|
||||||
"replace-in-files-cli": "3.0.0",
|
"replace-in-files-cli": "3.0.0",
|
||||||
"semantic-release": "23.1.1",
|
"semantic-release": "23.1.1",
|
||||||
"turbo": "2.0.12",
|
"turbo": "2.0.12",
|
||||||
|
530
pnpm-lock.yaml
generated
530
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@ -21,7 +21,7 @@ catalog:
|
|||||||
# TypeScript
|
# TypeScript
|
||||||
"typescript": "5.5.4"
|
"typescript": "5.5.4"
|
||||||
"@total-typescript/ts-reset": "0.5.1"
|
"@total-typescript/ts-reset": "0.5.1"
|
||||||
"@types/node": "22.1.0"
|
"@types/node": "22.2.0"
|
||||||
"tsx": "4.17.0"
|
"tsx": "4.17.0"
|
||||||
|
|
||||||
# AdonisJS
|
# AdonisJS
|
||||||
@ -29,7 +29,7 @@ catalog:
|
|||||||
"@adonisjs/core": "6.12.1"
|
"@adonisjs/core": "6.12.1"
|
||||||
"@adonisjs/cors": "2.2.1"
|
"@adonisjs/cors": "2.2.1"
|
||||||
"@adonisjs/lucid": "21.2.0"
|
"@adonisjs/lucid": "21.2.0"
|
||||||
"mysql2": "3.11.0"
|
"pg": "8.12.0"
|
||||||
"@adonisjs/assembler": "7.7.0"
|
"@adonisjs/assembler": "7.7.0"
|
||||||
"@vinejs/vine": "2.1.0"
|
"@vinejs/vine": "2.1.0"
|
||||||
"luxon": "3.5.0"
|
"luxon": "3.5.0"
|
||||||
|
Reference in New Issue
Block a user