wikipedia-game-solver/data/generate-sql-files.js

import fs from "node:fs"
import path from "node:path"
import {
  extractRowsFromSQLValues,
  swapKeysAndValues,
  zeroPad,
} from "./utils.js"

const SQL_DUMP_PATH = path.join(process.cwd(), "dump")
const SQL_FILENAME_NUMBER_PAD = 4

/**
 * @typedef {Record<string, number>} WikipediaPagesKeyTitle
 *
 * Object to store pages from Wikipedia:
 * - Key: page title sanitized - The real title shown is this title with underscores (_) converted to spaces ( ).
 * - Value: page id.
 */

/**
 * @typedef {Record<string, number>} WikipediaPagesKeyId
 *
 * Object to store pages from Wikipedia:
 * - Key: page id.
 * - Value: page title sanitized - The real title shown is this title with underscores (_) converted to spaces ( ).
 */

/**
 * @typedef WikipediaInternalLink
 * @property {number} fromPageId
 * @property {number} toPageId
 */

/**
 * Function to clean the `page.sql` file by:
 * - Removing all lines that don't start with `INSERT INTO...`.
 * - Filter by keeping rows where `page_namespace` (2nd column) is equal to 0, and where `page_is_redirect` (4th column) is equal to false (0).
 * - Only keep columns `page_id` (1st column) and `page_title` (3rd column).
 * @returns {Promise<WikipediaPagesKeyId>}
 */
const cleanPagesSQL = async () => {
  /** @type {WikipediaPagesKeyId} */
  const wikipediaPagesKeyId = {}

  const INSERT_INTO_START_INPUT = "INSERT INTO `page` VALUES "
  const sqlInputPath = path.join(SQL_DUMP_PATH, "page.sql")
  const sqlInputStat = await fs.promises.stat(sqlInputPath)
  const sqlInputFileStream = fs.createReadStream(sqlInputPath, "utf-8")

  let isInsideInsert = false
  let current = ""
  let lastPercent = 0

  let pagesFileCount = 1

  const INSERT_INTO_START_OUTPUT = "INSERT INTO pages (id, title) VALUES "

  const BATCH_SIZE = 1_000_000

  /**
   * @type {string[]}
   */
  let batch = []

  const flushBatch = async () => {
    if (batch.length > 0) {
      const batchString = batch.join(",")
      const fileName = `${zeroPad(pagesFileCount, SQL_FILENAME_NUMBER_PAD)}-pages-inserts.sql`
      const sqlOutputPath = path.join(
        process.cwd(),
        "sql-pages-inserts",
        fileName,
      )
      await fs.promises.writeFile(
        sqlOutputPath,
        `${INSERT_INTO_START_OUTPUT}${batchString};`,
        {
          encoding: "utf-8",
        },
      )
      console.log(`flushBatch - ${fileName}, batch.length: ${batch.length}`)
      pagesFileCount += 1
      batch = []
    }
  }

  return await new Promise((resolve, reject) => {
    sqlInputFileStream
      .on("data", async (dataInput) => {
        const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size
        const bytesReadPercent = bytesReadRatio * 100

        if (bytesReadPercent - lastPercent >= 1) {
          console.log(
            `cleanPagesSQL - Bytes read (${bytesReadPercent.toFixed(2)}%): ${sqlInputFileStream.bytesRead} / ${sqlInputStat.size}`,
          )
          lastPercent = bytesReadPercent
        }

        let data = current + dataInput

        if (!isInsideInsert) {
          const lines = data.split("\n").filter((line) => {
            return line.startsWith(INSERT_INTO_START_INPUT)
          })
          const [line] = lines
          if (line == null) {
            sqlInputFileStream.close()
            return reject(new Error(`No "${INSERT_INTO_START_INPUT}" found.`))
          }
          isInsideInsert = true
          const lineStripped = line.slice(INSERT_INTO_START_INPUT.length)
          data = lineStripped
        }

        const { rows, unCompleted } = extractRowsFromSQLValues(data)
        current = unCompleted

        for (const row of rows) {
          if (row.length !== 12) {
            sqlInputFileStream.close()
            console.error([row])
            return reject(new Error(`Invalid Row values.`))
          }

          const id = Number.parseInt(row[0] ?? "0", 10)
          const namespace = row[1] ?? ""
          const title = row[2] ?? ""
          const isRedirect = row[3] === "1"

          if (namespace === "0" && !isRedirect) {
            wikipediaPagesKeyId[id] = title
            batch.push(`(${id},E${title})`)
          }
        }

        if (batch.length >= BATCH_SIZE) {
          sqlInputFileStream.pause()
          await flushBatch()
          sqlInputFileStream.resume()
        }
      })
      .on("error", (error) => {
        return reject(error)
      })
      .on("close", async () => {
        await flushBatch()
        console.log("cleanPagesSQL - Bytes read (100%).")
        return resolve(wikipediaPagesKeyId)
      })
  })
}

const wikipediaPagesKeyId = await cleanPagesSQL()

/**
 * Function to clean the `pagelinks.sql` file by:
 * - Removing all lines that don't start with `INSERT INTO...`.
 * - Filter by keeping rows where `pl_from_namespace` (2nd column) is equal to 0.
 * - Transform the rows to internal links with fromPageId and toPageId.
 * @returns {Promise<void>}
 */
const cleanInternalLinksSQL = async () => {
  let internalLinksFileCount = 1
  const INSERT_INTO_START_OUTPUT =
    "INSERT INTO internal_links (from_page_id, to_page_id) VALUES "

  /**
   * @type {WikipediaPagesKeyTitle}
   */
  const wikipediaPagesKeyTitle = swapKeysAndValues(wikipediaPagesKeyId)

  const INSERT_INTO_START_INPUT = "INSERT INTO `pagelinks` VALUES "
  const sqlInputPath = path.join(SQL_DUMP_PATH, "pagelinks.sql")
  const sqlInputStat = await fs.promises.stat(sqlInputPath)
  const sqlInputFileStream = fs.createReadStream(sqlInputPath, "utf-8")

  let isInsideInsert = false
  let current = ""
  let lastPercent = 0

  const BATCH_SIZE = 4_000_000

  /**
   * @type {string[]}
   */
  let batch = []

  const flushBatch = async () => {
    if (batch.length > 0) {
      const batchString = batch.join(",")
      const fileName = `${zeroPad(internalLinksFileCount, SQL_FILENAME_NUMBER_PAD)}-internal-links-inserts.sql`
      const sqlOutputPath = path.join(
        process.cwd(),
        "sql-internal-links-inserts",
        fileName,
      )
      await fs.promises.writeFile(
        sqlOutputPath,
        `${INSERT_INTO_START_OUTPUT}${batchString};`,
        {
          encoding: "utf-8",
        },
      )
      console.log(`flushBatch - ${fileName}, batch.length: ${batch.length}`)
      internalLinksFileCount += 1
      batch = []
    }
  }

  return await new Promise((resolve, reject) => {
    sqlInputFileStream
      .on("data", async (dataInput) => {
        const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size
        const bytesReadPercent = bytesReadRatio * 100

        if (bytesReadPercent - lastPercent >= 0.5) {
          console.log(
            `cleanInternalLinksSQL - Bytes read (${bytesReadPercent.toFixed(2)}%): ${sqlInputFileStream.bytesRead} / ${sqlInputStat.size}`,
          )
          lastPercent = bytesReadPercent
        }

        let data = current + dataInput

        if (!isInsideInsert) {
          const lines = data.split("\n").filter((line) => {
            return line.startsWith(INSERT_INTO_START_INPUT)
          })
          const [line] = lines
          if (line == null) {
            sqlInputFileStream.close()
            return reject(new Error(`No "${INSERT_INTO_START_INPUT}" found.`))
          }
          isInsideInsert = true
          const lineStripped = line.slice(INSERT_INTO_START_INPUT.length)
          data = lineStripped
        }

        const { rows, unCompleted } = extractRowsFromSQLValues(data)
        current = unCompleted

        for (const row of rows) {
          if (row.length !== 5) {
            sqlInputFileStream.close()
            console.error([row])
            return reject(new Error(`Invalid Row values.`))
          }

          const plFromPageId = Number.parseInt(row[0] ?? "0", 10)
          const plTargetNamespace = row[1] ?? ""
          const plTargetTitle = row[2] ?? ""
          const plFromNamespace = row[3] ?? ""

          if (plFromNamespace === "0" && plTargetNamespace === "0") {
            const toPageId = wikipediaPagesKeyTitle[plTargetTitle]
            if (toPageId != null && wikipediaPagesKeyId[plFromPageId] != null) {
              /**
               * @type {WikipediaInternalLink}
               */
              const wikipediaInternalLink = {
                fromPageId: plFromPageId,
                toPageId,
              }
              batch.push(
                `(${wikipediaInternalLink.fromPageId},${wikipediaInternalLink.toPageId})`,
              )
            }
          }
        }

        if (batch.length >= BATCH_SIZE) {
          sqlInputFileStream.pause()
          await flushBatch()
          sqlInputFileStream.resume()
        }
      })
      .on("error", (error) => {
        return reject(error)
      })
      .on("close", async () => {
        await flushBatch()
        console.log("cleanInternalLinksSQL - Bytes read (100%).")
        return resolve()
      })
  })
}

await cleanInternalLinksSQL()
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`import fs from "node:fs"`
			`import path from "node:path"`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`import {`
			`extractRowsFromSQLValues,`
			`swapKeysAndValues,`
			`zeroPad,`
			`} from "./utils.js"`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
			`const SQL_DUMP_PATH = path.join(process.cwd(), "dump")`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`const SQL_FILENAME_NUMBER_PAD = 4`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
chore: clean up POC to get Wikipedia dump 2024-08-05 00:52:48 +02:00			`/**`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`* @typedef {Record<string, number>} WikipediaPagesKeyTitle`
chore: clean up POC to get Wikipedia dump 2024-08-05 00:52:48 +02:00			`*`
			`* Object to store pages from Wikipedia:`
			`* - Key: page title sanitized - The real title shown is this title with underscores (_) converted to spaces ( ).`
			`* - Value: page id.`
			`*/`

feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`/**`
			`* @typedef {Record<string, number>} WikipediaPagesKeyId`
			`*`
			`* Object to store pages from Wikipedia:`
			`* - Key: page id.`
			`* - Value: page title sanitized - The real title shown is this title with underscores (_) converted to spaces ( ).`
			`*/`

			`/**`
			`* @typedef WikipediaInternalLink`
			`* @property {number} fromPageId`
			`* @property {number} toPageId`
			`*/`

feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`/**`
			* Function to clean the `page.sql` file by:
			* - Removing all lines that don't start with `INSERT INTO...`.
			* - Filter by keeping rows where `page_namespace` (2nd column) is equal to 0, and where `page_is_redirect` (4th column) is equal to false (0).
			* - Only keep columns `page_id` (1st column) and `page_title` (3rd column).
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`* @returns {Promise<WikipediaPagesKeyId>}`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`*/`
			`const cleanPagesSQL = async () => {`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`/** @type {WikipediaPagesKeyId} */`
			`const wikipediaPagesKeyId = {}`
chore: clean up POC to get Wikipedia dump 2024-08-05 00:52:48 +02:00
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			const INSERT_INTO_START_INPUT = "INSERT INTO `page` VALUES "
			`const sqlInputPath = path.join(SQL_DUMP_PATH, "page.sql")`
			`const sqlInputStat = await fs.promises.stat(sqlInputPath)`
			`const sqlInputFileStream = fs.createReadStream(sqlInputPath, "utf-8")`

			`let isInsideInsert = false`
			`let current = ""`
			`let lastPercent = 0`

feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			`let pagesFileCount = 1`

			`const INSERT_INTO_START_OUTPUT = "INSERT INTO pages (id, title) VALUES "`

			`const BATCH_SIZE = 1_000_000`

			`/**`
			`* @type {string[]}`
			`*/`
			`let batch = []`

			`const flushBatch = async () => {`
			`if (batch.length > 0) {`
			`const batchString = batch.join(",")`
			const fileName = `${zeroPad(pagesFileCount, SQL_FILENAME_NUMBER_PAD)}-pages-inserts.sql`
			`const sqlOutputPath = path.join(`
			`process.cwd(),`
			`"sql-pages-inserts",`
			`fileName,`
			`)`
			`await fs.promises.writeFile(`
			`sqlOutputPath,`
			`${INSERT_INTO_START_OUTPUT}${batchString};`,
			`{`
			`encoding: "utf-8",`
			`},`
			`)`
			console.log(`flushBatch - ${fileName}, batch.length: ${batch.length}`)
			`pagesFileCount += 1`
			`batch = []`
			`}`
			`}`

feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`return await new Promise((resolve, reject) => {`
			`sqlInputFileStream`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			`.on("data", async (dataInput) => {`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size`
			`const bytesReadPercent = bytesReadRatio * 100`

			`if (bytesReadPercent - lastPercent >= 1) {`
			`console.log(`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`cleanPagesSQL - Bytes read (${bytesReadPercent.toFixed(2)}%): ${sqlInputFileStream.bytesRead} / ${sqlInputStat.size}`,
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`)`
			`lastPercent = bytesReadPercent`
			`}`

			`let data = current + dataInput`

			`if (!isInsideInsert) {`
			`const lines = data.split("\n").filter((line) => {`
			`return line.startsWith(INSERT_INTO_START_INPUT)`
			`})`
			`const [line] = lines`
			`if (line == null) {`
			`sqlInputFileStream.close()`
			return reject(new Error(`No "${INSERT_INTO_START_INPUT}" found.`))
			`}`
			`isInsideInsert = true`
			`const lineStripped = line.slice(INSERT_INTO_START_INPUT.length)`
			`data = lineStripped`
			`}`

			`const { rows, unCompleted } = extractRowsFromSQLValues(data)`
			`current = unCompleted`

			`for (const row of rows) {`
			`if (row.length !== 12) {`
			`sqlInputFileStream.close()`
			`console.error([row])`
			return reject(new Error(`Invalid Row values.`))
			`}`

			`const id = Number.parseInt(row[0] ?? "0", 10)`
			`const namespace = row[1] ?? ""`
			`const title = row[2] ?? ""`
			`const isRedirect = row[3] === "1"`

			`if (namespace === "0" && !isRedirect) {`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`wikipediaPagesKeyId[id] = title`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			batch.push(`(${id},E${title})`)
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`}`
			`}`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00
			`if (batch.length >= BATCH_SIZE) {`
			`sqlInputFileStream.pause()`
			`await flushBatch()`
			`sqlInputFileStream.resume()`
			`}`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`})`
chore: clean up POC to get Wikipedia dump 2024-08-05 00:52:48 +02:00			`.on("error", (error) => {`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`return reject(error)`
			`})`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			`.on("close", async () => {`
			`await flushBatch()`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`console.log("cleanPagesSQL - Bytes read (100%).")`
			`return resolve(wikipediaPagesKeyId)`
feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00			`})`
			`})`
			`}`

feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`const wikipediaPagesKeyId = await cleanPagesSQL()`
chore: clean up POC to get Wikipedia dump 2024-08-05 00:52:48 +02:00
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`/**`
			* Function to clean the `pagelinks.sql` file by:
			* - Removing all lines that don't start with `INSERT INTO...`.
			* - Filter by keeping rows where `pl_from_namespace` (2nd column) is equal to 0.
			`* - Transform the rows to internal links with fromPageId and toPageId.`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`* @returns {Promise<void>}`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`*/`
			`const cleanInternalLinksSQL = async () => {`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			`let internalLinksFileCount = 1`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`const INSERT_INTO_START_OUTPUT =`
			`"INSERT INTO internal_links (from_page_id, to_page_id) VALUES "`

feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`/**`
			`* @type {WikipediaPagesKeyTitle}`
			`*/`
			`const wikipediaPagesKeyTitle = swapKeysAndValues(wikipediaPagesKeyId)`

			const INSERT_INTO_START_INPUT = "INSERT INTO `pagelinks` VALUES "
			`const sqlInputPath = path.join(SQL_DUMP_PATH, "pagelinks.sql")`
			`const sqlInputStat = await fs.promises.stat(sqlInputPath)`
			`const sqlInputFileStream = fs.createReadStream(sqlInputPath, "utf-8")`

			`let isInsideInsert = false`
			`let current = ""`
			`let lastPercent = 0`

fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`const BATCH_SIZE = 4_000_000`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00
			`/**`
			`* @type {string[]}`
			`*/`
			`let batch = []`

fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`const flushBatch = async () => {`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`if (batch.length > 0) {`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`const batchString = batch.join(",")`
			const fileName = `${zeroPad(internalLinksFileCount, SQL_FILENAME_NUMBER_PAD)}-internal-links-inserts.sql`
feat(api): create Lucid models and migrations for Wikipedia database dump + usage of PostgreSQL instead of MariaDB 2024-08-11 10:42:42 +02:00			`const sqlOutputPath = path.join(`
			`process.cwd(),`
			`"sql-internal-links-inserts",`
			`fileName,`
			`)`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`await fs.promises.writeFile(`
			`sqlOutputPath,`
			`${INSERT_INTO_START_OUTPUT}${batchString};`,
			`{`
			`encoding: "utf-8",`
			`},`
			`)`
			console.log(`flushBatch - ${fileName}, batch.length: ${batch.length}`)
			`internalLinksFileCount += 1`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`batch = []`
			`}`
			`}`

feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`return await new Promise((resolve, reject) => {`
			`sqlInputFileStream`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`.on("data", async (dataInput) => {`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`const bytesReadRatio = sqlInputFileStream.bytesRead / sqlInputStat.size`
			`const bytesReadPercent = bytesReadRatio * 100`

perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`if (bytesReadPercent - lastPercent >= 0.5) {`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`console.log(`
			`cleanInternalLinksSQL - Bytes read (${bytesReadPercent.toFixed(2)}%): ${sqlInputFileStream.bytesRead} / ${sqlInputStat.size}`,
			`)`
			`lastPercent = bytesReadPercent`
			`}`

			`let data = current + dataInput`

			`if (!isInsideInsert) {`
			`const lines = data.split("\n").filter((line) => {`
			`return line.startsWith(INSERT_INTO_START_INPUT)`
			`})`
			`const [line] = lines`
			`if (line == null) {`
			`sqlInputFileStream.close()`
			return reject(new Error(`No "${INSERT_INTO_START_INPUT}" found.`))
			`}`
			`isInsideInsert = true`
			`const lineStripped = line.slice(INSERT_INTO_START_INPUT.length)`
			`data = lineStripped`
			`}`

			`const { rows, unCompleted } = extractRowsFromSQLValues(data)`
			`current = unCompleted`

			`for (const row of rows) {`
			`if (row.length !== 5) {`
			`sqlInputFileStream.close()`
			`console.error([row])`
			return reject(new Error(`Invalid Row values.`))
			`}`

			`const plFromPageId = Number.parseInt(row[0] ?? "0", 10)`
			`const plTargetNamespace = row[1] ?? ""`
			`const plTargetTitle = row[2] ?? ""`
			`const plFromNamespace = row[3] ?? ""`

			`if (plFromNamespace === "0" && plTargetNamespace === "0") {`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`const toPageId = wikipediaPagesKeyTitle[plTargetTitle]`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`if (toPageId != null && wikipediaPagesKeyId[plFromPageId] != null) {`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`/**`
			`* @type {WikipediaInternalLink}`
			`*/`
			`const wikipediaInternalLink = {`
			`fromPageId: plFromPageId,`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`toPageId,`
			`}`
			`batch.push(`
			`(${wikipediaInternalLink.fromPageId},${wikipediaInternalLink.toPageId})`,
			`)`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`}`
			`}`
			`}`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00
			`if (batch.length >= BATCH_SIZE) {`
			`sqlInputFileStream.pause()`
			`await flushBatch()`
			`sqlInputFileStream.resume()`
			`}`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`})`
			`.on("error", (error) => {`
			`return reject(error)`
			`})`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`.on("close", async () => {`
fix: wikipedia data dump POC improvements 2024-08-07 01:21:08 +02:00			`await flushBatch()`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`console.log("cleanInternalLinksSQL - Bytes read (100%).")`
perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`return resolve()`
feat: adapt internal_links SQL file POC 2024-08-05 14:04:28 +02:00			`})`
			`})`
			`}`

perf: improve memory usage for POC to get wikipedia dump 2024-08-05 17:36:19 +02:00			`await cleanInternalLinksSQL()`