This repository has been archived on 2024-10-12. You can view files and clone it, but cannot push or open issues or pull requests.
wikipedia-game-solver/data
2024-08-05 00:37:06 +02:00
..
adminer feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
sql feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
.env.example feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
.eslintrc.json feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
compose.yaml feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
database-wikipedia-v2.js feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
database-wikipedia.js feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
database-wikipedia.sh feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
README.md feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
test.js feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00
utils.js feat: start implementation to get Wikipedia dump and customize it 2024-08-05 00:37:06 +02:00

Wikipedia data

Utils

Show the first 10 line of sql file: head -n 10 ./dump/page.sql

To inspect volume size used by database: docker system df -v | grep 'wikipedia-solver-postgres-data'

Import SQL file to PostgreSQL Docker Container

In compose.yaml, we can specify SQL scripts to be executed when the container starts for the first time.

volumes:
  - "./sql:/docker-entrypoint-initdb.d/"

Remove a volume

# List all volumes
docker volume ls

# Remove a volume
docker volume rm data_wikipedia-solver-postgres-data

https://stackoverflow.com/questions/43954631/issues-with-wikipedia-dump-table-pagelinks

MySQL any way to import a huge (32 GB) sql dump faster?: https://stackoverflow.com/questions/40384864/importing-wikipedia-dump-to-mysql

Import data.sql MySQL Docker Container: https://stackoverflow.com/questions/43880026/import-data-sql-mysql-docker-container

https://dba.stackexchange.com/questions/83125/mysql-any-way-to-import-a-huge-32-gb-sql-dump-faster

page.sql.gz

MySQL full version

-- MariaDB dump 10.19  Distrib 10.5.23-MariaDB, for debian-linux-gnu (x86_64)
--
-- Host: db1206    Database: enwiki
-- ------------------------------------------------------
-- Server version	10.6.17-MariaDB-log

/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8mb4 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;

--
-- Table structure for table `page`
--

DROP TABLE IF EXISTS `page`;
/*!40101 SET @saved_cs_client     = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `page` (
  `page_id` int(8) unsigned NOT NULL AUTO_INCREMENT,
  `page_namespace` int(11) NOT NULL DEFAULT 0,
  `page_title` varbinary(255) NOT NULL DEFAULT '',
  `page_is_redirect` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `page_is_new` tinyint(1) unsigned NOT NULL DEFAULT 0,
  `page_random` double unsigned NOT NULL DEFAULT 0,
  `page_touched` binary(14) NOT NULL,
  `page_links_updated` varbinary(14) DEFAULT NULL,
  `page_latest` int(8) unsigned NOT NULL DEFAULT 0,
  `page_len` int(8) unsigned NOT NULL DEFAULT 0,
  `page_content_model` varbinary(32) DEFAULT NULL,
  `page_lang` varbinary(35) DEFAULT NULL,
  PRIMARY KEY (`page_id`),
  UNIQUE KEY `page_name_title` (`page_namespace`,`page_title`),
  KEY `page_random` (`page_random`),
  KEY `page_len` (`page_len`),
  KEY `page_redirect_namespace_len` (`page_is_redirect`,`page_namespace`,`page_len`)
) ENGINE=InnoDB AUTO_INCREMENT=77490241 DEFAULT CHARSET=binary ROW_FORMAT=COMPRESSED;
/*!40101 SET character_set_client = @saved_cs_client */;

--
-- Dumping data for table `page`
--

/*!40000 ALTER TABLE `page` DISABLE KEYS */;
INSERT INTO `page` VALUES (10,0,'AccessibleComputing',1,0,0.856935107283,'20240722211426','20240722220435',1219062925,111,'wikitext',NULL);

INSERT INTO `page` VALUES (10,0,'AccessibleComputing',1,0,0.856935107283,'20240722211426','20240722220435',1219062925,111,'wikitext',NULL),(12,0,'Anarchism',0,0,0.786172332974311,'20240731234111','20240731234202',1234495258,110759,'wikitext',NULL),(12281,0,'Gottfried_Wilhelm_Leibniz',0,0,0.79151204115852,'20240731234133','20240731234228',1237687724,155319,'wikitext',NULL),(13,0,'AfghanistanHistory',1,0,0.154661929211,'20240729123940','20240722220436',783865149,90,'wikitext',NULL),(14,0,'AfghanistanGeography',1,0,0.952234464653055,'20240722211426','20240722220436',783865160,92,'wikitext',NULL),(15,0,'AfghanistanPeople',1,0,0.047716566551,'20240722211426','20240722220436',783865293,95,'wikitext',NULL),(12473,1,'Gnosticism',0,0,0.00653186720472934,'20240801075011','20240731232236',1233717868,6579,'wikitext',NULL);
-- Expected output: INSERT INTO `page` VALUES (12,'Anarchism'),(12281,'Gottfried_Wilhelm_Leibniz');

PostgreSQL short version

CREATE TABLE IF NOT EXISTS pages (
  id BIGSERIAL PRIMARY KEY,
  title VARCHAR(255) UNIQUE NOT NULL

  -- is_redirect BOOLEAN NOT NULL DEFAULT FALSE
);

-- Examples of inserts
INSERT INTO pages VALUES (10, 'AccessibleComputing'); -- (is_redirect = true)
INSERT INTO pages VALUES (10474, 'Eight_queens_puzzle'); -- (is_redirect = false)

INSERT INTO pages VALUES
(10,'AccessibleComputing'),
(12,'Anarchism'),
(13,'AfghanistanHistory'),
(14,'AfghanistanGeography'),
(15,'AfghanistanPeople');