Merge branch 'iptv-org:master' into master

2025-01-22 11:11:10 -05:00 · 2022-04-25 07:39:19 +07:00 · 2022-04-25 07:39:19 +07:00 · 2e0980da55
commit 2e0980da55
parent 6ba55ffa24 985c45af66
12 changed files with 30990 additions and 30145 deletions
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@ -13,7 +13,7 @@ jobs:
      - id: files
        uses: tj-actions/changed-files@v12.2
        with:
-          files: \.csv$
+          files: 'data'
      - name: validate
        if: steps.files.outputs.any_changed == 'true'
        run: |
--- a/.readme/preview.png
+++ b/.readme/preview.png
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # Database [![check](https://github.com/iptv-org/database/actions/workflows/check.yml/badge.svg)](https://github.com/iptv-org/database/actions/workflows/check.yml)

+![channels.csv](https://github.com/iptv-org/database/raw/master/.readme/preview.png)
+
 All data is stored in the [/data](data) folder as [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) (Comma-separated values) files. Any of the files can be edited either with a basic text editor or through any spreadsheet editor (such as [Google Sheets](https://www.google.com/sheets/about/), [LibreOffice](https://www.libreoffice.org/discover/libreoffice/), ...).

 ## Data Scheme
@ -14,20 +16,24 @@ All data is stored in the [/data](data) folder as [CSV](https://en.wikipedia.org

 ### channels

-| Field          | Description                                                                                                                                                                                                      | Required | Example                       |
-| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ----------------------------- |
-| id             | Unique channel ID. Should be derived from the name of the channel and country code separated by dot. May only contain Latin letters, numbers and dot.                                                            | Required | `KSTSDT3.us`                  |
-| name           | Official channel name. May include: `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `!`, `:`, `&`, `.`, `+`, `'`, `/`, `»`, `#`, `%`, `°`, `$`, `@`, `?`, `(`, `)`.                                                           | Required | `KSTS-DT3`                    |
-| network        | Name of the network operating the channel.                                                                                                                                                                       | Optional | `NBC`                         |
-| country        | Country code from which the channel is transmitted. A list of all supported countries and their codes can be found in [data/countries.csv](data/countries.csv)                                                   | Required | `US`                          |
-| subdivision    | Code of the subdivision (e.g., provinces or states) from which the broadcast is transmitted. A list of all supported subdivisions and their codes can be found in [data/subdivisions.csv](data/subdivisions.csv) | Optional | `US-CA`                       |
-| city           | Name of the city from which the channel is transmitted. May only contain `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `'`.                                                                                                 | Optional | `San Francisco`               |
-| broadcast_area | List of codes describing the broadcasting area of the channel. Any combination of `r/<region_code>`, `c/<country_code>`, `s/<subdivision_code>`                                                                  | Required | `s/US-CA`                     |
-| languages      | List of languages in which the channel is broadcast separated by `;`. A list of all supported languages and their codes can be found in [data/languages.csv](data/languages.csv)                                 | Required | `eng;spa`                     |
-| categories     | List of categories to which this channel belongs separated by `;`. A list of all supported categories can be found in [data/categories.csv](data/categories.csv)                                                 | Optional | `news;weather`                |
-| is_nsfw        | Indicates whether the channel broadcasts adult content (`TRUE` or `FALSE`)                                                                                                                                       | Required | `FALSE`                       |
-| website        | Official website URL.                                                                                                                                                                                            | Optional | `https://nbc.com/`            |
-| logo           | Logo URL. Only URL with HTTPS protocol are allowed.                                                                                                                                                              | Optional | `https://example.com/nbc.png` |
+| Field          | Description                                                                                                                                                                                                      | Required | Example                        |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------------------------ |
+| id             | Unique channel ID. Should be derived from the name of the channel and country code separated by dot. May only contain Latin letters, numbers and dot.                                                            | Required | `AnhuiTV.cn`                   |
+| name           | Official channel name in English. May include: `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `!`, `:`, `&`, `.`, `+`, `'`, `/`, `»`, `#`, `%`, `°`, `$`, `@`, `?`, `(`, `)`.                                                | Required | `Anhui TV`                     |
+| native_name    | Channel name in the original language. May contain any characters except `,` and `"`.                                                                                                                            | Optional | `安徽卫视`                     |
+| network        | Network of which this channel is a part.                                                                                                                                                                         | Optional | `Anhui`                        |
+| country        | Country code from which the channel is transmitted. A list of all supported countries and their codes can be found in [data/countries.csv](data/countries.csv)                                                   | Required | `CN`                           |
+| subdivision    | Code of the subdivision (e.g., provinces or states) from which the broadcast is transmitted. A list of all supported subdivisions and their codes can be found in [data/subdivisions.csv](data/subdivisions.csv) | Optional | `CN-AH`                        |
+| city           | Name of the city from which the channel is transmitted. May only contain `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `'`.                                                                                                 | Optional | `Hefei`                        |
+| broadcast_area | List of codes describing the broadcasting area of the channel. Any combination of `r/<region_code>`, `c/<country_code>`, `s/<subdivision_code>`                                                                  | Required | `s/CN-AH`                      |
+| languages      | List of languages in which the channel is broadcast separated by `;`. A list of all supported languages and their codes can be found in [data/languages.csv](data/languages.csv)                                 | Required | `zho`                          |
+| categories     | List of categories to which this channel belongs separated by `;`. A list of all supported categories can be found in [data/categories.csv](data/categories.csv)                                                 | Optional | `general`                      |
+| is_nsfw        | Indicates whether the channel broadcasts adult content (`TRUE` or `FALSE`)                                                                                                                                       | Required | `FALSE`                        |
+| launched       | Launch date of the channel (`YYYY-MM-DD`)                                                                                                                                                                        | Optional | `2016-07-28`                   |
+| closed         | Date on which the channel closed (`YYYY-MM-DD`)                                                                                                                                                                  | Optional | `2020-05-31`                   |
+| replaced_by    | The ID of the channel that this channel was replaced by.                                                                                                                                                         | Optional | `CCTV1.cn`                     |
+| website        | Official website URL.                                                                                                                                                                                            | Optional | `http://www.ahtv.cn/`          |
+| logo           | Logo URL. Only URL with HTTPS protocol are allowed. Supported image types: `PNG`, `JPEG`.                                                                                                                        | Optional | `https://example.com/logo.png` |

 ### categories

--- a/data/blocklist.csv
+++ b/data/blocklist.csv
--- a/data/channels.csv
+++ b/data/channels.csv
--- a/data/subdivisions.csv
+++ b/data/subdivisions.csv
@ -865,10 +865,6 @@ GA,Haut-Ogooue,GA-2
 GA,Ngounie,GA-4
 GA,Ogooue-Maritime,GA-8
 GA,Woleu-Ntem,GA-9
-GB,England,GB-ENG
-GB,Northern Ireland,GB-NIR
-GB,Scotland,GB-SCT
-GB,Wales,GB-WLS
 GD,Saint Andrew,GD-01
 GD,Saint David,GD-02
 GD,Saint George,GD-03
@ -3195,6 +3191,10 @@ UG,Tororo,UG-212
 UG,Wakiso,UG-113
 UG,Yumbe,UG-313
 UG,Zombo,UG-330
+UK,England,GB-ENG
+UK,Northern Ireland,GB-NIR
+UK,Scotland,GB-SCT
+UK,Wales,GB-WLS
 UM,Palmyra Atoll,UM-95
 US,Alabama,US-AL
 US,Alaska,US-AK
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -12,20 +12,26 @@
  "private": true,
  "author": "Arhey",
  "dependencies": {
+    "@joi/date": "^2.1.0",
    "axios": "^0.25.0",
    "chalk": "^4.1.2",
    "cheerio": "^1.0.0-rc.10",
    "commander": "^9.0.0",
    "crlf": "^1.1.1",
    "csvtojson": "^2.0.10",
+    "dayjs": "^1.11.0",
    "glob": "^7.2.0",
+    "iso-639-2": "^3.0.1",
    "joi": "^17.6.0",
    "json2csv": "^6.0.0-alpha.0",
+    "lodash": "^4.17.21",
    "mz": "^2.7.0",
    "node-cleanup": "^2.1.2",
    "pre-commit": "^1.2.2",
    "signale": "^1.4.0",
    "slugify": "^1.6.5",
-    "transliteration": "^2.2.0"
+    "transliteration": "^2.2.0",
+    "wikijs": "^6.3.3",
+    "wtf_wikipedia": "^10.0.0"
  }
 }
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@ -1 +1,2 @@
-/bot.js
+/__data__/
+/bots/
--- a/scripts/core/csv.js
+++ b/scripts/core/csv.js
@ -23,7 +23,11 @@ const csv2jsonOptions = {
 		subdivision: nullable,
 		city: nullable,
 		network: nullable,
-		website: nullable
+		launched: nullable,
+		closed: nullable,
+		website: nullable,
+		native_name: nullable,
+		replaced_by: nullable
 	}
 }

--- a/scripts/db/schemes/channels.js
+++ b/scripts/db/schemes/channels.js
@ -1,12 +1,16 @@
-const Joi = require('joi')
+const Joi = require('joi').extend(require('@joi/date'))
+const path = require('path')

 module.exports = {
 	id: Joi.string()
 		.regex(/^[A-Za-z0-9]+\.[a-z]{2}$/)
 		.required(),
 	name: Joi.string()
-		.regex(/^[\sa-zA-Z\u00C0-\u00FF0-9-!:&.+'/»#%°$@?()]+$/)
+		.regex(/^[\sa-zA-Z\u00C0-\u00FF0-9-!:&.+'/»#%°$@?()¡]+$/)
 		.required(),
+	native_name: Joi.string()
+		.regex(/^[^",]+$/)
+		.allow(null),
 	network: Joi.string().allow(null),
 	country: Joi.string()
 		.regex(/^[A-Z]{2}$/)
@ -29,6 +33,11 @@ module.exports = {
 	),
 	categories: Joi.array().items(Joi.string().regex(/^[a-z]+$/)),
 	is_nsfw: Joi.boolean().strict().required(),
+	launched: Joi.date().format('YYYY-MM-DD').raw().allow(null),
+	closed: Joi.date().format('YYYY-MM-DD').raw().allow(null),
+	replaced_by: Joi.string()
+		.regex(/^[A-Za-z0-9]+\.[a-z]{2}$/)
+		.allow(null),
 	website: Joi.string()
 		.uri({
 			scheme: ['http', 'https']
@ -38,5 +47,13 @@ module.exports = {
 		.uri({
 			scheme: ['https']
 		})
+		.custom((value, helper) => {
+			const ext = path.extname(value)
+			if (!ext || /(\.png|\.jpeg|\.jpg)/i.test(ext)) {
+				return true
+			} else {
+				return helper.message(`"logo" has an invalid file extension "${ext}"`)
+			}
+		})
 		.allow(null)
 }
--- a/scripts/db/validate.js
+++ b/scripts/db/validate.js
@ -3,60 +3,105 @@ const { program } = require('commander')
 const schemes = require('./schemes')
 const chalk = require('chalk')
 const Joi = require('joi')
+const _ = require('lodash')

 program.argument('[filepath]', 'Path to file to validate').parse(process.argv)

+const allFiles = [
+	'data/blocklist.csv',
+	'data/categories.csv',
+	'data/channels.csv',
+	'data/countries.csv',
+	'data/languages.csv',
+	'data/regions.csv',
+	'data/subdivisions.csv'
+]
+
+let db = {}
+let files = {}
+
 async function main() {
-	let errors = []
-	const files = program.args.length
-		? program.args
-		: [
-				'data/blocklist.csv',
-				'data/categories.csv',
-				'data/channels.csv',
-				'data/countries.csv',
-				'data/languages.csv',
-				'data/regions.csv',
-				'data/subdivisions.csv'
-		  ]
-	for (const filepath of files) {
+	let globalErrors = []
+
+	for (let filepath of allFiles) {
 		if (!filepath.endsWith('.csv')) continue

 		const eol = await file.eol(filepath)
-		if (eol !== 'CRLF') {
-			logger.error(chalk.red(`\nError: file must have line endings with CRLF (${filepath})`))
-			process.exit(1)
-		}
+		if (eol !== 'CRLF')
+			return handleError(`Error: file must have line endings with CRLF (${filepath})`)

 		const csvString = await file.read(filepath)
-		if (/\s+$/.test(csvString)) {
-			logger.error(chalk.red(`\nError: empty lines at the end of file not allowed (${filepath})`))
-			process.exit(1)
+		if (/\s+$/.test(csvString))
+			return handleError(`Error: empty lines at the end of file not allowed (${filepath})`)
+
+		const rows = csvString.split('\r\n')
+		const headers = rows[0].split(',')
+		for (let [i, line] of rows.entries()) {
+			if (line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).length !== headers.length)
+				return handleError(`Error: row ${i + 1} has the wrong number of columns (${filepath})`)
 		}

 		const filename = file.getFilename(filepath)
-		if (!schemes[filename]) {
-			logger.error(chalk.red(`\nError: "${filename}" scheme is missing`))
-			process.exit(1)
+		let data = await csv
+			.fromString(csvString)
+			.catch(err => handleError(`${err.message} (${filepath})`))
+
+		let grouped
+		switch (filename) {
+			case 'blocklist':
+				grouped = _.keyBy(data, 'channel')
+				break
+			case 'categories':
+			case 'channels':
+				grouped = _.keyBy(data, 'id')
+				break
+			default:
+				grouped = _.keyBy(data, 'code')
+				break
 		}

-		const data = await csv.fromString(csvString).catch(err => {
-			logger.error(chalk.red(`\n${err.message} (${filepath})`))
-			process.exit(1)
-		})
+		db[filename] = grouped
+		files[filename] = data
+	}
+
+	const toCheck = program.args.length ? program.args : allFiles
+	for (const filepath of toCheck) {
+		const filename = file.getFilename(filepath)
+		if (!schemes[filename]) return handleError(`Error: "${filename}" scheme is missing`)
+
+		const rows = files[filename]

 		let fileErrors = []
 		if (filename === 'channels') {
-			if (/\"/.test(csvString)) {
-				logger.error(chalk.red(`\nError: \" character is not allowed (${filepath})`))
-				process.exit(1)
+			fileErrors = fileErrors.concat(findDuplicatesById(rows))
+			for (const [i, row] of rows.entries()) {
+				fileErrors = fileErrors.concat(validateChannelBroadcastArea(row, i))
+				fileErrors = fileErrors.concat(validateChannelSubdivision(row, i))
+				fileErrors = fileErrors.concat(validateChannelCategories(row, i))
+				fileErrors = fileErrors.concat(validateChannelReplacedBy(row, i))
+				fileErrors = fileErrors.concat(validateChannelLanguages(row, i))
+				fileErrors = fileErrors.concat(validateChannelCountry(row, i))
+			}
+		} else if (filename === 'blocklist') {
+			for (const [i, row] of rows.entries()) {
+				fileErrors = fileErrors.concat(validateChannelId(row, i))
+			}
+		} else if (filename === 'countries') {
+			for (const [i, row] of rows.entries()) {
+				fileErrors = fileErrors.concat(validateCountryLanguage(row, i))
+			}
+		} else if (filename === 'subdivisions') {
+			for (const [i, row] of rows.entries()) {
+				fileErrors = fileErrors.concat(validateSubdivisionCountry(row, i))
+			}
+		} else if (filename === 'regions') {
+			for (const [i, row] of rows.entries()) {
+				fileErrors = fileErrors.concat(validateRegionCountries(row, i))
 			}
-
-			fileErrors = fileErrors.concat(findDuplicatesById(data))
 		}

 		const schema = Joi.object(schemes[filename])
-		data.forEach((row, i) => {
+		rows.forEach((row, i) => {
 			const { error } = schema.validate(row, { abortEarly: false })
 			if (error) {
 				error.details.forEach(detail => {
@ -69,37 +114,173 @@ async function main() {
 			logger.info(`\n${chalk.underline(filepath)}`)
 			fileErrors.forEach(err => {
 				const position = err.line.toString().padEnd(6, ' ')
-				logger.error(` ${chalk.gray(position)} ${err.message}`)
+				logger.info(` ${chalk.gray(position)} ${err.message}`)
 			})
-			errors = errors.concat(fileErrors)
+			globalErrors = globalErrors.concat(fileErrors)
 		}
 	}

-	if (errors.length) {
-		logger.error(chalk.red(`\n${errors.length} error(s)`))
-		process.exit(1)
-	}
+	if (globalErrors.length) return handleError(`${globalErrors.length} error(s)`)
 }

 main()

-function findDuplicatesById(data) {
-	data = data.map(i => {
-		i.id = i.id.toLowerCase()
-		return i
+function findDuplicatesById(rows) {
+	rows = rows.map(row => {
+		row.id = row.id.toLowerCase()
+
+		return row
 	})

 	const errors = []
 	const schema = Joi.array().unique((a, b) => a.id === b.id)
-	const { error } = schema.validate(data, { abortEarly: false })
+	const { error } = schema.validate(rows, { abortEarly: false })
 	if (error) {
 		error.details.forEach(detail => {
 			errors.push({
 				line: detail.context.pos + 2,
-				message: `Entry with the id "${detail.context.value.id}" already exists`
+				message: `entry with the id "${detail.context.value.id}" already exists`
 			})
 		})
 	}

 	return errors
 }
+
+function validateChannelCategories(row, i) {
+	const errors = []
+	row.categories.forEach(category => {
+		if (!db.categories[category]) {
+			errors.push({
+				line: i + 2,
+				message: `"${row.id}" has the wrong category "${category}"`
+			})
+		}
+	})
+
+	return errors
+}
+
+function validateChannelCountry(row, i) {
+	const errors = []
+	if (!db.countries[row.country]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.id}" has the wrong country "${row.country}"`
+		})
+	}
+
+	return errors
+}
+
+function validateChannelReplacedBy(row, i) {
+	const errors = []
+	if (row.replaced_by && !db.channels[row.replaced_by]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.id}" has the wrong replaced_by "${row.replaced_by}"`
+		})
+	}
+
+	return errors
+}
+
+function validateChannelSubdivision(row, i) {
+	const errors = []
+	if (row.subdivision && !db.subdivisions[row.subdivision]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.id}" has the wrong subdivision "${row.subdivision}"`
+		})
+	}
+
+	return errors
+}
+
+function validateChannelBroadcastArea(row, i) {
+	const errors = []
+	row.broadcast_area.forEach(area => {
+		const [type, code] = area.split('/')
+		if (
+			(type === 'r' && !db.regions[code]) ||
+			(type === 'c' && !db.countries[code]) ||
+			(type === 's' && !db.subdivisions[code])
+		) {
+			errors.push({
+				line: i + 2,
+				message: `"${row.id}" has the wrong broadcast_area "${area}"`
+			})
+		}
+	})
+
+	return errors
+}
+
+function validateChannelLanguages(row, i) {
+	const errors = []
+	row.languages.forEach(language => {
+		if (!db.languages[language]) {
+			errors.push({
+				line: i + 2,
+				message: `"${row.id}" has the wrong language "${language}"`
+			})
+		}
+	})
+
+	return errors
+}
+
+function validateChannelId(row, i) {
+	const errors = []
+	if (!db.channels[row.channel]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.channel}" is missing in the channels.csv`
+		})
+	}
+
+	return errors
+}
+
+function validateCountryLanguage(row, i) {
+	const errors = []
+	if (!db.languages[row.lang]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.code}" has the wrong language "${row.lang}"`
+		})
+	}
+
+	return errors
+}
+
+function validateSubdivisionCountry(row, i) {
+	const errors = []
+	if (!db.countries[row.country]) {
+		errors.push({
+			line: i + 2,
+			message: `"${row.code}" has the wrong country "${row.country}"`
+		})
+	}
+
+	return errors
+}
+
+function validateRegionCountries(row, i) {
+	const errors = []
+	row.countries.forEach(country => {
+		if (!db.countries[country]) {
+			errors.push({
+				line: i + 2,
+				message: `"${row.code}" has the wrong country "${country}"`
+			})
+		}
+	})
+
+	return errors
+}
+
+function handleError(message) {
+	logger.error(chalk.red(`\n${message}`))
+	process.exit(1)
+}