Merge branch 'iptv-org:master' into master

2024-11-22 19:04:10 -05:00 · 2022-04-25 07:39:19 +07:00 · 2022-04-25 07:39:19 +07:00 · 2e0980da55
commit 2e0980da55
parent 6ba55ffa24 985c45af66
12 changed files with 30990 additions and 30145 deletions
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@ -13,7 +13,7 @@ jobs:
      - id: files
        uses: tj-actions/changed-files@v12.2
        with:
-          files: \.csv$
+          files: 'data'
      - name: validate
        if: steps.files.outputs.any_changed == 'true'
        run: |
--- a/.readme/preview.png
+++ b/.readme/preview.png
--- a/README.md
+++ b/README.md
@ -1,5 +1,7 @@
 # Database [![check](https://github.com/iptv-org/database/actions/workflows/check.yml/badge.svg)](https://github.com/iptv-org/database/actions/workflows/check.yml)
 ![channels.csv](https://github.com/iptv-org/database/raw/master/.readme/preview.png)
 All data is stored in the [/data](data) folder as [CSV](https://en.wikipedia.org/wiki/Comma-separated_values) (Comma-separated values) files. Any of the files can be edited either with a basic text editor or through any spreadsheet editor (such as [Google Sheets](https://www.google.com/sheets/about/), [LibreOffice](https://www.libreoffice.org/discover/libreoffice/), ...).
 ## Data Scheme
@ -15,19 +17,23 @@ All data is stored in the [/data](data) folder as [CSV](https://en.wikipedia.org
 ### channels
 | Field          | Description                                                                                                                                                                                                      | Required | Example                        |
-| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ----------------------------- |
+| -------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -------- | ------------------------------ |
-| id             | Unique channel ID. Should be derived from the name of the channel and country code separated by dot. May only contain Latin letters, numbers and dot.                                                            | Required | `KSTSDT3.us`                  |
+| id             | Unique channel ID. Should be derived from the name of the channel and country code separated by dot. May only contain Latin letters, numbers and dot.                                                            | Required | `AnhuiTV.cn`                   |
-| name           | Official channel name. May include: `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `!`, `:`, `&`, `.`, `+`, `'`, `/`, `»`, `#`, `%`, `°`, `$`, `@`, `?`, `(`, `)`.                                                           | Required | `KSTS-DT3`                    |
+| name           | Official channel name in English. May include: `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `!`, `:`, `&`, `.`, `+`, `'`, `/`, `»`, `#`, `%`, `°`, `$`, `@`, `?`, `(`, `)`.                                                | Required | `Anhui TV`                     |
-| network        | Name of the network operating the channel.                                                                                                                                                                       | Optional | `NBC`                         |
+| native_name    | Channel name in the original language. May contain any characters except `,` and `"`.                                                                                                                            | Optional | `安徽卫视`                     |
-| country        | Country code from which the channel is transmitted. A list of all supported countries and their codes can be found in [data/countries.csv](data/countries.csv)                                                   | Required | `US`                          |
+| network        | Network of which this channel is a part.                                                                                                                                                                         | Optional | `Anhui`                        |
-| subdivision    | Code of the subdivision (e.g., provinces or states) from which the broadcast is transmitted. A list of all supported subdivisions and their codes can be found in [data/subdivisions.csv](data/subdivisions.csv) | Optional | `US-CA`                       |
+| country        | Country code from which the channel is transmitted. A list of all supported countries and their codes can be found in [data/countries.csv](data/countries.csv)                                                   | Required | `CN`                           |
-| city           | Name of the city from which the channel is transmitted. May only contain `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `'`.                                                                                                 | Optional | `San Francisco`               |
+| subdivision    | Code of the subdivision (e.g., provinces or states) from which the broadcast is transmitted. A list of all supported subdivisions and their codes can be found in [data/subdivisions.csv](data/subdivisions.csv) | Optional | `CN-AH`                        |
-| broadcast_area | List of codes describing the broadcasting area of the channel. Any combination of `r/<region_code>`, `c/<country_code>`, `s/<subdivision_code>`                                                                  | Required | `s/US-CA`                     |
+| city           | Name of the city from which the channel is transmitted. May only contain `a-z`, `à-ÿ`, `0-9`, `space`, `-`, `'`.                                                                                                 | Optional | `Hefei`                        |
-| languages      | List of languages in which the channel is broadcast separated by `;`. A list of all supported languages and their codes can be found in [data/languages.csv](data/languages.csv)                                 | Required | `eng;spa`                     |
+| broadcast_area | List of codes describing the broadcasting area of the channel. Any combination of `r/<region_code>`, `c/<country_code>`, `s/<subdivision_code>`                                                                  | Required | `s/CN-AH`                      |
-| categories     | List of categories to which this channel belongs separated by `;`. A list of all supported categories can be found in [data/categories.csv](data/categories.csv)                                                 | Optional | `news;weather`                |
+| languages      | List of languages in which the channel is broadcast separated by `;`. A list of all supported languages and their codes can be found in [data/languages.csv](data/languages.csv)                                 | Required | `zho`                          |
 | categories     | List of categories to which this channel belongs separated by `;`. A list of all supported categories can be found in [data/categories.csv](data/categories.csv)                                                 | Optional | `general`                      |
 | is_nsfw        | Indicates whether the channel broadcasts adult content (`TRUE` or `FALSE`)                                                                                                                                       | Required | `FALSE`                        |
-| website        | Official website URL.                                                                                                                                                                                            | Optional | `https://nbc.com/`            |
+| launched       | Launch date of the channel (`YYYY-MM-DD`)                                                                                                                                                                        | Optional | `2016-07-28`                   |
-| logo           | Logo URL. Only URL with HTTPS protocol are allowed.                                                                                                                                                              | Optional | `https://example.com/nbc.png` |
+| closed         | Date on which the channel closed (`YYYY-MM-DD`)                                                                                                                                                                  | Optional | `2020-05-31`                   |
 | replaced_by    | The ID of the channel that this channel was replaced by.                                                                                                                                                         | Optional | `CCTV1.cn`                     |
 | website        | Official website URL.                                                                                                                                                                                            | Optional | `http://www.ahtv.cn/`          |
 | logo           | Logo URL. Only URL with HTTPS protocol are allowed. Supported image types: `PNG`, `JPEG`.                                                                                                                        | Optional | `https://example.com/logo.png` |
 ### categories
--- a/data/blocklist.csv
+++ b/data/blocklist.csv
--- a/data/channels.csv
+++ b/data/channels.csv
--- a/data/subdivisions.csv
+++ b/data/subdivisions.csv
@ -865,10 +865,6 @@ GA,Haut-Ogooue,GA-2
 GA,Ngounie,GA-4
 GA,Ogooue-Maritime,GA-8
 GA,Woleu-Ntem,GA-9
 GB,England,GB-ENG
 GB,Northern Ireland,GB-NIR
 GB,Scotland,GB-SCT
 GB,Wales,GB-WLS
 GD,Saint Andrew,GD-01
 GD,Saint David,GD-02
 GD,Saint George,GD-03
@ -3195,6 +3191,10 @@ UG,Tororo,UG-212
 UG,Wakiso,UG-113
 UG,Yumbe,UG-313
 UG,Zombo,UG-330
 UK,England,GB-ENG
 UK,Northern Ireland,GB-NIR
 UK,Scotland,GB-SCT
 UK,Wales,GB-WLS
 UM,Palmyra Atoll,UM-95
 US,Alabama,US-AL
 US,Alaska,US-AK
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -12,20 +12,26 @@
  "private": true,
  "author": "Arhey",
  "dependencies": {
    "@joi/date": "^2.1.0",
    "axios": "^0.25.0",
    "chalk": "^4.1.2",
    "cheerio": "^1.0.0-rc.10",
    "commander": "^9.0.0",
    "crlf": "^1.1.1",
    "csvtojson": "^2.0.10",
    "dayjs": "^1.11.0",
    "glob": "^7.2.0",
    "iso-639-2": "^3.0.1",
    "joi": "^17.6.0",
    "json2csv": "^6.0.0-alpha.0",
    "lodash": "^4.17.21",
    "mz": "^2.7.0",
    "node-cleanup": "^2.1.2",
    "pre-commit": "^1.2.2",
    "signale": "^1.4.0",
    "slugify": "^1.6.5",
-    "transliteration": "^2.2.0"
+    "transliteration": "^2.2.0",
    "wikijs": "^6.3.3",
    "wtf_wikipedia": "^10.0.0"
  }
 }
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@ -1 +1,2 @@
-/bot.js
+/__data__/
 /bots/
--- a/scripts/core/csv.js
+++ b/scripts/core/csv.js
@ -23,7 +23,11 @@ const csv2jsonOptions = {
 		subdivision: nullable,
 		city: nullable,
 		network: nullable,
-		website: nullable
+		launched: nullable,
 		closed: nullable,
 		website: nullable,
 		native_name: nullable,
 		replaced_by: nullable
 	}
 }
--- a/scripts/db/schemes/channels.js
+++ b/scripts/db/schemes/channels.js
@ -1,12 +1,16 @@
-const Joi = require('joi')
+const Joi = require('joi').extend(require('@joi/date'))
 const path = require('path')
 module.exports = {
 	id: Joi.string()
 		.regex(/^[A-Za-z0-9]+\.[a-z]{2}$/)
 		.required(),
 	name: Joi.string()
-		.regex(/^[\sa-zA-Z\u00C0-\u00FF0-9-!:&.+'/»#%°$@?()]+$/)
+		.regex(/^[\sa-zA-Z\u00C0-\u00FF0-9-!:&.+'/»#%°$@?()¡]+$/)
 		.required(),
 	native_name: Joi.string()
 		.regex(/^[^",]+$/)
 		.allow(null),
 	network: Joi.string().allow(null),
 	country: Joi.string()
 		.regex(/^[A-Z]{2}$/)
@ -29,6 +33,11 @@ module.exports = {
 	),
 	categories: Joi.array().items(Joi.string().regex(/^[a-z]+$/)),
 	is_nsfw: Joi.boolean().strict().required(),
 	launched: Joi.date().format('YYYY-MM-DD').raw().allow(null),
 	closed: Joi.date().format('YYYY-MM-DD').raw().allow(null),
 	replaced_by: Joi.string()
 		.regex(/^[A-Za-z0-9]+\.[a-z]{2}$/)
 		.allow(null),
 	website: Joi.string()
 		.uri({
 			scheme: ['http', 'https']
@ -38,5 +47,13 @@ module.exports = {
 		.uri({
 			scheme: ['https']
 		})
 		.custom((value, helper) => {
 			const ext = path.extname(value)
 			if (!ext || /(\.png|\.jpeg|\.jpg)/i.test(ext)) {
 				return true
 			} else {
 				return helper.message(`"logo" has an invalid file extension "${ext}"`)
 			}
 		})
 		.allow(null)
 }
--- a/scripts/db/validate.js
+++ b/scripts/db/validate.js
@ -3,14 +3,11 @@ const { program } = require('commander')
 const schemes = require('./schemes')
 const chalk = require('chalk')
 const Joi = require('joi')
 const _ = require('lodash')
 program.argument('[filepath]', 'Path to file to validate').parse(process.argv)
-async function main() {
+const allFiles = [
 	let errors = []
 	const files = program.args.length
 		? program.args
 		: [
 	'data/blocklist.csv',
 	'data/categories.csv',
 	'data/channels.csv',
@ -18,45 +15,93 @@ async function main() {
 	'data/languages.csv',
 	'data/regions.csv',
 	'data/subdivisions.csv'
-		  ]
+]
-	for (const filepath of files) {
+
 let db = {}
 let files = {}
 async function main() {
 	let globalErrors = []
 	for (let filepath of allFiles) {
 		if (!filepath.endsWith('.csv')) continue
 		const eol = await file.eol(filepath)
-		if (eol !== 'CRLF') {
+		if (eol !== 'CRLF')
-			logger.error(chalk.red(`\nError: file must have line endings with CRLF (${filepath})`))
+			return handleError(`Error: file must have line endings with CRLF (${filepath})`)
 			process.exit(1)
 		}
 		const csvString = await file.read(filepath)
-		if (/\s+$/.test(csvString)) {
+		if (/\s+$/.test(csvString))
-			logger.error(chalk.red(`\nError: empty lines at the end of file not allowed (${filepath})`))
+			return handleError(`Error: empty lines at the end of file not allowed (${filepath})`)
-			process.exit(1)
+
 		const rows = csvString.split('\r\n')
 		const headers = rows[0].split(',')
 		for (let [i, line] of rows.entries()) {
 			if (line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).length !== headers.length)
 				return handleError(`Error: row ${i + 1} has the wrong number of columns (${filepath})`)
 		}
 		const filename = file.getFilename(filepath)
-		if (!schemes[filename]) {
+		let data = await csv
-			logger.error(chalk.red(`\nError: "${filename}" scheme is missing`))
+			.fromString(csvString)
-			process.exit(1)
+			.catch(err => handleError(`${err.message} (${filepath})`))
 		let grouped
 		switch (filename) {
 			case 'blocklist':
 				grouped = _.keyBy(data, 'channel')
 				break
 			case 'categories':
 			case 'channels':
 				grouped = _.keyBy(data, 'id')
 				break
 			default:
 				grouped = _.keyBy(data, 'code')
 				break
 		}
-		const data = await csv.fromString(csvString).catch(err => {
+		db[filename] = grouped
-			logger.error(chalk.red(`\n${err.message} (${filepath})`))
+		files[filename] = data
-			process.exit(1)
+	}
-		})
+
 	const toCheck = program.args.length ? program.args : allFiles
 	for (const filepath of toCheck) {
 		const filename = file.getFilename(filepath)
 		if (!schemes[filename]) return handleError(`Error: "${filename}" scheme is missing`)
 		const rows = files[filename]
 		let fileErrors = []
 		if (filename === 'channels') {
-			if (/\"/.test(csvString)) {
+			fileErrors = fileErrors.concat(findDuplicatesById(rows))
-				logger.error(chalk.red(`\nError: \" character is not allowed (${filepath})`))
+			for (const [i, row] of rows.entries()) {
-				process.exit(1)
+				fileErrors = fileErrors.concat(validateChannelBroadcastArea(row, i))
 				fileErrors = fileErrors.concat(validateChannelSubdivision(row, i))
 				fileErrors = fileErrors.concat(validateChannelCategories(row, i))
 				fileErrors = fileErrors.concat(validateChannelReplacedBy(row, i))
 				fileErrors = fileErrors.concat(validateChannelLanguages(row, i))
 				fileErrors = fileErrors.concat(validateChannelCountry(row, i))
 			}
 		} else if (filename === 'blocklist') {
 			for (const [i, row] of rows.entries()) {
 				fileErrors = fileErrors.concat(validateChannelId(row, i))
 			}
 		} else if (filename === 'countries') {
 			for (const [i, row] of rows.entries()) {
 				fileErrors = fileErrors.concat(validateCountryLanguage(row, i))
 			}
 		} else if (filename === 'subdivisions') {
 			for (const [i, row] of rows.entries()) {
 				fileErrors = fileErrors.concat(validateSubdivisionCountry(row, i))
 			}
 		} else if (filename === 'regions') {
 			for (const [i, row] of rows.entries()) {
 				fileErrors = fileErrors.concat(validateRegionCountries(row, i))
 			}
 			fileErrors = fileErrors.concat(findDuplicatesById(data))
 		}
 		const schema = Joi.object(schemes[filename])
-		data.forEach((row, i) => {
+		rows.forEach((row, i) => {
 			const { error } = schema.validate(row, { abortEarly: false })
 			if (error) {
 				error.details.forEach(detail => {
@ -69,37 +114,173 @@ async function main() {
 			logger.info(`\n${chalk.underline(filepath)}`)
 			fileErrors.forEach(err => {
 				const position = err.line.toString().padEnd(6, ' ')
-				logger.error(` ${chalk.gray(position)} ${err.message}`)
+				logger.info(` ${chalk.gray(position)} ${err.message}`)
 			})
-			errors = errors.concat(fileErrors)
+			globalErrors = globalErrors.concat(fileErrors)
 		}
 	}
-	if (errors.length) {
+	if (globalErrors.length) return handleError(`${globalErrors.length} error(s)`)
 		logger.error(chalk.red(`\n${errors.length} error(s)`))
 		process.exit(1)
 	}
 }
 main()
-function findDuplicatesById(data) {
+function findDuplicatesById(rows) {
-	data = data.map(i => {
+	rows = rows.map(row => {
-		i.id = i.id.toLowerCase()
+		row.id = row.id.toLowerCase()
-		return i
+
 		return row
 	})
 	const errors = []
 	const schema = Joi.array().unique((a, b) => a.id === b.id)
-	const { error } = schema.validate(data, { abortEarly: false })
+	const { error } = schema.validate(rows, { abortEarly: false })
 	if (error) {
 		error.details.forEach(detail => {
 			errors.push({
 				line: detail.context.pos + 2,
-				message: `Entry with the id "${detail.context.value.id}" already exists`
+				message: `entry with the id "${detail.context.value.id}" already exists`
 			})
 		})
 	}
 	return errors
 }
 function validateChannelCategories(row, i) {
 	const errors = []
 	row.categories.forEach(category => {
 		if (!db.categories[category]) {
 			errors.push({
 				line: i + 2,
 				message: `"${row.id}" has the wrong category "${category}"`
 			})
 		}
 	})
 	return errors
 }
 function validateChannelCountry(row, i) {
 	const errors = []
 	if (!db.countries[row.country]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.id}" has the wrong country "${row.country}"`
 		})
 	}
 	return errors
 }
 function validateChannelReplacedBy(row, i) {
 	const errors = []
 	if (row.replaced_by && !db.channels[row.replaced_by]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.id}" has the wrong replaced_by "${row.replaced_by}"`
 		})
 	}
 	return errors
 }
 function validateChannelSubdivision(row, i) {
 	const errors = []
 	if (row.subdivision && !db.subdivisions[row.subdivision]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.id}" has the wrong subdivision "${row.subdivision}"`
 		})
 	}
 	return errors
 }
 function validateChannelBroadcastArea(row, i) {
 	const errors = []
 	row.broadcast_area.forEach(area => {
 		const [type, code] = area.split('/')
 		if (
 			(type === 'r' && !db.regions[code]) ||
 			(type === 'c' && !db.countries[code]) ||
 			(type === 's' && !db.subdivisions[code])
 		) {
 			errors.push({
 				line: i + 2,
 				message: `"${row.id}" has the wrong broadcast_area "${area}"`
 			})
 		}
 	})
 	return errors
 }
 function validateChannelLanguages(row, i) {
 	const errors = []
 	row.languages.forEach(language => {
 		if (!db.languages[language]) {
 			errors.push({
 				line: i + 2,
 				message: `"${row.id}" has the wrong language "${language}"`
 			})
 		}
 	})
 	return errors
 }
 function validateChannelId(row, i) {
 	const errors = []
 	if (!db.channels[row.channel]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.channel}" is missing in the channels.csv`
 		})
 	}
 	return errors
 }
 function validateCountryLanguage(row, i) {
 	const errors = []
 	if (!db.languages[row.lang]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.code}" has the wrong language "${row.lang}"`
 		})
 	}
 	return errors
 }
 function validateSubdivisionCountry(row, i) {
 	const errors = []
 	if (!db.countries[row.country]) {
 		errors.push({
 			line: i + 2,
 			message: `"${row.code}" has the wrong country "${row.country}"`
 		})
 	}
 	return errors
 }
 function validateRegionCountries(row, i) {
 	const errors = []
 	row.countries.forEach(country => {
 		if (!db.countries[country]) {
 			errors.push({
 				line: i + 2,
 				message: `"${row.code}" has the wrong country "${country}"`
 			})
 		}
 	})
 	return errors
 }
 function handleError(message) {
 	logger.error(chalk.red(`\n${message}`))
 	process.exit(1)
 }