iptv-database/scripts/db/validate.js

340 lines
8.5 KiB
JavaScript
Raw Normal View History

2022-09-22 22:47:57 -04:00
const { transliterate } = require('transliteration')
2022-02-11 21:55:50 -05:00
const { logger, file, csv } = require('../core')
const { program } = require('commander')
const schemes = require('./schemes')
const chalk = require('chalk')
const Joi = require('joi')
2022-04-09 07:42:41 -04:00
const _ = require('lodash')
2022-02-11 21:55:50 -05:00
program.argument('[filepath]', 'Path to file to validate').parse(process.argv)
2022-04-09 08:55:35 -04:00
const allFiles = [
'data/blocklist.csv',
'data/categories.csv',
'data/channels.csv',
'data/countries.csv',
'data/languages.csv',
'data/regions.csv',
'data/subdivisions.csv'
]
let db = {}
2022-04-16 07:28:06 -04:00
let files = {}
2022-04-09 08:55:35 -04:00
2022-02-11 21:55:50 -05:00
async function main() {
2022-04-09 08:11:13 -04:00
let globalErrors = []
2022-04-09 08:55:35 -04:00
for (let filepath of allFiles) {
2022-02-11 21:55:50 -05:00
if (!filepath.endsWith('.csv')) continue
2022-02-17 09:33:35 -05:00
const csvString = await file.read(filepath)
2022-04-09 08:11:13 -04:00
if (/\s+$/.test(csvString))
2022-04-16 07:28:06 -04:00
return handleError(`Error: empty lines at the end of file not allowed (${filepath})`)
2022-02-17 09:33:35 -05:00
2022-06-10 09:29:30 -04:00
const rows = csvString.split(/\r\n/)
2022-04-19 14:57:03 -04:00
const headers = rows[0].split(',')
for (let [i, line] of rows.entries()) {
2022-06-10 09:29:30 -04:00
if (line.indexOf('\n') > -1)
return handleError(
`Error: row ${i + 1} has the wrong line ending character, should be CRLF (${filepath})`
)
2022-04-19 14:57:03 -04:00
if (line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).length !== headers.length)
return handleError(`Error: row ${i + 1} has the wrong number of columns (${filepath})`)
}
2022-02-11 21:55:50 -05:00
const filename = file.getFilename(filepath)
2022-04-09 08:55:35 -04:00
let data = await csv
2022-04-09 08:11:13 -04:00
.fromString(csvString)
.catch(err => handleError(`${err.message} (${filepath})`))
2022-02-21 06:07:37 -05:00
2022-04-16 07:28:06 -04:00
let grouped
2022-04-09 08:55:35 -04:00
switch (filename) {
case 'blocklist':
2022-04-16 07:28:06 -04:00
grouped = _.keyBy(data, 'channel')
2022-04-09 08:55:35 -04:00
break
case 'categories':
case 'channels':
2022-04-16 07:28:06 -04:00
grouped = _.keyBy(data, 'id')
2022-04-09 08:55:35 -04:00
break
default:
2022-04-16 07:28:06 -04:00
grouped = _.keyBy(data, 'code')
2022-04-09 08:55:35 -04:00
break
}
2022-04-16 07:28:06 -04:00
db[filename] = grouped
files[filename] = data
2022-04-09 08:55:35 -04:00
}
const toCheck = program.args.length ? program.args : allFiles
for (const filepath of toCheck) {
const filename = file.getFilename(filepath)
2022-04-16 07:28:06 -04:00
if (!schemes[filename]) return handleError(`Error: "${filename}" scheme is missing`)
2022-04-09 08:55:35 -04:00
2022-04-16 07:28:06 -04:00
const rows = files[filename]
2022-09-22 22:47:57 -04:00
const rowsCopy = JSON.parse(JSON.stringify(rows))
2022-04-09 08:55:35 -04:00
2022-02-11 21:55:50 -05:00
let fileErrors = []
if (filename === 'channels') {
2022-09-22 22:47:57 -04:00
fileErrors = fileErrors.concat(findDuplicatesById(rowsCopy))
for (const [i, row] of rowsCopy.entries()) {
fileErrors = fileErrors.concat(validateChannelId(row, i))
2022-04-09 09:10:08 -04:00
fileErrors = fileErrors.concat(validateChannelBroadcastArea(row, i))
fileErrors = fileErrors.concat(validateChannelSubdivision(row, i))
fileErrors = fileErrors.concat(validateChannelCategories(row, i))
2022-04-21 14:47:14 -04:00
fileErrors = fileErrors.concat(validateChannelReplacedBy(row, i))
2022-04-09 09:10:08 -04:00
fileErrors = fileErrors.concat(validateChannelLanguages(row, i))
fileErrors = fileErrors.concat(validateChannelCountry(row, i))
2022-04-09 08:23:56 -04:00
}
2022-04-08 20:44:51 -04:00
} else if (filename === 'blocklist') {
2022-09-22 22:47:57 -04:00
for (const [i, row] of rowsCopy.entries()) {
fileErrors = fileErrors.concat(validateChannel(row, i))
2022-04-09 08:23:56 -04:00
}
2022-04-09 09:09:14 -04:00
} else if (filename === 'countries') {
2022-09-22 22:47:57 -04:00
for (const [i, row] of rowsCopy.entries()) {
2022-04-09 09:10:08 -04:00
fileErrors = fileErrors.concat(validateCountryLanguage(row, i))
2022-04-09 09:09:14 -04:00
}
2022-04-09 09:18:40 -04:00
} else if (filename === 'subdivisions') {
2022-09-22 22:47:57 -04:00
for (const [i, row] of rowsCopy.entries()) {
2022-04-09 09:18:40 -04:00
fileErrors = fileErrors.concat(validateSubdivisionCountry(row, i))
}
2022-04-09 09:22:05 -04:00
} else if (filename === 'regions') {
2022-09-22 22:47:57 -04:00
for (const [i, row] of rowsCopy.entries()) {
2022-04-09 09:22:05 -04:00
fileErrors = fileErrors.concat(validateRegionCountries(row, i))
}
2022-02-11 21:55:50 -05:00
}
const schema = Joi.object(schemes[filename])
2022-04-09 08:11:13 -04:00
rows.forEach((row, i) => {
2022-02-11 21:55:50 -05:00
const { error } = schema.validate(row, { abortEarly: false })
if (error) {
error.details.forEach(detail => {
fileErrors.push({ line: i + 2, message: detail.message })
})
}
})
if (fileErrors.length) {
logger.info(`\n${chalk.underline(filepath)}`)
fileErrors.forEach(err => {
const position = err.line.toString().padEnd(6, ' ')
2022-04-09 08:11:13 -04:00
logger.info(` ${chalk.gray(position)} ${err.message}`)
2022-02-11 21:55:50 -05:00
})
2022-04-09 08:11:13 -04:00
globalErrors = globalErrors.concat(fileErrors)
2022-02-11 21:55:50 -05:00
}
}
2022-04-09 08:11:13 -04:00
if (globalErrors.length) return handleError(`${globalErrors.length} error(s)`)
2022-02-11 21:55:50 -05:00
}
main()
2022-04-16 07:28:06 -04:00
function findDuplicatesById(rows) {
rows = rows.map(row => {
2022-09-22 22:47:57 -04:00
row.normId = row.id.toLowerCase()
2022-04-16 07:28:06 -04:00
return row
2022-02-11 21:55:50 -05:00
})
const errors = []
2022-09-22 22:47:57 -04:00
const schema = Joi.array().unique((a, b) => a.normId === b.normId)
2022-04-16 07:28:06 -04:00
const { error } = schema.validate(rows, { abortEarly: false })
2022-02-11 21:55:50 -05:00
if (error) {
error.details.forEach(detail => {
errors.push({
line: detail.context.pos + 2,
2022-04-16 07:28:06 -04:00
message: `entry with the id "${detail.context.value.id}" already exists`
2022-02-11 21:55:50 -05:00
})
})
}
return errors
}
2022-04-08 21:02:02 -04:00
2022-05-24 13:06:58 -04:00
// function findDuplicatesByName(rows) {
// rows = rows.map(row => {
// row.name = row.name.toLowerCase()
// return row
// })
// const errors = []
// const schema = Joi.array().unique((a, b) => a.name === b.name)
// const { error } = schema.validate(rows, { abortEarly: false })
// if (error) {
// error.details.forEach(detail => {
// errors.push({
// line: detail.context.pos + 2,
// message: `entry with the name "${detail.context.value.name}" already exists`
// })
// })
// }
// return errors
// }
2022-09-22 22:47:57 -04:00
function validateChannelId(row, i) {
const errors = []
let name = normalize(row.name)
let code = row.country
let expected = `${name}.${code}`.toLowerCase()
if (expected !== row.normId) {
errors.push({
line: i + 2,
message: `"${row.id}" must be derived from the channel name "${row.name}" and the country code "${row.country}"`
})
}
function normalize(name) {
let translit = transliterate(name)
return translit
.replace(/^@/i, 'At')
.replace(/^&/i, 'And')
.replace(/\+/gi, 'Plus')
.replace(/\s\-/gi, ' Minus')
.replace(/[^a-z\d]+/gi, '')
}
return errors
}
2022-04-09 09:10:08 -04:00
function validateChannelCategories(row, i) {
2022-04-09 08:11:13 -04:00
const errors = []
2022-04-09 08:23:56 -04:00
row.categories.forEach(category => {
2022-04-09 08:55:35 -04:00
if (!db.categories[category]) {
2022-04-09 08:23:56 -04:00
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong category "${category}"`
2022-04-09 08:11:13 -04:00
})
2022-04-09 08:23:56 -04:00
}
})
2022-04-09 08:11:13 -04:00
return errors
}
2022-04-09 09:10:08 -04:00
function validateChannelCountry(row, i) {
2022-04-09 08:11:13 -04:00
const errors = []
2022-04-09 08:55:35 -04:00
if (!db.countries[row.country]) {
2022-04-09 08:23:56 -04:00
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong country "${row.country}"`
2022-04-09 08:11:13 -04:00
})
}
return errors
}
2022-04-21 14:47:14 -04:00
function validateChannelReplacedBy(row, i) {
const errors = []
if (row.replaced_by && !db.channels[row.replaced_by]) {
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong replaced_by "${row.replaced_by}"`
})
}
return errors
}
2022-04-09 09:10:08 -04:00
function validateChannelSubdivision(row, i) {
2022-04-09 08:58:52 -04:00
const errors = []
if (row.subdivision && !db.subdivisions[row.subdivision]) {
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong subdivision "${row.subdivision}"`
})
}
return errors
}
2022-04-09 09:10:08 -04:00
function validateChannelBroadcastArea(row, i) {
2022-04-09 09:05:00 -04:00
const errors = []
row.broadcast_area.forEach(area => {
const [type, code] = area.split('/')
if (
(type === 'r' && !db.regions[code]) ||
(type === 'c' && !db.countries[code]) ||
(type === 's' && !db.subdivisions[code])
) {
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong broadcast_area "${area}"`
})
}
})
return errors
}
2022-04-09 09:10:08 -04:00
function validateChannelLanguages(row, i) {
2022-04-09 08:23:56 -04:00
const errors = []
row.languages.forEach(language => {
2022-04-09 08:55:35 -04:00
if (!db.languages[language]) {
2022-04-09 08:23:56 -04:00
errors.push({
line: i + 2,
message: `"${row.id}" has the wrong language "${language}"`
})
}
})
return errors
}
2022-04-09 08:11:13 -04:00
2022-09-22 22:47:57 -04:00
function validateChannel(row, i) {
2022-04-09 08:11:13 -04:00
const errors = []
2022-04-09 08:55:35 -04:00
if (!db.channels[row.channel]) {
2022-04-09 08:23:56 -04:00
errors.push({
line: i + 2,
message: `"${row.channel}" is missing in the channels.csv`
2022-04-09 08:11:13 -04:00
})
}
return errors
}
2022-04-09 09:10:08 -04:00
function validateCountryLanguage(row, i) {
2022-04-09 09:09:14 -04:00
const errors = []
if (!db.languages[row.lang]) {
errors.push({
line: i + 2,
message: `"${row.code}" has the wrong language "${row.lang}"`
})
}
return errors
}
2022-04-09 09:18:40 -04:00
function validateSubdivisionCountry(row, i) {
const errors = []
if (!db.countries[row.country]) {
errors.push({
line: i + 2,
message: `"${row.code}" has the wrong country "${row.country}"`
})
}
return errors
}
2022-04-09 09:22:05 -04:00
function validateRegionCountries(row, i) {
const errors = []
row.countries.forEach(country => {
if (!db.countries[country]) {
errors.push({
line: i + 2,
message: `"${row.code}" has the wrong country "${country}"`
})
}
})
return errors
}
2022-04-09 08:11:13 -04:00
function handleError(message) {
logger.error(chalk.red(`\n${message}`))
process.exit(1)
2022-04-08 21:02:02 -04:00
}