2022-09-23 05:47:57 +03:00
|
|
|
const { transliterate } = require('transliteration')
|
2022-02-12 05:55:50 +03:00
|
|
|
const { logger, file, csv } = require('../core')
|
|
|
|
const { program } = require('commander')
|
|
|
|
const schemes = require('./schemes')
|
|
|
|
const chalk = require('chalk')
|
|
|
|
const Joi = require('joi')
|
2022-04-09 14:42:41 +03:00
|
|
|
const _ = require('lodash')
|
2022-02-12 05:55:50 +03:00
|
|
|
|
|
|
|
program.argument('[filepath]', 'Path to file to validate').parse(process.argv)
|
|
|
|
|
2022-04-09 15:55:35 +03:00
|
|
|
const allFiles = [
|
|
|
|
'data/blocklist.csv',
|
|
|
|
'data/categories.csv',
|
|
|
|
'data/channels.csv',
|
|
|
|
'data/countries.csv',
|
|
|
|
'data/languages.csv',
|
|
|
|
'data/regions.csv',
|
|
|
|
'data/subdivisions.csv'
|
|
|
|
]
|
|
|
|
|
|
|
|
let db = {}
|
2022-04-16 14:28:06 +03:00
|
|
|
let files = {}
|
2022-04-09 15:55:35 +03:00
|
|
|
|
2022-02-12 05:55:50 +03:00
|
|
|
async function main() {
|
2022-04-09 15:11:13 +03:00
|
|
|
let globalErrors = []
|
2022-04-09 15:55:35 +03:00
|
|
|
|
|
|
|
for (let filepath of allFiles) {
|
2022-02-12 05:55:50 +03:00
|
|
|
if (!filepath.endsWith('.csv')) continue
|
2022-02-17 17:33:35 +03:00
|
|
|
|
|
|
|
const csvString = await file.read(filepath)
|
2022-04-09 15:11:13 +03:00
|
|
|
if (/\s+$/.test(csvString))
|
2022-04-16 14:28:06 +03:00
|
|
|
return handleError(`Error: empty lines at the end of file not allowed (${filepath})`)
|
2022-02-17 17:33:35 +03:00
|
|
|
|
2022-06-10 16:29:30 +03:00
|
|
|
const rows = csvString.split(/\r\n/)
|
2022-04-19 21:57:03 +03:00
|
|
|
const headers = rows[0].split(',')
|
|
|
|
for (let [i, line] of rows.entries()) {
|
2022-06-10 16:29:30 +03:00
|
|
|
if (line.indexOf('\n') > -1)
|
|
|
|
return handleError(
|
|
|
|
`Error: row ${i + 1} has the wrong line ending character, should be CRLF (${filepath})`
|
|
|
|
)
|
2022-04-19 21:57:03 +03:00
|
|
|
if (line.split(/,(?=(?:(?:[^"]*"){2})*[^"]*$)/).length !== headers.length)
|
|
|
|
return handleError(`Error: row ${i + 1} has the wrong number of columns (${filepath})`)
|
|
|
|
}
|
|
|
|
|
2022-02-12 05:55:50 +03:00
|
|
|
const filename = file.getFilename(filepath)
|
2022-04-09 15:55:35 +03:00
|
|
|
let data = await csv
|
2022-04-09 15:11:13 +03:00
|
|
|
.fromString(csvString)
|
|
|
|
.catch(err => handleError(`${err.message} (${filepath})`))
|
2022-02-21 14:07:37 +03:00
|
|
|
|
2022-04-16 14:28:06 +03:00
|
|
|
let grouped
|
2022-04-09 15:55:35 +03:00
|
|
|
switch (filename) {
|
|
|
|
case 'blocklist':
|
2022-04-16 14:28:06 +03:00
|
|
|
grouped = _.keyBy(data, 'channel')
|
2022-04-09 15:55:35 +03:00
|
|
|
break
|
|
|
|
case 'categories':
|
|
|
|
case 'channels':
|
2022-04-16 14:28:06 +03:00
|
|
|
grouped = _.keyBy(data, 'id')
|
2022-04-09 15:55:35 +03:00
|
|
|
break
|
|
|
|
default:
|
2022-04-16 14:28:06 +03:00
|
|
|
grouped = _.keyBy(data, 'code')
|
2022-04-09 15:55:35 +03:00
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2022-04-16 14:28:06 +03:00
|
|
|
db[filename] = grouped
|
|
|
|
files[filename] = data
|
2022-04-09 15:55:35 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const toCheck = program.args.length ? program.args : allFiles
|
|
|
|
for (const filepath of toCheck) {
|
|
|
|
const filename = file.getFilename(filepath)
|
2022-04-16 14:28:06 +03:00
|
|
|
if (!schemes[filename]) return handleError(`Error: "${filename}" scheme is missing`)
|
2022-04-09 15:55:35 +03:00
|
|
|
|
2022-04-16 14:28:06 +03:00
|
|
|
const rows = files[filename]
|
2022-09-23 05:47:57 +03:00
|
|
|
const rowsCopy = JSON.parse(JSON.stringify(rows))
|
2022-04-09 15:55:35 +03:00
|
|
|
|
2022-02-12 05:55:50 +03:00
|
|
|
let fileErrors = []
|
|
|
|
if (filename === 'channels') {
|
2022-09-23 05:47:57 +03:00
|
|
|
fileErrors = fileErrors.concat(findDuplicatesById(rowsCopy))
|
2022-11-05 05:19:49 +03:00
|
|
|
// fileErrors = fileErrors.concat(findDuplicatesByName(rowsCopy))
|
2022-09-23 05:47:57 +03:00
|
|
|
for (const [i, row] of rowsCopy.entries()) {
|
|
|
|
fileErrors = fileErrors.concat(validateChannelId(row, i))
|
2022-04-09 16:10:08 +03:00
|
|
|
fileErrors = fileErrors.concat(validateChannelBroadcastArea(row, i))
|
|
|
|
fileErrors = fileErrors.concat(validateChannelSubdivision(row, i))
|
|
|
|
fileErrors = fileErrors.concat(validateChannelCategories(row, i))
|
2022-04-21 21:47:14 +03:00
|
|
|
fileErrors = fileErrors.concat(validateChannelReplacedBy(row, i))
|
2022-04-09 16:10:08 +03:00
|
|
|
fileErrors = fileErrors.concat(validateChannelLanguages(row, i))
|
|
|
|
fileErrors = fileErrors.concat(validateChannelCountry(row, i))
|
2022-04-09 15:23:56 +03:00
|
|
|
}
|
2022-04-09 03:44:51 +03:00
|
|
|
} else if (filename === 'blocklist') {
|
2022-09-23 05:47:57 +03:00
|
|
|
for (const [i, row] of rowsCopy.entries()) {
|
|
|
|
fileErrors = fileErrors.concat(validateChannel(row, i))
|
2022-04-09 15:23:56 +03:00
|
|
|
}
|
2022-04-09 16:09:14 +03:00
|
|
|
} else if (filename === 'countries') {
|
2022-09-23 05:47:57 +03:00
|
|
|
for (const [i, row] of rowsCopy.entries()) {
|
2022-10-24 03:43:57 +03:00
|
|
|
fileErrors = fileErrors.concat(validateCountryLanguages(row, i))
|
2022-04-09 16:09:14 +03:00
|
|
|
}
|
2022-04-09 16:18:40 +03:00
|
|
|
} else if (filename === 'subdivisions') {
|
2022-09-23 05:47:57 +03:00
|
|
|
for (const [i, row] of rowsCopy.entries()) {
|
2022-04-09 16:18:40 +03:00
|
|
|
fileErrors = fileErrors.concat(validateSubdivisionCountry(row, i))
|
|
|
|
}
|
2022-04-09 16:22:05 +03:00
|
|
|
} else if (filename === 'regions') {
|
2022-09-23 05:47:57 +03:00
|
|
|
for (const [i, row] of rowsCopy.entries()) {
|
2022-04-09 16:22:05 +03:00
|
|
|
fileErrors = fileErrors.concat(validateRegionCountries(row, i))
|
|
|
|
}
|
2022-02-12 05:55:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
const schema = Joi.object(schemes[filename])
|
2022-04-09 15:11:13 +03:00
|
|
|
rows.forEach((row, i) => {
|
2022-02-12 05:55:50 +03:00
|
|
|
const { error } = schema.validate(row, { abortEarly: false })
|
|
|
|
if (error) {
|
|
|
|
error.details.forEach(detail => {
|
|
|
|
fileErrors.push({ line: i + 2, message: detail.message })
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
if (fileErrors.length) {
|
|
|
|
logger.info(`\n${chalk.underline(filepath)}`)
|
|
|
|
fileErrors.forEach(err => {
|
|
|
|
const position = err.line.toString().padEnd(6, ' ')
|
2022-04-09 15:11:13 +03:00
|
|
|
logger.info(` ${chalk.gray(position)} ${err.message}`)
|
2022-02-12 05:55:50 +03:00
|
|
|
})
|
2022-04-09 15:11:13 +03:00
|
|
|
globalErrors = globalErrors.concat(fileErrors)
|
2022-02-12 05:55:50 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-04-09 15:11:13 +03:00
|
|
|
if (globalErrors.length) return handleError(`${globalErrors.length} error(s)`)
|
2022-02-12 05:55:50 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
main()
|
|
|
|
|
2022-04-16 14:28:06 +03:00
|
|
|
function findDuplicatesById(rows) {
|
2022-11-05 05:19:49 +03:00
|
|
|
const errors = []
|
|
|
|
const buffer = {}
|
|
|
|
rows.forEach((row, i) => {
|
|
|
|
const normId = row.id.toLowerCase()
|
|
|
|
if (buffer[normId]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `entry with the id "${row.id}" already exists`
|
|
|
|
})
|
|
|
|
}
|
2022-04-16 14:28:06 +03:00
|
|
|
|
2022-11-05 05:19:49 +03:00
|
|
|
buffer[normId] = true
|
2022-02-12 05:55:50 +03:00
|
|
|
})
|
|
|
|
|
2022-11-05 05:19:49 +03:00
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
|
|
|
function findDuplicatesByName(rows) {
|
2022-02-12 05:55:50 +03:00
|
|
|
const errors = []
|
2022-11-05 05:19:49 +03:00
|
|
|
const buffer = {}
|
|
|
|
rows.forEach((row, i) => {
|
|
|
|
const normName = row.name.toLowerCase()
|
|
|
|
if (buffer[normName]) {
|
2022-02-12 05:55:50 +03:00
|
|
|
errors.push({
|
2022-11-05 05:19:49 +03:00
|
|
|
line: i + 2,
|
|
|
|
message: `entry with the name "${row.name}" already exists`
|
2022-02-12 05:55:50 +03:00
|
|
|
})
|
2022-11-05 05:19:49 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
buffer[normName] = true
|
|
|
|
})
|
2022-02-12 05:55:50 +03:00
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
2022-04-09 04:02:02 +03:00
|
|
|
|
2022-09-23 05:47:57 +03:00
|
|
|
function validateChannelId(row, i) {
|
|
|
|
const errors = []
|
|
|
|
|
|
|
|
let name = normalize(row.name)
|
2022-10-01 12:19:08 +03:00
|
|
|
let code = row.country.toLowerCase()
|
|
|
|
let expected = `${name}.${code}`
|
2022-09-23 05:47:57 +03:00
|
|
|
|
2022-10-01 12:19:08 +03:00
|
|
|
if (expected !== row.id) {
|
2022-09-23 05:47:57 +03:00
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" must be derived from the channel name "${row.name}" and the country code "${row.country}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
function normalize(name) {
|
|
|
|
let translit = transliterate(name)
|
|
|
|
|
|
|
|
return translit
|
|
|
|
.replace(/^@/i, 'At')
|
|
|
|
.replace(/^&/i, 'And')
|
|
|
|
.replace(/\+/gi, 'Plus')
|
2022-11-16 02:49:56 +03:00
|
|
|
.replace(/\s\-(\d)/gi, ' Minus$1')
|
2022-09-23 05:47:57 +03:00
|
|
|
.replace(/[^a-z\d]+/gi, '')
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:10:08 +03:00
|
|
|
function validateChannelCategories(row, i) {
|
2022-04-09 15:11:13 +03:00
|
|
|
const errors = []
|
2022-04-09 15:23:56 +03:00
|
|
|
row.categories.forEach(category => {
|
2022-04-09 15:55:35 +03:00
|
|
|
if (!db.categories[category]) {
|
2022-04-09 15:23:56 +03:00
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong category "${category}"`
|
2022-04-09 15:11:13 +03:00
|
|
|
})
|
2022-04-09 15:23:56 +03:00
|
|
|
}
|
|
|
|
})
|
2022-04-09 15:11:13 +03:00
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:10:08 +03:00
|
|
|
function validateChannelCountry(row, i) {
|
2022-04-09 15:11:13 +03:00
|
|
|
const errors = []
|
2022-04-09 15:55:35 +03:00
|
|
|
if (!db.countries[row.country]) {
|
2022-04-09 15:23:56 +03:00
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong country "${row.country}"`
|
2022-04-09 15:11:13 +03:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-21 21:47:14 +03:00
|
|
|
function validateChannelReplacedBy(row, i) {
|
|
|
|
const errors = []
|
|
|
|
if (row.replaced_by && !db.channels[row.replaced_by]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong replaced_by "${row.replaced_by}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:10:08 +03:00
|
|
|
function validateChannelSubdivision(row, i) {
|
2022-04-09 15:58:52 +03:00
|
|
|
const errors = []
|
|
|
|
if (row.subdivision && !db.subdivisions[row.subdivision]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong subdivision "${row.subdivision}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:10:08 +03:00
|
|
|
function validateChannelBroadcastArea(row, i) {
|
2022-04-09 16:05:00 +03:00
|
|
|
const errors = []
|
|
|
|
row.broadcast_area.forEach(area => {
|
|
|
|
const [type, code] = area.split('/')
|
|
|
|
if (
|
|
|
|
(type === 'r' && !db.regions[code]) ||
|
|
|
|
(type === 'c' && !db.countries[code]) ||
|
|
|
|
(type === 's' && !db.subdivisions[code])
|
|
|
|
) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong broadcast_area "${area}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:10:08 +03:00
|
|
|
function validateChannelLanguages(row, i) {
|
2022-04-09 15:23:56 +03:00
|
|
|
const errors = []
|
|
|
|
row.languages.forEach(language => {
|
2022-04-09 15:55:35 +03:00
|
|
|
if (!db.languages[language]) {
|
2022-04-09 15:23:56 +03:00
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.id}" has the wrong language "${language}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
2022-04-09 15:11:13 +03:00
|
|
|
|
2022-09-23 05:47:57 +03:00
|
|
|
function validateChannel(row, i) {
|
2022-04-09 15:11:13 +03:00
|
|
|
const errors = []
|
2022-04-09 15:55:35 +03:00
|
|
|
if (!db.channels[row.channel]) {
|
2022-04-09 15:23:56 +03:00
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.channel}" is missing in the channels.csv`
|
2022-04-09 15:11:13 +03:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-10-24 03:43:57 +03:00
|
|
|
function validateCountryLanguages(row, i) {
|
2022-04-09 16:09:14 +03:00
|
|
|
const errors = []
|
2022-10-24 03:43:57 +03:00
|
|
|
for (let lang of row.languages) {
|
|
|
|
if (!db.languages[lang]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.code}" has the wrong language "${lang}"`
|
|
|
|
})
|
|
|
|
}
|
2022-04-09 16:09:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:18:40 +03:00
|
|
|
function validateSubdivisionCountry(row, i) {
|
|
|
|
const errors = []
|
|
|
|
if (!db.countries[row.country]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.code}" has the wrong country "${row.country}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 16:22:05 +03:00
|
|
|
function validateRegionCountries(row, i) {
|
|
|
|
const errors = []
|
|
|
|
row.countries.forEach(country => {
|
|
|
|
if (!db.countries[country]) {
|
|
|
|
errors.push({
|
|
|
|
line: i + 2,
|
|
|
|
message: `"${row.code}" has the wrong country "${country}"`
|
|
|
|
})
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
return errors
|
|
|
|
}
|
|
|
|
|
2022-04-09 15:11:13 +03:00
|
|
|
function handleError(message) {
|
|
|
|
logger.error(chalk.red(`\n${message}`))
|
|
|
|
process.exit(1)
|
2022-04-09 04:02:02 +03:00
|
|
|
}
|