init
This commit is contained in:
49
parser/.env.example
Normal file
49
parser/.env.example
Normal file
@@ -0,0 +1,49 @@
|
||||
# Base page to open after authentication
|
||||
ALPINBET_BASE_URL=https://alpinbet.com/
|
||||
ALPINBET_TARGET_URL=https://alpinbet.com/dispatch/antigol/raketafon
|
||||
|
||||
# Browser mode
|
||||
HEADLESS=true
|
||||
AUTH_HEADLESS=false
|
||||
AUTH_ENABLED=true
|
||||
AUTH_LOGIN_URL=https://alpinbet.com/site/login
|
||||
AUTH_USERNAME=
|
||||
AUTH_PASSWORD=
|
||||
AUTH_USERNAME_SELECTOR=input[name="LoginForm[username]"]
|
||||
AUTH_PASSWORD_SELECTOR=input[name="LoginForm[password]"]
|
||||
AUTH_SUBMIT_SELECTOR=button[type="submit"]
|
||||
AUTH_SUCCESS_SELECTOR=#currentUser, .user-card, .currentUser
|
||||
AUTH_LOGIN_DETECT_URL_PART=/site/login
|
||||
POLL_INTERVAL_MS=10000
|
||||
REQUEST_TIMEOUT_MS=45000
|
||||
|
||||
# Session storage
|
||||
SESSION_FILE=./data/alpinbet-session.json
|
||||
STATE_FILE=./data/last-state.json
|
||||
HEARTBEAT_FILE=./data/heartbeat.json
|
||||
HTML_SNAPSHOT_DIR=./data/html
|
||||
|
||||
# Enable only for parser debugging to save raw HTML when rows are detected but filtered out
|
||||
DEBUG_SAVE_FILTERED_HTML=false
|
||||
|
||||
# Selectors
|
||||
ITEM_SELECTOR=.table-link
|
||||
ITEM_ID_SELECTOR=[data-id]
|
||||
TITLE_SELECTOR=.signal-title
|
||||
STATUS_SELECTOR=.signal-status
|
||||
COEF_SELECTOR=.signal-coef
|
||||
DATE_SELECTOR=.signal-date
|
||||
|
||||
# Notifications
|
||||
WEBHOOK_URL=
|
||||
WEBHOOK_SECRET=
|
||||
REDIS_URL=redis://127.0.0.1:6379
|
||||
BACKEND_INTERNAL_URL=
|
||||
PARSER_INTERNAL_SECRET=
|
||||
NOTIFY_ON_STARTUP=false
|
||||
|
||||
# Forecast pagination
|
||||
FORECAST_ACTIVE_TABS=1,2
|
||||
FORECAST_PER_PAGE=40
|
||||
FORECAST_START_PAGE=1
|
||||
FORECAST_MAX_PAGES=10
|
||||
6
parser/.gitignore
vendored
Normal file
6
parser/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
node_modules
|
||||
/data/*.json
|
||||
/data/html
|
||||
/data/screenshots
|
||||
/logs/*.log
|
||||
playwright-report
|
||||
12
parser/Dockerfile
Normal file
12
parser/Dockerfile
Normal file
@@ -0,0 +1,12 @@
|
||||
FROM mcr.microsoft.com/playwright:v1.58.2-noble
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY parser/package.json parser/package-lock.json ./parser/
|
||||
WORKDIR /app/parser
|
||||
RUN npm install
|
||||
COPY parser/ ./
|
||||
|
||||
WORKDIR /app/parser
|
||||
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 CMD ["node", "scripts/healthcheck.js"]
|
||||
CMD ["npm", "run", "start"]
|
||||
161
parser/README.md
Normal file
161
parser/README.md
Normal file
@@ -0,0 +1,161 @@
|
||||
# Alpinbet parser
|
||||
|
||||
Готовый каркас проекта для парсинга HTML-страниц Alpinbet через Playwright.
|
||||
|
||||
Что уже есть:
|
||||
- ручная авторизация с сохранением browser session;
|
||||
- повторное использование cookies/localStorage через `storageState`;
|
||||
- polling раз в 20 секунд;
|
||||
- вытаскивание данных из DOM по CSS-селекторам;
|
||||
- сравнение текущего состояния с прошлым;
|
||||
- отправка webhook при изменениях;
|
||||
- сохранение HTML-снимка;
|
||||
- скриншот при ошибке;
|
||||
- простой логгер.
|
||||
|
||||
## 1. Установка
|
||||
|
||||
```bash
|
||||
npm install
|
||||
npx playwright install chromium
|
||||
```
|
||||
|
||||
## 2. Настройка
|
||||
|
||||
Скопируй пример env:
|
||||
|
||||
```bash
|
||||
cp .env.example .env
|
||||
```
|
||||
|
||||
Минимум проверь:
|
||||
- `ALPINBET_TARGET_URL`
|
||||
- `SESSION_FILE`
|
||||
- `ITEM_SELECTOR`
|
||||
- селекторы полей строки
|
||||
- `WEBHOOK_URL` при необходимости
|
||||
- `HTTP_USER_AGENT` при необходимости
|
||||
- `FORECAST_ACTIVE_TABS` (обычно `1,2`, где `1` — активные, `2` — неактивные)
|
||||
- `FORECAST_PER_PAGE` и `FORECAST_MAX_PAGES`
|
||||
|
||||
## 3. Первичная авторизация
|
||||
|
||||
```bash
|
||||
npm run auth
|
||||
```
|
||||
|
||||
Что будет:
|
||||
1. откроется Chromium;
|
||||
2. ты вручную логинишься в Alpinbet;
|
||||
3. после входа возвращаешься в консоль и жмёшь Enter;
|
||||
4. сессия сохранится в `data/alpinbet-session.json`.
|
||||
|
||||
## 4. Проверка сессии
|
||||
|
||||
```bash
|
||||
npm run check-session
|
||||
```
|
||||
|
||||
Скрипт попытается открыть целевую страницу и выведет:
|
||||
- текущий URL;
|
||||
- заголовок страницы;
|
||||
- часть HTML.
|
||||
|
||||
Если тебя редиректит на логин, значит сессия протухла.
|
||||
|
||||
## 5. Запуск парсера
|
||||
|
||||
```bash
|
||||
npm start
|
||||
```
|
||||
|
||||
## Как работает парсер
|
||||
|
||||
Каждый цикл:
|
||||
1. получает HTML через HTTP-запрос с cookies из Playwright-сессии;
|
||||
2. если сессия истекла, переавторизуется через Playwright и повторяет HTTP-запрос;
|
||||
3. парсит HTML через Cheerio;
|
||||
4. собирает массив объектов;
|
||||
5. нормализует и сортирует его;
|
||||
6. сравнивает с прошлым состоянием;
|
||||
7. если есть изменения — сохраняет state и шлёт webhook.
|
||||
|
||||
### Получение HTML
|
||||
|
||||
- Парсер работает через HTTP-запросы с cookies из Playwright-сессии
|
||||
|
||||
Дополнительно:
|
||||
- `FORECAST_ACTIVE_TABS=1,2` — какие вкладки забирать
|
||||
- `FORECAST_PER_PAGE=40` — размер страницы запроса
|
||||
- `FORECAST_START_PAGE=1` — стартовая страница
|
||||
- `FORECAST_MAX_PAGES=10` — максимум страниц на вкладку за цикл
|
||||
|
||||
Парсер автоматически подставляет в URL параметры:
|
||||
- `ForecastSearch[activeTab]`
|
||||
- `ForecastSearch[perPage]`
|
||||
- `page`
|
||||
|
||||
Пример `ALPINBET_TARGET_URL`:
|
||||
|
||||
```text
|
||||
https://alpinbet.com/dispatch/diamondwither/dfadfdfaadfadf?ForecastSearch%5BisFavorite%5D=0&ForecastSearch%5Bsort%5D=date&ForecastSearch%5Buser_id%5D=7891&ForecastSearch%5Bg_id%5D=&ForecastSearch%5Bdispatch_id%5D=63652&_pjax=%23pjax-forecast-list
|
||||
```
|
||||
|
||||
## Важный момент
|
||||
|
||||
Селекторы в `.env.example` — это заглушки. Их почти наверняка надо поменять под реальный DOM Alpinbet.
|
||||
|
||||
Самый быстрый путь:
|
||||
1. зайди на страницу;
|
||||
2. открой DevTools;
|
||||
3. найди контейнер одной строки/сигнала;
|
||||
4. подставь реальные селекторы в `.env`.
|
||||
|
||||
## Формат webhook
|
||||
|
||||
По умолчанию отправляется JSON:
|
||||
|
||||
```json
|
||||
{
|
||||
"event": "alpinbet_changes_detected",
|
||||
"timestamp": "2026-03-18T00:00:00.000Z",
|
||||
"count": 2,
|
||||
"changes": [
|
||||
{
|
||||
"type": "added",
|
||||
"item": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
Если нужен Telegram/email/max, обычно проще принимать этот webhook на своём backend и уже там делать доставку.
|
||||
|
||||
## Структура
|
||||
|
||||
```text
|
||||
alpinbet-parser/
|
||||
data/
|
||||
logs/
|
||||
scripts/
|
||||
save-session.js
|
||||
check-session.js
|
||||
src/
|
||||
config.js
|
||||
index.js
|
||||
logger.js
|
||||
notifier.js
|
||||
parser.js
|
||||
session.js
|
||||
state.js
|
||||
utils.js
|
||||
```
|
||||
|
||||
## Что доработать под прод
|
||||
|
||||
- health endpoint;
|
||||
- Dockerfile;
|
||||
- очередь уведомлений;
|
||||
- админку для переавторизации;
|
||||
- хранение state в Redis/PostgreSQL;
|
||||
- multiple bot profiles / multiple pages.
|
||||
1130
parser/package-lock.json
generated
Normal file
1130
parser/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
25
parser/package.json
Normal file
25
parser/package.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"name": "alpinbet-parser",
|
||||
"version": "0.1.0",
|
||||
"description": "Alpinbet HTML parser with HTTP fetching, Playwright-backed session auth, polling, change detection and webhook notifications.",
|
||||
"main": "src/index.js",
|
||||
"type": "commonjs",
|
||||
"scripts": {
|
||||
"auth": "node scripts/save-session.js",
|
||||
"start": "node src/index.js",
|
||||
"start:with-db-url": "node scripts/with-db-url.cjs node src/index.js",
|
||||
"dev": "node --watch src/index.js",
|
||||
"check-session": "node scripts/check-session.js",
|
||||
"debug-fetch": "node scripts/debug-fetch.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"axios": "^1.8.4",
|
||||
"betting-signals-platform": "file:..",
|
||||
"bullmq": "^5.71.1",
|
||||
"cheerio": "^1.0.0",
|
||||
"dotenv": "^16.4.7",
|
||||
"ioredis": "^5.10.1",
|
||||
"pg": "^8.16.3",
|
||||
"playwright": "^1.58.2"
|
||||
}
|
||||
}
|
||||
21
parser/scripts/check-session.js
Normal file
21
parser/scripts/check-session.js
Normal file
@@ -0,0 +1,21 @@
|
||||
const config = require('../src/config');
|
||||
const { launchBrowser, createContext } = require('../src/session');
|
||||
const { fetchTargetHtmlWithSession } = require('../src/fetcher');
|
||||
|
||||
(async () => {
|
||||
const browser = await launchBrowser({ headless: config.headless });
|
||||
const context = await createContext({ browser, sessionFile: config.sessionFile });
|
||||
|
||||
const result = await fetchTargetHtmlWithSession({ context, config, targetUrl: config.targetUrl });
|
||||
console.log('HTTP status:', result.status);
|
||||
console.log('Current URL:', result.finalUrl);
|
||||
console.log('Title:', result.title);
|
||||
console.log('Auth required:', result.authRequired);
|
||||
console.log('\nHTML preview:\n');
|
||||
console.log(result.html.slice(0, 3000));
|
||||
|
||||
await browser.close();
|
||||
})().catch((error) => {
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
71
parser/scripts/debug-fetch.js
Normal file
71
parser/scripts/debug-fetch.js
Normal file
@@ -0,0 +1,71 @@
|
||||
const fs = require('fs/promises');
|
||||
const path = require('path');
|
||||
const cheerio = require('cheerio');
|
||||
const config = require('../src/config');
|
||||
const { launchBrowser, createContext } = require('../src/session');
|
||||
const { fetchTargetHtmlWithSession, buildForecastUrl } = require('../src/fetcher');
|
||||
const { parseItemsFromHtml, countForecastRows } = require('../src/parser');
|
||||
const { sanitizeFilePart } = require('../src/utils');
|
||||
|
||||
async function main() {
|
||||
const botArg = process.argv[2] || 'raketafon';
|
||||
const activeTab = Number(process.argv[3] || 1);
|
||||
const currentPage = Number(process.argv[4] || 1);
|
||||
const botUrl = botArg.startsWith('http') ? botArg : `https://alpinbet.com/dispatch/antigol/${botArg}`;
|
||||
const targetUrl = buildForecastUrl(botUrl, {
|
||||
activeTab,
|
||||
perPage: config.forecastPerPage,
|
||||
page: currentPage
|
||||
});
|
||||
|
||||
const browser = await launchBrowser({ headless: true });
|
||||
const context = await createContext({ browser, sessionFile: config.sessionFile });
|
||||
|
||||
try {
|
||||
const result = await fetchTargetHtmlWithSession({ context, config, targetUrl });
|
||||
const items = parseItemsFromHtml(result.html, config.selectors);
|
||||
const rows = countForecastRows(result.html, config.selectors);
|
||||
const $ = cheerio.load(result.html);
|
||||
const htmlFile = path.join(
|
||||
config.htmlSnapshotDir,
|
||||
`${sanitizeFilePart(botArg)}_tab-${activeTab}_page-${currentPage}_manual-debug.html`
|
||||
);
|
||||
|
||||
await fs.mkdir(config.htmlSnapshotDir, { recursive: true });
|
||||
await fs.writeFile(htmlFile, result.html, 'utf8');
|
||||
|
||||
console.log('status:', result.status);
|
||||
console.log('title:', result.title);
|
||||
console.log('url:', result.finalUrl);
|
||||
console.log('rows:', rows);
|
||||
console.log('parsedItems:', items.length);
|
||||
console.log('htmlFile:', htmlFile);
|
||||
|
||||
const rowCandidates = $('.table-link')
|
||||
.slice(0, 5)
|
||||
.toArray()
|
||||
.map((element, index) => {
|
||||
const $row = $(element);
|
||||
return {
|
||||
index,
|
||||
classes: $row.attr('class') || '',
|
||||
text: $row.text().replace(/\s+/g, ' ').trim().slice(0, 200),
|
||||
hasLink: $row.find('.cell-team-title a').length > 0,
|
||||
hasFavorite: $row.find('.rating-mailings__favorite').length > 0,
|
||||
hasCoefficient: $row.find('.cell-coefficient__total, .cell-coefficient').length > 0
|
||||
};
|
||||
});
|
||||
|
||||
console.log('tableLinkCandidates:', JSON.stringify(rowCandidates, null, 2));
|
||||
if (items[0]) {
|
||||
console.log('firstItem:', JSON.stringify(items[0], null, 2));
|
||||
}
|
||||
} finally {
|
||||
await browser.close();
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((error) => {
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
25
parser/scripts/healthcheck.js
Normal file
25
parser/scripts/healthcheck.js
Normal file
@@ -0,0 +1,25 @@
|
||||
const fs = require('fs');
|
||||
const config = require('../src/config');
|
||||
|
||||
try {
|
||||
const raw = fs.readFileSync(config.heartbeatFile, 'utf8');
|
||||
const heartbeat = JSON.parse(raw);
|
||||
const ageMs = Date.now() - new Date(heartbeat.timestamp).getTime();
|
||||
const maxAgeMs = Math.max(config.pollIntervalMs * 3, 120000);
|
||||
|
||||
if (!heartbeat.timestamp || Number.isNaN(ageMs) || ageMs > maxAgeMs) {
|
||||
console.error('Heartbeat is stale');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (heartbeat.status === 'fatal') {
|
||||
console.error('Parser is in fatal state');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
process.exit(0);
|
||||
} catch (error) {
|
||||
console.error(error.message);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
33
parser/scripts/save-session.js
Normal file
33
parser/scripts/save-session.js
Normal file
@@ -0,0 +1,33 @@
|
||||
const readline = require('readline');
|
||||
const config = require('../src/config');
|
||||
const logger = require('../src/logger');
|
||||
const { launchBrowser } = require('../src/session');
|
||||
|
||||
function waitForEnter() {
|
||||
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
||||
return new Promise((resolve) => {
|
||||
rl.question('Залогинься в браузере, потом нажми Enter здесь...\n', () => {
|
||||
rl.close();
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
(async () => {
|
||||
const browser = await launchBrowser({ headless: config.authHeadless });
|
||||
const context = await browser.newContext({ viewport: { width: 1440, height: 900 } });
|
||||
const page = await context.newPage();
|
||||
|
||||
logger.info(`Opening base URL: ${config.baseUrl}`);
|
||||
await page.goto(config.baseUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
await waitForEnter();
|
||||
|
||||
await context.storageState({ path: config.sessionFile });
|
||||
logger.info(`Session saved to ${config.sessionFile}`);
|
||||
|
||||
await browser.close();
|
||||
})().catch((error) => {
|
||||
console.error(error);
|
||||
process.exit(1);
|
||||
});
|
||||
51
parser/scripts/with-db-url.cjs
Normal file
51
parser/scripts/with-db-url.cjs
Normal file
@@ -0,0 +1,51 @@
|
||||
const { spawn } = require('child_process');
|
||||
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
if (args.length === 0) {
|
||||
console.error('Usage: node scripts/with-db-url.cjs <command> [...args]');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
function buildDatabaseUrl() {
|
||||
const host = (process.env.POSTGRES_HOST || 'localhost').trim();
|
||||
const port = (process.env.POSTGRES_PORT || '5432').trim();
|
||||
const database = process.env.POSTGRES_DB && process.env.POSTGRES_DB.trim();
|
||||
const user = process.env.POSTGRES_USER && process.env.POSTGRES_USER.trim();
|
||||
const password = process.env.POSTGRES_PASSWORD || '';
|
||||
const schema = (process.env.POSTGRES_SCHEMA || 'public').trim();
|
||||
|
||||
if (database && user) {
|
||||
const credentials = `${encodeURIComponent(user)}:${encodeURIComponent(password)}@`;
|
||||
return `postgresql://${credentials}${host}:${port}/${database}?schema=${encodeURIComponent(schema)}`;
|
||||
}
|
||||
|
||||
const explicitUrl = process.env.DATABASE_URL && process.env.DATABASE_URL.trim();
|
||||
if (explicitUrl) {
|
||||
return explicitUrl;
|
||||
}
|
||||
|
||||
throw new Error('DATABASE_URL is missing and POSTGRES_DB/POSTGRES_USER are not fully configured');
|
||||
}
|
||||
|
||||
try {
|
||||
process.env.DATABASE_URL = buildDatabaseUrl();
|
||||
} catch (error) {
|
||||
console.error(error instanceof Error ? error.message : String(error));
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
const child = spawn(args[0], args.slice(1), {
|
||||
stdio: 'inherit',
|
||||
shell: true,
|
||||
env: process.env
|
||||
});
|
||||
|
||||
child.on('exit', (code, signal) => {
|
||||
if (signal) {
|
||||
process.kill(process.pid, signal);
|
||||
return;
|
||||
}
|
||||
|
||||
process.exit(code == null ? 1 : code);
|
||||
});
|
||||
164
parser/src/config.js
Normal file
164
parser/src/config.js
Normal file
@@ -0,0 +1,164 @@
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
require('dotenv').config();
|
||||
|
||||
function bool(name, fallback = false) {
|
||||
const value = process.env[name];
|
||||
if (value == null || value === '') return fallback;
|
||||
return ['1', 'true', 'yes', 'on'].includes(String(value).toLowerCase());
|
||||
}
|
||||
|
||||
function int(name, fallback) {
|
||||
const value = Number(process.env[name]);
|
||||
return Number.isFinite(value) ? value : fallback;
|
||||
}
|
||||
|
||||
function str(name, fallback = '') {
|
||||
return process.env[name] || fallback;
|
||||
}
|
||||
|
||||
function intList(name, fallback) {
|
||||
const value = str(name, '');
|
||||
if (!value) return fallback;
|
||||
const parsed = value
|
||||
.split(',')
|
||||
.map((entry) => Number(entry.trim()))
|
||||
.filter((entry) => Number.isInteger(entry));
|
||||
return parsed.length > 0 ? parsed : fallback;
|
||||
}
|
||||
|
||||
function slugify(value) {
|
||||
return String(value || '')
|
||||
.trim()
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '');
|
||||
}
|
||||
|
||||
function uniqueBots(bots) {
|
||||
const seen = new Set();
|
||||
return bots.filter((bot) => {
|
||||
const key = bot.key || slugify(bot.name) || slugify(bot.url);
|
||||
if (!key || seen.has(key)) return false;
|
||||
seen.add(key);
|
||||
bot.key = key;
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function parseBots(name, fallback) {
|
||||
const value = str(name, '');
|
||||
if (!value) return fallback;
|
||||
|
||||
const bots = value
|
||||
.split(/\r?\n|\|/)
|
||||
.map((entry) => entry.trim())
|
||||
.filter(Boolean)
|
||||
.map((entry, index) => {
|
||||
const [rawName, rawUrl] = entry.includes('=') ? entry.split('=') : [`bot-${index + 1}`, entry];
|
||||
const nameValue = String(rawName || '').trim();
|
||||
const urlValue = String(rawUrl || '').trim();
|
||||
return {
|
||||
key: slugify(nameValue) || `bot-${index + 1}`,
|
||||
name: nameValue || `Bot ${index + 1}`,
|
||||
url: urlValue
|
||||
};
|
||||
})
|
||||
.filter((bot) => bot.url);
|
||||
|
||||
return bots.length > 0 ? uniqueBots(bots) : fallback;
|
||||
}
|
||||
|
||||
const rootDir = path.resolve(__dirname, '..');
|
||||
const sessionFile = path.resolve(rootDir, str('SESSION_FILE', './data/alpinbet-session.json'));
|
||||
const stateFile = path.resolve(rootDir, str('STATE_FILE', './data/last-state.json'));
|
||||
const heartbeatFile = path.resolve(rootDir, str('HEARTBEAT_FILE', './data/heartbeat.json'));
|
||||
const htmlSnapshotDir = path.resolve(rootDir, str('HTML_SNAPSHOT_DIR', './data/html'));
|
||||
const defaultBots = uniqueBots([
|
||||
{
|
||||
key: 'raketafon',
|
||||
name: 'Raketafon',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/raketafon'
|
||||
},
|
||||
{
|
||||
key: 'pobeda-1-comand',
|
||||
name: 'Pobeda 1 Comand',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/pobeda-1-comand'
|
||||
},
|
||||
{
|
||||
key: 'raketabas',
|
||||
name: 'Raketabas',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/raketabas'
|
||||
},
|
||||
|
||||
{
|
||||
key: 'sol-1www',
|
||||
name: 'Sol 1www',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/sol-1www'
|
||||
},
|
||||
{
|
||||
key: 'fon-stb',
|
||||
name: 'Fon Stb',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/fon-stb'
|
||||
},
|
||||
{
|
||||
key: 'fonat',
|
||||
name: 'Fonat',
|
||||
url: 'https://alpinbet.com/dispatch/antigol/fonat'
|
||||
}
|
||||
]);
|
||||
|
||||
for (const dir of [path.dirname(sessionFile), path.dirname(stateFile), path.dirname(heartbeatFile), htmlSnapshotDir]) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
providerId: str('PROVIDER_ID', 'alpinbet'),
|
||||
baseUrl: str('ALPINBET_BASE_URL', 'https://alpinbet.com/'),
|
||||
targetUrl: str('ALPINBET_TARGET_URL', defaultBots[0]?.url || 'https://alpinbet.com/bot/my-live'),
|
||||
bots: parseBots('ALPINBET_BOTS', defaultBots),
|
||||
headless: bool('HEADLESS', true),
|
||||
authHeadless: bool('AUTH_HEADLESS', false),
|
||||
httpUserAgent: str(
|
||||
'HTTP_USER_AGENT',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36'
|
||||
),
|
||||
authEnabled: bool('AUTH_ENABLED', false),
|
||||
authLoginUrl: str('AUTH_LOGIN_URL', 'https://alpinbet.com/site/login'),
|
||||
authUsername: str('AUTH_USERNAME', ''),
|
||||
authPassword: str('AUTH_PASSWORD', ''),
|
||||
authUsernameSelector: str('AUTH_USERNAME_SELECTOR', 'input[name=\"LoginForm[login]\"], input[name=\"LoginForm[email]\"], input[name=\"email\"], input[type=\"email\"], input[name=\"login\"], input[name=\"username\"]'),
|
||||
authPasswordSelector: str('AUTH_PASSWORD_SELECTOR', 'input[name=\"LoginForm[password]\"], input[name=\"password\"], input[type=\"password\"]'),
|
||||
authSubmitSelector: str('AUTH_SUBMIT_SELECTOR', 'button[type=\"submit\"], input[type=\"submit\"], .login button, .auth button'),
|
||||
authSuccessSelector: str('AUTH_SUCCESS_SELECTOR', '#currentUser, .user-card, .currentUser'),
|
||||
authLoginDetectUrlPart: str('AUTH_LOGIN_DETECT_URL_PART', '/site/login'),
|
||||
pollIntervalMs: int('POLL_INTERVAL_MS', 20000),
|
||||
requestTimeoutMs: int('REQUEST_TIMEOUT_MS', 45000),
|
||||
sessionFile,
|
||||
stateFile,
|
||||
heartbeatFile,
|
||||
htmlSnapshotDir,
|
||||
debugSaveFilteredHtml: bool('DEBUG_SAVE_FILTERED_HTML', false),
|
||||
logLevel: str('LOG_LEVEL', 'info'),
|
||||
selectors: {
|
||||
item: str('ITEM_SELECTOR', '.signal-row'),
|
||||
itemId: str('ITEM_ID_SELECTOR', '[data-id]'),
|
||||
title: str('TITLE_SELECTOR', '.signal-title'),
|
||||
status: str('STATUS_SELECTOR', '.signal-status'),
|
||||
coef: str('COEF_SELECTOR', '.signal-coef'),
|
||||
date: str('DATE_SELECTOR', '.signal-date')
|
||||
},
|
||||
webhookUrl: str('WEBHOOK_URL', ''),
|
||||
webhookSecret: str('WEBHOOK_SECRET', ''),
|
||||
redisUrl: str('REDIS_URL', 'redis://127.0.0.1:6379'),
|
||||
backendInternalUrl: str('BACKEND_INTERNAL_URL', ''),
|
||||
parserInternalSecret: str('PARSER_INTERNAL_SECRET', ''),
|
||||
forecastOcrUrl: str('FORECAST_OCR_URL', ''),
|
||||
forecastOcrTimeoutMs: int('FORECAST_OCR_TIMEOUT_MS', 30000),
|
||||
forecastOcrEnabled: bool('FORECAST_OCR_ENABLED', true),
|
||||
notifyOnStartup: bool('NOTIFY_ON_STARTUP', false),
|
||||
forecastActiveTabs: intList('FORECAST_ACTIVE_TABS', [1, 2]).filter((tab) => tab === 1 || tab === 2),
|
||||
forecastPerPage: int('FORECAST_PER_PAGE', 40),
|
||||
forecastStartPage: int('FORECAST_START_PAGE', 1),
|
||||
forecastMaxPages: int('FORECAST_MAX_PAGES', 10)
|
||||
};
|
||||
102
parser/src/fetcher.js
Normal file
102
parser/src/fetcher.js
Normal file
@@ -0,0 +1,102 @@
|
||||
const axios = require('axios');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
function domainMatches(hostname, cookieDomain) {
|
||||
const normalizedDomain = String(cookieDomain || '').replace(/^\./, '').toLowerCase();
|
||||
const normalizedHost = String(hostname || '').toLowerCase();
|
||||
return normalizedHost === normalizedDomain || normalizedHost.endsWith(`.${normalizedDomain}`);
|
||||
}
|
||||
|
||||
function pathMatches(pathname, cookiePath) {
|
||||
const normalizedPath = pathname || '/';
|
||||
const normalizedCookiePath = cookiePath || '/';
|
||||
return normalizedPath.startsWith(normalizedCookiePath);
|
||||
}
|
||||
|
||||
function buildCookieHeader(cookies, targetUrl) {
|
||||
const url = new URL(targetUrl);
|
||||
const nowSeconds = Date.now() / 1000;
|
||||
const isHttps = url.protocol === 'https:';
|
||||
|
||||
return cookies
|
||||
.filter((cookie) => {
|
||||
if (!domainMatches(url.hostname, cookie.domain)) return false;
|
||||
if (!pathMatches(url.pathname, cookie.path)) return false;
|
||||
if (cookie.expires && cookie.expires > 0 && cookie.expires < nowSeconds) return false;
|
||||
if (cookie.secure && !isHttps) return false;
|
||||
return true;
|
||||
})
|
||||
.map((cookie) => `${cookie.name}=${cookie.value}`)
|
||||
.join('; ');
|
||||
}
|
||||
|
||||
function hasLoginInputsInHtml(html, config) {
|
||||
try {
|
||||
const $ = cheerio.load(html || '');
|
||||
const hasUsername = config.authUsernameSelector ? $(config.authUsernameSelector).length > 0 : false;
|
||||
const hasPassword = config.authPasswordSelector ? $(config.authPasswordSelector).length > 0 : false;
|
||||
const hasAuthenticatedUser =
|
||||
$('#currentUser').length > 0 ||
|
||||
$('.currentUser').length > 0 ||
|
||||
$('[data-target="#logout"], [href*="logout"]').length > 0;
|
||||
|
||||
return hasUsername && hasPassword && !hasAuthenticatedUser;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function extractTitle(html) {
|
||||
try {
|
||||
const $ = cheerio.load(html || '');
|
||||
return $('title').first().text().trim();
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
function buildForecastUrl(baseUrl, { activeTab, perPage, page }) {
|
||||
const url = new URL(baseUrl);
|
||||
url.searchParams.set('ForecastSearch[activeTab]', String(activeTab));
|
||||
url.searchParams.set('ForecastSearch[perPage]', String(perPage));
|
||||
url.searchParams.set('page', String(page));
|
||||
return url.toString();
|
||||
}
|
||||
|
||||
async function fetchTargetHtmlWithSession({ context, config, targetUrl }) {
|
||||
const requestUrl = targetUrl || config.targetUrl;
|
||||
const storageState = await context.storageState();
|
||||
const cookieHeader = buildCookieHeader(storageState.cookies || [], requestUrl);
|
||||
|
||||
const response = await axios.get(requestUrl, {
|
||||
headers: {
|
||||
Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'ru,en;q=0.9',
|
||||
'User-Agent': config.httpUserAgent,
|
||||
Referer: config.baseUrl,
|
||||
Cookie: cookieHeader
|
||||
},
|
||||
timeout: config.requestTimeoutMs,
|
||||
maxRedirects: 10,
|
||||
validateStatus: () => true,
|
||||
responseType: 'text'
|
||||
});
|
||||
|
||||
const html = typeof response.data === 'string' ? response.data : String(response.data || '');
|
||||
const finalUrl = response?.request?.res?.responseUrl || requestUrl;
|
||||
const loginByUrl = Boolean(config.authLoginDetectUrlPart && String(finalUrl).includes(config.authLoginDetectUrlPart));
|
||||
const loginByHtml = hasLoginInputsInHtml(html, config);
|
||||
|
||||
return {
|
||||
status: response.status,
|
||||
html,
|
||||
finalUrl,
|
||||
title: extractTitle(html),
|
||||
authRequired: loginByUrl || loginByHtml
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchTargetHtmlWithSession,
|
||||
buildForecastUrl
|
||||
};
|
||||
111
parser/src/forecast-ocr.js
Normal file
111
parser/src/forecast-ocr.js
Normal file
@@ -0,0 +1,111 @@
|
||||
const axios = require('axios');
|
||||
const config = require('./config');
|
||||
const logger = require('./logger');
|
||||
|
||||
const forecastCache = new Map();
|
||||
|
||||
function normalizeImageUrl(imageUrl) {
|
||||
if (!imageUrl) return null;
|
||||
|
||||
try {
|
||||
return new URL(imageUrl, config.baseUrl).toString();
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function pickForecastImage(item) {
|
||||
if (!Array.isArray(item?.images)) return null;
|
||||
|
||||
for (const candidate of item.images) {
|
||||
const normalized = normalizeImageUrl(candidate);
|
||||
if (normalized) {
|
||||
return normalized;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
function normalizeForecastText(value) {
|
||||
if (!value) return null;
|
||||
|
||||
const normalized = String(value)
|
||||
.replace(/\s+/g, ' ')
|
||||
.replace(/\s+([),.:])/g, '$1')
|
||||
.replace(/([(])\s+/g, '$1')
|
||||
.trim();
|
||||
|
||||
return normalized || null;
|
||||
}
|
||||
|
||||
async function recognizeForecastText(item) {
|
||||
if (!config.forecastOcrEnabled || !config.forecastOcrUrl) {
|
||||
return { forecast: null, imageUrl: null, source: 'disabled' };
|
||||
}
|
||||
|
||||
const imageUrl = pickForecastImage(item);
|
||||
if (!imageUrl) {
|
||||
return { forecast: null, imageUrl: null, source: 'no-image' };
|
||||
}
|
||||
|
||||
if (forecastCache.has(imageUrl)) {
|
||||
return {
|
||||
forecast: forecastCache.get(imageUrl),
|
||||
imageUrl,
|
||||
source: 'cache'
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const imageResponse = await axios.get(imageUrl, {
|
||||
responseType: 'arraybuffer',
|
||||
timeout: config.requestTimeoutMs,
|
||||
headers: {
|
||||
'User-Agent': config.httpUserAgent,
|
||||
Referer: config.baseUrl
|
||||
}
|
||||
});
|
||||
const imageBuffer = Buffer.from(imageResponse.data);
|
||||
const imageFileName = (() => {
|
||||
try {
|
||||
const pathname = new URL(imageUrl).pathname;
|
||||
const parts = pathname.split('/').filter(Boolean);
|
||||
return parts[parts.length - 1] || 'forecast-image';
|
||||
} catch {
|
||||
return 'forecast-image';
|
||||
}
|
||||
})();
|
||||
const formData = new FormData();
|
||||
formData.append('image', new Blob([imageBuffer]), imageFileName);
|
||||
|
||||
const response = await axios.post(
|
||||
`${config.forecastOcrUrl.replace(/\/$/, '')}/ocr/forecast`,
|
||||
formData,
|
||||
{
|
||||
timeout: config.forecastOcrTimeoutMs,
|
||||
headers: {
|
||||
Accept: 'application/json'
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
const forecast = normalizeForecastText(response.data?.forecast);
|
||||
const rawForecast = normalizeForecastText(response.data?.rawForecast);
|
||||
forecastCache.set(imageUrl, forecast);
|
||||
|
||||
return {
|
||||
forecast,
|
||||
rawForecast,
|
||||
imageUrl,
|
||||
source: response.data?.cached ? 'service-cache' : 'service'
|
||||
};
|
||||
} catch (error) {
|
||||
logger.warn(`Forecast OCR failed for ${imageUrl}: ${error.message}`);
|
||||
return { forecast: null, rawForecast: null, imageUrl, source: 'error' };
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
recognizeForecastText
|
||||
};
|
||||
415
parser/src/index.js
Normal file
415
parser/src/index.js
Normal file
@@ -0,0 +1,415 @@
|
||||
const fs = require('fs/promises');
|
||||
const config = require('./config');
|
||||
const logger = require('./logger');
|
||||
const { launchBrowser, createContext, authenticateContext, hasSessionFile } = require('./session');
|
||||
const { parseItemsFromHtml, countForecastRows } = require('./parser');
|
||||
const { fetchTargetHtmlWithSession, buildForecastUrl } = require('./fetcher');
|
||||
const { loadState, saveState, diffItems } = require('./state');
|
||||
const { sendWebhook } = require('./notifier');
|
||||
const { syncSignalsSnapshotToDb, logParserError, closeStorage } = require('./storage');
|
||||
const { nowIso, writeTextFile, sanitizeFilePart } = require('./utils');
|
||||
const { recognizeForecastText } = require('./forecast-ocr');
|
||||
|
||||
let runtimeBrowser = null;
|
||||
|
||||
function sleep(ms) {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
function isNotFoundTitle(title = '') {
|
||||
const normalized = String(title).toLowerCase();
|
||||
return normalized.includes('404') || normalized.includes('страница не найдена');
|
||||
}
|
||||
|
||||
function normalizeTabs() {
|
||||
const tabs = config.forecastActiveTabs.length > 0 ? config.forecastActiveTabs : [1, 2];
|
||||
return [...new Set(tabs)].filter((tab) => tab === 1 || tab === 2);
|
||||
}
|
||||
|
||||
function annotateItemsWithTab(items, activeTab) {
|
||||
return items.map((item) => ({
|
||||
...item,
|
||||
parserDetectedInactive: item.forecastInactive === true,
|
||||
activeTab: Number.isInteger(Number(item.activeTab)) ? Number(item.activeTab) : activeTab,
|
||||
forecastInactive: item.forecastInactive === true || activeTab === 2
|
||||
}));
|
||||
}
|
||||
|
||||
function dedupeItems(items) {
|
||||
const byKey = new Map();
|
||||
for (const item of items) {
|
||||
if (!byKey.has(item.__key)) {
|
||||
byKey.set(item.__key, item);
|
||||
}
|
||||
}
|
||||
return Array.from(byKey.values());
|
||||
}
|
||||
|
||||
function getActiveBots() {
|
||||
if (Array.isArray(config.bots) && config.bots.length > 0) {
|
||||
return config.bots;
|
||||
}
|
||||
|
||||
return [
|
||||
{
|
||||
key: 'default',
|
||||
name: 'Default bot',
|
||||
url: config.targetUrl
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
function annotateItemsWithBot(items, bot) {
|
||||
return items.map((item) => ({
|
||||
...item,
|
||||
botKey: bot.key,
|
||||
botName: bot.name,
|
||||
botUrl: bot.url,
|
||||
__key: `${bot.key}:${item.__key}`
|
||||
}));
|
||||
}
|
||||
|
||||
async function enrichItemsWithForecast(items) {
|
||||
const concurrency = 4;
|
||||
const enriched = new Array(items.length);
|
||||
|
||||
async function worker(startIndex) {
|
||||
for (let index = startIndex; index < items.length; index += concurrency) {
|
||||
const item = items[index];
|
||||
const forecastResult = await recognizeForecastText(item);
|
||||
enriched[index] = {
|
||||
...item,
|
||||
forecast: forecastResult.forecast,
|
||||
forecastRaw: forecastResult.rawForecast,
|
||||
forecastImageUrl: forecastResult.imageUrl,
|
||||
forecastSource: forecastResult.source
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
await Promise.all(
|
||||
Array.from({ length: Math.min(concurrency, items.length) }, (_, index) => worker(index))
|
||||
);
|
||||
|
||||
return enriched.filter(Boolean);
|
||||
}
|
||||
|
||||
function getTabStats(snapshot, tab) {
|
||||
return snapshot?.stats?.tabs?.[String(tab)] || null;
|
||||
}
|
||||
|
||||
async function fetchHttpPageWithSession({ context, targetUrl }) {
|
||||
let result = await fetchTargetHtmlWithSession({ context, config, targetUrl });
|
||||
|
||||
if (result.authRequired && config.authEnabled) {
|
||||
logger.warn('HTTP fetch indicates expired session, refreshing authentication in Playwright');
|
||||
await authenticateContext({ context, sessionFile: config.sessionFile, config });
|
||||
result = await fetchTargetHtmlWithSession({ context, config, targetUrl });
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async function saveDebugHtmlSnapshot({ botKey, activeTab, currentPage, html }) {
|
||||
if (!config.debugSaveFilteredHtml) {
|
||||
return;
|
||||
}
|
||||
|
||||
const timestamp = nowIso().replace(/[:.]/g, '-');
|
||||
const baseName = `${sanitizeFilePart(botKey)}_tab-${activeTab}_page-${currentPage}_${timestamp}`;
|
||||
const file = await writeTextFile(config.htmlSnapshotDir, baseName, html, 'html');
|
||||
logger.info(`Saved debug HTML snapshot: ${file}`);
|
||||
}
|
||||
|
||||
async function loadFromHttp({ context, targetUrl }) {
|
||||
const tabs = normalizeTabs();
|
||||
const allItems = [];
|
||||
let lastUrl = targetUrl;
|
||||
let lastTitle = '';
|
||||
const byTab = {};
|
||||
|
||||
logger.info(
|
||||
`Fetching target pages via HTTP with tabs=${tabs.join(',')}, perPage=${config.forecastPerPage}, maxPages=${config.forecastMaxPages}`
|
||||
);
|
||||
|
||||
for (const activeTab of tabs) {
|
||||
byTab[String(activeTab)] = {
|
||||
pagesFetched: 0,
|
||||
itemsParsed: 0,
|
||||
rowsDetected: 0
|
||||
};
|
||||
|
||||
for (let offset = 0; offset < config.forecastMaxPages; offset += 1) {
|
||||
const currentPage = config.forecastStartPage + offset;
|
||||
const forecastUrl = buildForecastUrl(targetUrl, {
|
||||
activeTab,
|
||||
perPage: config.forecastPerPage,
|
||||
page: currentPage
|
||||
});
|
||||
|
||||
const result = await fetchHttpPageWithSession({ context, targetUrl: forecastUrl });
|
||||
lastUrl = result.finalUrl;
|
||||
lastTitle = result.title || lastTitle;
|
||||
|
||||
if (result.authRequired) {
|
||||
throw new Error(`Authentication required for target page: ${result.finalUrl}`);
|
||||
}
|
||||
if (result.status >= 400) {
|
||||
throw new Error(`Target page returned status ${result.status}: ${result.finalUrl}`);
|
||||
}
|
||||
if (isNotFoundTitle(result.title)) {
|
||||
throw new Error(`Target page returned not found: ${result.finalUrl}`);
|
||||
}
|
||||
|
||||
const parsedItems = parseItemsFromHtml(result.html, config.selectors, activeTab);
|
||||
const detectedRows = countForecastRows(result.html, config.selectors, activeTab);
|
||||
byTab[String(activeTab)].rowsDetected += detectedRows;
|
||||
logger.info(`Parsed items for tab=${activeTab} page=${currentPage}: ${parsedItems.length}`);
|
||||
|
||||
if (parsedItems.length === 0) {
|
||||
if (detectedRows > 0) {
|
||||
logger.info(
|
||||
`Detected ${detectedRows} forecast rows for tab=${activeTab} page=${currentPage}, but all were filtered out as hidden or incomplete`
|
||||
);
|
||||
await saveDebugHtmlSnapshot({
|
||||
botKey: new URL(targetUrl).pathname.split('/').filter(Boolean).pop() || 'unknown-bot',
|
||||
activeTab,
|
||||
currentPage,
|
||||
html: result.html
|
||||
});
|
||||
byTab[String(activeTab)].pagesFetched += 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
byTab[String(activeTab)].pagesFetched += 1;
|
||||
byTab[String(activeTab)].itemsParsed += parsedItems.length;
|
||||
allItems.push(...annotateItemsWithTab(parsedItems, activeTab));
|
||||
|
||||
if (parsedItems.length < config.forecastPerPage) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const dedupedItems = dedupeItems(allItems);
|
||||
const stats = {
|
||||
rawItems: allItems.length,
|
||||
dedupedItems: dedupedItems.length,
|
||||
detectedRows: Object.values(byTab).reduce((total, tabStats) => total + (tabStats.rowsDetected || 0), 0),
|
||||
tabs: byTab
|
||||
};
|
||||
|
||||
return {
|
||||
items: dedupedItems,
|
||||
currentUrl: lastUrl,
|
||||
title: lastTitle,
|
||||
source: 'http',
|
||||
stats
|
||||
};
|
||||
}
|
||||
|
||||
async function writeHeartbeat(status, extra = {}) {
|
||||
await fs.writeFile(
|
||||
config.heartbeatFile,
|
||||
JSON.stringify(
|
||||
{
|
||||
status,
|
||||
timestamp: nowIso(),
|
||||
...extra
|
||||
},
|
||||
null,
|
||||
2
|
||||
),
|
||||
'utf8'
|
||||
);
|
||||
}
|
||||
|
||||
async function run() {
|
||||
const browser = await launchBrowser({ headless: config.headless });
|
||||
runtimeBrowser = browser;
|
||||
const context = await createContext({ browser, sessionFile: config.sessionFile });
|
||||
|
||||
logger.info('Parser fetch mode: http');
|
||||
logger.info(
|
||||
`Configured bots: ${getActiveBots()
|
||||
.map((bot) => `${bot.key} -> ${bot.url}`)
|
||||
.join(', ')}`
|
||||
);
|
||||
let firstRun = true;
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
await writeHeartbeat('running');
|
||||
|
||||
if (!hasSessionFile(config.sessionFile) && config.authEnabled) {
|
||||
logger.warn('Session file is missing, performing background authentication before HTTP fetch');
|
||||
await authenticateContext({ context, sessionFile: config.sessionFile, config });
|
||||
}
|
||||
|
||||
const botSnapshots = [];
|
||||
const aggregatedItems = [];
|
||||
|
||||
for (const bot of getActiveBots()) {
|
||||
logger.info(`Loading bot "${bot.name}" (${bot.url})`);
|
||||
|
||||
const botSnapshot = await loadFromHttp({ context, targetUrl: bot.url });
|
||||
let syncEligible = true;
|
||||
let syncWarning = null;
|
||||
const activeTabStats = getTabStats(botSnapshot, 1);
|
||||
const activeListIsEmpty = Boolean(activeTabStats)
|
||||
&& (activeTabStats.itemsParsed || 0) === 0
|
||||
&& (activeTabStats.rowsDetected || 0) === 0;
|
||||
|
||||
if (botSnapshot.items.length === 0) {
|
||||
if (activeListIsEmpty) {
|
||||
syncEligible = true;
|
||||
syncWarning = 'empty_active_list';
|
||||
logger.warn(
|
||||
`Bot "${bot.name}" returned an empty active list. All current matches for this bot will be marked inactive`
|
||||
);
|
||||
} else if ((botSnapshot.stats?.detectedRows || 0) > 0) {
|
||||
syncEligible = true;
|
||||
syncWarning = 'filtered_hidden_or_incomplete_rows';
|
||||
logger.info(
|
||||
`Bot "${bot.name}" returned only hidden or incomplete rows. Sync will continue with an empty valid snapshot`
|
||||
);
|
||||
} else {
|
||||
syncEligible = false;
|
||||
syncWarning = 'empty_snapshot';
|
||||
logger.warn(
|
||||
`Bot "${bot.name}" returned 0 items. Skipping automatic removals for this bot in the current sync cycle`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const botItems = await enrichItemsWithForecast(annotateItemsWithBot(botSnapshot.items, bot));
|
||||
aggregatedItems.push(...botItems);
|
||||
botSnapshots.push({
|
||||
key: bot.key,
|
||||
name: bot.name,
|
||||
url: bot.url,
|
||||
currentUrl: botSnapshot.currentUrl,
|
||||
title: botSnapshot.title,
|
||||
source: botSnapshot.source,
|
||||
stats: botSnapshot.stats,
|
||||
items: botItems.length,
|
||||
syncEligible,
|
||||
syncWarning
|
||||
});
|
||||
}
|
||||
|
||||
const snapshot = {
|
||||
items: dedupeItems(aggregatedItems),
|
||||
currentUrl: botSnapshots[0]?.currentUrl || config.targetUrl,
|
||||
title: botSnapshots.map((bot) => bot.title).filter(Boolean).join(' | '),
|
||||
source: botSnapshots.map((bot) => `${bot.key}:${bot.source}`).join(','),
|
||||
stats: {
|
||||
bots: botSnapshots
|
||||
}
|
||||
};
|
||||
|
||||
logger.info('Loaded snapshot', {
|
||||
source: snapshot.source,
|
||||
currentUrl: snapshot.currentUrl,
|
||||
title: snapshot.title,
|
||||
items: snapshot.items.length
|
||||
});
|
||||
if (snapshot.stats) {
|
||||
logger.info('Snapshot stats', snapshot.stats);
|
||||
}
|
||||
|
||||
const hasSyncEligibleBots = botSnapshots.some((bot) => bot.syncEligible);
|
||||
|
||||
if (snapshot.items.length === 0 && !hasSyncEligibleBots) {
|
||||
throw new Error(`No parsed items found on page: ${snapshot.currentUrl}`);
|
||||
}
|
||||
|
||||
const previousState = await loadState(config.stateFile);
|
||||
const previousItems = previousState?.items || [];
|
||||
const changes = diffItems(previousItems, snapshot.items);
|
||||
|
||||
const shouldNotify = firstRun ? config.notifyOnStartup && changes.length > 0 : changes.length > 0;
|
||||
|
||||
if (!previousState) {
|
||||
logger.info('No previous state found, saving initial state');
|
||||
await saveState(config.stateFile, snapshot.items);
|
||||
} else if (changes.length > 0) {
|
||||
logger.info(`Detected changes: ${changes.length}`);
|
||||
await saveState(config.stateFile, snapshot.items);
|
||||
} else {
|
||||
logger.info('No changes detected');
|
||||
}
|
||||
|
||||
const dbResult = await syncSignalsSnapshotToDb(snapshot.items, {
|
||||
currentUrl: snapshot.currentUrl,
|
||||
title: snapshot.title,
|
||||
source: snapshot.source,
|
||||
stats: snapshot.stats,
|
||||
changesCount: changes.length
|
||||
});
|
||||
logger.info('Signals snapshot queued', dbResult);
|
||||
|
||||
if (shouldNotify) {
|
||||
await sendWebhook(config.webhookUrl, config.webhookSecret, {
|
||||
event: 'alpinbet_changes_detected',
|
||||
timestamp: nowIso(),
|
||||
currentUrl: snapshot.currentUrl,
|
||||
title: snapshot.title,
|
||||
source: snapshot.source,
|
||||
bots: snapshot.stats?.bots,
|
||||
count: changes.length,
|
||||
changes
|
||||
});
|
||||
}
|
||||
|
||||
firstRun = false;
|
||||
await writeHeartbeat('ok', { changesCount: changes.length });
|
||||
} catch (error) {
|
||||
logger.error(`Parser loop failed: ${error.message}`);
|
||||
await logParserError(error, { phase: 'loop' }).catch((logError) => {
|
||||
logger.warn(`Failed to persist parser error: ${logError.message}`);
|
||||
});
|
||||
await writeHeartbeat('error', { message: error.message });
|
||||
|
||||
await sendWebhook(config.webhookUrl, config.webhookSecret, {
|
||||
event: 'alpinbet_parser_error',
|
||||
timestamp: nowIso(),
|
||||
message: error.message,
|
||||
stack: error.stack
|
||||
}).catch((notifyError) => {
|
||||
logger.warn(`Failed to send error webhook: ${notifyError.message}`);
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`Sleeping for ${config.pollIntervalMs} ms`);
|
||||
await sleep(config.pollIntervalMs);
|
||||
}
|
||||
}
|
||||
|
||||
run().catch((error) => {
|
||||
logger.error(`Fatal startup error: ${error.message}`);
|
||||
logParserError(error, { phase: 'startup' }).catch(() => undefined);
|
||||
fs
|
||||
.writeFile(
|
||||
config.heartbeatFile,
|
||||
JSON.stringify({ status: 'fatal', timestamp: nowIso(), message: error.message }, null, 2),
|
||||
'utf8'
|
||||
)
|
||||
.catch(() => undefined);
|
||||
runtimeBrowser?.close().catch(() => undefined);
|
||||
closeStorage().catch(() => undefined);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
process.on('SIGINT', async () => {
|
||||
await runtimeBrowser?.close().catch(() => undefined);
|
||||
await closeStorage().catch(() => undefined);
|
||||
process.exit(0);
|
||||
});
|
||||
|
||||
process.on('SIGTERM', async () => {
|
||||
await runtimeBrowser?.close().catch(() => undefined);
|
||||
await closeStorage().catch(() => undefined);
|
||||
process.exit(0);
|
||||
});
|
||||
19
parser/src/logger.js
Normal file
19
parser/src/logger.js
Normal file
@@ -0,0 +1,19 @@
|
||||
const levels = { debug: 10, info: 20, warn: 30, error: 40 };
|
||||
const currentLevel = levels[(process.env.LOG_LEVEL || 'info').toLowerCase()] || levels.info;
|
||||
|
||||
function log(level, message, meta) {
|
||||
if ((levels[level] || 100) < currentLevel) return;
|
||||
const ts = new Date().toISOString();
|
||||
if (meta !== undefined) {
|
||||
console.log(`[${ts}] [${level.toUpperCase()}] ${message}`, meta);
|
||||
} else {
|
||||
console.log(`[${ts}] [${level.toUpperCase()}] ${message}`);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
debug: (msg, meta) => log('debug', msg, meta),
|
||||
info: (msg, meta) => log('info', msg, meta),
|
||||
warn: (msg, meta) => log('warn', msg, meta),
|
||||
error: (msg, meta) => log('error', msg, meta)
|
||||
};
|
||||
63
parser/src/notifier.js
Normal file
63
parser/src/notifier.js
Normal file
@@ -0,0 +1,63 @@
|
||||
const axios = require('axios');
|
||||
const logger = require('./logger');
|
||||
|
||||
async function sendWebhook(url, secret, payload) {
|
||||
if (!url) {
|
||||
logger.info('WEBHOOK_URL not set, skipping notification');
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
const headers = { 'Content-Type': 'application/json' };
|
||||
if (secret) {
|
||||
headers['X-Webhook-Secret'] = secret;
|
||||
}
|
||||
|
||||
const response = await axios.post(url, payload, {
|
||||
headers,
|
||||
timeout: 10000,
|
||||
validateStatus: () => true
|
||||
});
|
||||
|
||||
if (response.status >= 200 && response.status < 300) {
|
||||
logger.info(`Webhook sent successfully: ${response.status}`);
|
||||
} else {
|
||||
logger.warn(`Webhook responded with non-2xx status: ${response.status}`, response.data);
|
||||
}
|
||||
|
||||
return {
|
||||
status: response.status,
|
||||
data: response.data
|
||||
};
|
||||
}
|
||||
|
||||
async function notifyBackend(url, secret, payload) {
|
||||
if (!url) {
|
||||
logger.info('BACKEND_INTERNAL_URL not set, skipping backend notification');
|
||||
return { skipped: true };
|
||||
}
|
||||
|
||||
const response = await axios.post(url, payload, {
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'X-Parser-Secret': secret || ''
|
||||
},
|
||||
timeout: 10000,
|
||||
validateStatus: () => true
|
||||
});
|
||||
|
||||
if (response.status >= 200 && response.status < 300) {
|
||||
logger.info(`Backend notification sent successfully: ${response.status}`);
|
||||
} else {
|
||||
logger.warn(`Backend notification responded with non-2xx status: ${response.status}`, response.data);
|
||||
}
|
||||
|
||||
return {
|
||||
status: response.status,
|
||||
data: response.data
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
sendWebhook,
|
||||
notifyBackend
|
||||
};
|
||||
280
parser/src/parser.js
Normal file
280
parser/src/parser.js
Normal file
@@ -0,0 +1,280 @@
|
||||
const cheerio = require('cheerio');
|
||||
const { hashObject } = require('./utils');
|
||||
|
||||
function text($root, selector) {
|
||||
return $root.find(selector).first().text().trim() || null;
|
||||
}
|
||||
|
||||
function firstText($root, selectors) {
|
||||
for (const selector of selectors) {
|
||||
const value = text($root, selector);
|
||||
if (value) return value;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function texts($root, selector) {
|
||||
return $root
|
||||
.find(selector)
|
||||
.toArray()
|
||||
.map((element) => cheerio.load(element).root().text().replace(/\s+/g, ' ').trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function childTexts($root, selector, childSelector) {
|
||||
const node = $root.find(selector).first();
|
||||
if (!node.length) return [];
|
||||
|
||||
return node
|
||||
.children(childSelector)
|
||||
.toArray()
|
||||
.map((element) => cleanText(cheerio.load(element).root().text()))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function isForecastRow($row) {
|
||||
return Boolean(
|
||||
$row.find('.cell-team-title a').length ||
|
||||
$row.find('.rating-mailings__favorite').length ||
|
||||
$row.find('.cell-coefficient__total').length ||
|
||||
$row.find('.cell-match').length ||
|
||||
$row.find('.cell-team-command').length
|
||||
);
|
||||
}
|
||||
|
||||
function filterForecastRows(rows) {
|
||||
return rows.filter((_, element) => isForecastRow(cheerio.load(element).root()));
|
||||
}
|
||||
|
||||
function resolveRows($scope, selectors, activeTab = null) {
|
||||
if (activeTab === 1) {
|
||||
const activeRows = filterForecastRows($scope.find('#tab-forecast-active .rTableBody .table-link'));
|
||||
if (activeRows.length > 0) {
|
||||
return activeRows;
|
||||
}
|
||||
}
|
||||
|
||||
if (activeTab === 2) {
|
||||
const inactiveRows = filterForecastRows($scope.find('#tab-forecast-zip .rTableBody .table-link'));
|
||||
if (inactiveRows.length > 0) {
|
||||
return inactiveRows;
|
||||
}
|
||||
}
|
||||
|
||||
const forecastRows = $scope
|
||||
.find('#tab-forecast-active .rTableBody .table-link')
|
||||
.add($scope.find('#tab-forecast-zip .rTableBody .table-link'));
|
||||
|
||||
const filteredForecastRows = filterForecastRows(forecastRows);
|
||||
if (filteredForecastRows.length > 0) {
|
||||
return filteredForecastRows;
|
||||
}
|
||||
|
||||
const tableRows = $scope.find('.rTableBody .table-link');
|
||||
const filteredTableRows = filterForecastRows(tableRows);
|
||||
if (filteredTableRows.length > 0) {
|
||||
return filteredTableRows;
|
||||
}
|
||||
|
||||
return filterForecastRows($scope.find(selectors.item));
|
||||
}
|
||||
|
||||
function attrOrText($root, selector) {
|
||||
const node = $root.find(selector).first();
|
||||
if (!node.length) return null;
|
||||
return node.attr('data-id') || node.attr('id') || node.text().trim() || null;
|
||||
}
|
||||
|
||||
function cleanText(value) {
|
||||
if (value == null) return null;
|
||||
const normalized = String(value).replace(/\s+/g, ' ').trim();
|
||||
return normalized || null;
|
||||
}
|
||||
|
||||
function attr($root, selector, name) {
|
||||
const node = $root.find(selector).first();
|
||||
if (!node.length) return null;
|
||||
return node.attr(name) || null;
|
||||
}
|
||||
|
||||
function attrs($root, selector, name) {
|
||||
return $root
|
||||
.find(selector)
|
||||
.toArray()
|
||||
.map((element) => cleanText(element.attribs?.[name]))
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
function resolveForecastContainer($row) {
|
||||
let cursor = $row;
|
||||
while (cursor && cursor.length) {
|
||||
const idAttr = cursor.attr('id') || '';
|
||||
const classAttr = cursor.attr('class') || '';
|
||||
const classList = classAttr.split(/\s+/).filter(Boolean);
|
||||
|
||||
if (idAttr === 'tab-forecast-active' || classList.includes('tab-forecast-active')) {
|
||||
return { container: 'tab-forecast-active', activeTab: 1, forecastInactive: false };
|
||||
}
|
||||
|
||||
if (idAttr === 'tab-forecast-zip' || classList.includes('tab-forecast-zip')) {
|
||||
return { container: 'tab-forecast-zip', activeTab: 2, forecastInactive: true };
|
||||
}
|
||||
|
||||
cursor = cursor.parent();
|
||||
}
|
||||
|
||||
return { container: null, activeTab: null, forecastInactive: null };
|
||||
}
|
||||
|
||||
function extractStructuredItem($row, selectors) {
|
||||
const teamNames = texts($row, '.cell-team-command');
|
||||
const rateParts = childTexts($row, '.cell-rate', 'span');
|
||||
const timeEvent = cleanText($row.find('.time-event').first().text());
|
||||
const titleText = cleanText(
|
||||
$row
|
||||
.find('.cell-team-title')
|
||||
.clone()
|
||||
.find('.info-help')
|
||||
.remove()
|
||||
.end()
|
||||
.text()
|
||||
);
|
||||
const score = cleanText(text($row, '.cell-team-score'));
|
||||
const publicationType = cleanText(text($row, '.cell-type .type-live, .cell-type .type-prematch, .cell-type [class*="type-"]'));
|
||||
const publicationAge = cleanText(text($row, '.cell-type .time-since'));
|
||||
const publicationTimer = cleanText(attr($row, '.cell-type .js-published-live-date, .cell-type .time-since', 'data-timer'));
|
||||
const buyButtonText = cleanText($row.find('.btn_buy, .popupTarget').first().text());
|
||||
const coefficientText = cleanText(text($row, '.cell-coefficient'));
|
||||
const parentContext = resolveForecastContainer($row);
|
||||
const inactiveHint = attrs($row, '[data-tippy-content]', 'data-tippy-content').find((value) =>
|
||||
value.includes('Прогноз неактивен') && value.includes('у букмекера больше нет события')
|
||||
);
|
||||
const inactiveByParent = parentContext.forecastInactive === true;
|
||||
|
||||
return {
|
||||
id: attr($row, '.rating-mailings__favorite', 'data-model-id') || attrOrText($row, selectors.itemId),
|
||||
title: teamNames.length > 0 ? teamNames.join(' vs ') : titleText || text($row, selectors.title),
|
||||
status: timeEvent || text($row, selectors.status),
|
||||
coef:
|
||||
cleanText(
|
||||
firstText($row, [
|
||||
'.cell-coefficient__total',
|
||||
'.completed_rate_desc .rate',
|
||||
'.cell-prognos .rate',
|
||||
selectors.coef
|
||||
])
|
||||
),
|
||||
date: cleanText(text($row, '.time-event:not(.time-event_live)')) || text($row, selectors.date),
|
||||
sport: attr($row, '.cell-icon.js-sport-tooltip', 'data-tippy-content'),
|
||||
eventTime: timeEvent,
|
||||
score,
|
||||
homeTeam: teamNames[0] || null,
|
||||
awayTeam: teamNames[1] || null,
|
||||
tournament: cleanText(text($row, '.cell-team-tnm')),
|
||||
eventUrl: attr($row, '.cell-team-title a', 'href'),
|
||||
publicationType,
|
||||
publicationAge,
|
||||
publicationTimer,
|
||||
buyButtonText,
|
||||
coefficientText,
|
||||
forecastInactive: inactiveByParent || Boolean(inactiveHint),
|
||||
activeTab: parentContext.activeTab,
|
||||
forecastContainer: parentContext.container,
|
||||
stake: rateParts[0] || null,
|
||||
stakePercent: rateParts[1] || null,
|
||||
selectionText: cleanText(firstText($row, ['.rate-description', '.cell-prognos .rate-description'])),
|
||||
images: attrs($row, '.cell-prognos img[data-src]', 'data-src'),
|
||||
rawText: cleanText($row.text())
|
||||
};
|
||||
}
|
||||
|
||||
function shouldSkipStructuredItem(item) {
|
||||
const normalizedTitle = String(item.title || '').toLowerCase();
|
||||
const normalizedCoefficientText = String(item.coefficientText || '').toLowerCase();
|
||||
const normalizedBuyButtonText = String(item.buyButtonText || '').toLowerCase();
|
||||
|
||||
const hasTeams = Boolean(item.homeTeam && item.awayTeam);
|
||||
const hasOpenEvent = Boolean(item.eventUrl);
|
||||
const hasSelection = Boolean(item.selectionText);
|
||||
const hasOdds = Boolean(item.coef);
|
||||
const hasHiddenMarker =
|
||||
normalizedTitle.includes('скрыто') ||
|
||||
normalizedCoefficientText.includes('скрыт') ||
|
||||
normalizedBuyButtonText.includes('купить');
|
||||
|
||||
if (hasHiddenMarker && !hasTeams && !hasOpenEvent && !hasSelection && !hasOdds) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function normalizeItem(item) {
|
||||
const cleaned = {};
|
||||
for (const [key, value] of Object.entries(item)) {
|
||||
if (typeof value === 'string') {
|
||||
cleaned[key] = value.replace(/\s+/g, ' ').trim();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (Array.isArray(value)) {
|
||||
cleaned[key] = value.map((entry) => (typeof entry === 'string' ? entry.replace(/\s+/g, ' ').trim() : entry));
|
||||
continue;
|
||||
}
|
||||
|
||||
cleaned[key] = value;
|
||||
}
|
||||
|
||||
const keyBase = [cleaned.id, cleaned.title, cleaned.status, cleaned.coef, cleaned.date, cleaned.score]
|
||||
.filter(Boolean)
|
||||
.join('|');
|
||||
cleaned.__key = hashObject(keyBase || cleaned);
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
function parseItemsFromHtml(html, selectors, activeTab = null) {
|
||||
const $ = cheerio.load(html);
|
||||
const items = [];
|
||||
const $scope = $('#pjax-forecast-list').length ? $('#pjax-forecast-list') : $.root();
|
||||
const $rows = resolveRows($scope, selectors, activeTab);
|
||||
|
||||
$rows.each((_, element) => {
|
||||
const $row = $(element);
|
||||
const structuredItem = extractStructuredItem($row, selectors);
|
||||
|
||||
if (shouldSkipStructuredItem(structuredItem)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const hasMeaningfulContent =
|
||||
structuredItem.homeTeam ||
|
||||
structuredItem.awayTeam ||
|
||||
structuredItem.title ||
|
||||
structuredItem.coef ||
|
||||
structuredItem.eventUrl ||
|
||||
structuredItem.stake ||
|
||||
structuredItem.publicationType;
|
||||
|
||||
if (!hasMeaningfulContent) {
|
||||
return;
|
||||
}
|
||||
|
||||
const item = normalizeItem(structuredItem);
|
||||
items.push(item);
|
||||
});
|
||||
|
||||
items.sort((a, b) => a.__key.localeCompare(b.__key));
|
||||
return items;
|
||||
}
|
||||
|
||||
function countForecastRows(html, selectors, activeTab = null) {
|
||||
const $ = cheerio.load(html);
|
||||
const $scope = $('#pjax-forecast-list').length ? $('#pjax-forecast-list') : $.root();
|
||||
const $rows = resolveRows($scope, selectors, activeTab);
|
||||
return $rows.length;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
parseItemsFromHtml,
|
||||
countForecastRows
|
||||
};
|
||||
97
parser/src/session.js
Normal file
97
parser/src/session.js
Normal file
@@ -0,0 +1,97 @@
|
||||
const fs = require('fs');
|
||||
const { chromium } = require('playwright');
|
||||
const logger = require('./logger');
|
||||
|
||||
async function launchBrowser({ headless }) {
|
||||
return chromium.launch({
|
||||
headless,
|
||||
args: [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--no-sandbox',
|
||||
'--disable-dev-shm-usage'
|
||||
]
|
||||
});
|
||||
}
|
||||
|
||||
async function createContext({ browser, sessionFile }) {
|
||||
const hasState = fs.existsSync(sessionFile);
|
||||
logger.info(`Using session file: ${sessionFile} (${hasState ? 'found' : 'missing'})`);
|
||||
|
||||
return browser.newContext(
|
||||
hasState
|
||||
? {
|
||||
storageState: sessionFile,
|
||||
viewport: { width: 1440, height: 900 }
|
||||
}
|
||||
: {
|
||||
viewport: { width: 1440, height: 900 }
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function hasSessionFile(sessionFile) {
|
||||
return fs.existsSync(sessionFile);
|
||||
}
|
||||
|
||||
async function saveSessionState(context, sessionFile) {
|
||||
await context.storageState({ path: sessionFile });
|
||||
logger.info(`Session saved to ${sessionFile}`);
|
||||
}
|
||||
|
||||
async function performBackgroundLogin(page, config) {
|
||||
if (!config.authEnabled) {
|
||||
throw new Error('Authentication is required, but AUTH_ENABLED=false');
|
||||
}
|
||||
|
||||
if (!config.authUsername || !config.authPassword) {
|
||||
throw new Error('Authentication is enabled, but AUTH_USERNAME or AUTH_PASSWORD is empty');
|
||||
}
|
||||
|
||||
logger.info(`Opening login page: ${config.authLoginUrl}`);
|
||||
await page.goto(config.authLoginUrl, { waitUntil: 'domcontentloaded' });
|
||||
|
||||
const usernameInput = page.locator(config.authUsernameSelector).first();
|
||||
const passwordInput = page.locator(config.authPasswordSelector).first();
|
||||
|
||||
await usernameInput.waitFor({ state: 'visible', timeout: config.requestTimeoutMs });
|
||||
await passwordInput.waitFor({ state: 'visible', timeout: config.requestTimeoutMs });
|
||||
|
||||
await usernameInput.fill(config.authUsername);
|
||||
await passwordInput.fill(config.authPassword);
|
||||
|
||||
const submit = page.locator(config.authSubmitSelector).first();
|
||||
await submit.waitFor({ state: 'visible', timeout: config.requestTimeoutMs });
|
||||
|
||||
await Promise.all([
|
||||
page.waitForLoadState('networkidle').catch(() => null),
|
||||
submit.click()
|
||||
]);
|
||||
|
||||
if (config.authSuccessSelector) {
|
||||
await page.locator(config.authSuccessSelector).first().waitFor({
|
||||
state: 'visible',
|
||||
timeout: config.requestTimeoutMs
|
||||
});
|
||||
}
|
||||
|
||||
logger.info('Background authentication completed');
|
||||
}
|
||||
|
||||
async function authenticateContext({ context, sessionFile, config }) {
|
||||
const page = await context.newPage();
|
||||
|
||||
try {
|
||||
await performBackgroundLogin(page, config);
|
||||
await saveSessionState(context, sessionFile);
|
||||
} finally {
|
||||
await page.close().catch(() => undefined);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
launchBrowser,
|
||||
createContext,
|
||||
hasSessionFile,
|
||||
authenticateContext,
|
||||
performBackgroundLogin
|
||||
};
|
||||
67
parser/src/state.js
Normal file
67
parser/src/state.js
Normal file
@@ -0,0 +1,67 @@
|
||||
const fs = require('fs/promises');
|
||||
const { hashObject, nowIso } = require('./utils');
|
||||
|
||||
async function loadState(filePath) {
|
||||
try {
|
||||
const raw = await fs.readFile(filePath, 'utf8');
|
||||
if (!raw.trim()) {
|
||||
return null;
|
||||
}
|
||||
return JSON.parse(raw);
|
||||
} catch (error) {
|
||||
if (error.code === 'ENOENT') {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (error instanceof SyntaxError) {
|
||||
return null;
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function saveState(filePath, items) {
|
||||
const payload = {
|
||||
savedAt: nowIso(),
|
||||
hash: hashObject(items),
|
||||
items
|
||||
};
|
||||
await fs.writeFile(filePath, JSON.stringify(payload, null, 2), 'utf8');
|
||||
return payload;
|
||||
}
|
||||
|
||||
function diffItems(previousItems = [], currentItems = []) {
|
||||
const prevMap = new Map(previousItems.map((item) => [item.__key, item]));
|
||||
const currMap = new Map(currentItems.map((item) => [item.__key, item]));
|
||||
|
||||
const changes = [];
|
||||
|
||||
for (const [key, item] of currMap) {
|
||||
if (!prevMap.has(key)) {
|
||||
changes.push({ type: 'added', item });
|
||||
continue;
|
||||
}
|
||||
|
||||
const prev = prevMap.get(key);
|
||||
const prevComparable = JSON.stringify(prev);
|
||||
const currComparable = JSON.stringify(item);
|
||||
if (prevComparable !== currComparable) {
|
||||
changes.push({ type: 'updated', before: prev, after: item });
|
||||
}
|
||||
}
|
||||
|
||||
for (const [key, item] of prevMap) {
|
||||
if (!currMap.has(key)) {
|
||||
changes.push({ type: 'removed', item });
|
||||
}
|
||||
}
|
||||
|
||||
return changes;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
loadState,
|
||||
saveState,
|
||||
diffItems
|
||||
};
|
||||
329
parser/src/storage.js
Normal file
329
parser/src/storage.js
Normal file
@@ -0,0 +1,329 @@
|
||||
const { Queue } = require('bullmq');
|
||||
const IORedis = require('ioredis');
|
||||
const config = require('./config');
|
||||
const logger = require('./logger');
|
||||
const { hashObject, nowIso } = require('./utils');
|
||||
|
||||
const redis = new IORedis(config.redisUrl, {
|
||||
maxRetriesPerRequest: null
|
||||
});
|
||||
|
||||
const signalsQueue = new Queue('signals', {
|
||||
connection: redis,
|
||||
defaultJobOptions: {
|
||||
attempts: 5,
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
delay: 5000
|
||||
},
|
||||
removeOnComplete: 200,
|
||||
removeOnFail: 500
|
||||
}
|
||||
});
|
||||
|
||||
function parseOdds(value) {
|
||||
if (value == null) return 1;
|
||||
const normalized = String(value).replace(',', '.').match(/-?\d+(\.\d+)?/);
|
||||
const parsed = normalized ? Number(normalized[0]) : NaN;
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : 1;
|
||||
}
|
||||
|
||||
function parseLineValue(value) {
|
||||
if (value == null) return null;
|
||||
const normalized = String(value).replace(',', '.').match(/-?\d+(\.\d+)?/);
|
||||
const parsed = normalized ? Number(normalized[0]) : NaN;
|
||||
return Number.isFinite(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function parseEventStartTime(item) {
|
||||
const now = new Date();
|
||||
const candidates = [item.date, item.eventTime];
|
||||
for (const candidate of candidates) {
|
||||
if (!candidate) continue;
|
||||
|
||||
const isoDate = new Date(candidate);
|
||||
if (!Number.isNaN(isoDate.getTime())) {
|
||||
return isoDate;
|
||||
}
|
||||
|
||||
const partial = String(candidate).match(/(\d{1,2})[./](\d{1,2})(?:[./](\d{2,4}))?.*?(\d{1,2}):(\d{2})/);
|
||||
if (partial) {
|
||||
const year = partial[3] ? Number(partial[3].length === 2 ? `20${partial[3]}` : partial[3]) : new Date().getFullYear();
|
||||
const parsed = new Date(year, Number(partial[2]) - 1, Number(partial[1]), Number(partial[4]), Number(partial[5]));
|
||||
if (!Number.isNaN(parsed.getTime())) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
const timeOnly = String(candidate).match(/(?:^|\s)(\d{1,2}):(\d{2})(?:\s|$)/);
|
||||
if (timeOnly) {
|
||||
const parsed = new Date(
|
||||
now.getFullYear(),
|
||||
now.getMonth(),
|
||||
now.getDate(),
|
||||
Number(timeOnly[1]),
|
||||
Number(timeOnly[2]),
|
||||
0,
|
||||
0
|
||||
);
|
||||
if (!Number.isNaN(parsed.getTime())) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return new Date(now.getFullYear(), now.getMonth(), now.getDate(), 0, 0, 0, 0);
|
||||
}
|
||||
|
||||
function parseRelativeSignalAge(value, reference = new Date()) {
|
||||
if (!value) return null;
|
||||
|
||||
const normalized = String(value).trim().toLowerCase();
|
||||
if (!normalized) return null;
|
||||
if (/^(just now|now|только что|сейчас)$/i.test(normalized)) {
|
||||
return new Date(reference.getTime());
|
||||
}
|
||||
|
||||
const match = normalized.match(
|
||||
/(\d+)\s*(сек(?:унда|унды|унд)?|sec(?:ond)?s?|с|min(?:ute)?s?|мин(?:ута|уты|ут)?|m|час(?:а|ов)?|ч|hour(?:s)?|hr|h|д(?:ень|ня|ней)?|дн(?:я|ей)?|day(?:s)?|нед(?:еля|ели|ель)?|week(?:s)?|w)/i
|
||||
);
|
||||
|
||||
if (!match) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const amount = Number(match[1]);
|
||||
if (!Number.isFinite(amount) || amount < 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const unit = match[2].toLowerCase();
|
||||
let diffMs = 0;
|
||||
|
||||
if (/^(сек|sec|с)/i.test(unit)) {
|
||||
diffMs = amount * 1000;
|
||||
} else if (/^(min|РјРёРЅ|m)/i.test(unit)) {
|
||||
diffMs = amount * 60 * 1000;
|
||||
} else if (/^(час|ч|hour|hr|h)/i.test(unit)) {
|
||||
diffMs = amount * 60 * 60 * 1000;
|
||||
} else if (/^(Рґ|РґРЅ|day)/i.test(unit)) {
|
||||
diffMs = amount * 24 * 60 * 60 * 1000;
|
||||
} else if (/^(нед|week|w)/i.test(unit)) {
|
||||
diffMs = amount * 7 * 24 * 60 * 60 * 1000;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new Date(reference.getTime() - diffMs);
|
||||
}
|
||||
|
||||
function parseSignalTime(item) {
|
||||
const publicationTimer = Number(item.publicationTimer);
|
||||
if (Number.isFinite(publicationTimer) && publicationTimer >= 0) {
|
||||
return new Date(Date.now() - publicationTimer * 1000);
|
||||
}
|
||||
|
||||
const explicitCandidates = [
|
||||
item.signalTime,
|
||||
item.publishedAt,
|
||||
item.publicationTime
|
||||
];
|
||||
|
||||
for (const candidate of explicitCandidates) {
|
||||
if (!candidate) continue;
|
||||
|
||||
const parsed = candidate instanceof Date ? candidate : new Date(candidate);
|
||||
if (!Number.isNaN(parsed.getTime())) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
const relativeCandidates = [
|
||||
item.publicationAge
|
||||
];
|
||||
|
||||
for (const candidate of relativeCandidates) {
|
||||
const parsed = parseRelativeSignalAge(candidate);
|
||||
if (parsed && !Number.isNaN(parsed.getTime())) {
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
|
||||
return new Date();
|
||||
}
|
||||
|
||||
function createEventId(item) {
|
||||
const botPrefix = item.botKey ? `${item.botKey}:` : '';
|
||||
return (
|
||||
(item.id && `${botPrefix}${item.id}`) ||
|
||||
(item.eventUrl && `${botPrefix}${item.eventUrl}`) ||
|
||||
`${botPrefix}${hashObject({
|
||||
homeTeam: item.homeTeam,
|
||||
awayTeam: item.awayTeam,
|
||||
tournament: item.tournament,
|
||||
title: item.title
|
||||
}).slice(0, 24)}`
|
||||
);
|
||||
}
|
||||
|
||||
function createSelection(item) {
|
||||
return item.selectionText || item.title || item.stake || 'unknown_selection';
|
||||
}
|
||||
|
||||
function createMarketType(item) {
|
||||
return item.publicationType || 'parser_market';
|
||||
}
|
||||
|
||||
function createDedupeKey(mapped) {
|
||||
return [
|
||||
mapped.providerId,
|
||||
mapped.rawPayload?.botKey || 'default',
|
||||
mapped.eventId.trim().toLowerCase(),
|
||||
mapped.marketType.trim().toLowerCase(),
|
||||
mapped.selection.trim().toLowerCase(),
|
||||
mapped.lineValue ?? 'na'
|
||||
].join(':');
|
||||
}
|
||||
|
||||
function parseActiveTab(value) {
|
||||
if (value === null || value === undefined || value === '') return null;
|
||||
const parsed = Number(value);
|
||||
return Number.isInteger(parsed) ? parsed : null;
|
||||
}
|
||||
|
||||
function getSignalPriority(signal) {
|
||||
const activeTab = parseActiveTab(signal?.rawPayload?.activeTab);
|
||||
if (activeTab === 1) return 2;
|
||||
if (activeTab === 2) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
function pickBestCandidate(existing, candidate) {
|
||||
if (!existing) return candidate;
|
||||
const existingPriority = getSignalPriority(existing.signal);
|
||||
const candidatePriority = getSignalPriority(candidate.signal);
|
||||
|
||||
if (candidatePriority > existingPriority) {
|
||||
return candidate;
|
||||
}
|
||||
|
||||
return existing;
|
||||
}
|
||||
|
||||
function mapParserItemToSignal(item) {
|
||||
const mapped = {
|
||||
providerId: config.providerId,
|
||||
eventId: createEventId(item),
|
||||
sportType: item.sport || 'unknown',
|
||||
leagueName: item.tournament || 'Unknown league',
|
||||
homeTeam: item.homeTeam || item.title || 'Unknown home',
|
||||
awayTeam: item.awayTeam || 'Unknown away',
|
||||
eventStartTime: parseEventStartTime(item),
|
||||
marketType: createMarketType(item),
|
||||
selection: createSelection(item),
|
||||
forecast: item.forecast || null,
|
||||
lineValue: parseLineValue(item.stakePercent),
|
||||
odds: parseOdds(item.coef),
|
||||
signalTime: parseSignalTime(item),
|
||||
status: 'pending',
|
||||
sourceType: 'provider',
|
||||
comment: item.rawText || null,
|
||||
published: true,
|
||||
rawPayload: item
|
||||
};
|
||||
|
||||
return {
|
||||
...mapped,
|
||||
dedupeKey: createDedupeKey(mapped)
|
||||
};
|
||||
}
|
||||
|
||||
function serializeSignal(signal) {
|
||||
return {
|
||||
...signal,
|
||||
eventStartTime: signal.eventStartTime.toISOString(),
|
||||
signalTime: signal.signalTime.toISOString(),
|
||||
rawPayload: signal.rawPayload && typeof signal.rawPayload === 'object' && !Array.isArray(signal.rawPayload)
|
||||
? signal.rawPayload
|
||||
: null
|
||||
};
|
||||
}
|
||||
|
||||
function getSyncEligibleBotKeys(meta = {}) {
|
||||
const bots = Array.isArray(meta?.stats?.bots) ? meta.stats.bots : [];
|
||||
return bots
|
||||
.filter((bot) => bot && bot.syncEligible !== false && bot.key)
|
||||
.map((bot) => String(bot.key));
|
||||
}
|
||||
|
||||
async function enqueueSnapshot(items, meta = {}) {
|
||||
const candidatesByDedupeKey = new Map();
|
||||
for (const item of items) {
|
||||
const signal = mapParserItemToSignal(item);
|
||||
const dedupeKey = signal.dedupeKey;
|
||||
const selected = pickBestCandidate(candidatesByDedupeKey.get(dedupeKey), { item, signal });
|
||||
candidatesByDedupeKey.set(dedupeKey, selected);
|
||||
}
|
||||
|
||||
const selectedCandidates = Array.from(candidatesByDedupeKey.values());
|
||||
const payload = {
|
||||
providerId: config.providerId,
|
||||
items: selectedCandidates.map((entry) => serializeSignal(entry.signal)),
|
||||
meta: {
|
||||
timestamp: nowIso(),
|
||||
...meta
|
||||
},
|
||||
syncEligibleBotKeys: getSyncEligibleBotKeys(meta)
|
||||
};
|
||||
|
||||
const job = await signalsQueue.add('signals.snapshot', payload);
|
||||
|
||||
return {
|
||||
queued: true,
|
||||
jobId: job.id,
|
||||
items: payload.items.length,
|
||||
syncEligibleBotKeys: payload.syncEligibleBotKeys,
|
||||
changedSignalIds: []
|
||||
};
|
||||
}
|
||||
|
||||
async function syncSignalsSnapshotToDb(items, meta = {}) {
|
||||
return enqueueSnapshot(items, meta);
|
||||
}
|
||||
|
||||
async function saveChangesToDb(changes, meta = {}) {
|
||||
const items = changes
|
||||
.map((change) => change.after || change.item)
|
||||
.filter(Boolean);
|
||||
|
||||
if (items.length === 0) {
|
||||
return { queued: false, items: 0, changedSignalIds: [] };
|
||||
}
|
||||
|
||||
return enqueueSnapshot(items, {
|
||||
...meta,
|
||||
changesCount: changes.length,
|
||||
mode: 'incremental'
|
||||
});
|
||||
}
|
||||
|
||||
async function logParserError(error, context = {}) {
|
||||
logger.error('Parser failure queued for external reporting', {
|
||||
timestamp: nowIso(),
|
||||
context,
|
||||
message: error?.message || String(error),
|
||||
stack: error?.stack || null
|
||||
});
|
||||
}
|
||||
|
||||
async function closeStorage() {
|
||||
await signalsQueue.close();
|
||||
await redis.quit();
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
saveChangesToDb,
|
||||
syncSignalsSnapshotToDb,
|
||||
logParserError,
|
||||
closeStorage
|
||||
};
|
||||
45
parser/src/utils.js
Normal file
45
parser/src/utils.js
Normal file
@@ -0,0 +1,45 @@
|
||||
const crypto = require('crypto');
|
||||
const fs = require('fs/promises');
|
||||
const path = require('path');
|
||||
|
||||
function stableStringify(value) {
|
||||
if (Array.isArray(value)) {
|
||||
return `[${value.map(stableStringify).join(',')}]`;
|
||||
}
|
||||
if (value && typeof value === 'object') {
|
||||
const keys = Object.keys(value).sort();
|
||||
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(value[k])}`).join(',')}}`;
|
||||
}
|
||||
return JSON.stringify(value);
|
||||
}
|
||||
|
||||
function sha256(input) {
|
||||
return crypto.createHash('sha256').update(input).digest('hex');
|
||||
}
|
||||
|
||||
function hashObject(value) {
|
||||
return sha256(stableStringify(value));
|
||||
}
|
||||
|
||||
function nowIso() {
|
||||
return new Date().toISOString();
|
||||
}
|
||||
|
||||
function sanitizeFilePart(value) {
|
||||
return String(value).replace(/[^a-zA-Z0-9_.-]+/g, '_');
|
||||
}
|
||||
|
||||
async function writeTextFile(dir, baseName, content, ext = 'txt') {
|
||||
await fs.mkdir(dir, { recursive: true });
|
||||
const file = path.join(dir, `${sanitizeFilePart(baseName)}.${ext}`);
|
||||
await fs.writeFile(file, content, 'utf8');
|
||||
return file;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
stableStringify,
|
||||
hashObject,
|
||||
nowIso,
|
||||
writeTextFile,
|
||||
sanitizeFilePart
|
||||
};
|
||||
Reference in New Issue
Block a user