f78f35fb3f
Парсер лидов МБ РФ: Яндекс.Карты + HH.ru + обогащение DaData/ЕГРЮЛ/Rusprofile + Streamlit CRM. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
330 lines
13 KiB
Python
330 lines
13 KiB
Python
"""DB-слой Streamlit-приложения.
|
||
|
||
Все запросы к leads.db инкапсулированы здесь. UI-код в app.py не делает
|
||
SQL напрямую — только через эти функции.
|
||
|
||
Стандарт: каждая функция сама открывает/закрывает соединение.
|
||
Streamlit перезапускает скрипт на каждое действие — глобальный коннект
|
||
держать не имеет смысла.
|
||
"""
|
||
import json
|
||
import sqlite3
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Any
|
||
|
||
import pandas as pd
|
||
|
||
|
||
def _conn(db_path: Path | str) -> sqlite3.Connection:
|
||
conn = sqlite3.connect(str(db_path))
|
||
conn.row_factory = sqlite3.Row
|
||
return conn
|
||
|
||
|
||
# ─── Опции для фильтров (что вообще есть в БД) ──────────────────────
|
||
def get_all_sources(db_path) -> list[str]:
|
||
conn = _conn(db_path)
|
||
rows = conn.execute(
|
||
"SELECT DISTINCT source FROM leads WHERE source IS NOT NULL ORDER BY source"
|
||
).fetchall()
|
||
conn.close()
|
||
# У некоторых лидов source может быть 'yandex_maps,hh' (мерж разных источников) — раскладываем
|
||
out: set[str] = set()
|
||
for r in rows:
|
||
for part in (r["source"] or "").split(","):
|
||
part = part.strip()
|
||
if part:
|
||
out.add(part)
|
||
return sorted(out)
|
||
|
||
|
||
def get_all_regions(db_path) -> list[str]:
|
||
conn = _conn(db_path)
|
||
rows = conn.execute(
|
||
"SELECT DISTINCT region FROM leads WHERE region IS NOT NULL AND region != '' ORDER BY region"
|
||
).fetchall()
|
||
conn.close()
|
||
return [r["region"] for r in rows]
|
||
|
||
|
||
def get_all_categories(db_path) -> list[str]:
|
||
conn = _conn(db_path)
|
||
rows = conn.execute(
|
||
"SELECT DISTINCT category FROM leads WHERE category IS NOT NULL AND category != '' ORDER BY category"
|
||
).fetchall()
|
||
conn.close()
|
||
return [r["category"] for r in rows]
|
||
|
||
|
||
# ─── Загрузка лидов с фильтрами ──────────────────────────────────────
|
||
def get_leads(db_path, filters: dict) -> pd.DataFrame:
|
||
"""Получить таблицу лидов с применением фильтров. Возвращает DataFrame.
|
||
|
||
filters: {
|
||
sources, regions, district_search, categories, statuses,
|
||
min_score, max_score, name_search
|
||
}
|
||
Все ключи опциональные.
|
||
"""
|
||
where: list[str] = []
|
||
params: list = []
|
||
|
||
if filters.get("sources"):
|
||
clauses = []
|
||
for s in filters["sources"]:
|
||
clauses.append("source LIKE ?")
|
||
params.append(f"%{s}%")
|
||
where.append("(" + " OR ".join(clauses) + ")")
|
||
|
||
if filters.get("regions"):
|
||
placeholders = ", ".join("?" for _ in filters["regions"])
|
||
where.append(f"region IN ({placeholders})")
|
||
params.extend(filters["regions"])
|
||
|
||
if filters.get("district_search"):
|
||
where.append("district LIKE ?")
|
||
params.append(f"%{filters['district_search']}%")
|
||
|
||
if filters.get("categories"):
|
||
placeholders = ", ".join("?" for _ in filters["categories"])
|
||
where.append(f"category IN ({placeholders})")
|
||
params.extend(filters["categories"])
|
||
|
||
if filters.get("statuses"):
|
||
# 'inbox' совмещаем с 'new' (старые лиды до миграции имели default 'new')
|
||
normalized = []
|
||
for s in filters["statuses"]:
|
||
if s == "inbox":
|
||
normalized.append("inbox")
|
||
normalized.append("new")
|
||
else:
|
||
normalized.append(s)
|
||
placeholders = ", ".join("?" for _ in normalized)
|
||
where.append(f"COALESCE(outreach_status, 'new') IN ({placeholders})")
|
||
params.extend(normalized)
|
||
|
||
# COALESCE(score, 0): лиды со score=NULL (напр. добавленные вручную) иначе
|
||
# отсеиваются, т.к. в SQL `NULL >= 0` не истинно. Считаем NULL за 0.
|
||
if "min_score" in filters:
|
||
where.append("COALESCE(score, 0) >= ?")
|
||
params.append(filters["min_score"])
|
||
|
||
if "max_score" in filters:
|
||
where.append("COALESCE(score, 0) <= ?")
|
||
params.append(filters["max_score"])
|
||
|
||
if filters.get("name_search"):
|
||
where.append("name LIKE ?")
|
||
params.append(f"%{filters['name_search']}%")
|
||
|
||
# Фильтр «есть боль под продукт»: pain_products хранит JSON {"P4":3.0,...}.
|
||
# Матчим по подстроке "P4" (в кавычках, чтобы P1 не ловил P10).
|
||
if filters.get("pain_products"):
|
||
clauses = []
|
||
for p in filters["pain_products"]:
|
||
clauses.append("pain_products LIKE ?")
|
||
params.append(f'%"{p}"%')
|
||
where.append("(" + " OR ".join(clauses) + ")")
|
||
|
||
where_sql = " AND ".join(where) if where else "1=1"
|
||
|
||
cols = """
|
||
id, name, inn, director_name, phone_primary, email_primary, phones, emails,
|
||
website, vk_url, telegram_url, instagram_url, youtube_url,
|
||
address, city, region, district, category,
|
||
reviews_count, reviews_avg, score, score_breakdown,
|
||
pain_products, diagnostic_coverage, band,
|
||
outreach_status, comments, last_action, last_reaction, last_touched_at,
|
||
source, parsed_at
|
||
"""
|
||
|
||
query = f"""
|
||
SELECT {cols}
|
||
FROM leads
|
||
WHERE {where_sql}
|
||
ORDER BY score DESC, id
|
||
"""
|
||
|
||
conn = _conn(db_path)
|
||
df = pd.read_sql_query(query, conn, params=params)
|
||
conn.close()
|
||
|
||
# Нормализуем outreach_status: NULL/'new' → 'inbox' для отображения
|
||
if "outreach_status" in df.columns:
|
||
df["outreach_status"] = df["outreach_status"].fillna("inbox").replace({"new": "inbox"})
|
||
|
||
return df
|
||
|
||
|
||
# ─── Один лид ────────────────────────────────────────────────────────
|
||
def get_lead_detail(db_path, lead_id: int) -> dict | None:
|
||
conn = _conn(db_path)
|
||
row = conn.execute("SELECT * FROM leads WHERE id = ?", (lead_id,)).fetchone()
|
||
conn.close()
|
||
if not row:
|
||
return None
|
||
lead = dict(row)
|
||
# Парсим JSON-поля
|
||
for f in ("phones", "phones_extra", "emails", "score_breakdown", "pain_products"):
|
||
if lead.get(f):
|
||
try:
|
||
lead[f] = json.loads(lead[f])
|
||
except (json.JSONDecodeError, TypeError):
|
||
pass
|
||
return lead
|
||
|
||
|
||
# ─── История касаний ────────────────────────────────────────────────
|
||
def get_outreach_history(db_path, lead_id: int) -> list[dict]:
|
||
conn = _conn(db_path)
|
||
rows = conn.execute("""
|
||
SELECT * FROM outreach_events
|
||
WHERE lead_id = ?
|
||
ORDER BY COALESCE(sent_at, '0000') DESC, id DESC
|
||
""", (lead_id,)).fetchall()
|
||
conn.close()
|
||
return [dict(r) for r in rows]
|
||
|
||
|
||
# ─── Запись нового касания ──────────────────────────────────────────
|
||
def record_touch(
|
||
db_path,
|
||
lead_id: int,
|
||
channel: str,
|
||
reaction: str | None = None,
|
||
notes: str | None = None,
|
||
new_status: str | None = None,
|
||
message_text: str | None = None,
|
||
) -> int:
|
||
"""Записать касание лида.
|
||
|
||
- Создаёт строку в outreach_events
|
||
- Обновляет last_action / last_reaction / last_touched_at у лида
|
||
- Опционально меняет outreach_status
|
||
|
||
Возвращает id новой строки в outreach_events.
|
||
"""
|
||
now = datetime.now().isoformat(timespec="seconds")
|
||
|
||
conn = _conn(db_path)
|
||
cursor = conn.execute("""
|
||
INSERT INTO outreach_events
|
||
(lead_id, channel, message_text, sent_at, reaction, notes)
|
||
VALUES (?, ?, ?, ?, ?, ?)
|
||
""", (lead_id, channel, message_text, now, reaction, notes))
|
||
event_id = cursor.lastrowid
|
||
|
||
updates = ["last_action = ?", "last_reaction = ?", "last_touched_at = ?"]
|
||
values: list[Any] = [channel, reaction, now]
|
||
if new_status:
|
||
updates.append("outreach_status = ?")
|
||
values.append(new_status)
|
||
values.append(lead_id)
|
||
|
||
conn.execute(f"UPDATE leads SET {', '.join(updates)} WHERE id = ?", values)
|
||
conn.commit()
|
||
conn.close()
|
||
return event_id
|
||
|
||
|
||
# ─── Обновление полей лида ──────────────────────────────────────────
|
||
def update_lead_status(db_path, lead_id: int, status: str) -> None:
|
||
conn = _conn(db_path)
|
||
conn.execute("UPDATE leads SET outreach_status = ? WHERE id = ?", (status, lead_id))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
|
||
def update_lead_comments(db_path, lead_id: int, comments: str) -> None:
|
||
conn = _conn(db_path)
|
||
conn.execute("UPDATE leads SET comments = ? WHERE id = ?", (comments, lead_id))
|
||
conn.commit()
|
||
conn.close()
|
||
|
||
|
||
# ─── Метрики для дашборда ───────────────────────────────────────────
|
||
def count_inbox(db_path) -> int:
|
||
conn = _conn(db_path)
|
||
n = conn.execute(
|
||
"SELECT COUNT(*) FROM leads WHERE COALESCE(outreach_status, 'new') IN ('inbox', 'new')"
|
||
).fetchone()[0]
|
||
conn.close()
|
||
return n
|
||
|
||
|
||
def count_in_work(db_path) -> int:
|
||
conn = _conn(db_path)
|
||
n = conn.execute(
|
||
"SELECT COUNT(*) FROM leads WHERE outreach_status IN ('in_work', 'triaged')"
|
||
).fetchone()[0]
|
||
conn.close()
|
||
return n
|
||
|
||
|
||
def count_done(db_path) -> int:
|
||
conn = _conn(db_path)
|
||
n = conn.execute(
|
||
"SELECT COUNT(*) FROM leads WHERE outreach_status = 'done'"
|
||
).fetchone()[0]
|
||
conn.close()
|
||
return n
|
||
|
||
|
||
def count_total(db_path) -> int:
|
||
conn = _conn(db_path)
|
||
n = conn.execute("SELECT COUNT(*) FROM leads").fetchone()[0]
|
||
conn.close()
|
||
return n
|
||
|
||
|
||
# ─── Ручное добавление / удаление компаний (из CRM) ──────────────────
|
||
def add_lead_manual(db_path, data: dict) -> int:
|
||
"""Добавить компанию вручную из CRM. Пишет в ту же leads.db.
|
||
|
||
Использует database._prepare_lead — те же dedup-ключи / нормализация /
|
||
has_website / parsed_at, что и у парсера (консистентность).
|
||
|
||
Возвращает id нового лида.
|
||
Бросает ValueError при дубле (UNIQUE inn / phone_dedup_key) — UI покажет.
|
||
"""
|
||
import database # parser_v1/database.py (PARENT уже в sys.path из app.py)
|
||
|
||
prepared = database._prepare_lead(data)
|
||
if prepared.get("score") is None:
|
||
prepared["score"] = 0 # иначе NULL-score лид невидим в таблице (фильтр score)
|
||
fields = list(database.WRITABLE_FIELDS) + ["parsed_at"]
|
||
if prepared.get("outreach_status"): # не входит в WRITABLE_FIELDS — добавляем явно
|
||
fields.append("outreach_status")
|
||
cols = ", ".join(fields)
|
||
placeholders = ", ".join("?" for _ in fields)
|
||
values = [prepared.get(f) for f in fields]
|
||
|
||
conn = _conn(db_path)
|
||
try:
|
||
cur = conn.execute(
|
||
f"INSERT INTO leads ({cols}) VALUES ({placeholders})", values
|
||
)
|
||
conn.commit()
|
||
return cur.lastrowid
|
||
except sqlite3.IntegrityError as e:
|
||
conn.rollback()
|
||
msg = str(e).lower()
|
||
if "inn" in msg:
|
||
raise ValueError(f"Компания с таким ИНН уже есть в базе ({data.get('inn')}).") from e
|
||
if "phone" in msg:
|
||
raise ValueError(f"Компания с таким телефоном уже есть в базе.") from e
|
||
raise ValueError(f"Не удалось добавить (дубль): {e}") from e
|
||
finally:
|
||
conn.close()
|
||
|
||
|
||
def delete_lead(db_path, lead_id: int) -> None:
|
||
"""Удалить компанию из CRM + её историю касаний и связи с прогонами."""
|
||
conn = _conn(db_path)
|
||
conn.execute("DELETE FROM outreach_events WHERE lead_id = ?", (lead_id,))
|
||
conn.execute("DELETE FROM lead_in_run WHERE lead_id = ?", (lead_id,))
|
||
conn.execute("DELETE FROM leads WHERE id = ?", (lead_id,))
|
||
conn.commit()
|
||
conn.close()
|