init: Parser v1 — Lead Generation Engine
Парсер лидов МБ РФ: Яндекс.Карты + HH.ru + обогащение DaData/ЕГРЮЛ/Rusprofile + Streamlit CRM. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+329
@@ -0,0 +1,329 @@
|
||||
"""DB-слой Streamlit-приложения.
|
||||
|
||||
Все запросы к leads.db инкапсулированы здесь. UI-код в app.py не делает
|
||||
SQL напрямую — только через эти функции.
|
||||
|
||||
Стандарт: каждая функция сама открывает/закрывает соединение.
|
||||
Streamlit перезапускает скрипт на каждое действие — глобальный коннект
|
||||
держать не имеет смысла.
|
||||
"""
|
||||
import json
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def _conn(db_path: Path | str) -> sqlite3.Connection:
|
||||
conn = sqlite3.connect(str(db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
return conn
|
||||
|
||||
|
||||
# ─── Опции для фильтров (что вообще есть в БД) ──────────────────────
|
||||
def get_all_sources(db_path) -> list[str]:
|
||||
conn = _conn(db_path)
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT source FROM leads WHERE source IS NOT NULL ORDER BY source"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
# У некоторых лидов source может быть 'yandex_maps,hh' (мерж разных источников) — раскладываем
|
||||
out: set[str] = set()
|
||||
for r in rows:
|
||||
for part in (r["source"] or "").split(","):
|
||||
part = part.strip()
|
||||
if part:
|
||||
out.add(part)
|
||||
return sorted(out)
|
||||
|
||||
|
||||
def get_all_regions(db_path) -> list[str]:
|
||||
conn = _conn(db_path)
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT region FROM leads WHERE region IS NOT NULL AND region != '' ORDER BY region"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return [r["region"] for r in rows]
|
||||
|
||||
|
||||
def get_all_categories(db_path) -> list[str]:
|
||||
conn = _conn(db_path)
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT category FROM leads WHERE category IS NOT NULL AND category != '' ORDER BY category"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
return [r["category"] for r in rows]
|
||||
|
||||
|
||||
# ─── Загрузка лидов с фильтрами ──────────────────────────────────────
|
||||
def get_leads(db_path, filters: dict) -> pd.DataFrame:
|
||||
"""Получить таблицу лидов с применением фильтров. Возвращает DataFrame.
|
||||
|
||||
filters: {
|
||||
sources, regions, district_search, categories, statuses,
|
||||
min_score, max_score, name_search
|
||||
}
|
||||
Все ключи опциональные.
|
||||
"""
|
||||
where: list[str] = []
|
||||
params: list = []
|
||||
|
||||
if filters.get("sources"):
|
||||
clauses = []
|
||||
for s in filters["sources"]:
|
||||
clauses.append("source LIKE ?")
|
||||
params.append(f"%{s}%")
|
||||
where.append("(" + " OR ".join(clauses) + ")")
|
||||
|
||||
if filters.get("regions"):
|
||||
placeholders = ", ".join("?" for _ in filters["regions"])
|
||||
where.append(f"region IN ({placeholders})")
|
||||
params.extend(filters["regions"])
|
||||
|
||||
if filters.get("district_search"):
|
||||
where.append("district LIKE ?")
|
||||
params.append(f"%{filters['district_search']}%")
|
||||
|
||||
if filters.get("categories"):
|
||||
placeholders = ", ".join("?" for _ in filters["categories"])
|
||||
where.append(f"category IN ({placeholders})")
|
||||
params.extend(filters["categories"])
|
||||
|
||||
if filters.get("statuses"):
|
||||
# 'inbox' совмещаем с 'new' (старые лиды до миграции имели default 'new')
|
||||
normalized = []
|
||||
for s in filters["statuses"]:
|
||||
if s == "inbox":
|
||||
normalized.append("inbox")
|
||||
normalized.append("new")
|
||||
else:
|
||||
normalized.append(s)
|
||||
placeholders = ", ".join("?" for _ in normalized)
|
||||
where.append(f"COALESCE(outreach_status, 'new') IN ({placeholders})")
|
||||
params.extend(normalized)
|
||||
|
||||
# COALESCE(score, 0): лиды со score=NULL (напр. добавленные вручную) иначе
|
||||
# отсеиваются, т.к. в SQL `NULL >= 0` не истинно. Считаем NULL за 0.
|
||||
if "min_score" in filters:
|
||||
where.append("COALESCE(score, 0) >= ?")
|
||||
params.append(filters["min_score"])
|
||||
|
||||
if "max_score" in filters:
|
||||
where.append("COALESCE(score, 0) <= ?")
|
||||
params.append(filters["max_score"])
|
||||
|
||||
if filters.get("name_search"):
|
||||
where.append("name LIKE ?")
|
||||
params.append(f"%{filters['name_search']}%")
|
||||
|
||||
# Фильтр «есть боль под продукт»: pain_products хранит JSON {"P4":3.0,...}.
|
||||
# Матчим по подстроке "P4" (в кавычках, чтобы P1 не ловил P10).
|
||||
if filters.get("pain_products"):
|
||||
clauses = []
|
||||
for p in filters["pain_products"]:
|
||||
clauses.append("pain_products LIKE ?")
|
||||
params.append(f'%"{p}"%')
|
||||
where.append("(" + " OR ".join(clauses) + ")")
|
||||
|
||||
where_sql = " AND ".join(where) if where else "1=1"
|
||||
|
||||
cols = """
|
||||
id, name, inn, director_name, phone_primary, email_primary, phones, emails,
|
||||
website, vk_url, telegram_url, instagram_url, youtube_url,
|
||||
address, city, region, district, category,
|
||||
reviews_count, reviews_avg, score, score_breakdown,
|
||||
pain_products, diagnostic_coverage, band,
|
||||
outreach_status, comments, last_action, last_reaction, last_touched_at,
|
||||
source, parsed_at
|
||||
"""
|
||||
|
||||
query = f"""
|
||||
SELECT {cols}
|
||||
FROM leads
|
||||
WHERE {where_sql}
|
||||
ORDER BY score DESC, id
|
||||
"""
|
||||
|
||||
conn = _conn(db_path)
|
||||
df = pd.read_sql_query(query, conn, params=params)
|
||||
conn.close()
|
||||
|
||||
# Нормализуем outreach_status: NULL/'new' → 'inbox' для отображения
|
||||
if "outreach_status" in df.columns:
|
||||
df["outreach_status"] = df["outreach_status"].fillna("inbox").replace({"new": "inbox"})
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# ─── Один лид ────────────────────────────────────────────────────────
|
||||
def get_lead_detail(db_path, lead_id: int) -> dict | None:
|
||||
conn = _conn(db_path)
|
||||
row = conn.execute("SELECT * FROM leads WHERE id = ?", (lead_id,)).fetchone()
|
||||
conn.close()
|
||||
if not row:
|
||||
return None
|
||||
lead = dict(row)
|
||||
# Парсим JSON-поля
|
||||
for f in ("phones", "phones_extra", "emails", "score_breakdown", "pain_products"):
|
||||
if lead.get(f):
|
||||
try:
|
||||
lead[f] = json.loads(lead[f])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
return lead
|
||||
|
||||
|
||||
# ─── История касаний ────────────────────────────────────────────────
|
||||
def get_outreach_history(db_path, lead_id: int) -> list[dict]:
|
||||
conn = _conn(db_path)
|
||||
rows = conn.execute("""
|
||||
SELECT * FROM outreach_events
|
||||
WHERE lead_id = ?
|
||||
ORDER BY COALESCE(sent_at, '0000') DESC, id DESC
|
||||
""", (lead_id,)).fetchall()
|
||||
conn.close()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
|
||||
# ─── Запись нового касания ──────────────────────────────────────────
|
||||
def record_touch(
|
||||
db_path,
|
||||
lead_id: int,
|
||||
channel: str,
|
||||
reaction: str | None = None,
|
||||
notes: str | None = None,
|
||||
new_status: str | None = None,
|
||||
message_text: str | None = None,
|
||||
) -> int:
|
||||
"""Записать касание лида.
|
||||
|
||||
- Создаёт строку в outreach_events
|
||||
- Обновляет last_action / last_reaction / last_touched_at у лида
|
||||
- Опционально меняет outreach_status
|
||||
|
||||
Возвращает id новой строки в outreach_events.
|
||||
"""
|
||||
now = datetime.now().isoformat(timespec="seconds")
|
||||
|
||||
conn = _conn(db_path)
|
||||
cursor = conn.execute("""
|
||||
INSERT INTO outreach_events
|
||||
(lead_id, channel, message_text, sent_at, reaction, notes)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""", (lead_id, channel, message_text, now, reaction, notes))
|
||||
event_id = cursor.lastrowid
|
||||
|
||||
updates = ["last_action = ?", "last_reaction = ?", "last_touched_at = ?"]
|
||||
values: list[Any] = [channel, reaction, now]
|
||||
if new_status:
|
||||
updates.append("outreach_status = ?")
|
||||
values.append(new_status)
|
||||
values.append(lead_id)
|
||||
|
||||
conn.execute(f"UPDATE leads SET {', '.join(updates)} WHERE id = ?", values)
|
||||
conn.commit()
|
||||
conn.close()
|
||||
return event_id
|
||||
|
||||
|
||||
# ─── Обновление полей лида ──────────────────────────────────────────
|
||||
def update_lead_status(db_path, lead_id: int, status: str) -> None:
|
||||
conn = _conn(db_path)
|
||||
conn.execute("UPDATE leads SET outreach_status = ? WHERE id = ?", (status, lead_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def update_lead_comments(db_path, lead_id: int, comments: str) -> None:
|
||||
conn = _conn(db_path)
|
||||
conn.execute("UPDATE leads SET comments = ? WHERE id = ?", (comments, lead_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
# ─── Метрики для дашборда ───────────────────────────────────────────
|
||||
def count_inbox(db_path) -> int:
|
||||
conn = _conn(db_path)
|
||||
n = conn.execute(
|
||||
"SELECT COUNT(*) FROM leads WHERE COALESCE(outreach_status, 'new') IN ('inbox', 'new')"
|
||||
).fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
def count_in_work(db_path) -> int:
|
||||
conn = _conn(db_path)
|
||||
n = conn.execute(
|
||||
"SELECT COUNT(*) FROM leads WHERE outreach_status IN ('in_work', 'triaged')"
|
||||
).fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
def count_done(db_path) -> int:
|
||||
conn = _conn(db_path)
|
||||
n = conn.execute(
|
||||
"SELECT COUNT(*) FROM leads WHERE outreach_status = 'done'"
|
||||
).fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
def count_total(db_path) -> int:
|
||||
conn = _conn(db_path)
|
||||
n = conn.execute("SELECT COUNT(*) FROM leads").fetchone()[0]
|
||||
conn.close()
|
||||
return n
|
||||
|
||||
|
||||
# ─── Ручное добавление / удаление компаний (из CRM) ──────────────────
|
||||
def add_lead_manual(db_path, data: dict) -> int:
|
||||
"""Добавить компанию вручную из CRM. Пишет в ту же leads.db.
|
||||
|
||||
Использует database._prepare_lead — те же dedup-ключи / нормализация /
|
||||
has_website / parsed_at, что и у парсера (консистентность).
|
||||
|
||||
Возвращает id нового лида.
|
||||
Бросает ValueError при дубле (UNIQUE inn / phone_dedup_key) — UI покажет.
|
||||
"""
|
||||
import database # parser_v1/database.py (PARENT уже в sys.path из app.py)
|
||||
|
||||
prepared = database._prepare_lead(data)
|
||||
if prepared.get("score") is None:
|
||||
prepared["score"] = 0 # иначе NULL-score лид невидим в таблице (фильтр score)
|
||||
fields = list(database.WRITABLE_FIELDS) + ["parsed_at"]
|
||||
if prepared.get("outreach_status"): # не входит в WRITABLE_FIELDS — добавляем явно
|
||||
fields.append("outreach_status")
|
||||
cols = ", ".join(fields)
|
||||
placeholders = ", ".join("?" for _ in fields)
|
||||
values = [prepared.get(f) for f in fields]
|
||||
|
||||
conn = _conn(db_path)
|
||||
try:
|
||||
cur = conn.execute(
|
||||
f"INSERT INTO leads ({cols}) VALUES ({placeholders})", values
|
||||
)
|
||||
conn.commit()
|
||||
return cur.lastrowid
|
||||
except sqlite3.IntegrityError as e:
|
||||
conn.rollback()
|
||||
msg = str(e).lower()
|
||||
if "inn" in msg:
|
||||
raise ValueError(f"Компания с таким ИНН уже есть в базе ({data.get('inn')}).") from e
|
||||
if "phone" in msg:
|
||||
raise ValueError(f"Компания с таким телефоном уже есть в базе.") from e
|
||||
raise ValueError(f"Не удалось добавить (дубль): {e}") from e
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def delete_lead(db_path, lead_id: int) -> None:
|
||||
"""Удалить компанию из CRM + её историю касаний и связи с прогонами."""
|
||||
conn = _conn(db_path)
|
||||
conn.execute("DELETE FROM outreach_events WHERE lead_id = ?", (lead_id,))
|
||||
conn.execute("DELETE FROM lead_in_run WHERE lead_id = ?", (lead_id,))
|
||||
conn.execute("DELETE FROM leads WHERE id = ?", (lead_id,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user