#!/usr/bin/env python3 """Pipeline pra revisar PT-BR, gerar imagem e publicar como rascunho no WP.""" import base64 import glob import json import os import re import sys import time from io import BytesIO from pathlib import Path import requests from docx import Document from PIL import Image from requests.auth import HTTPBasicAuth # === Config === ENV = {} for line in open("/opt/mia/.env"): if "=" in line and not line.strip().startswith("#"): k, _, v = line.strip().partition("=") ENV[k] = v OPENAI_KEY = ENV["OPENAI_API_KEY"] WP_BASE = "https://franciscoborrello.com.br" WP_USER = "remoraes09@gmail.com" WP_PASS = "pZSE dY02 sxgC N9KA qEza ElOx" WP_UA = "WordPress/6.5; https://franciscoborrello.com.br" auth = HTTPBasicAuth(WP_USER, WP_PASS) ROOT = Path("/opt/mia/workspace/clientes/francisco_borrello/blog") SRC = ROOT / "artigos-corrigidos" / "ARTIGOS DE BLOG" IMG_DIR = ROOT / "imagens" HTML_DIR = ROOT / "publicados" IMG_DIR.mkdir(exist_ok=True) HTML_DIR.mkdir(exist_ok=True) def parse_docx(path: Path): """Extrai title, body_paras (com style), tags, briefing, excerpt.""" doc = Document(path) paras = [(p.style.name if p.style else "Normal", p.text) for p in doc.paragraphs if p.text.strip()] title = paras[0][1].strip() body = [] tags = [] briefing = "" excerpt = "" in_metadata = False for style, text in paras[1:]: low = text.lower() if "metadados para wordpress" in low: in_metadata = True continue if in_metadata: if low.startswith("tags sugeridas"): tags_str = text.split(":", 1)[1] if ":" in text else "" tags = [t.strip() for t in tags_str.split(",") if t.strip()] elif "imagem de capa" in low or "briefing" in low: briefing = text.split(":", 1)[1].strip() if ":" in text else text elif low.startswith("excerpt"): excerpt = text.split(":", 1)[1].strip() if ":" in text else text # strip "(50-60 palavras)" leftover excerpt = re.sub(r"^\s*$[^)]+$\s*", "", excerpt).strip() else: body.append((style, text)) return { "title": title, "body": body, "tags": tags, "briefing": briefing, "excerpt": excerpt, } def revise_html(article: dict) -> str: """Usa GPT-4o pra revisar PT-BR e produzir HTML limpo, mantendo escolhas estilísticas.""" title = article["title"] body_str = "\n\n".join(f"[{style}] {text}" for style, text in article["body"]) prompt = f"""Você é um copydesk experiente do mercado editorial brasileiro. Recebe um artigo de blog em PT-BR e devolve HTML limpo pronto pra WordPress. REGRAS: 1. Não altere o conteúdo, ideias ou estrutura. Não acrescente nada novo. 2. Faça SOMENTE revisão técnica: acentuação, ortografia, vírgulas, concordância, pontuação. Corrija erros óbvios. 3. Onde houver palavras ou trechos em CAIXA ALTA dentro de frases normais (ex: "OU DE VOCE", "[PEQUENO]", "MUDEI A cama"), entenda como ênfase ou marca de revisão do autor. Faça assim: - Se for claramente uma marca de revisão (texto entre colchetes, palavra fora de lugar como "[PEQUENO]"), normalize a capitalização e remova os colchetes. - Se for ênfase intencional (ex: "EMBAIXO DE VOCE" no meio de uma frase), converta pra texto em ênfase com capitalização normal. 4. Não remova frases. Não cole conteúdo. Mantenha a voz do autor. 5. Cabeçalhos: o título principal NÃO entra no body (o WP renderiza separado). Os subtítulos viram

. Texto solto vira
. 6. Listas: se aparecerem como linhas curtas em sequência precedidas por contexto tipo "...padrões:" ou "...caminhos:", converta em
ou
. 7. Itálico, negrito: use e pra ênfase quando fizer sentido (siglas, nomes próprios de obras, etc). 8. CTA final: se houver algo como [QUERO CONHECER X] ou similar (texto entre colchetes em caps no final), renderize como botão CTA:
TEXTO →
9. Output: SOMENTE o HTML do corpo do artigo, SEM o título (já vai no campo title separado), SEM ```html``` envolvendo, SEM comentários, SEM ou . Comece direto pelo primeiro
. ARTIGO (cada linha vem com [estilo] do .docx original): TÍTULO (não incluir no output): {title} CORPO: {body_str} """ r = requests.post( "https://api.openai.com/v1/chat/completions", headers={ "Authorization": f"Bearer {OPENAI_KEY}", "Content-Type": "application/json", }, json={ "model": "gpt-4o", "messages": [ {"role": "system", "content": "Você é um copydesk profissional. Devolve apenas o HTML pedido, sem nada antes ou depois."}, {"role": "user", "content": prompt}, ], "temperature": 0.2, }, timeout=180, ) r.raise_for_status() html = r.json()["choices"][0]["message"]["content"].strip() # strip markdown fences if any if html.startswith("```"): html = re.sub(r"^```(?:html)?\s", "", html) html = re.sub(r"\s```$", "", html) return html.strip() def gen_image(briefing: str, out_path: Path) -> Path: """Gera imagem via gpt-image-1 (alta qualidade).""" prompt = ( "Editorial illustration, magazine cover style, painterly and elegant. " "Theme: " + briefing[:1500] + " " "No text overlays. Cinematic 16:9 framing. Sophisticated, never kitsch or generic-mystical." ) r = requests.post( "https://api.openai.com/v1/images/generations", headers={ "Authorization": f"Bearer {OPENAI_KEY}", "Content-Type": "application/json", }, json={ "model": "gpt-image-1", "prompt": prompt, "size": "1536x1024", "quality": "high", "n": 1, }, timeout=240, ) r.raise_for_status() b64 = r.json()["data"][0]["b64_json"] raw_png = out_path.with_suffix(".png") raw_png.write_bytes(base64.b64decode(b64)) # convert/resize to jpg img = Image.open(raw_png).convert("RGB").resize((1200, 800), Image.LANCZOS) jpg = out_path.with_suffix(".jpg") img.save(jpg, "JPEG", quality=88, optimize=True) return jpg def wp_upload_media(jpg: Path, alt: str, slug: str) -> int: img_bytes = jpg.read_bytes() r = requests.post( f"{WP_BASE}/wp-json/wp/v2/media", auth=auth, headers={ "User-Agent": WP_UA, "Content-Disposition": f'attachment; filename="{slug}.jpg"', "Content-Type": "application/octet-stream", }, data=img_bytes, timeout=180, ) r.raise_for_status() media = r.json() media_id = media["id"] # set alt requests.post( f"{WP_BASE}/wp-json/wp/v2/media/{media_id}", auth=auth, headers={"User-Agent": WP_UA}, json={"alt_text": alt[:200], "title": alt[:80]}, timeout=30, ) return media_id def wp_get_or_create_term(kind: str, name: str, cache: dict) -> int | None: key = (kind, name.lower()) if key in cache: return cache[key] try: rs = requests.get( f"{WP_BASE}/wp-json/wp/v2/{kind}", auth=auth, headers={"User-Agent": WP_UA}, params={"search": name, "per_page": 30}, timeout=20, ) if rs.status_code == 200: for t in rs.json(): if t.get("name", "").strip().lower() == name.strip().lower(): cache[key] = t["id"] return t["id"] except Exception: pass try: rc = requests.post( f"{WP_BASE}/wp-json/wp/v2/{kind}", auth=auth, headers={"User-Agent": WP_UA}, json={"name": name}, timeout=20, ) if rc.status_code in (200, 201): tid = rc.json()["id"] cache[key] = tid return tid if rc.status_code == 400: err = rc.json() if err.get("code") == "term_exists": tid = err.get("data", {}).get("term_id") cache[key] = tid return tid except Exception: pass return None CATEGORY_RULES = [ ("rede de hartmann", "Geobiologia"), ("hartmann", "Geobiologia"), ("geobiologia", "Geobiologia"), ("geoacupuntura", "Geobiologia"), ("zonas geopatic", "Geobiologia"), ("feng shui", "Feng Shui"), ("estrelas voadoras", "Feng Shui"), ("home office", "Feng Shui"), ("cozinha", "Feng Shui"), ("plantas", "Feng Shui"), ("mitos feng shui", "Feng Shui"), ("mesa radionica", "Radiônica"), ("mesa radiônica", "Radiônica"), ("radionica", "Radiônica"), ("radiônica", "Radiônica"), ("pêndulo", "Radiestesia"), ("pendulo", "Radiestesia"), ("radiestesia", "Radiestesia"), ("energia ruim", "Harmonização"), ("sinais", "Harmonização"), ] def pick_category(slug: str, title: str) -> str: blob = (slug + " " + title).lower() for needle, cat in CATEGORY_RULES: if needle in blob: return cat return "Geobiologia" def wp_create_draft(title: str, content: str, excerpt: str, media_id: int | None, tag_ids: list, cat_id: int | None) -> dict: payload = { "title": title, "content": content, "excerpt": excerpt, "status": "draft", "tags": tag_ids, } if media_id: payload["featured_media"] = media_id if cat_id: payload["categories"] = [cat_id] r = requests.post( f"{WP_BASE}/wp-json/wp/v2/posts", auth=auth, headers={"User-Agent": WP_UA}, json=payload, timeout=60, ) r.raise_for_status() return r.json() def process_one(docx_path: Path, skip_image: bool, term_cache: dict, report: list): slug = docx_path.stem # "02-cama-cruzamento-hartmann" print(f"\n=== {slug} ===", flush=True) art = parse_docx(docx_path) print(f" title: {art['title'][:80]}", flush=True) # 1) revisão try: html = revise_html(art) except Exception as e: print(f" ERRO revisão: {e}", flush=True) report.append({"slug": slug, "status": "ERR-revisao", "error": str(e)}) return (HTML_DIR / f"{slug}.html").write_text(html) print(f" html len: {len(html)}", flush=True) # 2) imagem media_id = None if not skip_image and art["briefing"]: try: out_jpg = gen_image(art["briefing"], IMG_DIR / slug) print(f" img: {out_jpg}", flush=True) media_id = wp_upload_media(out_jpg, art["title"], slug) print(f" media_id: {media_id}", flush=True) except Exception as e: print(f" ERRO imagem: {e}", flush=True) # 3) tags tag_ids = [] for t in art["tags"]: if not t.strip(): continue tid = wp_get_or_create_term("tags", t.strip(), term_cache) if tid: tag_ids.append(tid) # 4) categoria cat_name = pick_category(slug, art["title"]) cat_id = wp_get_or_create_term("categories", cat_name, term_cache) # 5) post try: post = wp_create_draft(art["title"], html, art["excerpt"], media_id, tag_ids, cat_id) print(f" post_id: {post['id']}", flush=True) report.append({ "slug": slug, "status": "OK", "post_id": post["id"], "title": art["title"], "category": cat_name, "media_id": media_id, "edit_link": f"{WP_BASE}/wp-admin/post.php?post={post['id']}&action=edit", "preview_link": f"{WP_BASE}/?p={post['id']}&preview=true", }) except Exception as e: print(f" ERRO post: {e}", flush=True) report.append({"slug": slug, "status": "ERR-post", "error": str(e)}) def main(): # 01 já foi feito manualmente; processar 02-12 docs = sorted(SRC.glob("*.docx")) targets = [d for d in docs if not d.name.startswith("01-")] term_cache = {} report = [] for d in targets: skip_image = d.name.startswith("03-") # Borrello vai fornecer fotos reais process_one(d, skip_image, term_cache, report) time.sleep(2) # save report out = ROOT / "publicacao-report.json" out.write_text(json.dumps(report, indent=2, ensure_ascii=False)) print(f"\nREPORT: {out}") print(json.dumps(report, indent=2, ensure_ascii=False)) if name == "main": main()