#!/usr/bin/env python3
"""
Song Link Tester

This program calls the Zamar REST API to get songs with links and tests
each link to ensure they are working correctly.
"""

import requests
import socket
import json
from urllib.parse import urlparse
import time
import re
from typing import Dict, List, Any, Optional, Tuple
import smtplib
import ssl
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email.mime.text import MIMEText
from email import encoders
import argparse
import asyncio
from contextlib import asynccontextmanager
from datetime import datetime
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError


@asynccontextmanager
async def _anghami_browser():
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        try:
            yield browser
        finally:
            await browser.close()


async def _check_anghami_async(url: str, timeout_ms: int = 15000) -> bool:
    """
    Returns True if the Anghami link looks BAD, False if it looks OK.
    Uses headless Chromium and treats any title containing 'anghami' as bad.
    """
    async with _anghami_browser() as browser:
        page = await browser.new_page()
        try:
            await page.goto(url, wait_until="networkidle", timeout=timeout_ms)
        except PlaywrightTimeoutError:
            # Timed out loading → treat as bad
            return True
        except Exception:
            # Any navigation / DNS / TLS error → bad
            return True

        try:
            title = await page.title()
        except Exception:
            title = ""

        title_l = (title or "").lower().strip()
        # If the title contains "anghami" at all, treat as bad
        if "anghami" in title_l:
            return True

        return False


def is_bad_anghami_headless(url: str, timeout_ms: int = 15000) -> bool:
    """
    Synchronous wrapper around the async Anghami checker.
    """
    try:
        return asyncio.run(_check_anghami_async(url, timeout_ms=timeout_ms))
    except RuntimeError:
        # Fallback if there's already an event loop running
        loop = asyncio.new_event_loop()
        try:
            asyncio.set_event_loop(loop)
            return loop.run_until_complete(_check_anghami_async(url, timeout_ms=timeout_ms))
        finally:
            loop.close()
            asyncio.set_event_loop(None)


class SongLinkTester:
    counter = 0
    def __init__(self, api_url: str = "https://zamarapp.com/zamar/backend/web/song/get",
                 smtp_host: str = "send.one.com", smtp_port: int = 587,
                 smtp_username: str = "thomas@dilts.se", smtp_password: str = "YourPasswordHere",
                 smtp_from: str = "thomas@dilts.se", smtp_to: str = "thomas@dilts.se,malek_bakary@hotmail.com",
                 smtp_use_tls: bool = True):
        self.api_url = api_url
        self.start_id = 0
        # Separate sessions for API and web content
        self.api_session = requests.Session()
        self.api_session.headers.update({
            'Content-Type': 'application/json',
            'Accept': 'application/json',
            'User-Agent': 'SongLinkTester/1.0'
        })

        self.web_session = requests.Session()
        self.web_session.headers.update({
            # Use a realistic browser UA to avoid atypical pages (e.g., consent/interstitials)
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'en-US,en;q=0.9'
        })
        # Email settings
        self.smtp_host = smtp_host
        self.smtp_port = smtp_port
        self.smtp_username = smtp_username
        self.smtp_password = smtp_password
        self.smtp_from = smtp_from
        self.smtp_to = smtp_to
        self.smtp_use_tls = smtp_use_tls
        
    def get_songs_page(self, page_number: int, per_page: int = 20) -> Optional[Dict[str, Any]]:
        """Get a page of songs from the API."""
        payload = {
            "how-many-per-page": per_page,
            "page-number": page_number,
            "returnfields": [
                "id", "title", "youtube_link", "spotify_link", 
                "apple_music_link", "anghami_link", "sound_cloud_link"
            ],
            "filters": [
                {
                    "field": "iso_name",
                    "like": "ar"
                }
            ],
            "sort-by": 
            {
                "field":"id","direction":"ASC"
            }
        }
        
        try:
            # Use API session with JSON headers
            response = self.api_session.post(self.api_url, json=payload, timeout=30)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error fetching page {page_number}: {e}")
            try:
                # response may not exist or not have text
                if 'response' in locals() and hasattr(response, 'text'):
                    print(f"ReturnedContents: {response.text[:500]}")
            except Exception:
                pass
            return None
    def test_link(self, url: str, field_name: str) -> Tuple[bool, str]:
        """Test if a link is working by making a GET request and checking content."""
        try:
            # Special handling for Anghami: use headless browser and title heuristic
            if "anghami.com" in url:
                if is_bad_anghami_headless(url):
                    return False, "Anghami (headless): Bad link (title contains 'anghami')"
                return True, "Anghami (headless): OK"

            # Prefer provider oEmbed APIs for stable existence checks
            if "youtube.com" in url or "youtu.be" in url:
                is_valid, note = self._check_youtube_oembed(url)
                if is_valid is not None:
                    return is_valid, note

            # Use GET request to check content for error messages
            response = self.web_session.get(url, timeout=10, allow_redirects=True)
            
            # Check if status code indicates success
            if response.status_code >= 400:
                return False, f"HTTP Error {response.status_code}"
            
            # Keep original content for meta tag checks (case-sensitive)
            original_content = response.text
            #if(field_name == "anghami_link"):
            #    write_to_file("content" + str(SongLinkTester.counter) + ".html", original_content)
            #    SongLinkTester.counter += 1
            #    exit(0);
            # Lowercase content for phrase matching
            content = original_content.lower()
            
            # Common error phrases that indicate broken/deleted content
            error_phrases = [
                "this video is not available",
                "video unavailable",
                "this video has been removed",
                "video has been deleted",
                "content not available",
                "this content is not available",
                "video is private",
                "this video is private",
                "video has been made private",
                "this track was not found",
                "track not found",
                "song not found",
                "this song is not available",
                "track unavailable",
                "song unavailable",
                "this track is not available",
                "track has been removed",
                "song has been removed",
                "track has been deleted",
                "song has been deleted",
                "this track is private",
                "track is private",
                "this song is private",
                "song is private",
                "track has been made private",
                "song has been made private",
                "page not found",
                "not found",
                "does not exist",
                "no longer exists",
                "has been removed",
                "has been deleted",
                "content removed",
                "content deleted",
                "access denied",
                "forbidden",
                "unauthorized",
                "account suspended",
                "channel terminated",
                "user not found",
                "profile not found",
                "playlist not found",
                "album not found",
                "This track was not found"
            ]
            
            # Check if any error phrase is in the content
            for phrase in error_phrases:
                if phrase in content:
                    return False, f"Content Error: '{phrase}' detected"
            
            # Additional checks for specific platforms
            if "youtube.com" in url or "youtu.be" in url:
                # After oEmbed, avoid HTML heuristics that vary by region/consent.
                # If we got here, oEmbed was inconclusive; assume OK to prevent false negatives.
                return True, "YouTube: oEmbed inconclusive; assuming OK"
            
            elif "spotify.com" in url:
                if any(phrase in content for phrase in [
                    "track not found", "song not found", "track unavailable"
                ]):
                    return False, "Spotify: Track not found or unavailable"
            
            elif "soundcloud.com" in url:
                if any(phrase in content for phrase in [
                    "track not found", "this track was not found", 
                    "track has been removed", "track is private","<title>soundcloud - hear the world"
                ]):
                    return False, "SoundCloud: Track not found or private"
            
            elif "music.apple.com" in url or "itunes.apple.com" in url:
                if any(phrase in content for phrase in [
                    "song not found", "track not found", "not available"
                ]):
                    return False, "Apple Music: Track not found or unavailable"
            
            # If we get here, the link appears to be working
            return True, f"OK (Status: {response.status_code})"
                
        except requests.exceptions.Timeout:
            return False, "Timeout"
        except requests.exceptions.ConnectionError:
            return False, "Connection Error"
        except requests.exceptions.RequestException as e:
            return False, f"Request Error: {str(e)}"
        except Exception as e:
            return False, f"Unexpected Error: {str(e)}"
    
    def get_link_fields(self, song: Dict[str, Any]) -> List[Tuple[str, str]]:
        """Extract all link fields from a song record."""
        link_fields = []
        for key, value in song.items():
            if key.endswith('_link') and value is not None and str(value).strip():
                link_fields.append((key, str(value).strip()))
        return link_fields

    def _normalize_youtube_watch_url(self, url: str) -> str:
        """Normalize youtu.be or other forms to a standard watch URL when possible."""
        try:
            from urllib.parse import urlparse, parse_qs
            parsed = urlparse(url)
            host = parsed.netloc.lower()
            if 'youtu.be' in host:
                # Path is /VIDEO_ID
                video_id = parsed.path.strip('/').split('/')[0]
                if video_id:
                    return f"https://www.youtube.com/watch?v={video_id}"
            if 'youtube.com' in host:
                if parsed.path.startswith('/watch'):
                    return url
                if parsed.path.startswith('/shorts/'):
                    video_id = parsed.path.split('/')[2] if len(parsed.path.split('/')) > 2 else ''
                    if video_id:
                        return f"https://www.youtube.com/watch?v={video_id}"
                # Fallback to original
                return url
        except Exception:
            pass
        return url

    def _check_youtube_oembed(self, url: str) -> Tuple[Optional[bool], str]:
        """Use YouTube oEmbed to verify existence. Returns (True/False, msg) or (None, reason) if inconclusive."""
        try:
            normalized = self._normalize_youtube_watch_url(url)
            oembed_url = "https://www.youtube.com/oembed"
            params = {"url": normalized, "format": "json"}
            # oEmbed returns 200 for valid videos, 404 for invalid
            resp = self.web_session.get(oembed_url, params=params, timeout=10, allow_redirects=True)
            if resp.status_code == 200:
                return True, "YouTube oEmbed OK"
            if resp.status_code == 404:
                return False, "YouTube oEmbed: Not found"
            # If consent/interstitial or other, be inconclusive and let HTML checks proceed
            return None, f"YouTube oEmbed inconclusive ({resp.status_code})"
        except requests.exceptions.RequestException as e:
            return None, f"YouTube oEmbed request error: {e}"
    
    def test_song_links(self, song: Dict[str, Any]) -> List[Tuple[str, str, bool, str]]:
        """Test all links in a song and return results."""
        results = []
        link_fields = self.get_link_fields(song)
        
        for field_name, url in link_fields:
            is_valid, error_msg = self.test_link(url,field_name)
            results.append((field_name, url, is_valid, error_msg))
            
        return results
    
    def run_tests(self):
        """Main method to run all link tests."""
        start_time = datetime.now()
        self.run_start_time = start_time

        print("Starting song link tests...")
        print("=" * 60)
        
        page_number = 0
        total_tested = 0
        total_errors = 0
        
        # Initialize error file
        self.init_error_file()
        
        while True:
            print(f"\nFetching page {page_number}...")
            api_response = self.get_songs_page(page_number)
            
            if not api_response:
                print(f"Failed to fetch page {page_number}, stopping.")
                break
                
            if api_response.get('status') != 200:
                print(f"API returned error status: {api_response.get('status')}")
                break
                
            songs = api_response.get('data', [])
            if not songs:
                print(f"No more songs found on page {page_number}, stopping.")
                break
                
            print(f"Found {len(songs)} songs on page {page_number}")
            
            # Test links for each song
            for song in songs:
                song_id = song.get('id')
                if song_id < self.start_id:
                    print(f"\nSkipping id {song_id}.")
                    continue
                song_title = song.get('title', 'Unknown Title')
                
                print(f"\nTesting song ID {song_id}: '{song_title}'")
                
                link_results = self.test_song_links(song)
                
                for field_name, url, is_valid, error_msg in link_results:
                    total_tested += 1
                    if is_valid:
                        print(f"  ✓ {field_name}: {url} - {error_msg}")
                    else:
                        total_errors += 1
                        # Write error immediately to file
                        self.log_error_to_file(song_id, song_title, field_name, url, error_msg, total_errors)
                        print(f"  ✗ {field_name}: {url} - ERROR: {error_msg}")
                        print(f"    Song ID: {song_id}, Title: '{song_title}'")
                
                # Small delay to be respectful to servers
                time.sleep(0.5)
                # break # TESTING ONLY
            page_number += 1
            # break # TESTING ONLY
            # Safety check to prevent infinite loops
            if page_number > 1000:
                print("Safety limit reached (1000 pages), stopping.")
                break

        end_time = datetime.now()
        self.run_end_time = end_time
        duration = end_time - start_time
        self.run_duration = duration
        self.total_links_tested = total_tested

        print("\n" + "=" * 60)
        print(f"Testing complete!")
        print(f"Total links tested: {total_tested}")
        print(f"Total errors found: {total_errors}")
        print(f"Start time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"End time:   {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"Total time: {duration}")
        
        if total_errors > 0:
            print(f"Error rate: {(total_errors/total_tested)*100:.1f}%")
            print(f"Error details written to: {self.error_file_path}")
            # Email the error file
            try:
                self.send_error_email(total_errors)
            except Exception as e:
                print(f"Failed to send error email: {e}")
        else:
            print("All links are working correctly!")
            self.send_error_email(0)
    
    def init_error_file(self) -> None:
        """Initialize the error log file with header."""
        import os
        from datetime import datetime
        
        # Get the directory where this script is located
        script_dir = os.path.dirname(os.path.abspath(__file__))
        
        # Create filename with timestamp
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"link_errors_{timestamp}.txt"
        self.error_file_path = os.path.join(script_dir, filename)
        
        # Write header to file
        with open(self.error_file_path, 'w', encoding='utf-8') as f:
            f.write("SONG LINK TESTING ERRORS\n")
            f.write("=" * 50 + "\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write("Errors logged as they are found:\n\n")
    
    def log_error_to_file(self, song_id: int, song_title: str, field_name: str, url: str, error_msg: str, error_number: int) -> None:
        """Log a single error immediately to the error file."""
        with open(self.error_file_path, 'a', encoding='utf-8') as f:
            f.write(f"ERROR #{error_number}\n")
            f.write(f"Song ID: {song_id}\n")
            f.write(f"Song Title: {song_title}\n")
            f.write(f"Field: {field_name}\n")
            f.write(f"URL: {url}\n")
            f.write(f"Error: {error_msg}\n")
            f.write("-" * 30 + "\n\n")

    def send_error_email(self, total_errors: int) -> None:
        """Send an email with the error file attached using SMTP settings."""
        if not getattr(self, 'error_file_path', None):
            return
        if not (self.smtp_host and self.smtp_from and self.smtp_to):
            # Missing SMTP configuration; skip emailing
            return

        subject = f"Zamar Link Tester: {total_errors} errors found"

        fqdn = socket.getfqdn()

        # Timing and totals (may be absent if run_tests didn't set them)
        start_time = getattr(self, "run_start_time", None)
        end_time = getattr(self, "run_end_time", None)
        duration = getattr(self, "run_duration", None)
        total_tested = getattr(self, "total_links_tested", None)

        timing_lines = []
        if start_time:
            timing_lines.append(f"Start time: {start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        if end_time:
            timing_lines.append(f"End time:   {end_time.strftime('%Y-%m-%d %H:%M:%S')}")
        if duration:
            timing_lines.append(f"Total time: {duration}")
        if total_tested is not None:
            timing_lines.append(f"Total links tested: {total_tested}")

        timing_block = "\n".join(timing_lines) + ("\n" if timing_lines else "")

        body = (
            f"Computer the link tester was run on: {fqdn}\n\n"
            f"The link tester has completed with {total_errors} errors.\n\n"
            f"Please find the attached error log: {self.error_file_path}\n\n"
            f"{timing_block}"
        )

        # Support multiple recipients via comma-separated smtp_to
        to_list = [addr.strip() for addr in str(self.smtp_to).split(",") if addr.strip()]
        if not to_list:
            # No valid recipients parsed; abort quietly
            return

        # Create the email
        message = MIMEMultipart()
        message['From'] = self.smtp_from
        message['To'] = ", ".join(to_list)
        message['Subject'] = subject
        message.attach(MIMEText(body, 'plain'))

        # Attach the file
        with open(self.error_file_path, 'rb') as attachment:
            part = MIMEBase('application', 'octet-stream')
            part.set_payload(attachment.read())
        encoders.encode_base64(part)
        # Use safe basename and structured header to avoid quoting issues
        import os
        filename_only = os.path.basename(self.error_file_path)
        part.add_header('Content-Disposition', 'attachment', filename=filename_only)
        message.attach(part)

        # Send the email
        if self.smtp_use_tls:
            context = ssl.create_default_context()
            with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
                server.ehlo()
                server.starttls(context=context)
                server.ehlo()
                if self.smtp_username and self.smtp_password:
                    server.login(self.smtp_username, self.smtp_password)
                server.sendmail(self.smtp_from, to_list, message.as_string())
        else:
            with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
                if self.smtp_username and self.smtp_password:
                    server.login(self.smtp_username, self.smtp_password)
                server.sendmail(self.smtp_from, to_list, message.as_string())

def write_to_file(filename: str, content: str) -> None:
    """Write content to a file in the same directory as this script."""
    import os
    
    # Get the directory where this script is located
    script_dir = os.path.dirname(os.path.abspath(__file__))
    
    # Create the full file path
    file_path = os.path.join(script_dir, filename)
    
    # Write the content to the file
    with open(file_path, 'w', encoding='utf-8') as f:
        f.write(content)
    
    print(f"Content written to: {file_path}")

def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(description='Test song links from Zamar API')
    
    # API settings
    parser.add_argument('--api-url', type=str, 
                       default='https://zamarapp.com/zamar/backend/web/song/get',
                       help='API endpoint URL')
    parser.add_argument('--start-id', type=int, default=0,
                       help='Start testing from this song ID')
    
    # SMTP settings
    parser.add_argument('--smtp-host', type=str, default='send.one.com',
                       help='SMTP server hostname')
    parser.add_argument('--smtp-port', type=int, default=587,
                       help='SMTP server port')
    parser.add_argument('--smtp-username', type=str, default='thomas@dilts.se',
                       help='SMTP username')
    parser.add_argument('--smtp-password', type=str, default='ThePasswordHere',
                       help='SMTP password')
    parser.add_argument('--smtp-from', type=str, default='thomas@dilts.se',
                       help='Email sender address')
    parser.add_argument('--smtp-to', type=str, default='thomas@dilts.se,malek_bakary@hotmail.com',
                       help='Email recipient address')
    parser.add_argument('--smtp-no-tls', action='store_true',
                       help='Disable TLS for SMTP (default: TLS enabled)')
    
    args = parser.parse_args()
    
    tester = SongLinkTester(
        api_url=args.api_url,
        smtp_host=args.smtp_host,
        smtp_port=args.smtp_port,
        smtp_username=args.smtp_username,
        smtp_password=args.smtp_password,
        smtp_from=args.smtp_from,
        smtp_to=args.smtp_to,
        smtp_use_tls=not args.smtp_no_tls
    )
    tester.start_id = args.start_id
    tester.run_tests()


if __name__ == "__main__":
    main()