# TODO: should i do something similar for slideslive? Ask Matt

import os
import xml.etree.ElementTree as ET

# Read the XML data from the file
xml_files = [
    # "D15.xml",
    # "D16.xml",
    # "D17.xml",
    # "D18.xml",
    # "N15.xml",
    # "N18.xml",
    "N19.xml",
    # "P17.xml",
    # "P18.xml",
    # "P19.xml",
    # "Q14.xml",
    # "Q15.xml",
    # "Q16.xml",
    # "Q17.xml",
    # "Q18.xml",
    # "Q19.xml",
    # "W18.xml",
]

anthology_data = "/home/anthologizer/acl-anthology/data/xml/"

xml_files = [anthology_data+x for x in xml_files]

for xml_file in xml_files:
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Loop over each paper element
    for paper in root.findall('.//paper'):
        paper_id = paper.get('id')
        video_elem = paper.find('video')
        if video_elem is not None and 'href' in video_elem.attrib:
            video_href = video_elem.attrib['href']
            print(xml_file, video_href)
            url_elem = paper.find('url')
            if url_elem is not None:
                id = url_elem.text

                if "vimeo" in video_href:

                    cmd = f"youtube-dl -f 'http-1080p/http-720p/http-540p/http-360p/http-240p' --output videos/{id}.mp4 {video_href}"

                    print(cmd)
                    os.system(cmd)
                    print("\n\n\n")