import sys
import os
from bs4 import BeautifulSoup

BUILD_CONTEXT=os.environ.get('DOCS_BUILD_CONTEXT')

def remove_column_from_tables(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        soup = BeautifulSoup(file, 'lxml')

    if BUILD_CONTEXT:
        context_tag = soup.find('meta', attrs={'name': 'docs-build-context'})
        if context_tag:
           if context_tag.get('content') != BUILD_CONTEXT:
               print("Not in", context_tag.get('content'), "- skipping", file_path)
               return

    # Find column to delete
    column_tag = soup.find('meta', attrs={'name': 'remove-column-from-html-table'})
    if column_tag:
       column_names = column_tag.get('content').split(",")
    else:
       print(f"No column to remove specified in '{file_path}' header")
       sys.exit(1)

    # Remove empty rows?
    row_tag = soup.find('meta', attrs={'name': 'remove-column-emptied-row'})
    if row_tag:
        empty_rows = 1
    else:
        empty_rows = 0

    for column_name in column_names:

       with open(file_path, 'r', encoding='utf-8') as file:
           soup = BeautifulSoup(file, 'lxml')

       print(f"Removing column '{column_name}' from '{file_path}'")
       tables = soup.find_all('table')

       for table in tables:
           headers = table.find_all('th')
           column_index = None

           for index, header in enumerate(headers):
               if header.get_text(strip=True) == column_name:
                   column_index = index
                   break

           if column_index is not None:
               # for header in headers:
               #     header.extract()

               rows = table.find_all('tr')
               for row in rows:
                   columns = row.find_all(['td', 'th'])
                   if column_index < len(columns):
                       columns[column_index].extract()


               # Clean up rows that have become empty
               if int(empty_rows) == 1:
                  for row in rows:
                      if not row.find_all(['td', 'th']):
                          row.decompose()

           with open(file_path, 'w', encoding='utf-8') as file:
               file.write(str(soup))

    if int(empty_rows) == 1:
        print("... removed rows made empty by column removal")

if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: python remove_column.py <html_file> <html_file> ...")
        sys.exit(0)

    for html_file in sys.argv[1:]:
       remove_column_from_tables(html_file)