import sys import os from bs4 import BeautifulSoup BUILD_CONTEXT=os.environ.get('DOCS_BUILD_CONTEXT') def remove_column_from_tables(file_path): with open(file_path, 'r', encoding='utf-8') as file: soup = BeautifulSoup(file, 'lxml') if BUILD_CONTEXT: context_tag = soup.find('meta', attrs={'name': 'docs-build-context'}) if context_tag: if context_tag.get('content') != BUILD_CONTEXT: print("Not in", context_tag.get('content'), "- skipping", file_path) return # Find column to delete column_tag = soup.find('meta', attrs={'name': 'remove-column-from-html-table'}) if column_tag: column_names = column_tag.get('content').split(",") else: print(f"No column to remove specified in '{file_path}' header") sys.exit(1) # Remove empty rows? row_tag = soup.find('meta', attrs={'name': 'remove-column-emptied-row'}) if row_tag: empty_rows = 1 else: empty_rows = 0 for column_name in column_names: with open(file_path, 'r', encoding='utf-8') as file: soup = BeautifulSoup(file, 'lxml') print(f"Removing column '{column_name}' from '{file_path}'") tables = soup.find_all('table') for table in tables: headers = table.find_all('th') column_index = None for index, header in enumerate(headers): if header.get_text(strip=True) == column_name: column_index = index break if column_index is not None: # for header in headers: # header.extract() rows = table.find_all('tr') for row in rows: columns = row.find_all(['td', 'th']) if column_index < len(columns): columns[column_index].extract() # Clean up rows that have become empty if int(empty_rows) == 1: for row in rows: if not row.find_all(['td', 'th']): row.decompose() with open(file_path, 'w', encoding='utf-8') as file: file.write(str(soup)) if int(empty_rows) == 1: print("... removed rows made empty by column removal") if __name__ == '__main__': if len(sys.argv) < 2: print("Usage: python remove_column.py ...") sys.exit(0) for html_file in sys.argv[1:]: remove_column_from_tables(html_file)