37a6f5c996
Use bs4 to remove empty table rows from tagged html output files. This replaces shell script that had a column count limitation. - Remove arg count check - not needed Change-Id: I09bd4cfd4360dbb04638ed3d6a16899bc1f4f62d Signed-off-by: Ron Stone <ronald.stone@windriver.com>
28 lines
842 B
Python
28 lines
842 B
Python
import sys
|
|
from bs4 import BeautifulSoup
|
|
|
|
def remove_empty_rows_from_tables(file_path):
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
soup = BeautifulSoup(file, 'lxml')
|
|
|
|
# Find all tables in the document
|
|
tables = soup.find_all('table')
|
|
|
|
for table in tables:
|
|
# Find all rows in the table
|
|
rows = table.find_all('tr')
|
|
for row in rows:
|
|
# Check if the row is empty (contains no visible text)
|
|
if not row.get_text(strip=True):
|
|
row.decompose() # Remove the empty row
|
|
|
|
# Save the modified HTML back to the file
|
|
with open(file_path, 'w', encoding='utf-8') as file:
|
|
file.write(str(soup))
|
|
|
|
if __name__ == '__main__':
|
|
for html_file in sys.argv[1:]:
|
|
remove_empty_rows_from_tables(html_file)
|
|
print(f"Processed {html_file}")
|
|
|