67 lines
2.1 KiB
Python
67 lines
2.1 KiB
Python
import sys
|
|
import re
|
|
|
|
def get_balanced_div(html, start_idx):
|
|
i = start_idx
|
|
tag_count = 0
|
|
while i < len(html):
|
|
# We need to correctly parse `<div` vs `</div>` handling any attributes
|
|
# Find next tag start
|
|
next_open = html.find('<div', i)
|
|
next_close = html.find('</div>', i)
|
|
|
|
if next_open == -1 and next_close == -1:
|
|
break
|
|
|
|
if next_open != -1 and (next_open < next_close or next_close == -1):
|
|
tag_count += 1
|
|
i = next_open + 4
|
|
else:
|
|
tag_count -= 1
|
|
i = next_close + 6
|
|
if tag_count == 0:
|
|
return start_idx, i
|
|
return start_idx, -1
|
|
|
|
html = open('app/modules/sag/templates/detail.html').read()
|
|
|
|
def extract_widget(html, data_module_name):
|
|
pattern = f'<div[^>]*data-module="{data_module_name}"[^>]*>'
|
|
match = re.search(pattern, html)
|
|
if not match: return "", html
|
|
start, end = get_balanced_div(html, match.start())
|
|
widget = html[start:end]
|
|
html = html[:start] + html[end:]
|
|
return widget, html
|
|
|
|
# Let's extract assignment card
|
|
# It does not have data-module, but we know it follows: `<!-- Assignment Card -->`
|
|
def extract_by_comment(html, comment_str):
|
|
c_start = html.find(comment_str)
|
|
if c_start == -1: return "", html
|
|
div_start = html.find('<div', c_start)
|
|
if div_start == -1: return "", html
|
|
start, end = get_balanced_div(html, div_start)
|
|
widget = html[c_start:end] # include the comment
|
|
html = html[:c_start] + html[end:]
|
|
return widget, html
|
|
|
|
def extract_block_by_id(html, id_name):
|
|
pattern = f'<div[^>]*id="{id_name}"[^>]*>'
|
|
match = re.search(pattern, html)
|
|
if not match: return "", html
|
|
start, end = get_balanced_div(html, match.start())
|
|
widget = html[start:end]
|
|
html = html[:start] + html[end:]
|
|
return widget, html
|
|
|
|
# Test extractions
|
|
ass, _ = extract_by_comment(html, '<!-- Assignment Card -->')
|
|
print(f"Assignment widget len: {len(ass)}")
|
|
|
|
cust, _ = extract_widget(html, "customers")
|
|
print(f"Customer widget len: {len(cust)}")
|
|
|
|
rem, _ = extract_widget(html, "reminders")
|
|
print(f"Reminders widget len: {len(rem)}")
|