', i)
if next_open == -1 and next_close == -1:
break
if next_open != -1 and (next_open < next_close or next_close == -1):
tag_count += 1
i = next_open + 4
else:
tag_count -= 1
i = next_close + 6
if tag_count == 0:
return start_idx, i
return start_idx, -1
html = open('app/modules/sag/templates/detail.html').read()
def extract_widget(html, data_module_name):
pattern = f'
]*data-module="{data_module_name}"[^>]*>'
match = re.search(pattern, html)
if not match: return "", html
start, end = get_balanced_div(html, match.start())
widget = html[start:end]
html = html[:start] + html[end:]
return widget, html
# Let's extract assignment card
# It does not have data-module, but we know it follows: ``
def extract_by_comment(html, comment_str):
c_start = html.find(comment_str)
if c_start == -1: return "", html
div_start = html.find('
]*id="{id_name}"[^>]*>'
match = re.search(pattern, html)
if not match: return "", html
start, end = get_balanced_div(html, match.start())
widget = html[start:end]
html = html[:start] + html[end:]
return widget, html
# Test extractions
ass, _ = extract_by_comment(html, '')
print(f"Assignment widget len: {len(ass)}")
cust, _ = extract_widget(html, "customers")
print(f"Customer widget len: {len(cust)}")
rem, _ = extract_widget(html, "reminders")
print(f"Reminders widget len: {len(rem)}")