import sys import re def get_balanced_div(html, start_idx): i = start_idx tag_count = 0 while i < len(html): next_open = html.find('', i) if next_open == -1 and next_close == -1: break if next_open != -1 and (next_open < next_close or next_close == -1): tag_count += 1 i = next_open + 4 else: tag_count -= 1 i = next_close + 6 if tag_count == 0: return start_idx, i return start_idx, -1 def get_balanced_ul(html, start_idx): i = start_idx tag_count = 0 while i < len(html): next_open = html.find('', i) if next_open == -1 and next_close == -1: break if next_open != -1 and (next_open < next_close or next_close == -1): tag_count += 1 i = next_open + 3 else: tag_count -= 1 i = next_close + 5 if tag_count == 0: return start_idx, i return start_idx, -1 def get_balanced_tag(html, start_idx, tag_name): i = start_idx tag_count = 0 while i < len(html): next_open = html.find(f'<{tag_name}', i) next_close = html.find(f'', i) if next_open == -1 and next_close == -1: break if next_open != -1 and (next_open < next_close or next_close == -1): tag_count += 1 i = next_open + len(tag_name) + 1 else: tag_count -= 1 i = next_close + len(tag_name) + 3 if tag_count == 0: return start_idx, i return start_idx, -1 html = open('app/modules/sag/templates/detail.html').read() def extract_widget(html, data_module_name): # exact attribute parsing to not match false positives matches = list(re.finditer(rf']*data-module="{data_module_name}"[^>]*>', html)) if not matches: return "", html start, end = get_balanced_div(html, matches[0].start()) widget = html[start:end] html = html[:start] + html[end:] return widget, html def extract_by_comment(html, comment_str): c_start = html.find(comment_str) if c_start == -1: return "", html div_start = html.find('', 0, start) if c_start != -1 and (start - c_start < 100): actual_start = c_start else: actual_start = start _, end = get_balanced_ul(html, start) widget = html[actual_start:end] html = html[:actual_start] + html[end:] return widget, html # Extraction process # 1. Quick Info Bar # The user wants "Status" in right side, but let's keep Quick Info over full width or right? # We will just leave it. # 2. Assignment assignment, html = extract_by_comment(html, '') # 3. Widgets customers, html = extract_widget(html, "customers") contacts, html = extract_widget(html, "contacts") hardware, html = extract_widget(html, "hardware") locations, html = extract_widget(html, "locations") todo, html = extract_widget(html, "todo-steps") wiki, html = extract_widget(html, "wiki") # 4. Reminders - Currently it's a whole tab-pane. # Let's extract the reminders tab-pane inner content or the whole div pane. reminders_tab_pane, html = extract_widget(html, "reminders") # Clean up reminders to make it just a widget (remove tab-pane classes, maybe add card class if not present) reminders_content = reminders_tab_pane.replace('class="tab-pane fade"', 'class="card h-100 right-module-card pt-1"').replace('id="reminders" role="tabpanel" tabindex="0"', '') # Also remove reminders from the nav tab! nav_match = re.search(r'

\s*]*data-bs-target="#reminders"[^>]*>.*?Påmindelser\s*\s*

', html, flags=re.DOTALL) if nav_match: html = html[:nav_match.start()] + html[nav_match.end():] # 5. Sagsbeskrivelse - "ROW 1: Main Info" sagsbeskrivelse, html = extract_by_comment(html, '') # 6. Extract the whole Tabs Navigation and Tabs Content to manipulate them nav_tabs, html = extract_ul_nav(html) tab_content_start = html.find('

inside the #details tab. # We already extracted widgets, so the right column should be mostly empty. # Let's just remove the case-left-column / case-right-column wrapping, and replace it with just the remaining flow. # It's inside: #

#

#

# ... #

#

#

#

#

#

#

# A simple string replacement to remove those wrappers: tab_content = tab_content.replace('

\n

', '') # And the closing divs for them: # We have to be careful. Instead of regexing html parsing, we can just replace the left/right column structure. # Since it's easier, I'll just use string manipulation for exactly what it says. left_col_str = '

' idx_l = tab_content.find(left_col_str) if idx_l != -1: tab_content = tab_content[:idx_l] + tab_content[idx_l+len(left_col_str):] idx_row = tab_content.rfind('

', 0, idx_l) if idx_row != -1: tab_content = tab_content[:idx_row] + tab_content[idx_row+len('

'):] right_col_str = '

' idx_r = tab_content.find(right_col_str) if idx_r != -1: # find the end of this div and remove the whole thing r_start, r_end = get_balanced_div(tab_content, idx_r) tab_content = tab_content[:idx_r] + tab_content[r_end:] # Now tab_content has two extra

at the end of the details tab? Yes. We can just leave them if they don't break much? # Wait, unclosed/unopened divs will break the layout. # Let's write the new body! # Find the marker where we removed Tabs and Tab content. insertion_point = html.find('

', html.find('')) # wait, no. # Best insertion point is after the Quick Info Bar. quick_info, html = extract_by_comment(html, '') # Re-assemble the layout new_grid = f""" {quick_info}

Kontekst & Stamdata

{customers} {contacts} {hardware} {locations} {wiki}

{sagsbeskrivelse}

{nav_tabs}

{tab_content}

Opsummering & Opgaver

{assignment} {todo} {reminders_content}

""" # Let's insert where Quick Info Bar was. # To find it, let's just insert after # Wait, actually let's reconstruct the content inside

...

# The rest of html (like modals etc.) should follow. container_start = html.find('

', container_start) # find where top bar ends top_bar_end_div = get_balanced_div(html, top_bar_start - 30) # wait, top bar is just a div... # Let's just find the exact text # Alternatively, just string replace replacing an arbitrary known stable block. # The html already had Assignment, tabs, quick info pulled out. # So we can just put `new_grid` exactly where Quick Info Bar was pulled out! pass """ pass