# Filename: parse_dsvdc_log_to_json.py
import json
import re
import sys

def parse_dsvdc_log_content(log_text):
    """
    Parses the provided DSVDC log text (as a string) and returns a list of test case dictionaries.
    """
    test_cases = []
    
    # Use regex to find all occurrences of "--- START DSVDC ---" to "--- END DSVDC ---"
    # re.DOTALL makes '.' match newlines as well
    case_pattern = re.compile(r"--- START DSVDC ---(.*?)--- END DSVDC ---", re.DOTALL)
    
    for match in case_pattern.finditer(log_text):
        case_str = match.group(1) # Content between START and END markers
        
        current_case_data = {"inputs": {}, "outputs": {}}
        # Get non-empty stripped lines from the current case block
        lines = [line.strip() for line in case_str.split('\n') if line.strip()]

        # Variables to store current case's n, p, job for matrix dimensioning
        n_val, p_val, job_val = 0, 0, 0 
        
        line_idx = 0
        while line_idx < len(lines):
            line = lines[line_idx]

            if line.startswith("INPUT n:"):
                n_val = int(line.split(":", 1)[1].strip())
                current_case_data["inputs"]["n"] = n_val
            elif line.startswith("INPUT p:"):
                p_val = int(line.split(":", 1)[1].strip())
                current_case_data["inputs"]["p"] = p_val
            elif line.startswith("INPUT job:"):
                job_val = int(line.split(":", 1)[1].strip())
                current_case_data["inputs"]["job"] = job_val
            elif line == "INPUT x:":
                line_idx += 1 
                matrix_data = []
                if n_val > 0: # Ensure n_val is known and positive
                    for _ in range(n_val): 
                        if line_idx < len(lines):
                            matrix_data.append([float(f) for f in lines[line_idx].split()])
                            line_idx += 1
                        else: # Not enough lines for the matrix
                            break 
                current_case_data["inputs"]["x"] = matrix_data
                line_idx -= 1 # Adjust for outer loop's increment
            
            elif line == "--- OUTPUTS DSVDC ---": # Marker line, skip
                pass 
            elif line.startswith("OUTPUT info:"):
                current_case_data["outputs"]["info"] = int(line.split(":", 1)[1].strip())
            elif line == "OUTPUT s:":
                line_idx += 1 
                if line_idx < len(lines) and lines[line_idx] == "s is empty or m is zero":
                    current_case_data["outputs"]["s"] = None
                # else: # Logic for if 's' had actual data (not in current logs)
                #     if line_idx < len(lines) and not lines[line_idx].startswith("OUTPUT"):
                #         current_case_data["outputs"]["s"] = [float(f) for f in lines[line_idx].split()]
                #     else: # Unexpected line after "OUTPUT s:"
                #         current_case_data["outputs"]["s"] = [] # or handle as error
            elif line == "OUTPUT e:":
                line_idx += 1
                if line_idx < len(lines) and not lines[line_idx].startswith("OUTPUT"):
                    current_case_data["outputs"]["e"] = [float(f) for f in lines[line_idx].split()]
                else: 
                    current_case_data["outputs"]["e"] = [] # e.g. if "OUTPUT e:" is the last line or followed by another OUTPUT
                    line_idx -=1 # Re-evaluate the current line if it's a new header
            elif line == "OUTPUT u:":
                line_idx += 1
                if line_idx < len(lines) and lines[line_idx] == "Not computed (wantu is false)":
                    current_case_data["outputs"]["u"] = None
                else:
                    matrix_data = []
                    num_rows_u = n_val # u has n_val rows
                    if num_rows_u > 0:
                        for _ in range(num_rows_u):
                            if line_idx < len(lines) and not lines[line_idx].startswith("OUTPUT"):
                                matrix_data.append([float(f) for f in lines[line_idx].split()])
                                line_idx += 1
                            else: break
                    current_case_data["outputs"]["u"] = matrix_data
                    line_idx -= 1 
            elif line == "OUTPUT v:":
                line_idx += 1
                if line_idx < len(lines) and lines[line_idx] == "Not computed (wantv is false)":
                    current_case_data["outputs"]["v"] = None
                else:
                    matrix_data = []
                    num_rows_v = p_val # v has p_val rows
                    if num_rows_v > 0:
                        for _ in range(num_rows_v):
                            if line_idx < len(lines) and not lines[line_idx].startswith("OUTPUT"):
                                matrix_data.append([float(f) for f in lines[line_idx].split()])
                                line_idx += 1
                            else: break
                    current_case_data["outputs"]["v"] = matrix_data
                    line_idx -= 1 
            
            line_idx += 1
        
        # Add the parsed case to our list if it contains some data
        if current_case_data["inputs"] or current_case_data["outputs"]:
            test_cases.append(current_case_data)
            
    return test_cases

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(f"Usage: python {sys.argv[0]} <logfile>")
        sys.exit(1)

    logfile_path = sys.argv[1]

    try:
        with open(logfile_path, 'r') as f:
            log_content = f.read()
    except FileNotFoundError:
        print(f"Error: File not found at {logfile_path}")
        sys.exit(1)
    except Exception as e:
        print(f"Error reading file {logfile_path}: {e}")
        sys.exit(1)

    parsed_data = parse_dsvdc_log_content(log_content)
    
    # Output the JSON to standard output
    print(json.dumps(parsed_data, indent=2))