#!/usr/bin/env python3
import pathlib
import re
import sys

OUTPUT_PREFIX = "./typedefs_output"
SEPARATOR = "==="  # separator line used in output files


def read_statement(text, start):
    """Extract text of a C statement starting at given index"""
    curr = start
    depth = 0  # depth of {} nesting
    while not (depth == 0 and text[curr] == ";"):
        if text[curr] == "{":
            depth += 1
        elif text[curr] == "}":
            depth -= 1
        curr += 1

    curr += 1  # trailing ";"

    return text[start:curr]


def standardize(text):
    text = text.replace("\t", "    ")  # no tabs
    text = re.sub("[ ]+", " ", text)  # squash spaces
    return text


def process_files(path_list, statement_regexp, name_regexp):
    """Parse path_list, return {what_is_declared: declaration, ...} dict"""
    result = {}
    for path in path_list:
        text = path.read_text()

        for match in statement_regexp.finditer(text):
            try:
                decl = read_statement(text, match.start(0))
            except IndexError:
                # Code with unmatched '{' in comments may break this little parser.
                # Report such cases and go on.
                print(
                    f"Failed parsing {path} at index {match.start(0)}.", file=sys.stderr
                )
                print("First 100 symbols:", file=sys.stderr)
                print(text[match.start(0) : match.start(0) + 100], file=sys.stderr)
                print(file=sys.stderr)
                continue

            decl = standardize(decl)
            try:
                name = re.search(name_regexp, decl)[0]
            except Exception as e:
                print(decl)
                print(
                    f"Regexp {name_regexp!r} could not find declaration name in {decl!r}",
                    file=sys.stderr,
                )
                raise e
            result[name] = decl
    return result


def main():
    if len(sys.argv) not in (3, 4):
        print(
            r"""
Usage:
    ./typedefs.py <stmt_regexp> <name_regexp> [<output_prefix>]

    # For finding function type typedefs
    ./typedefs.py '^typedef[^\{;]*\(' '\([^\)]*\)'

    # For finding struct/enum typedefs
    ./typedefs.py '^typedef\s+(struct|enum)' '[^\s]*;$'

    # Then compare
    diff ./typedefs_output*

Description:
    Scans working directory for code declarations and code declaration excerpts
    in docs matching "stmt_regexp" regular expression. For each declared 
    name ().

    Scans working directory for statements matching <stmt_regexp> in code and
    code excerpts in docs. <name_regexp> is used to extract name from declaration.
    Writes all found statements from docs into <output_prefix>.docs.txt and
    corresponding declarations from code into <output_prefix>.code.txt
    (<output_prefix> is "./typedefs_output" by default)
        """.strip()
        )
        exit(1)

    statement_regexp = re.compile(sys.argv[1], re.MULTILINE)
    name_regexp = sys.argv[2]
    output_prefix = sys.argv[3] if len(sys.argv) == 4 else OUTPUT_PREFIX

    docs_typedefs = process_files(
        pathlib.Path(".").glob("**/*.sgml"), statement_regexp, name_regexp
    )
    code_typedefs = process_files(
        pathlib.Path(".").glob("**/*.h"), statement_regexp, name_regexp
    )

    docs_output = []
    code_output = []
    for name, decl in docs_typedefs.items():
        docs_output.append(decl)
        code_output.append(code_typedefs.get(name, f"{name!r} is not present in code"))

    pathlib.Path(f"{output_prefix}.docs.txt").write_text(
        f"\n{SEPARATOR}\n".join(docs_output)
    )
    pathlib.Path(f"{output_prefix}.code.txt").write_text(
        f"\n{SEPARATOR}\n".join(code_output)
    )


if __name__ == "__main__":
    main()