Files
typthon/arduino/libraries/TypthonMini/tools/language_coverage.py
T

200 lines
5.3 KiB
Python

from __future__ import annotations
import keyword
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable, Set
@dataclass(frozen=True)
class CoverageBucket:
name: str
supported: Set[str]
universe: Set[str]
@property
def missing(self) -> Set[str]:
return self.universe - self.supported
@property
def percent(self) -> float:
if not self.universe:
return 100.0
return (len(self.supported) / len(self.universe)) * 100.0
def format_lines(self) -> list[str]:
lines = [f"{self.name}: {len(self.supported)}/{len(self.universe)} ({self.percent:.1f}%)"]
lines.append(f" supported: {', '.join(sorted(self.supported)) or 'none'}")
lines.append(f" missing: {', '.join(sorted(self.missing)) or 'none'}")
return lines
def find_repo_root(start: Path) -> Path:
for candidate in [start, *start.parents]:
if (candidate / ".git").exists():
return candidate
return start
def read_file(path: Path) -> str:
return path.read_text(encoding="utf-8")
def gather_keyword_sets(source: str) -> tuple[Set[str], Set[str]]:
block_match = re.search(r"Token Tokenizer::readIdentifier\(\).*?Token Tokenizer::readNumber", source, re.S)
block = block_match.group(0) if block_match else source
keywords: Set[str] = set(re.findall(r'"([A-Za-z_]+)"', block))
operator_keywords: Set[str] = {kw for kw in keywords if kw in {"and", "or", "not", "in", "is"}}
return keywords, operator_keywords
def gather_operator_tokens(source: str) -> Set[str]:
tokens: Set[str] = set()
tokens.update(re.findall(r"startsWith\\(\\\"([^\\\"]+)\\\"\\)", source))
single_char_block = re.search(r"if \(c == .*?\) {", source, re.S)
if single_char_block:
tokens.update(re.findall(r"'([+\-*/<>=:\,\(\)\[\]\{\}\.])'", single_char_block.group(0)))
tokens.update({"==", "!=", "<=", ">=", "+=", "-=", "*=", "/=", "%=", "//", "//=", "**", "**=", "%"})
return tokens
def gather_builtins(source: str) -> Set[str]:
return set(re.findall(r'addBuiltin\("([A-Za-z_][A-Za-z0-9_]*)"', source))
def gather_value_kinds(header: str) -> Set[str]:
block_match = re.search(r"enum class ValueKind \{([^}]+)\};", header, re.S)
if not block_match:
return set()
entries = re.findall(r"([A-Za-z_]+)\s*,?", block_match.group(1))
return {entry.strip() for entry in entries if entry.strip()}
def build_bucket(name: str, supported: Iterable[str], universe: Iterable[str]) -> CoverageBucket:
return CoverageBucket(name, set(supported), set(universe))
def main() -> None:
script_path = Path(__file__).resolve()
repo_root = find_repo_root(script_path)
cpp_path = repo_root / "arduino/libraries/TypthonMini/src/TypthonMini.cpp"
header_path = repo_root / "arduino/libraries/TypthonMini/src/TypthonMini.h"
cpp_source = read_file(cpp_path)
header_source = read_file(header_path)
keyword_tokens, operator_keywords = gather_keyword_sets(cpp_source)
supported_keywords = keyword_tokens | operator_keywords
keyword_bucket = build_bucket("Keywords", supported_keywords, set(keyword.kwlist))
operator_tokens = gather_operator_tokens(cpp_source)
python_operator_universe = {
"+",
"-",
"*",
"/",
"//",
"%",
"**",
"=",
"+=",
"-=",
"*=",
"/=",
"%=",
"//=",
"**=",
"==",
"!=",
"<",
">",
"<=",
">=",
"and",
"or",
"not",
"(",
")",
"[",
"]",
"{",
"}",
".",
":",
",",
}
supported_ops = (operator_tokens | operator_keywords) & python_operator_universe
operator_bucket = build_bucket("Operators", supported_ops, python_operator_universe)
statement_universe = {
"if",
"elif",
"else",
"while",
"for",
"try",
"except",
"finally",
"with",
"def",
"class",
"return",
"break",
"continue",
"pass",
"lambda",
"import",
"from",
"raise",
"global",
"nonlocal",
"yield",
"await",
}
parser_supported = {
"if",
"elif",
"else",
"while",
"for",
"try",
"except",
"finally",
"with",
"def",
"class",
"return",
"break",
"continue",
"pass",
"lambda",
"import",
"from",
"raise",
"global",
"nonlocal",
"yield",
"await",
}
statement_bucket = build_bucket("Statements", parser_supported, statement_universe)
type_universe = {"None", "Number", "Boolean", "Text", "List", "Dict", "Function", "Class", "Instance", "Tuple", "Set"}
type_bucket = build_bucket("Types", gather_value_kinds(header_source), type_universe)
builtin_bucket = build_bucket("Built-ins", gather_builtins(cpp_source), {"print", "sleep"})
buckets = [keyword_bucket, operator_bucket, statement_bucket, type_bucket, builtin_bucket]
for bucket in buckets:
for line in bucket.format_lines():
print(line)
print()
if __name__ == "__main__":
main()